Source code for featuretools.primitives.standard.transform.file_extension

from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Filepath

from featuretools.primitives.base import TransformPrimitive


[docs]class FileExtension(TransformPrimitive): """Determines the extension of a filepath. Description: Given a list of filepaths, return the extension suffix of each one. If the filepath is missing or invalid, return `NaN`. Examples: >>> file_extension = FileExtension() >>> file_extension(['doc.txt', '~/documents/data.json', 'file']).tolist() ['.txt', '.json', nan] """ name = "file_extension" input_types = [ColumnSchema(logical_type=Filepath)] return_type = ColumnSchema(semantic_tags={"category"}) def get_function(self): def file_extension(x): p = r"(\.[a-z|A-Z]+$)" return x.str.extract(p, expand=False).str.lower() return file_extension