Source code for featuretools.primitives.standard.transform.full_name_to_first_name

import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Categorical, PersonFullName

from featuretools.primitives.base import TransformPrimitive


[docs]class FullNameToFirstName(TransformPrimitive): """Determines the first name from a person's name. Description: Given a list of names, determines the first name. If only a single name is provided, assume this is a first name. If only a title and a single name is provided return `nan`. This assumes all titles will be followed by a period. Please note, in the current implementation, last names containing spaces may result in improper first name matches. Examples: >>> full_name_to_first_name = FullNameToFirstName() >>> names = ['Woolf Spector', 'Oliva y Ocana, Dona. Fermina', ... 'Ware, Mr. Frederick', 'Peter, Michael J', 'Mr. Brown'] >>> full_name_to_first_name(names).to_list() ['Woolf', 'Oliva', 'Frederick', 'Michael', nan] """ name = "full_name_to_first_name" input_types = [ColumnSchema(logical_type=PersonFullName)] return_type = ColumnSchema(logical_type=Categorical, semantic_tags={"category"}) def get_function(self): def full_name_to_first_name(x): title_with_last_pattern = r"(^[A-Z][a-z]+\. [A-Z][a-z]+$)" titles_pattern = r"([A-Z][a-z]+)\. " df = pd.DataFrame({"names": x}) # remove any entries with just a title and a name df["names"] = df["names"].str.replace( title_with_last_pattern, "", regex=True, ) # remove any known titles df["names"] = df["names"].str.replace(titles_pattern, "", regex=True) # extract first names pattern = r"([A-Z][a-z]+ |, [A-Z][a-z]+$|^[A-Z][a-z]+$)" df["first_name"] = df["names"].str.extract(pattern) # clean up white space and leftover commas df["first_name"] = df["first_name"].str.replace(",", "").str.strip() return df["first_name"] return full_name_to_first_name