Source code for featuretools.primitives.standard.transform.natural_language.num_characters

import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import IntegerNullable, NaturalLanguage

from featuretools.primitives.base import TransformPrimitive
from featuretools.utils.gen_utils import Library


[docs]class NumCharacters(TransformPrimitive): """Calculates the number of characters in a given string, including whitespace and punctuation. Description: Returns the number of characters in a string. This is equivalent to the length of a string. Examples: >>> num_characters = NumCharacters() >>> num_characters(['This is a string', ... 'second item', ... 'final1']).tolist() [16, 11, 6] """ name = "num_characters" input_types = [ColumnSchema(logical_type=NaturalLanguage)] return_type = ColumnSchema(logical_type=IntegerNullable, semantic_tags={"numeric"}) compatibility = [Library.PANDAS, Library.DASK, Library.SPARK] description_template = "the number of characters in {}" def get_function(self): def character_counter(array): def _get_num_characters(elem): """Returns the length of elem, or pd.NA given null input""" if pd.isna(elem): return pd.NA return len(elem) return array.apply(_get_num_characters) return character_counter