Source code for featuretools.primitives.standard.transform.natural_language.upper_case_word_count

import re
from string import punctuation

import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import IntegerNullable, NaturalLanguage

from featuretools.primitives.base import TransformPrimitive
from featuretools.primitives.standard.transform.natural_language.constants import (
    DELIMITERS,
)


[docs]class UpperCaseWordCount(TransformPrimitive): """Determines the number of words in a string that are entirely capitalized. Description: Given list of strings, determine the number of words in each string that are entirely capitalized. If a string is missing, return `NaN`. Examples: >>> x = ['This IS a string.', 'This is a string', 'AAA'] >>> upper_case_word_count = UpperCaseWordCount() >>> upper_case_word_count(x).tolist() [1, 0, 1] """ name = "upper_case_word_count" input_types = [ColumnSchema(logical_type=NaturalLanguage)] return_type = ColumnSchema(logical_type=IntegerNullable, semantic_tags={"numeric"}) default_value = 0 def get_function(self): def upper_case_word_count(x): def _count_upper_case_words(elem): if pd.isna(elem): return pd.NA return sum( 1 for word in re.split(DELIMITERS, elem) if word.strip(punctuation) and word.upper() == word ) return x.apply(_count_upper_case_words) return upper_case_word_count