Source code for featuretools.primitives.standard.aggregation.percent_unique
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Double
from featuretools.primitives.base import AggregationPrimitive
[docs]class PercentUnique(AggregationPrimitive):
    """Determines the percent of unique values.
    Description:
        Given a list of values, determine what percent of the
        list is made up of unique values.  Multiple `NaN` values
        are treated as one unique value.
    Args:
        skipna (bool): Determines whether to ignore `NaN` values.
            Defaults to True.
    Examples:
        >>> percent_unique = PercentUnique()
        >>> percent_unique([1, 1, 2, 2, 3, 4, 5, 6, 7, 8])
        0.8
        We can control whether or not `NaN` values are ignored.
        >>> percent_unique = PercentUnique()
        >>> percent_unique([1, 1, 2, None])
        0.5
        >>> percent_unique_skipna = PercentUnique(skipna=False)
        >>> percent_unique_skipna([1, 1, 2, None])
        0.75
    """
    name = "percent_unique"
    input_types = [ColumnSchema(semantic_tags={"category"})]
    return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"})
    default_value = 0
[docs]    def __init__(self, skipna=True):
        self.skipna = skipna 
    def get_function(self):
        def percent_unique(x):
            return x.nunique(dropna=self.skipna) / (x.shape[0] * 1.0)
        return percent_unique