Source code for featuretools.primitives.standard.aggregation.entropy
from scipy import stats
from woodwork.column_schema import ColumnSchema
from featuretools.primitives.base.aggregation_primitive_base import AggregationPrimitive
[docs]class Entropy(AggregationPrimitive):
    """Calculates the entropy for a categorical column
    Description:
        Given a list of observations from a categorical
        column return the entropy of the distribution.
        NaN values can be treated as a category or
        dropped.
    Args:
        dropna (bool): Whether to consider NaN values as a separate category
            Defaults to False.
        base (float): The logarithmic base to use
            Defaults to e (natural logarithm)
    Examples:
        >>> pd_entropy = Entropy()
        >>> pd_entropy([1, 2, 3, 4])
        1.3862943611198906
    """
    name = "entropy"
    input_types = [ColumnSchema(semantic_tags={"category"})]
    return_type = ColumnSchema(semantic_tags={"numeric"})
    stack_on_self = False
    description_template = "the entropy of {}"
[docs]    def __init__(self, dropna=False, base=None):
        self.dropna = dropna
        self.base = base 
    def get_function(self):
        def pd_entropy(s):
            distribution = s.value_counts(normalize=True, dropna=self.dropna)
            if distribution.dtype == "Float64":
                distribution = distribution.astype("float64")
            return stats.entropy(distribution.to_numpy(), base=self.base)
        return pd_entropy