from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Double
from featuretools.primitives.base import TransformPrimitive
[docs]class PercentChange(TransformPrimitive):
    """Determines the percent difference between values in a list.
    Description:
        Given a list of numbers, return the percent difference
        between each subsequent number. Percentages are shown in
        decimal form (not multiplied by 100). Uses pandas' pct_change
        function.
    Args:
        periods (int): Periods to shift for calculating percent change.
            Default is 1.
        fill_method (str): Method for filling gaps in reindexed
            Series. Valid options are `backfill`, `bfill`, `pad`, `ffill`.
            `pad / ffill`: fill gap with last valid observation.
            `backfill / bfill`: fill gap with next valid observation.
            Default is `pad`.
        limit (int): The max number of consecutive NaN values in a gap that
            can be filled. Default is None.
        freq (DateOffset, timedelta, or offset alias string):
            If `freq` is specified, instead of calcualting change between subsequent
            points, PercentChange will calculate change between points with a
            certain interval between their date indices. `freq` defines the
            desired interval. When freq is used, the resulting index will also be
            filled to include any missing dates from the specified interval.
            If the index is not date/datetime and freq is used, it will raise a
            NotImplementedError.
            If freq is None, no changes will be applied. Default is None.
    Examples:
        >>> percent_change = PercentChange()
        >>> percent_change([2, 5, 15, 3, 3, 9, 4.5]).to_list()
        [nan, 1.5, 2.0, -0.8, 0.0, 2.0, -0.5]
        We can control the number of periods to return the percent
            difference between points further from one another.
        >>> percent_change_2 = PercentChange(periods=2)
        >>> percent_change_2([2, 5, 15, 3, 3, 9, 4.5]).to_list()
        [nan, nan, 6.5, -0.4, -0.8, 2.0, 0.5]
        We can control the method used to handle gaps in data.
        >>> percent_change = PercentChange()
        >>> percent_change([2, 4, 8, None, 16, None, 32, None]).to_list()
        [nan, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
        >>> percent_change_backfill = PercentChange(fill_method='backfill')
        >>> percent_change_backfill([2, 4, 8, None, 16, None, 32, None]).to_list()
        [nan, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, nan]
        We can also control the maximum number of NaN values to fill in a gap.
        >>> percent_change = PercentChange()
        >>> percent_change([2, None, None, None, 4]).to_list()
        [nan, 0.0, 0.0, 0.0, 1.0]
        >>> percent_change_limited = PercentChange(limit=2)
        >>> percent_change_limited([2, None, None, None, 4]).to_list()
        [nan, 0.0, 0.0, nan, nan]
        Finally, we can specify a date frequency on which to calculate percent
            change.
        >>> import pandas as pd
        >>> dates = pd.DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-05'])
        >>> x_indexed = pd.Series([1, 2, 3, 4], index=dates)
        >>> percent_change = PercentChange()
        >>> percent_change(x_indexed).to_list()
        [nan, 1.0, 0.5, 0.33333333333333326]
        >>> date_offset = pd.tseries.offsets.DateOffset(days=1)
        >>> percent_change_freq = PercentChange(freq=date_offset)
        >>> percent_change_freq(x_indexed).to_list()
        [nan, 1.0, 0.5, nan]
    """
    name = "percent_change"
    input_types = [ColumnSchema(semantic_tags={"numeric"})]
    return_type = ColumnSchema(logical_type=Double, semantic_tags={"numeric"})
[docs]    def __init__(self, periods=1, fill_method="pad", limit=None, freq=None):
        if fill_method not in ["backfill", "bfill", "pad", "ffill"]:
            raise ValueError("Invalid fill_method")
        self.periods = periods
        self.fill_method = fill_method
        self.limit = limit
        self.freq = freq 
    def get_function(self):
        def percent_change(data):
            return data.pct_change(
                self.periods,
                self.fill_method,
                self.limit,
                self.freq,
            )
        return percent_change