Source code for featuretools.primitives.standard.transform.datetime.distance_to_holiday

import pandas as pd
from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import Datetime

from featuretools.primitives.base import TransformPrimitive
from featuretools.primitives.standard.transform.datetime.utils import HolidayUtil


[docs]class DistanceToHoliday(TransformPrimitive): """Computes the number of days before or after a given holiday. Description: For a list of dates, return the distance from the nearest occurrence of a chosen holiday. The distance is returned in days. If the closest occurrence is prior to the date given, return a negative number. If a date is missing, return `NaN`. Currently only works with dates between 1950 and 2100. Args: holiday (str): Name of the holiday. Defaults to New Year's Day. country (str): Specifies which country's calendar to use for the given holiday. Default is `US`. Examples: >>> from datetime import datetime >>> distance_to_holiday = DistanceToHoliday("New Year's Day") >>> dates = [datetime(2010, 1, 1), ... datetime(2012, 5, 31), ... datetime(2017, 7, 31), ... datetime(2020, 12, 31)] >>> distance_to_holiday(dates).tolist() [0, -151, 154, 1] We can also control the country in which we're searching for a holiday. >>> distance_to_holiday = DistanceToHoliday("Canada Day", country='Canada') >>> dates = [datetime(2010, 1, 1), ... datetime(2012, 5, 31), ... datetime(2017, 7, 31), ... datetime(2020, 12, 31)] >>> distance_to_holiday(dates).tolist() [181, 31, -30, 182] """ name = "distance_to_holiday" input_types = [ColumnSchema(logical_type=Datetime)] return_type = ColumnSchema(semantic_tags={"numeric"}) default_value = 0
[docs] def __init__(self, holiday="New Year's Day", country="US"): self.country = country self.holiday = holiday self.holidayUtil = HolidayUtil(country) available_holidays = list(set(self.holidayUtil.federal_holidays.values())) if self.holiday not in available_holidays: error = "must be one of the available holidays:\n%s" % available_holidays raise ValueError(error)
def get_function(self): def distance_to_holiday(x): holiday_df = self.holidayUtil.to_df() holiday_df = holiday_df[holiday_df.names == self.holiday] df = pd.DataFrame({"date": x}) df["x_index"] = df.index # store original index as a column df = df.dropna() df = df.sort_values("date") df["date"] = df["date"].dt.date.astype("datetime64[ns]") matches = pd.merge_asof( df, holiday_df, left_on="date", right_on="holiday_date", direction="nearest", tolerance=pd.Timedelta("365d"), ) matches = matches.set_index("x_index") matches["days_diff"] = (matches.holiday_date - matches.date).dt.days return matches.days_diff.reindex_like(x) return distance_to_holiday