import pandas as pd
[docs]def make_temporal_cutoffs(instance_ids,
cutoffs,
window_size=None,
num_windows=None,
start=None):
'''Makes a set of equally spaced cutoff times prior to a set of input cutoffs and instance ids.
If window_size and num_windows are provided, then num_windows of size window_size will be created
prior to each cutoff time
If window_size and a start list is provided, then a variable number of windows will be created prior
to each cutoff time, with the corresponding start time as the first cutoff.
If num_windows and a start list is provided, then num_windows of variable size will be created prior
to each cutoff time, with the corresponding start time as the first cutoff
Args:
instance_ids (list, np.ndarray, or pd.Series): list of instance ids. This function will make a
new datetime series of multiple cutoff times for each value in this array.
cutoffs (list, np.ndarray, or pd.Series): list of datetime objects associated with each instance id.
Each one of these will be the last time in the new datetime series for each instance id
window_size (pd.Timedelta, optional): amount of time between each datetime in each new cutoff series
num_windows (int, optional): number of windows in each new cutoff series
start (list, optional): list of start times for each instance id
'''
if (window_size is not None and
num_windows is not None and
start is not None):
raise ValueError("Only supply 2 of the 3 optional args, window_size, num_windows and start")
out = []
for i, id_time in enumerate(zip(instance_ids, cutoffs)):
_id, time = id_time
_window_size = window_size
_start = None
if start is not None:
if window_size is None:
_window_size = (time - start[i]) / (num_windows - 1)
else:
_start = start[i]
to_add = pd.DataFrame()
to_add["time"] = pd.date_range(end=time,
periods=num_windows,
freq=_window_size,
start=_start)
to_add['instance_id'] = [_id] * len(to_add['time'])
out.append(to_add)
return pd.concat(out).reset_index(drop=True)
def convert_time_units(secs,
unit):
'''
Converts a time specified in seconds to a time in the given units
Args:
secs (integer): number of seconds. This function will convert the units of this number.
unit(str): units to be converted to.
acceptable values: years, months, days, hours, minutes, seconds, milliseconds, nanoseconds
'''
unit_divs = {'years': 31540000,
'months': 2628000,
'days': 86400,
'hours': 3600,
'minutes': 60,
'seconds': 1,
'milliseconds': 0.001,
'nanoseconds': 0.000000001}
if unit not in unit_divs:
raise ValueError("Invalid unit given, make sure it is plural")
return secs / (unit_divs[unit])