def __init__( self, outcome_var: Union[SeriesSpecs, pd.Series], model: Optional[Union[Type, Tuple[ Type, dict]]], # Model class and optionally initialization parameters, can be set later with set_model start_of_training: datetime, end_of_testing: datetime, frequency: timedelta, horizon: timedelta, lags: List[int] = None, regressors: Union[List[SeriesSpecs], List[pd.Series]] = None, ratio_training_testing_data=DEFAULT_RATIO_TRAINING_TESTING_DATA, remodel_frequency: Union[str, timedelta] = DEFAULT_REMODELING_FREQUENCY, model_filename: str = None, creation_time: datetime = None, ): """Create a ModelSpecs instance.""" self.outcome_var = parse_series_specs(outcome_var, "y") if model is not None: self.set_model(model) self.frequency = frequency self.horizon = horizon self.lags = lags if self.lags is None: self.lags = [] if regressors is None: self.regressors = [] else: self.regressors = [ parse_series_specs(r, "Regressor%d" % (regressors.index(r) + 1)) for r in regressors ] self.start_of_training = start_of_training self.end_of_testing = end_of_testing self.ratio_training_testing_data = ratio_training_testing_data # check if training + testing period is compatible with frequency if not timedelta_fits_into( self.frequency, self.end_of_testing - self.start_of_training): raise IncompatibleModelSpecs( "Training & testing period (%s to %s) does not fit with frequency (%s)" % (self.start_of_training, self.end_of_testing, self.frequency)) if creation_time is None: self.creation_time = tz_aware_utc_now() else: self.creation_time = creation_time self.model_filename = model_filename self.remodel_frequency = remodel_frequency
def get_time_steps(time_range: Union[str, datetime, Tuple[datetime, datetime]], specs: ModelSpecs) -> pd.DatetimeIndex: """ Get relevant datetime indices to build features for. The time_range parameter can be one or two datetime objects, in which case this function builds a DateTimeIndex. It can also be one of two strings: "train" or "test". In this situation, this function creates a training or testing period from model specs. TODO: we can check (and complain) if datetime objects are incompatible to specs.frequency e.g. if round_datetime(dt, by_seconds=specs.frequency.total_seconds()) != dt: raise Exception("%s is not compatible with frequency %s." % (dt, specs.frequency)) We have to discuss if we allow to use any time to start intervals or rather 15:00, 15:15, 15:30 etc ... """ # check valid time_range parameter if not (isinstance(time_range, datetime) or (isinstance(time_range, tuple) and isinstance(time_range[0], datetime) and isinstance(time_range[1], datetime)) or (isinstance(time_range, str) and time_range in ("train", "test"))): raise Exception( "Goal for DateTimeIndex construction needs to be either a string ('train', 'test')," "a tuple of two datetime objects or one datetime object.") pd_frequency = timedelta_to_pandas_freq_str(specs.frequency) # easy cases: one or two datetime objects if isinstance(time_range, datetime): return pd.date_range(time_range, time_range, closed="left", freq=pd_frequency) elif isinstance(time_range, tuple): if not timedelta_fits_into(specs.frequency, time_range[1] - time_range[0]): raise Exception( "Start & end period (%s to %s) does not cleanly fit a multiple of the model frequency (%s)" % (time_range[0], time_range[1], specs.frequency)) return pd.date_range(time_range[0], time_range[1], closed="left", freq=pd_frequency) # special cases: "train" or "test" - we have to calculate from model specs length_of_data = specs.end_of_testing - specs.start_of_training if time_range == "train": end_of_training = (specs.start_of_training + length_of_data * specs.ratio_training_testing_data) end_of_training = round_datetime(end_of_training, specs.frequency.total_seconds()) logger.debug("Start of training: %s" % specs.start_of_training) logger.debug("End of training: %s" % end_of_training) return pd.date_range(specs.start_of_training, end_of_training, freq=pd_frequency) elif time_range == "test": start_of_testing = ( specs.start_of_training + (length_of_data * specs.ratio_training_testing_data) + specs.frequency) start_of_testing = round_datetime(start_of_testing, specs.frequency.total_seconds()) logger.debug("Start of testing: %s" % start_of_testing) logger.debug("End of testing: %s" % specs.end_of_testing) return pd.date_range(start_of_testing, specs.end_of_testing, freq=pd_frequency)
def test_timedelta_fits(): assert not timedelta_fits_into(timedelta(seconds=11), timedelta(minutes=4)) assert timedelta_fits_into(timedelta(minutes=10), timedelta(hours=2)) assert timedelta_fits_into(timedelta(minutes=3), timedelta(hours=1)) assert timedelta_fits_into(timedelta(minutes=15), timedelta(hours=1)) assert timedelta_fits_into(timedelta(minutes=15), timedelta(days=4)) assert timedelta_fits_into(timedelta(hours=12), timedelta(days=2)) assert not timedelta_fits_into(timedelta(hours=16), timedelta(days=3)) assert timedelta_fits_into(timedelta(hours=16), timedelta(days=6)) assert timedelta_fits_into(timedelta(minutes=15), timedelta(weeks=1)) assert not timedelta_fits_into(timedelta(minutes=11), timedelta(hours=1)) assert timedelta_fits_into(timedelta(minutes=11), timedelta(hours=11))