示例#1
0
    def __init__(
        self,
        outcome_var: Union[SeriesSpecs, pd.Series],
        model: Optional[Union[Type, Tuple[
            Type,
            dict]]],  # Model class and optionally initialization parameters, can be set later with set_model
        start_of_training: datetime,
        end_of_testing: datetime,
        frequency: timedelta,
        horizon: timedelta,
        lags: List[int] = None,
        regressors: Union[List[SeriesSpecs], List[pd.Series]] = None,
        ratio_training_testing_data=DEFAULT_RATIO_TRAINING_TESTING_DATA,
        remodel_frequency: Union[str,
                                 timedelta] = DEFAULT_REMODELING_FREQUENCY,
        model_filename: str = None,
        creation_time: datetime = None,
    ):
        """Create a ModelSpecs instance."""
        self.outcome_var = parse_series_specs(outcome_var, "y")
        if model is not None:
            self.set_model(model)
        self.frequency = frequency
        self.horizon = horizon
        self.lags = lags
        if self.lags is None:
            self.lags = []
        if regressors is None:
            self.regressors = []
        else:
            self.regressors = [
                parse_series_specs(r,
                                   "Regressor%d" % (regressors.index(r) + 1))
                for r in regressors
            ]
        self.start_of_training = start_of_training
        self.end_of_testing = end_of_testing
        self.ratio_training_testing_data = ratio_training_testing_data
        # check if training + testing period is compatible with frequency
        if not timedelta_fits_into(
                self.frequency, self.end_of_testing - self.start_of_training):
            raise IncompatibleModelSpecs(
                "Training & testing period (%s to %s) does not fit with frequency (%s)"
                %
                (self.start_of_training, self.end_of_testing, self.frequency))

        if creation_time is None:
            self.creation_time = tz_aware_utc_now()
        else:
            self.creation_time = creation_time
        self.model_filename = model_filename
        self.remodel_frequency = remodel_frequency
示例#2
0
def get_time_steps(time_range: Union[str, datetime, Tuple[datetime, datetime]],
                   specs: ModelSpecs) -> pd.DatetimeIndex:
    """ Get relevant datetime indices to build features for.

        The time_range parameter can be one or two datetime objects, in which case this function builds a DateTimeIndex.
        It can also be one of two strings: "train" or "test". In this situation, this function creates a training or
        testing period from model specs.

        TODO: we can check (and complain) if datetime objects are incompatible to specs.frequency
              e.g. if round_datetime(dt, by_seconds=specs.frequency.total_seconds()) != dt:
                       raise Exception("%s is not compatible with frequency %s." % (dt, specs.frequency))
              We have to discuss if we allow to use any time to start intervals or rather 15:00, 15:15, 15:30 etc ...
    """
    # check valid time_range parameter
    if not (isinstance(time_range, datetime) or
            (isinstance(time_range, tuple)
             and isinstance(time_range[0], datetime)
             and isinstance(time_range[1], datetime)) or
            (isinstance(time_range, str) and time_range in ("train", "test"))):
        raise Exception(
            "Goal for DateTimeIndex construction needs to be either a string ('train', 'test'),"
            "a tuple of two datetime objects or one datetime object.")

    pd_frequency = timedelta_to_pandas_freq_str(specs.frequency)

    # easy cases: one or two datetime objects
    if isinstance(time_range, datetime):
        return pd.date_range(time_range,
                             time_range,
                             closed="left",
                             freq=pd_frequency)
    elif isinstance(time_range, tuple):
        if not timedelta_fits_into(specs.frequency,
                                   time_range[1] - time_range[0]):
            raise Exception(
                "Start & end period (%s to %s) does not cleanly fit a multiple of the model frequency (%s)"
                % (time_range[0], time_range[1], specs.frequency))
        return pd.date_range(time_range[0],
                             time_range[1],
                             closed="left",
                             freq=pd_frequency)

    # special cases: "train" or "test" - we have to calculate from model specs
    length_of_data = specs.end_of_testing - specs.start_of_training
    if time_range == "train":
        end_of_training = (specs.start_of_training +
                           length_of_data * specs.ratio_training_testing_data)
        end_of_training = round_datetime(end_of_training,
                                         specs.frequency.total_seconds())
        logger.debug("Start of training: %s" % specs.start_of_training)
        logger.debug("End of training: %s" % end_of_training)
        return pd.date_range(specs.start_of_training,
                             end_of_training,
                             freq=pd_frequency)
    elif time_range == "test":
        start_of_testing = (
            specs.start_of_training +
            (length_of_data * specs.ratio_training_testing_data) +
            specs.frequency)
        start_of_testing = round_datetime(start_of_testing,
                                          specs.frequency.total_seconds())
        logger.debug("Start of testing: %s" % start_of_testing)
        logger.debug("End of testing: %s" % specs.end_of_testing)
        return pd.date_range(start_of_testing,
                             specs.end_of_testing,
                             freq=pd_frequency)
示例#3
0
def test_timedelta_fits():
    assert not timedelta_fits_into(timedelta(seconds=11), timedelta(minutes=4))
    assert timedelta_fits_into(timedelta(minutes=10), timedelta(hours=2))
    assert timedelta_fits_into(timedelta(minutes=3), timedelta(hours=1))
    assert timedelta_fits_into(timedelta(minutes=15), timedelta(hours=1))
    assert timedelta_fits_into(timedelta(minutes=15), timedelta(days=4))
    assert timedelta_fits_into(timedelta(hours=12), timedelta(days=2))
    assert not timedelta_fits_into(timedelta(hours=16), timedelta(days=3))
    assert timedelta_fits_into(timedelta(hours=16), timedelta(days=6))
    assert timedelta_fits_into(timedelta(minutes=15), timedelta(weeks=1))
    assert not timedelta_fits_into(timedelta(minutes=11), timedelta(hours=1))
    assert timedelta_fits_into(timedelta(minutes=11), timedelta(hours=11))