示例#1
0
 def setup_class(self):
     self.df = pd.DataFrame({
         "date": [
             "2018-01-06", "2018-01-07", "2018-01-08", "2018-01-06",
             "2018-01-07", "2018-01-08"
         ],
         "volume": [2, 4, 2, 5, 2, 5],
         "revenue": [12, 13, 14, 15, 11, 10],
         "store": [1, 1, 1, 1, 1, 1],
         "item": [1, 1, 1, 2, 2, 2],
         "is_holiday": [0, 0, 0, 0, 1, 0],
         "is_weekend": [1, 0, 0, 1, 0, 0],
     })
     self.df["date"] = pd.to_datetime(
         self.df["date"]).dt.tz_localize(tz=None)
     self.gluon_dataset = GluonDataset(
         dataframe=self.df,
         time_column_name="date",
         frequency="D",
         target_columns_names=["volume", "revenue"],
         timeseries_identifiers_names=["store", "item"],
         external_features_columns_names=["is_holiday", "is_weekend"],
         min_length=2,
     )
     self.prediction_length = 1
     gluon_list_datasets = self.gluon_dataset.create_list_datasets(
         cut_lengths=[self.prediction_length, 0])
     self.train_list_dataset = gluon_list_datasets[0]
     self.test_list_dataset = gluon_list_datasets[1]
示例#2
0
    def create_gluon_datasets(self):
        """Create train and test gluon list datasets.
        The last prediction_length time steps are removed from each timeseries of the train dataset.
        Compute optimal num_batches_per_epoch value based on the train dataset size._check_target_columns_types
        """

        gluon_dataset = GluonDataset(
            dataframe=self.training_df,
            time_column_name=self.time_column_name,
            frequency=self.frequency,
            target_columns_names=self.target_columns_names,
            timeseries_identifiers_names=self.timeseries_identifiers_names,
            external_features_columns_names=self.
            external_features_columns_names,
            min_length=2 * self.
            prediction_length,  # Assuming that context_length = prediction_length
        )

        gluon_list_datasets = gluon_dataset.create_list_datasets(
            cut_lengths=[self.prediction_length, 0])
        self.evaluation_train_list_dataset = gluon_list_datasets[0]
        self.full_list_dataset = gluon_list_datasets[1]

        if self.user_num_batches_per_epoch == -1:
            self.num_batches_per_epoch = self._compute_optimal_num_batches_per_epoch(
            )
        else:
            self.num_batches_per_epoch = self.user_num_batches_per_epoch
示例#3
0
 def setup_method(self):
     self.gluon_dataset = GluonDataset(
         dataframe=self.df,
         time_column_name="date",
         frequency="D",
         target_columns_names=["volume", "revenue"],
         timeseries_identifiers_names=["store", "item"],
         external_features_columns_names=["is_holiday", "is_weekend"],
         min_length=2,
     )
     self.gluon_list_dataset = self.gluon_dataset.create_list_datasets(
         cut_lengths=[0])[0]
    def setup_class(self):
        df = pd.DataFrame(
            {
                "date": ["2018-01-06", "2018-01-07", "2018-01-08", "2018-01-09", "2018-01-08", "2018-01-09", "2018-01-10", "2018-01-11", "2018-01-12"],
                "target": [2, 4, 2, 2, 5, 2, 3, 2, 3],
                "key": [1, 1, 1, 1, 2, 2, 2, 2, 2],
                "ext_feat": [0, 0, 0, 0, 0, 1, 0, 1, 1],
            }
        )
        df["date"] = pd.to_datetime(df["date"]).dt.tz_localize(tz=None)

        self.frequency = "D"

        gluon_dataset = GluonDataset(
            dataframe=df,
            time_column_name="date",
            frequency=self.frequency,
            target_columns_names=["target"],
            timeseries_identifiers_names=["key"],
            external_features_columns_names=["ext_feat"],
            min_length=2,
        )

        self.prediction_length = 2
        gluon_list_datasets = gluon_dataset.create_list_datasets(cut_lengths=[self.prediction_length, 0])
        self.train_list_dataset = gluon_list_datasets[0]
        self.test_list_dataset = gluon_list_datasets[1]

        self.model_name = "simplefeedforward"
        self.model = Model(
            self.model_name,
            model_parameters={"activated": True, "kwargs": {}},
            frequency=self.frequency,
            prediction_length=self.prediction_length,
            epoch=1,
            use_external_features=True,
            batch_size=32,
            num_batches_per_epoch=50,
        )
示例#5
0
class TestGluonDataset:
    def setup_class(self):
        self.df = pd.DataFrame({
            "date": [
                "2018-01-06", "2018-01-07", "2018-01-08", "2018-01-06",
                "2018-01-07", "2018-01-08"
            ],
            "volume": [2, 4, 2, 5, 2, 5],
            "revenue": [12, 13, 14, 15, 11, 10],
            "store": [1, 1, 1, 1, 1, 1],
            "item": [1, 1, 1, 2, 2, 2],
            "is_holiday": [0, 0, 0, 0, 1, 0],
            "is_weekend": [1, 0, 0, 1, 0, 0],
        })
        self.df["date"] = pd.to_datetime(
            self.df["date"]).dt.tz_localize(tz=None)

    def setup_method(self):
        self.gluon_dataset = GluonDataset(
            dataframe=self.df,
            time_column_name="date",
            frequency="D",
            target_columns_names=["volume", "revenue"],
            timeseries_identifiers_names=["store", "item"],
            external_features_columns_names=["is_holiday", "is_weekend"],
            min_length=2,
        )
        self.gluon_list_dataset = self.gluon_dataset.create_list_datasets(
            cut_lengths=[0])[0]

    def test_start_date(self):
        assert self.gluon_list_dataset.list_data[1][
            TIMESERIES_KEYS.START] == pd.Timestamp("2018-01-06")

    def test_target(self):
        assert (self.gluon_list_dataset.list_data[1][TIMESERIES_KEYS.TARGET] ==
                np.array([12, 13, 14])).all()

    def test_external_features(self):
        assert (self.gluon_list_dataset.list_data[1]
                [TIMESERIES_KEYS.FEAT_DYNAMIC_REAL] == np.array([[0, 0, 0],
                                                                 [1, 0,
                                                                  0]])).all()

    def test_timeseries_identifiers(self):
        assert self.gluon_list_dataset.list_data[2][
            TIMESERIES_KEYS.IDENTIFIERS] == {
                "store": 1,
                "item": 2
            }
示例#6
0
class TestModel:
    def setup_class(self):
        self.df = pd.DataFrame({
            "date": [
                "2018-01-06", "2018-01-07", "2018-01-08", "2018-01-06",
                "2018-01-07", "2018-01-08"
            ],
            "volume": [2, 4, 2, 5, 2, 5],
            "revenue": [12, 13, 14, 15, 11, 10],
            "store": [1, 1, 1, 1, 1, 1],
            "item": [1, 1, 1, 2, 2, 2],
            "is_holiday": [0, 0, 0, 0, 1, 0],
            "is_weekend": [1, 0, 0, 1, 0, 0],
        })
        self.df["date"] = pd.to_datetime(
            self.df["date"]).dt.tz_localize(tz=None)
        self.gluon_dataset = GluonDataset(
            dataframe=self.df,
            time_column_name="date",
            frequency="D",
            target_columns_names=["volume", "revenue"],
            timeseries_identifiers_names=["store", "item"],
            external_features_columns_names=["is_holiday", "is_weekend"],
            min_length=2,
        )
        self.prediction_length = 1
        gluon_list_datasets = self.gluon_dataset.create_list_datasets(
            cut_lengths=[self.prediction_length, 0])
        self.train_list_dataset = gluon_list_datasets[0]
        self.test_list_dataset = gluon_list_datasets[1]

    def test_deepar(self):
        model_name = "deepar"
        model = Model(
            model_name,
            model_parameters={
                "activated": True,
                "kwargs": {
                    "dropout_rate": "0.3",
                    "cell_type": "gru"
                }
            },
            frequency="D",
            prediction_length=self.prediction_length,
            epoch=1,
            use_external_features=True,
            batch_size=32,
            num_batches_per_epoch=50,
        )
        metrics, identifiers_columns, forecasts_df = model.train_evaluate(
            self.train_list_dataset,
            self.test_list_dataset,
            make_forecasts=True)

        TestModel.metrics_assertions(metrics, model_name)
        TestModel.forecasts_assertions(
            forecasts_df, model_name, prediction_length=self.prediction_length)

    def test_transformer(self):
        model_name = "transformer"
        model = Model(
            model_name,
            model_parameters={
                "activated": True,
                "kwargs": {
                    "model_dim": 16
                }
            },
            frequency="D",
            prediction_length=self.prediction_length,
            epoch=1,
            use_external_features=True,
            batch_size=32,
            num_batches_per_epoch=50,
        )
        metrics, identifiers_columns, forecasts_df = model.train_evaluate(
            self.train_list_dataset,
            self.test_list_dataset,
            make_forecasts=True)

        TestModel.metrics_assertions(metrics, model_name)
        TestModel.forecasts_assertions(
            forecasts_df, model_name, prediction_length=self.prediction_length)

    def test_seasonal_naive(self):
        model_name = "seasonal_naive"
        model = Model(
            model_name,
            model_parameters={
                "activated": True,
                "kwargs": {}
            },
            frequency="D",
            prediction_length=self.prediction_length,
            epoch=1,
            use_external_features=False,
            batch_size=32,
            num_batches_per_epoch=50,
        )
        metrics, identifiers_columns, forecasts_df = model.train_evaluate(
            self.train_list_dataset,
            self.test_list_dataset,
            make_forecasts=True)

        TestModel.metrics_assertions(metrics, model_name)
        TestModel.forecasts_assertions(
            forecasts_df, model_name, prediction_length=self.prediction_length)

    def test_mqcnn(self):
        model_name = "mqcnn"
        model = Model(
            model_name,
            model_parameters={
                "activated": True,
                "kwargs": {}
            },
            frequency="D",
            prediction_length=self.prediction_length,
            epoch=1,
            use_external_features=False,
            batch_size=32,
            num_batches_per_epoch=50,
        )
        model.train(self.test_list_dataset)
        assert model.predictor is not None

    @staticmethod
    def metrics_assertions(metrics, model_name):
        expected_metrics_columns = ["store", "item"]
        expected_metrics_columns += [
            METRICS_DATASET.TARGET_COLUMN,
            METRICS_DATASET.MODEL_COLUMN,
            METRICS_DATASET.MODEL_PARAMETERS,
            METRICS_DATASET.TRAINING_TIME,
        ]
        expected_metrics_columns += list(
            EVALUATION_METRICS_DESCRIPTIONS.keys())
        assert len(metrics.index) == 5
        assert set(metrics.columns) == set(expected_metrics_columns)
        assert metrics[METRICS_DATASET.MODEL_COLUMN].unique(
        ) == MODEL_DESCRIPTORS[model_name][LABEL]

    @staticmethod
    def forecasts_assertions(forecasts_df, model_name, prediction_length=1):
        assert len(forecasts_df.index) == 2
        assert forecasts_df["index"].nunique() == prediction_length