def test_simple_forecast(self): seasonality_size = 5 seasonal_data = np.arange(seasonality_size) data = np.tile(seasonal_data, 3) X_train = pd.Series(data, name="ts") training_seasons = 3 training_size = seasonality_size * training_seasons forecaster = SsaForecaster(series_length=8, train_size=training_size, window_size=seasonality_size + 1, horizon=2) << { 'fc': 'ts' } forecaster.fit(X_train, verbose=1) data = forecaster.transform(X_train) self.assertEqual(round(data.loc[0, 'fc.0']), 1.0) self.assertEqual(round(data.loc[0, 'fc.1']), 2.0) self.assertEqual(len(data['fc.0']), 15)
def get_uni_standard_trained_on_entire_data_ssa_pipeline(): return Pipeline([ SsaForecaster(series_length=6, train_size=20, window_size=3, horizon=1, columns={'T (degC)_fc': 'T (degC)'}) ])
def test_multiple_user_specified_columns_is_not_allowed(self): path = get_dataset('timeseries').as_filepath() data = FileDataStream.read_csv(path) try: pipeline = Pipeline([ SsaForecaster(series_length=8, train_size=15, window_size=5, horizon=2, columns=['t2', 't3']) ]) pipeline.fit_transform(data) except RuntimeError as e: self.assertTrue('Only one column is allowed' in str(e)) return self.fail()
'SkipFilter': SkipFilter(count=5), 'TakeFilter': TakeFilter(count=100000), 'IidSpikeDetector': IidSpikeDetector(columns=['F0']), 'IidChangePointDetector': IidChangePointDetector(columns=['F0']), 'SsaSpikeDetector': SsaSpikeDetector(columns=['F0'], seasonal_window_size=2), 'SsaChangePointDetector': SsaChangePointDetector(columns=['F0'], seasonal_window_size=2), 'SsaForecaster': SsaForecaster(columns=['F0'], window_size=2, series_length=5, train_size=5, horizon=1), 'TensorFlowScorer': TensorFlowScorer(model_location=os.path.join(this, '..', 'nimbusml', 'examples', 'frozen_saved_model.pb'), columns={'c': ['a', 'b']}), } MULTI_OUTPUT_EX = [ 'FastLinearClassifier', 'FastLinearRegressor', 'LogisticRegressionClassifier', 'FastTreesRegressor', 'FastForestRegressor', 'FastTreesTweedieRegressor', 'OneClassSvmAnomalyDetector', 'NaiveBayesClassifier', 'GamBinaryClassifier', 'GamRegressor', 'OnlineGradientDescentRegressor',
PixelExtractor(columns={'ImgPixels': 'ImgPath'}), ]), 'PrefixColumnConcatenator': PrefixColumnConcatenator(columns={'Features': 'Sepal_'}), 'Resizer': Pipeline([ Loader(columns={'ImgPath': 'Path'}), Resizer(image_width=227, image_height=227, columns={'ImgResize': 'ImgPath'}) ]), 'SkipFilter': SkipFilter(count=5), 'SsaSpikeDetector': SsaSpikeDetector(columns=['Sepal_Length'], seasonal_window_size=2), 'SsaChangePointDetector': SsaChangePointDetector(columns=['Sepal_Length'], seasonal_window_size=2), 'SsaForecaster': SsaForecaster(columns=['Sepal_Length'], window_size=2, series_length=5, train_size=5, horizon=1), 'RangeFilter': RangeFilter(min=5.0, max=5.1, columns=['Sepal_Length']), 'TakeFilter': TakeFilter(count=100), 'TensorFlowScorer': TensorFlowScorer( model_location=os.path.join( script_dir, '..', 'nimbusml', 'examples', 'frozen_saved_model.pb'), columns={'c': ['a', 'b']}), 'ToKey': ToKey(columns={'edu_1': 'education_str'}), 'TypeConverter': TypeConverter(columns=['group'], result_type='R4'), 'WordTokenizer': WordTokenizer(char_array_term_separators=[" "]) << {'wt': 'SentimentText'}
# 5 0 # 6 1 # 7 2 # 8 3 # 9 4 # 10 0 # 11 1 # 12 100 # 13 110 # 14 120 training_seasons = 3 training_size = seasonality_size * training_seasons forecaster = SsaForecaster(series_length=8, train_size=training_size, window_size=seasonality_size + 1, horizon=4) << { 'fc': 'ts' } forecaster.fit(X_train, verbose=1) data = forecaster.transform(x_test) pd.set_option('display.float_format', lambda x: '%.2f' % x) print(data) # The fc.x columns are the forecasts # given the input in the ts column. # # ts fc.0 fc.1 fc.2 fc.3 # 0 0 1.00 2.00 3.00 4.00
path = get_dataset('timeseries').as_filepath() data = FileDataStream.read_csv(path) print(data.head()) # t1 t2 t3 # 0 0.01 0.01 0.0100 # 1 0.02 0.02 0.0200 # 2 0.03 0.03 0.0200 # 3 0.03 0.03 0.0250 # 4 0.03 0.03 0.0005 # define the training pipeline pipeline = Pipeline([ SsaForecaster(series_length=6, train_size=8, window_size=3, horizon=2, columns={'t2_fc': 't2'}) ]) result = pipeline.fit_transform(data) pd.set_option('display.float_format', lambda x: '%.2f' % x) print(result) # Output # # t1 t2 t3 t2_fc.0 t2_fc.1 # 0 0.01 0.01 0.01 0.10 0.12 # 1 0.02 0.02 0.02 0.06 0.08 # 2 0.03 0.03 0.02 0.04 0.05