def test_TimeSeriesForest_predictions(n_estimators, n_intervals): random_state = 1234 X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) features = [np.mean, np.std, time_series_slope] steps = [ ( "transform", RandomIntervalFeatureExtractor( random_state=random_state, features=features ), ), ("clf", DecisionTreeClassifier()), ] estimator = Pipeline(steps) clf1 = TimeSeriesForestClassifier( estimator=estimator, random_state=random_state, n_estimators=n_estimators ) clf1.fit(X_train, y_train) a = clf1.predict_proba(X_test) # default, semi-modular implementation using # RandomIntervalFeatureExtractor internally clf2 = TimeSeriesForestClassifier( random_state=random_state, n_estimators=n_estimators ) clf2.fit(X_train, y_train) b = clf2.predict_proba(X_test) np.testing.assert_array_equal(a, b)
def test_predict_proba(): clf = TimeSeriesForestClassifier(n_estimators=2) clf.fit(X, y) proba = clf.predict_proba(X) assert proba.shape == (X.shape[0], n_classes) np.testing.assert_array_equal(np.ones(X.shape[0]), np.sum(proba, axis=1)) # test single row input y_proba = clf.predict_proba(X.iloc[[0], :]) assert y_proba.shape == (1, n_classes) y_pred = clf.predict(X.iloc[[0], :]) assert y_pred.shape == (1,)
def main(): generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file, threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax) training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element, training_frac=0.7) steps = [ ('extract', RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std, time_series_slope])), ('clf', DecisionTreeClassifier()) ] time_series_tree = Pipeline(steps) tsf = TimeSeriesForestClassifier( estimator=time_series_tree, n_estimators=args.n_estimators, criterion='entropy' if args.criterion == 'entropy' else 'gini', bootstrap=True, oob_score=True, random_state=1, # n_jobs=4, verbose=1 ) x = detabularize(pd.DataFrame(training_data[:,1:])) try: with parallel_backend('threading', n_jobs=args.n_jobs): tsf = tsf.fit(x, training_data[:,0]) with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \ as TimeSeriesForestModel: pickle.dump(tsf, TimeSeriesForestModel, protocol=pickle.HIGHEST_PROTOCOL) except Exception as ex: print(ex)
def main(): #1. Loading and splitting the dataset X_train, y_train = load_italy_power_demand(split='train', return_X_y=True) X_test, y_test = load_italy_power_demand(split='test', return_X_y=True) print('Shape of X, y train and test dataset', X_train.shape, y_train.shape, X_test.shape, y_test.shape, '\n') print('X_train:', X_train.head(), '\n') print('\nX_train info', X_train.info(), '\n') labels, counts = np.unique(y_train, return_counts=True) print( '\nThere are', labels, 'labels in this dataset, one corresponds to winter and the other to summer. The counter of each one is', counts, '\n') #2. Creating a Model, Fit and Predict Sklearn Classifier #Sktime Tabularizing the data X_train_tab = tabularize(X_train) X_test_tab = tabularize(X_test) print('\n X_train tabularized\n', X_train_tab.head(), '\n') #2.1 SKlearn RandomForest Classifier classifier = RandomForestClassifier(n_estimators=100) classifier.fit(X_train_tab, y_train) y_pred = classifier.predict(X_test_tab) print('Accuracy sklearn RandomForestClassifier', round(accuracy_score(y_test, y_pred), 4), '\n') #2.2 Same SKlearn as above but using make_pipeline w/ Sktime Tabularizer classifier = make_pipeline(Tabularizer(), RandomForestClassifier(n_estimators=100), verbose=True) classifier.fit(X_train, y_train) print( 'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime Tabularize()', round(classifier.score(X_test, y_test), 4), '\n') #3 Sklearn using make_pipeline w/ Sktime TSFreshFeatureExtractor classifier = make_pipeline(TSFreshFeatureExtractor(show_warnings=False), RandomForestClassifier(n_estimators=100)) classifier.fit(X_train, y_train) print( 'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime TSFreshFeatureExtractor that automatically extracts and filters several key statistical features from the nested X_train time series', round(classifier.score(X_test, y_test), 4), '\n') #4. Using Time series algorithms and classifiers from sklearn/sktime steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt')), #Sktime ( 'transform', FeatureUnion([ #Sklearn ('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))), #sktime ('std', RowTransformer( FunctionTransformer(func=np.std, validate=False))), #sktime ('slope', RowTransformer( FunctionTransformer(func=time_series_slope, validate=False))) #sktime ])), ('clf', DecisionTreeClassifier()) #From Sklearn ] time_series_tree = Pipeline(steps, verbose=True) #sklearn time_series_tree.fit(X_train, y_train) print( 'Accuracy sklearn DecisionTreeClassifier using sklearn Pipeline() as well as segmentation and transformation techniques from sktime and sklearn', round(time_series_tree.score(X_test, y_test), 4)) #5. Using Time series Sktime tsf = TimeSeriesForestClassifier(n_estimators=100, verbose=True) tsf.fit(X_train, y_train) print('Accuracy sktime TimeSeriesForestClassifier', round(tsf.score(X_test, y_test), 4))
y_train = np.vstack([y_train, y_val]) y_train = pd.Series(y_train.reshape(-1)) y_test = pd.Series(y_test.reshape(-1)) # Timeseries random foreset for every column for i, col in enumerate(col_names[:2]): print(col) # Choose one feature X_train_step = X_train.iloc[:, [i]] X_test_step = X_test.iloc[:, [i]] # Time series forest clf classifier = TimeSeriesForestClassifier() classifier.fit(X_train_step, y_train) y_pred = classifier.predict(X_test_step) # Metrics print(f'accuracy_test: {accuracy_score(y_test, y_pred)}') print(f"recall_test: {recall_score(y_test, y_pred)}") print(f"precisoin_test: {precision_score(y_test, y_pred)}") print(f"f1_test: {f1_score(y_test, y_pred)}") # clf2 = pickle.loads(s) # clf2.predict(X_test[0:1]) # # KNeighbors Classifier
regressor = RandomForestRegressor() forecaster = ReducedRegressionForecaster(regressor, window_length=12) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=['y_train', 'y_test', 'y_pred']) smape_loss(y_test, y_pred) """Forcasting with autoarima""" from sktime.forecasting.arima import AutoARIMA forecaster = AutoARIMA(sp=12) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]); smape_loss(y_test, y_pred) """Time Series Classification""" from sktime.datasets import load_arrow_head from sktime.classification.compose import TimeSeriesForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score X, y = load_arrow_head(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y) classifier = TimeSeriesForestClassifier() classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) accuracy_score(y_test, y_pred)