def test_TimeSeriesForest_predictions(n_estimators, n_intervals):
    random_state = 1234
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)

    features = [np.mean, np.std, time_series_slope]
    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(
                random_state=random_state, features=features
            ),
        ),
        ("clf", DecisionTreeClassifier()),
    ]
    estimator = Pipeline(steps)

    clf1 = TimeSeriesForestClassifier(
        estimator=estimator, random_state=random_state, n_estimators=n_estimators
    )
    clf1.fit(X_train, y_train)
    a = clf1.predict_proba(X_test)

    # default, semi-modular implementation using
    # RandomIntervalFeatureExtractor internally
    clf2 = TimeSeriesForestClassifier(
        random_state=random_state, n_estimators=n_estimators
    )
    clf2.fit(X_train, y_train)
    b = clf2.predict_proba(X_test)

    np.testing.assert_array_equal(a, b)
def test_predict_proba():
    clf = TimeSeriesForestClassifier(n_estimators=2)
    clf.fit(X, y)
    proba = clf.predict_proba(X)

    assert proba.shape == (X.shape[0], n_classes)
    np.testing.assert_array_equal(np.ones(X.shape[0]), np.sum(proba, axis=1))

    # test single row input
    y_proba = clf.predict_proba(X.iloc[[0], :])
    assert y_proba.shape == (1, n_classes)

    y_pred = clf.predict(X.iloc[[0], :])
    assert y_pred.shape == (1,)
示例#3
0
def main():
    generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file,
                              threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax)
    training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element,
                                                                   training_frac=0.7)
    steps = [
        ('extract', RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                                   features=[np.mean, np.std, time_series_slope])),
        ('clf', DecisionTreeClassifier())
    ]
    time_series_tree = Pipeline(steps)
    tsf = TimeSeriesForestClassifier(
        estimator=time_series_tree,
        n_estimators=args.n_estimators,
        criterion='entropy' if args.criterion == 'entropy' else 'gini',
        bootstrap=True,
        oob_score=True,
        random_state=1,
        # n_jobs=4,
        verbose=1
    )
    x = detabularize(pd.DataFrame(training_data[:,1:]))
    try:
        with parallel_backend('threading', n_jobs=args.n_jobs):
            tsf = tsf.fit(x, training_data[:,0])
        with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \
                as TimeSeriesForestModel:
            pickle.dump(tsf, TimeSeriesForestModel, protocol=pickle.HIGHEST_PROTOCOL)
    except Exception as ex:
        print(ex)
示例#4
0
def main():
    #1. Loading and splitting the dataset
    X_train, y_train = load_italy_power_demand(split='train', return_X_y=True)
    X_test, y_test = load_italy_power_demand(split='test', return_X_y=True)
    print('Shape of X, y train and test dataset', X_train.shape, y_train.shape,
          X_test.shape, y_test.shape, '\n')
    print('X_train:', X_train.head(), '\n')
    print('\nX_train info', X_train.info(), '\n')

    labels, counts = np.unique(y_train, return_counts=True)
    print(
        '\nThere are', labels,
        'labels in this dataset, one corresponds to winter and the other to summer. The counter of each one is',
        counts, '\n')

    #2. Creating a Model, Fit and Predict Sklearn Classifier
    #Sktime Tabularizing the data
    X_train_tab = tabularize(X_train)
    X_test_tab = tabularize(X_test)
    print('\n X_train tabularized\n', X_train_tab.head(), '\n')

    #2.1 SKlearn RandomForest Classifier
    classifier = RandomForestClassifier(n_estimators=100)
    classifier.fit(X_train_tab, y_train)
    y_pred = classifier.predict(X_test_tab)
    print('Accuracy sklearn RandomForestClassifier',
          round(accuracy_score(y_test, y_pred), 4), '\n')

    #2.2 Same SKlearn as above but using make_pipeline w/ Sktime Tabularizer
    classifier = make_pipeline(Tabularizer(),
                               RandomForestClassifier(n_estimators=100),
                               verbose=True)
    classifier.fit(X_train, y_train)
    print(
        'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime Tabularize()',
        round(classifier.score(X_test, y_test), 4), '\n')

    #3 Sklearn using make_pipeline w/ Sktime TSFreshFeatureExtractor
    classifier = make_pipeline(TSFreshFeatureExtractor(show_warnings=False),
                               RandomForestClassifier(n_estimators=100))
    classifier.fit(X_train, y_train)
    print(
        'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime TSFreshFeatureExtractor that automatically extracts and filters several key statistical features from the nested X_train time series',
        round(classifier.score(X_test, y_test), 4), '\n')

    #4. Using Time series algorithms and classifiers from sklearn/sktime
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),  #Sktime
        (
            'transform',
            FeatureUnion([  #Sklearn
                ('mean',
                 RowTransformer(
                     FunctionTransformer(func=np.mean,
                                         validate=False))),  #sktime
                ('std',
                 RowTransformer(
                     FunctionTransformer(func=np.std,
                                         validate=False))),  #sktime
                ('slope',
                 RowTransformer(
                     FunctionTransformer(func=time_series_slope,
                                         validate=False)))  #sktime
            ])),
        ('clf', DecisionTreeClassifier())  #From Sklearn
    ]
    time_series_tree = Pipeline(steps, verbose=True)  #sklearn
    time_series_tree.fit(X_train, y_train)
    print(
        'Accuracy sklearn DecisionTreeClassifier using sklearn Pipeline() as well as segmentation and transformation techniques from sktime and sklearn',
        round(time_series_tree.score(X_test, y_test), 4))

    #5. Using Time series Sktime
    tsf = TimeSeriesForestClassifier(n_estimators=100, verbose=True)
    tsf.fit(X_train, y_train)
    print('Accuracy sktime TimeSeriesForestClassifier',
          round(tsf.score(X_test, y_test), 4))
示例#5
0
y_train = np.vstack([y_train, y_val])
y_train = pd.Series(y_train.reshape(-1))
y_test = pd.Series(y_test.reshape(-1))


# Timeseries random foreset for every column
for i, col in enumerate(col_names[:2]):
    print(col)

    # Choose one feature
    X_train_step = X_train.iloc[:, [i]]
    X_test_step = X_test.iloc[:, [i]]

    # Time series forest clf
    classifier = TimeSeriesForestClassifier()
    classifier.fit(X_train_step, y_train)
    y_pred = classifier.predict(X_test_step)
    
    # Metrics
    print(f'accuracy_test: {accuracy_score(y_test, y_pred)}')
    print(f"recall_test: {recall_score(y_test, y_pred)}")
    print(f"precisoin_test: {precision_score(y_test, y_pred)}")
    print(f"f1_test: {f1_score(y_test, y_pred)}")



# clf2 = pickle.loads(s)
# clf2.predict(X_test[0:1])


# # KNeighbors Classifier
regressor = RandomForestRegressor()
forecaster = ReducedRegressionForecaster(regressor, window_length=12)
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)

plot_ys(y_train, y_test, y_pred, labels=['y_train', 'y_test', 'y_pred'])
smape_loss(y_test, y_pred)

"""Forcasting with autoarima"""

from sktime.forecasting.arima import AutoARIMA
forecaster = AutoARIMA(sp=12)
forecaster.fit(y_train)

y_pred = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]);
smape_loss(y_test, y_pred)

"""Time Series Classification"""

from sktime.datasets import load_arrow_head
from sktime.classification.compose import TimeSeriesForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = load_arrow_head(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
classifier = TimeSeriesForestClassifier()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
accuracy_score(y_test, y_pred)