def main(): generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file, threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax) training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element, training_frac=0.7) steps = [ ('extract', RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std, time_series_slope])), ('clf', DecisionTreeClassifier()) ] time_series_tree = Pipeline(steps) tsf = TimeSeriesForestClassifier( estimator=time_series_tree, n_estimators=args.n_estimators, criterion='entropy' if args.criterion == 'entropy' else 'gini', bootstrap=True, oob_score=True, random_state=1, # n_jobs=4, verbose=1 ) x = detabularize(pd.DataFrame(training_data[:,1:])) try: with parallel_backend('threading', n_jobs=args.n_jobs): tsf = tsf.fit(x, training_data[:,0]) with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \ as TimeSeriesForestModel: pickle.dump(tsf, TimeSeriesForestModel, protocol=pickle.HIGHEST_PROTOCOL) except Exception as ex: print(ex)
def test_TimeSeriesForest_predictions(n_estimators, n_intervals): random_state = 1234 X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) features = [np.mean, np.std, time_series_slope] steps = [ ( "transform", RandomIntervalFeatureExtractor( random_state=random_state, features=features ), ), ("clf", DecisionTreeClassifier()), ] estimator = Pipeline(steps) clf1 = TimeSeriesForestClassifier( estimator=estimator, random_state=random_state, n_estimators=n_estimators ) clf1.fit(X_train, y_train) a = clf1.predict_proba(X_test) # default, semi-modular implementation using # RandomIntervalFeatureExtractor internally clf2 = TimeSeriesForestClassifier( random_state=random_state, n_estimators=n_estimators ) clf2.fit(X_train, y_train) b = clf2.predict_proba(X_test) np.testing.assert_array_equal(a, b)
def tsf_classifier(X_train, X_test, y_train, y_test): """ML Scorer based on sktime pipeline with a TimeSeriesForestClassifier.""" steps = [('concatenate', ColumnConcatenator()), ('classify', TimeSeriesForestClassifier(n_estimators=100))] clf = Pipeline(steps) clf.fit(X_train, y_train) return clf.score(X_test, y_test)
def rise_benchmarking(): for i in range(len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) rise = fb.RandomIntervalSpectralForest(n_estimators=100) exp.run_experiment(overwrite=True, problem_path=data_dir, results_path=results_dir, cls_name="PythonRISE", classifier=rise, dataset=dataset, train_file=False) steps = [('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)), ('transform', FeatureUnion([('acf', RowTransformer( FunctionTransformer(func=acf_coefs, validate=False))), ('ps', RowTransformer( FunctionTransformer(func=powerspectrum, validate=False)))])), ('tabularise', Tabularizer()), ('clf', DecisionTreeClassifier())] base_estimator = Pipeline(steps) rise = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) exp.run_experiment(overwrite=True, problem_path=data_dir, results_path=results_dir, cls_name="PythonRISEComposite", classifier=rise, dataset=dataset, train_file=False)
def _rf_scorer(X_train, X_test, y_train, y_test): steps = [ ('concatenate', ColumnConcatenator()), ('classify', TimeSeriesForestClassifier(n_estimators=100)) ] clf = Pipeline(steps) clf.fit(X_train, y_train) return clf.score(X_test, y_test)
def tsf_benchmarking(): for i in range(0, len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) tsf = ib.TimeSeriesForest(n_estimators=100) exp.run_experiment( overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSF", classifier=tsf, dataset=dataset, train_file=False, ) steps = [ ("segment", RandomIntervalSegmenter(n_intervals="sqrt")), ( "transform", FeatureUnion( [ ( "mean", make_row_transformer( FunctionTransformer(func=np.mean, validate=False) ), ), ( "std", make_row_transformer( FunctionTransformer(func=np.std, validate=False) ), ), ( "slope", make_row_transformer( FunctionTransformer( func=time_series_slope, validate=False ) ), ), ] ), ), ("clf", DecisionTreeClassifier()), ] base_estimator = Pipeline(steps) tsf = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) exp.run_experiment( overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSFComposite", classifier=tsf, dataset=dataset, train_file=False, )
def test_predict_proba(): clf = TimeSeriesForestClassifier(n_estimators=2) clf.fit(X, y) proba = clf.predict_proba(X) assert proba.shape == (X.shape[0], n_classes) np.testing.assert_array_equal(np.ones(X.shape[0]), np.sum(proba, axis=1)) # test single row input y_proba = clf.predict_proba(X.iloc[[0], :]) assert y_proba.shape == (1, n_classes) y_pred = clf.predict(X.iloc[[0], :]) assert y_pred.shape == (1,)
def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=9 * 6): logger("MODEL-FIT").debug( "max_horizon: {} / avg windows: {} / assmnt windows: {} / until: {} / total_data_size: {}".format( max_horizon, str(luck_average_windows), str(assessment_windows), until, len(self.data_points))) if until is not None and (until < 0 or until >= len(self.data_points)): logger("MODEL-FIT").error("Parameter until is too large for the given data points: {}".format(until)) return self.horizon = max_horizon for wi, w in enumerate(assessment_windows): if w > self.horizon: break # prepare data frame for sktime package temporary_data_fit_file = self.prepare_ts_file(0, len(self.data_points) if until is None else until, self.case_observation_size, wi, w) # parse data frames from the temporary fit data file X, y = load_from_tsfile_to_dataframe(temporary_data_fit_file, replace_missing_vals_with="-100") # which label is the first one? true_index = 0 if y[0] == "false": true_index = 1 new_class_weights = self.create_class_weight_dict(true_index=true_index) estimators = [] for i in range(0, len(luck_average_windows)): estimators.append(("TSF{}".format(i), TimeSeriesForestClassifier( n_estimators=int(self.no_estimators), n_jobs=16, max_depth=self.max_depth, class_weight=new_class_weights, criterion=self.criterion, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, oob_score=self.oob_score, bootstrap=self.bootstrap), [i])) c = ColumnEnsembleClassifier(estimators=estimators) c.fit(X, y) # print(str(c.classes_)) self.classifiers.append(c)
def main(args): # Load and wrangle data raw_data_df = run.input_datasets["rawdata"].to_pandas_dataframe() processed_data_df = prepare_dataframe( raw_data_df, time_series_length=args.timeserieslength, threshold=args.threshold) # Split data train = processed_data_df.sample(frac=args.train_data_split, random_state=42) test = processed_data_df.drop(train.index) # Example for logging run.log( "data_split_fraction", args.train_data_split, "Fraction of samples used for training", ) run.log("train_samples", train.shape[0], "Number of samples used for training") run.log("test_samples", test.shape[0], "Number of samples used for testing") # Train task = TSCTask(target="label", metadata=train) clf = TimeSeriesForestClassifier(n_estimators=args.n_estimators) strategy = TSCStrategy(clf) strategy.fit(task, train) run.log("n_estimators", args.n_estimators, "Number of tree estimators used in the model") # Metrics y_pred = strategy.predict(test) y_test = test[task.target] accuracy = accuracy_score(y_test, y_pred) run.log("Accuracy", f"{accuracy:1.3f}", "Accuracy of model") # Persist model os.makedirs("outputs", exist_ok=True) model_path = os.path.join("outputs", args.model_filename) dump(strategy, model_path)
def test_stat(): data = load_gunpoint(split="train") dataset = RAMDataset(dataset=data, name="gunpoint") task = TSCTask(target="class_val") fc = TimeSeriesForestClassifier(n_estimators=1, random_state=1) strategy_fc = TSCStrategy(fc, name="tsf") pf = ProximityForest(n_estimators=1, random_state=1) strategy_pf = TSCStrategy(pf, name="pf") # result backend results = RAMResults() orchestrator = Orchestrator( datasets=[dataset], tasks=[task], strategies=[strategy_pf, strategy_fc], cv=SingleSplit(random_state=1), results=results, ) orchestrator.fit_predict(save_fitted_strategies=False) analyse = Evaluator(results) metric = PairwiseMetric(func=accuracy_score, name="accuracy") _ = analyse.evaluate(metric=metric) ranks = analyse.rank(ascending=True) pf_rank = ranks.loc[ranks.strategy == "pf", "accuracy_mean_rank"].item() # 1 fc_rank = ranks.loc[ranks.strategy == "tsf", "accuracy_mean_rank"].item() # 2 rank_array = [pf_rank, fc_rank] rank_array_test = [1, 2] _, sign_test_df = analyse.sign_test() sign_array = [ [sign_test_df["pf"][0], sign_test_df["pf"][1]], [sign_test_df["tsf"][0], sign_test_df["tsf"][1]], ] sign_array_test = [[1, 1], [1, 1]] np.testing.assert_equal([rank_array, sign_array], [rank_array_test, sign_array_test])
def tsf_benchmarking(): for i in range(len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) tsf = ib.TimeSeriesForest(n_estimators=100) exp.run_experiment(overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSF", classifier=tsf, dataset=dataset, train_file=False) steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt')), ('transform', FeatureUnion([('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowTransformer( FunctionTransformer(func=np.std, validate=False))), ('slope', RowTransformer( FunctionTransformer(func=time_series_slope, validate=False)))])), ('clf', DecisionTreeClassifier()) ] base_estimator = Pipeline(steps) tsf = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) exp.run_experiment(overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSFComposite", classifier=tsf, dataset=dataset, train_file=False)
def main(): #1. Loading and splitting the dataset X_train, y_train = load_italy_power_demand(split='train', return_X_y=True) X_test, y_test = load_italy_power_demand(split='test', return_X_y=True) print('Shape of X, y train and test dataset', X_train.shape, y_train.shape, X_test.shape, y_test.shape, '\n') print('X_train:', X_train.head(), '\n') print('\nX_train info', X_train.info(), '\n') labels, counts = np.unique(y_train, return_counts=True) print( '\nThere are', labels, 'labels in this dataset, one corresponds to winter and the other to summer. The counter of each one is', counts, '\n') #2. Creating a Model, Fit and Predict Sklearn Classifier #Sktime Tabularizing the data X_train_tab = tabularize(X_train) X_test_tab = tabularize(X_test) print('\n X_train tabularized\n', X_train_tab.head(), '\n') #2.1 SKlearn RandomForest Classifier classifier = RandomForestClassifier(n_estimators=100) classifier.fit(X_train_tab, y_train) y_pred = classifier.predict(X_test_tab) print('Accuracy sklearn RandomForestClassifier', round(accuracy_score(y_test, y_pred), 4), '\n') #2.2 Same SKlearn as above but using make_pipeline w/ Sktime Tabularizer classifier = make_pipeline(Tabularizer(), RandomForestClassifier(n_estimators=100), verbose=True) classifier.fit(X_train, y_train) print( 'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime Tabularize()', round(classifier.score(X_test, y_test), 4), '\n') #3 Sklearn using make_pipeline w/ Sktime TSFreshFeatureExtractor classifier = make_pipeline(TSFreshFeatureExtractor(show_warnings=False), RandomForestClassifier(n_estimators=100)) classifier.fit(X_train, y_train) print( 'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime TSFreshFeatureExtractor that automatically extracts and filters several key statistical features from the nested X_train time series', round(classifier.score(X_test, y_test), 4), '\n') #4. Using Time series algorithms and classifiers from sklearn/sktime steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt')), #Sktime ( 'transform', FeatureUnion([ #Sklearn ('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))), #sktime ('std', RowTransformer( FunctionTransformer(func=np.std, validate=False))), #sktime ('slope', RowTransformer( FunctionTransformer(func=time_series_slope, validate=False))) #sktime ])), ('clf', DecisionTreeClassifier()) #From Sklearn ] time_series_tree = Pipeline(steps, verbose=True) #sklearn time_series_tree.fit(X_train, y_train) print( 'Accuracy sklearn DecisionTreeClassifier using sklearn Pipeline() as well as segmentation and transformation techniques from sktime and sklearn', round(time_series_tree.score(X_test, y_test), 4)) #5. Using Time series Sktime tsf = TimeSeriesForestClassifier(n_estimators=100, verbose=True) tsf.fit(X_train, y_train) print('Accuracy sktime TimeSeriesForestClassifier', round(tsf.score(X_test, y_test), 4))
# data -> our function -> (X_nested, y) X = generate_long_table(ts) X.head() X_nested = from_long_to_nested(X) X_nested.head() y = np.array(['a']) # , 'b', 'a', 'b', 'a', 'b', 'a', 'b']) print(X_nested) X_train, X_test, y_train, y_test = train_test_split(X_nested, y) print(X.head()) classifier = ColumnEnsembleClassifier(estimators=[ ("TSF1", TimeSeriesForestClassifier(n_estimators=100), [1]), ("TSF2", TimeSeriesForestClassifier(n_estimators=100), [2]), ]) classifier.fit(X_train, y_train) # Use the test portion of data for prediction so we can understand how accurate our model was learned y_pred = classifier.predict(X_test) # Use the native `accuracy_score` method to calculate the accuracy based on the test outcomes and the predicted outcomes print("Accuracy score is: " + str(accuracy_score(y_test, y_pred))) def generate_example_long_table(num_cases=50, series_len=20, num_dims=2): rows_per_case = series_len * num_dims total_rows = num_cases * series_len * num_dims case_ids = np.empty(total_rows, dtype=np.int)
X_train_timedata = X_train_timedata.to_frame() X_test_timedata = X_test_timedata.to_frame() ts_train = pd.Series(X_train_timedata['combine'].values, index=X_train_timedata.index) X_ts_train = ts_train.to_frame() ts_test = pd.Series(X_test_timedata['combine'].values, index=X_test_timedata.index) X_ts_test = ts_test.to_frame() for row_num in range(0, X_ts_train.shape[0]): series1 = pd.Series(X_ts_train.iat[row_num, 0]) X_ts_train.iat[row_num, 0] = series1 for row_num in range(0, X_ts_test.shape[0]): series2 = pd.Series(X_ts_test.iat[row_num, 0]) X_ts_test.iat[row_num, 0] = series2 ## =======================Column ensembling================================ clf = ColumnEnsembleClassifier(estimators=[ ("TSF0", TimeSeriesForestClassifier(n_estimators=5), [0]), ]) start_time = time.time() clf.fit(X_ts_train, y_train) Efficiency = time.time() - start_time Accuracy = clf.score(X_ts_test, y_test) print("Efficiency is:\n", Efficiency) print("Accuracy is :\n", Accuracy)
signal_names = ["chan_%d" % x for x in range(num_channels)] return signal_names, X, y def testlime(signal_names, clf, x, y): class_names=[y] num_slices=20 num_features=10 explainer = lime_ts.LimeTimeSeriesExplainer(class_names=class_names, signal_names=signal_names) labelid = 0 exp = explainer.explain_instance(x, clf.predict_proba, num_features=num_features, num_samples=100, num_slices=num_slices, labels=[labelid], replacement_method='total_mean') exp.as_pyplot_figure(labelid) plt.show() if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) signal_names, X, y = genDataset(80, 4, 30) steps = [ ("concatenate", ColumnConcatenator()), ("classify", TimeSeriesForestClassifier(n_estimators=100)), ] clf = Pipeline(steps) clf.fit(X,y) testlime(signal_names, clf, X[0], y[0])
models = { "features": make_pipeline(TruncationTransformer(lower=MAX_LENGTH), TSFreshFeatureExtractor(default_fc_parameters="efficient", show_warnings=False, n_jobs=-1), RandomForestClassifier(n_jobs=-1, random_state=1), verbose=True), "interval": make_pipeline(TruncationTransformer(lower=15000), TimeSeriesForestClassifier( estimator=time_series_tree, n_estimators=100, criterion="entropy", bootstrap=True, oob_score=True, random_state=1, n_jobs=-1, ), verbose=True), "shapelet": make_pipeline(TruncationTransformer(lower=1000), ContractedShapeletTransform( time_contract_in_mins=10, num_candidates_to_sample_per_case=10, verbose=2, random_state=1), RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=1),
random_state=j) # set CV _, counts = np.unique(y_train, return_counts=True) n_splits = np.minimum(counts.min(), INNER_N_SPLITS) n_repeats = np.maximum(1, INNER_N_SPLITS // n_splits) # cv = StratifiedKFold(n_splits=n_splits, shuffle=True, # random_state=RANDOM_STATE) inner_cv = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=RANDOM_STATE) print(f'Dataset: {i + 1}/{n_datasets} {dataset.name} - n_splits: ' f'{j + 1}/{OUTER_CV_N_SPLITS}') # set estimator estimator = TimeSeriesForestClassifier(BASE_ESTIMATOR, n_jobs=-1) gscv = GridSearchCV(estimator, param_grid, scoring='neg_log_loss', cv=inner_cv, refit=True, iid=False, error_score='raise', verbose=True) # tune when enough samples for all classes are available start = time.time() gscv.fit(X_train, y_train) results[0] = time.time() - start # predict
X_test = tslearn.utils.to_sktime_dataset(X_test) y_train = np.vstack([y_train, y_val]) y_train = pd.Series(y_train.reshape(-1)) y_test = pd.Series(y_test.reshape(-1)) # Timeseries random foreset for every column for i, col in enumerate(col_names[:2]): print(col) # Choose one feature X_train_step = X_train.iloc[:, [i]] X_test_step = X_test.iloc[:, [i]] # Time series forest clf classifier = TimeSeriesForestClassifier() classifier.fit(X_train_step, y_train) y_pred = classifier.predict(X_test_step) # Metrics print(f'accuracy_test: {accuracy_score(y_test, y_pred)}') print(f"recall_test: {recall_score(y_test, y_pred)}") print(f"precisoin_test: {precision_score(y_test, y_pred)}") print(f"f1_test: {f1_score(y_test, y_pred)}") # clf2 = pickle.loads(s) # clf2.predict(X_test[0:1])
def main(model, input_training_raster, train_feature, input_test_raster, test_feature, input_test_csv, result_path, n_channels, n_jobs, model_path, raster_to_classify, patch_size, output_raster, train_ratio, n_estimators, max_depth, max_num_of_samples_per_class): # -- Creating output path if does not exist if not os.path.exists(result_path): os.makedirs(result_path) # ---- output files result_path = os.path.join(result_path, model) if not os.path.exists(result_path): os.makedirs(result_path) print("Model: ", model) # Generatin train/test datasets train_list, test_list, _ = split_train_feature(train_feature, train_ratio) train_data = generate_training_data(input_training_raster, train_feature, train_list, max_num_of_samples_per_class) X_train, y_train = train_data[:, 1:], train_data[:, 0] if input_test_raster and test_feature: _, test_list, _ = split_train_feature(test_feature, train_ratio=0) test_data = generate_training_data(input_test_raster, test_feature, test_list) X_test, y_test = test_data[:, 1:], test_data[:, 0] elif input_test_csv: df = pd.read_csv(input_test_csv, sep=',', header=None) test_data = np.asarray(df.values) X_test, y_test = test_data[:, 2:], test_data[:, 0] else: test_data = generate_training_data(input_training_raster, train_feature, test_list, max_num_of_samples_per_class) X_test, y_test = test_data[:, 1:], test_data[:, 0] # Fitting the classifier into the Training set n_classes_test = len(np.unique(y_test)) n_classes_train = len(np.unique(y_train)) if (n_classes_test != n_classes_train): print("WARNING: different number of classes in train and test") n_classes = max(n_classes_train, n_classes_test) # Torch, numpy, whatever, all index from 0, if we did not assign landcover classes # with [0, 1, 2, 3, ...], it may cause problem, things get easier by reindex classes lc_ids_old = np.unique(y_train) lc_ids_old.sort() lc_ids_new = np.arange(n_classes_train) indexes = [np.where(y_train == lc_id)[0] for lc_id in lc_ids_old] for index, lc_id_new in zip(indexes, lc_ids_new): y_train[index] = lc_id_new indexes = [np.where(y_test == lc_id)[0] for lc_id in lc_ids_old] for index, lc_id_new in zip(indexes, lc_ids_new): y_test[index] = lc_id_new relation = np.vstack((lc_ids_old, lc_ids_new)) if model in ["RF", "SVM"]: is_ts = False # ---- Normalizing the data per band, min_per = np.percentile(X_train, 2, axis=(0)) max_per = np.percentile(X_train, 100 - 2, axis=(0)) X_train = (X_train - min_per) / (max_per - min_per) X_test = (X_test - min_per) / (max_per - min_per) if model == "RF": clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, criterion='entropy', random_state=None, verbose=0, n_jobs=n_jobs) elif model == "SVM": clf = OneVsRestClassifier( BaggingClassifier(SVC(kernel='linear', cache_size=200), max_samples=1.0, n_estimators=n_estimators, verbose=0, n_jobs=n_jobs)) elif model == "RF_TS": from sktime.classification.compose import TimeSeriesForestClassifier from sktime.transformations.panel.compose import ColumnConcatenator is_ts = True X_train = X_train.reshape(X_train.shape[0], int(X_train.shape[1] / n_channels), n_channels) X_test = X_test.reshape(X_test.shape[0], int(X_test.shape[1] / n_channels), n_channels) # ---- Normalizing the data per band, min_per = np.percentile(X_train, 2, axis=(0, 1)) max_per = np.percentile(X_train, 100 - 2, axis=(0, 1)) X_train = (X_train - min_per) / (max_per - min_per) X_test = (X_test - min_per) / (max_per - min_per) steps = [ ("concatenate", ColumnConcatenator()), ("classify", TimeSeriesForestClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=n_jobs)), ] clf = Pipeline(steps) # Train classifier clf.fit(X_train, y_train) # Save trained classifier if not model_path: model_path = os.path.join(result_path, 'Best_model.pkl') joblib.dump(clf, model_path) # Evaluation start = time.time() y_pred = clf.predict(X_test) Classes = [f'class {i}' for i in np.unique(y_test)] scores = metrics(y_test, y_pred, Classes) scores_msg = ", ".join([f"{k}={v}" for (k, v) in scores.items()]) scores["time"] = (time.time() - start) / 60 log = {k: [v] for k, v in scores.items()} log_df = pd.DataFrame(log) log_df.to_csv(os.path.join(result_path, "trainlog.csv")) print( scores["report"] ) # In report, precision means User_accuracy, recall means Producer_accuracy print(scores["confusion_matrix"]) # ---- Save min_max minMaxVal_file = os.path.join(result_path, 'min_Max.txt') save_minMaxVal(minMaxVal_file, min_per, max_per) # Inference on raster if raster_to_classify: classify_image(raster_to_classify, model_path, output_raster, n_channels, patch_size=patch_size, minmax=[min_per, max_per], is_ts=is_ts, relation=relation)
import pytest from sktime.benchmarking.strategies import TSCStrategy from sktime.benchmarking.tasks import TSCTask from sktime.datasets import load_gunpoint from sktime.datasets import load_italy_power_demand from sktime.classification.compose import TimeSeriesForestClassifier classifier = TimeSeriesForestClassifier(n_estimators=2) DATASET_LOADERS = (load_gunpoint, load_italy_power_demand) # Test output of time-series classification strategies @pytest.mark.parametrize("dataset", DATASET_LOADERS) def test_TSCStrategy(dataset): train = dataset(split='train') test = dataset(split='test') s = TSCStrategy(classifier) task = TSCTask(target='class_val') s.fit(task, train) y_pred = s.predict(test) assert y_pred.shape == test[task.target].shape