def test_classifier_on_basic_motions(self, estimator_class): """Test classifier on basic motions data.""" # we only use the first estimator instance for testing classname = estimator_class.__name__ # retrieve expected predict_proba output, and skip test if not available if classname in basic_motions_proba.keys(): expected_probas = basic_motions_proba[classname] else: # skip test if no expected probas are registered return None # we only use the first estimator instance for testing estimator_instance = clone( estimator_class.create_test_instance( parameter_set="results_comparison")) # set random seed if possible if "random_state" in estimator_instance.get_params().keys(): estimator_instance.set_params(random_state=0) # load unit test data X_train, y_train = load_basic_motions(split="train") X_test, _ = load_basic_motions(split="test") indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) # train classifier and predict probas estimator_instance.fit(X_train.iloc[indices], y_train[indices]) y_proba = estimator_instance.predict_proba(X_test.iloc[indices]) # assert probabilities are the same _assert_array_almost_equal(y_proba, expected_probas, decimal=2)
def test_kmeans(): """Test implementation of Kmeans.""" X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") kmeans = TimeSeriesKMeans( averaging_method="mean", random_state=1, n_init=2, n_clusters=4, init_algorithm="kmeans++", metric="dtw", ) train_predict = kmeans.fit_predict(X_train) train_mean_score = metrics.rand_score(y_train, train_predict) test_mean_result = kmeans.predict(X_test) mean_score = metrics.rand_score(y_test, test_mean_result) proba = kmeans.predict_proba(X_test) assert np.array_equal(test_mean_result, expected_results["mean"]) assert mean_score == expected_score["mean"] assert train_mean_score == expected_train_result["mean"] assert kmeans.n_iter_ == expected_iters["mean"] assert np.array_equal(kmeans.labels_, expected_labels["mean"]) assert isinstance(kmeans.cluster_centers_, np.ndarray) assert proba.shape == (40, 4) for val in proba: assert np.count_nonzero(val == 1.0) == 1
def test_col_ens_on_basic_motions(): """Test of ColumnEnsembleClassifier on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, ) tde = TemporalDictionaryEnsemble( n_parameter_samples=10, max_ensemble_size=5, randomly_selected_params=5, random_state=0, ) drcif = DrCIF(n_estimators=10, random_state=0, save_transformed_data=True) estimators = [ ("FreshPrince", fp, [0, 1, 2]), ("TDE", tde, [3, 4]), ("DrCIF", drcif, [5]), ] # train column ensemble col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) # preds = col_ens.predict(X_test.iloc[indices]) # assert preds[0] == 2 # assert probabilities are the same probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_basic_motions_probas, decimal=2)
def test_heterogenous_pipeline_column_ensmbler(): X_train, y_train = load_basic_motions("TRAIN", return_X_y=True) X_test, y_test = load_basic_motions("TEST", return_X_y=True) n_intervals = 3 steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)), ('transform', FeatureUnion([('mean', RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer( FunctionTransformer(func=np.std, validate=False)))])), ('clf', DecisionTreeClassifier())] clf1 = Pipeline(steps, random_state=1) # dims 0-3 with alternating classifiers. ct = ColumnEnsembleClassifier([ ("RandomIntervalTree", clf1, [0]), ("KNN4", KNNTSC(n_neighbors=1), [4]), ("BOSSEnsemble1 ", BOSSEnsemble(ensemble_size=3), [1]), ("KNN2", KNNTSC(n_neighbors=1), [2]), ("BOSSEnsemble3", BOSSEnsemble(ensemble_size=3), [3]), ]) ct.fit(X_train, y_train) ct.score(X_test, y_test)
def test_kmedoids(): """Test implementation of Kmedoids.""" X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") kmedoids = TimeSeriesKMedoids( random_state=1, n_init=2, max_iter=5, init_algorithm="kmeans++", metric="euclidean", ) train_predict = kmedoids.fit_predict(X_train) train_score = metrics.rand_score(y_train, train_predict) test_medoids_result = kmedoids.predict(X_test) medoids_score = metrics.rand_score(y_test, test_medoids_result) proba = kmedoids.predict_proba(X_test) assert np.array_equal(test_medoids_result, expected_results["medoids"]) assert medoids_score == expected_score["medoids"] assert train_score == train_expected_score["medoids"] assert np.isclose(kmedoids.inertia_, expected_inertia["medoids"]) assert kmedoids.n_iter_ == expected_iters["medoids"] assert np.array_equal(kmedoids.labels_, expected_labels["medoids"]) assert isinstance(kmedoids.cluster_centers_, np.ndarray) assert proba.shape == (40, 8) for val in proba: assert np.count_nonzero(val == 1.0) == 1
def test_hivecote_v2_on_basic_motions(): """Test of HIVEVOTEV2 on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 15, replace=False) # train HIVE-COTE v2 hc2 = HIVECOTEV2( random_state=0, stc_params={ "estimator": RotationForest(n_estimators=3), "n_shapelet_samples": 500, "max_shapelets": 20, "batch_size": 100, }, drcif_params={"n_estimators": 10}, arsenal_params={ "num_kernels": 100, "n_estimators": 5 }, tde_params={ "n_parameter_samples": 25, "max_ensemble_size": 5, "randomly_selected_params": 10, }, ) hc2.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = hc2.predict_proba(X_test.iloc[indices[:10]]) testing.assert_array_equal(probas, stc_basic_motions_probas)
def test_col_ens_on_basic_motions(): """Test of ColumnEnsembleClassifier on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) tde = TemporalDictionaryEnsemble( n_parameter_samples=10, max_ensemble_size=5, randomly_selected_params=5, random_state=0, ) drcif = DrCIF(n_estimators=10, random_state=0) estimators = [ ("TDE", tde, [3, 4]), ("DrCIF", drcif, [5]), ] # train column ensemble col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_basic_motions_probas, decimal=2)
def _reproduce_classification_basic_motions(estimator): X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) estimator.fit(X_train.iloc[indices], y_train[indices]) return estimator.predict_proba(X_test.iloc[indices])
def test_homogeneous_column_ensembler(): X_train, y_train = load_basic_motions("TRAIN", return_X_y=True) X_test, y_test = load_basic_motions("TEST", return_X_y=True) cts = HomogeneousColumnEnsembleClassifier(KNNTSC(n_neighbors=1)) cts.fit(X_train, y_train) cts.score(X_test, y_test) == 1.0
def test_homogeneous_pipeline_column_ensmbler(): X_train, y_train = load_basic_motions("TRAIN", return_X_y=True) X_test, y_test = load_basic_motions("TEST", return_X_y=True) ct = ColumnEnsembleClassifier([("KNN%d " % i, KNNTSC(n_neighbors=1), [i]) for i in range(0, X_train.shape[1])]) ct.fit(X_train, y_train) ct.score(X_test, y_test)
def test_RowTransformer_pipeline(): X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) # using pure sklearn def row_mean(X): if isinstance(X, pd.Series): X = pd.DataFrame(X) Xt = pd.concat([pd.Series(col.apply(np.mean)) for _, col in X.items()], axis=1) return Xt def row_first(X): if isinstance(X, pd.Series): X = pd.DataFrame(X) Xt = pd.concat( [ pd.Series(from_nested_to_2d_array(col).iloc[:, 0]) for _, col in X.items() ], axis=1, ) return Xt # specify column as a list, otherwise pandas Series are selected and # passed on to the transformers transformer = ColumnTransformer( [ ("mean", FunctionTransformer(func=row_mean, validate=False), ["dim_0"]), ("first", FunctionTransformer(func=row_first, validate=False), ["dim_1"]), ] ) estimator = RandomForestClassifier(n_estimators=2, random_state=1) steps = [("extract", transformer), ("classify", estimator)] model = Pipeline(steps=steps) model.fit(X_train, y_train) expected = model.predict(X_test) # using sktime with sklearn pipeline transformer = ColumnTransformer( [ ( "mean", RowTransformer(FunctionTransformer(func=np.mean, validate=False)), ["dim_0"], ), ("first", FunctionTransformer(func=row_first, validate=False), ["dim_1"]), ] ) estimator = RandomForestClassifier(n_estimators=2, random_state=1) steps = [("extract", transformer), ("classify", estimator)] model = Pipeline(steps=steps) model.fit(X_train, y_train) actual = model.predict(X_test) np.testing.assert_array_equal(expected, actual)
def test_rocket_on_basic_motions(): # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(20) # train ROCKET rocket = ROCKETClassifier(num_kernels=1000, random_state=0) rocket.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = rocket.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, rocket_basic_motions_probas)
def test_drcif_on_basic_motions(): # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(20) # train DrCIF drcif = DrCIF(n_estimators=20, random_state=0) drcif.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = drcif.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, drcif_basic_motions_probas)
def test_cif_on_basic_motions(): # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(20) # train CIF cif = CanonicalIntervalForest(n_estimators=100, random_state=0) cif.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = cif.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, cif_basic_motions_probas)
def test_arsenal_on_basic_motions(): # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(20) # train Arsenal arsenal = Arsenal(num_kernels=1000, n_estimators=10, random_state=0) arsenal.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = arsenal.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, arsenal_basic_motions_probas)
def test_catch22_forest_classifier_on_basic_motions(): # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(20) # train c22f c22f = Catch22ForestClassifier(random_state=0) c22f.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = c22f.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, catch22_forest_classifier_basic_motions_probas)
def test_arsenal_on_basic_motions(): """Test of Arsenal on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) # train Arsenal arsenal = Arsenal(num_kernels=500, n_estimators=5, random_state=0) arsenal.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = arsenal.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, arsenal_basic_motions_probas)
def test_cif_on_basic_motions(): """Test of CanonicalIntervalForest on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) # train CIF cif = CanonicalIntervalForest(n_estimators=10, random_state=0) cif.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = cif.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, cif_basic_motions_probas)
def test_drcif_on_basic_motions(): """Test of DrCIF on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) # train DrCIF drcif = DrCIF(n_estimators=10, random_state=0) drcif.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = drcif.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, drcif_basic_motions_probas)
def test_muse_on_basic_motions(): """Test MUSE classifier based on accuracy on BasicMotions.""" X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(20) # train WEASEL+MUSE on multivariate data muse = MUSE(random_state=1379, window_inc=4, use_first_order_differences=False) muse.fit(X_train.iloc[indices], y_train[indices]) score = muse.score(X_test.iloc[indices], y_test[indices]) assert score >= 0.99
def test_catch22_classifier_on_basic_motions(): """Test of Catch22Classifier on basic motions.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(20) # train catch22 classifier rf = RandomForestClassifier(n_estimators=20) c22c = Catch22Classifier(random_state=0, estimator=rf) c22c.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = c22c.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, catch22_classifier_basic_motions_probas)
def test_resizing(): # 1) all lengths are equal # 2) cut lengths and check that they are really different # 3) use transformer for resizing to resize time series to equal length # 4) check that result length are equal to length that was set for # transformer X, _ = load_basic_motions(split="train", return_X_y=True) # 1) Check that lengths of all time series (all via the axis=1 - for # all dims in first row) are equal. ts_lens_before = [len(X.iloc[0][i]) for i in range(len(X.iloc[0]))] # all lengths are equal to first length in array assert all([length == ts_lens_before[0] for length in ts_lens_before]) # 2) cutting each time series in each cell of X to make lengths different cut_X_ts(X) # operation is inplace # get lengths to ensure that they are really different ts_lens_after_cut = [len(X.iloc[0][i]) for i in range(len(X.iloc[0]))] assert not all( [length == ts_lens_after_cut[0] for length in ts_lens_after_cut]) # are different # 3) make tranformer, set target length `target_len` and apply it target_len = 50 Xt = TSInterpolator(target_len).fit_transform(X) # 4) check that result time series have lengths equal to `target_len # that we set above ts_lens_after_resize = [len(Xt.iloc[0][i]) for i in range(len(Xt.iloc[0]))] assert all([length == target_len for length in ts_lens_after_resize])
def test_multivariate_correctness(): """Test distance correctness on BasicMotions: multivariate, equal length.""" trainX, trainy = load_basic_motions(return_type="numpy3D") case1 = trainX[0] case2 = trainX[1] d = euclidean_distance(case1, case2) assert_almost_equal(d, basic_motions_distances["euclidean"], 4) for j in range(0, 3): d = dtw_distance(case1, case2, window=distance_parameters["dtw"][j]) assert_almost_equal(d, basic_motions_distances["dtw"][j], 4) d = wdtw_distance(case1, case2, g=distance_parameters["wdtw"][j]) assert_almost_equal(d, basic_motions_distances["wdtw"][j], 4) d = lcss_distance(case1, case2, epsilon=distance_parameters["lcss"][j] / 50.0) assert_almost_equal(d, basic_motions_distances["lcss"][j], 4) d = erp_distance(case1, case2, window=distance_parameters["erp"][j]) assert_almost_equal(d, basic_motions_distances["erp"][j], 4) d = edr_distance(case1, case2, epsilon=distance_parameters["edr"][j] / 50.0) assert_almost_equal(d, basic_motions_distances["edr"][j], 4) d = ddtw_distance(case1, case2, window=distance_parameters["ddtw"][j]) assert_almost_equal(d, basic_motions_distances["ddtw"][j], 4) d = wddtw_distance(case1, case2, g=distance_parameters["wddtw"][j]) assert_almost_equal(d, basic_motions_distances["wddtw"][j], 4) d = twe_distance(case1, case2, window=distance_parameters["twe"][j]) assert_almost_equal(d, basic_motions_distances["twe"][j], 4)
def _reproduce_transform_basic_motions(estimator): X_train, y_train = load_basic_motions(split="train") indices = np.random.RandomState(4).choice(len(y_train), 5, replace=False) estimator.fit(X_train.iloc[indices], y_train[indices]) return np.nan_to_num(estimator.transform(X_train.iloc[indices]), False, 0, 0, 0)
def test_rocket_on_basic_motions(): """Test of RocketClassifier on basic motions.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) # train Rocket rocket = RocketClassifier(num_kernels=500, random_state=0) rocket.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = rocket.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, rocket_basic_motions_probas, decimal=2)
def test_signature_classifier_on_basic_motions(): """Test of SignatureClassifier on basic motions.""" # load basic motions data X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) # train signature classifier sigc = SignatureClassifier( random_state=0, estimator=RandomForestClassifier(n_estimators=10)) sigc.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = sigc.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal( probas, signature_classifier_basic_motions_probas, decimal=2)
def test_basic_multivariate(network=CNNClassifier(nb_epochs=SMALL_NB_EPOCHS)): """ just a super basic test with basicmotions, load data, construct classifier, fit, score """ print("Start test_multivariate()") X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) network.fit(X_train, y_train) print(network.score(X_test, y_test)) print("End test_multivariate()")
def test_RowwiseTransformer_pipeline(): X_train, y_train = load_basic_motions("TRAIN", return_X_y=True) X_test, y_test = load_basic_motions("TEST", return_X_y=True) # using pure sklearn def rowwise_mean(X): if isinstance(X, pd.Series): X = pd.DataFrame(X) Xt = pd.concat([pd.Series(col.apply(np.mean)) for _, col in X.items()], axis=1) return Xt def rowwise_first(X): if isinstance(X, pd.Series): X = pd.DataFrame(X) Xt = pd.concat([pd.Series(tabularise(col).iloc[:, 0]) for _, col in X.items()], axis=1) return Xt # specify column as a list, otherwise pandas Series are selected and passed on to the transformers transformer = ColumnTransformer([ ('mean', FunctionTransformer(func=rowwise_mean, validate=False), ['dim_0']), ('first', FunctionTransformer(func=rowwise_first, validate=False), ['dim_1']) ]) estimator = RandomForestClassifier(n_estimators=2, random_state=1) steps = [ ('extract', transformer), ('classify', estimator) ] model = Pipeline(steps=steps) model.fit(X_train, y_train) expected = model.predict(X_test) # using sktime with sklearn pipeline transformer = ColumnTransformer([ ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)), ['dim_0']), ('first', FunctionTransformer(func=rowwise_first, validate=False), ['dim_1']) ]) estimator = RandomForestClassifier(n_estimators=2, random_state=1) steps = [ ('extract', transformer), ('classify', estimator) ] model = Pipeline(steps=steps) model.fit(X_train, y_train) actual = model.predict(X_test) np.testing.assert_array_equal(expected, actual)
def test_catch22_classifier_on_basic_motions(): """Test of Catch22Classifier on basic motions.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) # train catch22 classifier c22c = Catch22Classifier(random_state=0, estimator=RandomForestClassifier(n_estimators=10)) c22c.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = c22c.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, catch22_classifier_basic_motions_probas, decimal=2)
def test_tsfresh_classifier_on_basic_motions(): """Test of TSFreshClassifier on basic motions.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(20) # train TSFresh classifier rf = RandomForestClassifier(n_estimators=20) tsfc = TSFreshClassifier(random_state=0, default_fc_parameters="minimal", estimator=rf) tsfc.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = tsfc.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, tsfresh_classifier_basic_motions_probas)