def test_different_pipelines(): random_state = 1233 X_train, y_train = make_classification_problem() steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_state)), ('transform', FeatureUnion([ ('mean', RowTransformer(FunctionTransformer(func=np.mean, validate=False))), ('std', RowTransformer(FunctionTransformer(func=np.std, validate=False))), ('slope', RowTransformer( FunctionTransformer(func=time_series_slope, validate=False))), ])), ] pipe = Pipeline(steps) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor( n_intervals='sqrt', features=[np.mean, np.std, time_series_slope], random_state=random_state) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_from_nested_to_2d_array(n_instances, n_columns, n_timepoints): nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints) array = from_nested_to_2d_array(nested) assert array.shape == (n_instances, n_columns * n_timepoints) assert array.index.equals(nested.index)
def test_check_X_enforce_univariate(): X, y = make_classification_problem(n_columns=2) msg = r"univariate" with pytest.raises(ValueError, match=msg): check_X(X, enforce_univariate=True) with pytest.raises(ValueError, match=msg): check_X_y(X, y, enforce_univariate=True)
def test_check_X_enforce_min_columns(): X, y = make_classification_problem(n_columns=2) msg = r"columns" with pytest.raises(ValueError, match=msg): check_X(X, enforce_min_columns=3) with pytest.raises(ValueError, match=msg): check_X_y(X, y, enforce_min_columns=3)
def test_from_nested_to_3d_numpy(n_instances, n_columns, n_timepoints): nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints) array = from_nested_to_3d_numpy(nested) # check types and shapes assert isinstance(array, np.ndarray) assert array.shape == (n_instances, n_columns, n_timepoints) # check values of random series np.testing.assert_array_equal(nested.iloc[1, 0], array[1, 0, :])
def test_check_enforce_min_instances(): X, y = make_classification_problem(n_instances=3) msg = r"instance" with pytest.raises(ValueError, match=msg): check_X(X, enforce_min_instances=4) with pytest.raises(ValueError, match=msg): check_X_y(X, y, enforce_min_instances=4) with pytest.raises(ValueError, match=msg): check_y(y, enforce_min_instances=4)
def test_different_implementations(): random_state = 1233 X_train, y_train = make_classification_problem() # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_state) tran2 = RowTransformer(FunctionTransformer(func=np.mean, validate=False)) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_state) B = tran.fit_transform(X_train) np.testing.assert_array_equal(A, B)
def test_different_pipelines(): random_state = 1233 X_train, y_train = make_classification_problem() steps = [ ( "segment", RandomIntervalSegmenter(n_intervals=1, random_state=random_state), ), ( "transform", FeatureUnion([ ( "mean", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.mean, validate=False), check_transformer=False, ), ), ( "std", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.std, validate=False), check_transformer=False, ), ), ( "slope", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=time_series_slope, validate=False), check_transformer=False, ), ), ]), ), ] pipe = Pipeline(steps) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor( n_intervals=1, features=[np.mean, np.std, time_series_slope], random_state=random_state, ) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_results(n_instances, n_timepoints, n_intervals): X, _ = make_classification_problem(n_instances=n_instances, n_timepoints=n_timepoints, return_numpy=True) transformer = RandomIntervalFeatureExtractor(n_intervals=n_intervals, features=[np.mean, np.std]) Xt = transformer.fit_transform(X) Xt = Xt.loc[:, ~Xt.columns.duplicated()] # Check results intervals = transformer.intervals_ for start, end in intervals: expected_mean = np.mean(X[:, 0, start:end], axis=-1) expected_std = np.std(X[:, 0, start:end], axis=-1) actual_means = Xt.loc[:, f"{start}_{end}_mean"].to_numpy().ravel() actual_stds = Xt.loc[:, f"{start}_{end}_std"].to_numpy().ravel() np.testing.assert_array_equal(actual_means, expected_mean) np.testing.assert_array_equal(actual_stds, expected_std)
def test_make_classification_problem(n_instances, n_columns, n_timepoints, n_classes, return_numpy): X, y = make_classification_problem( n_instances=n_instances, n_classes=n_classes, n_columns=n_columns, n_timepoints=n_timepoints, return_numpy=return_numpy, ) # check dimensions of generated data _check_X_y(X, y, n_instances, n_columns, n_timepoints, check_numpy=return_numpy) # check number of classes assert len(np.unique(y)) == n_classes
def test_bad_features(bad_features): X, y = make_classification_problem() with pytest.raises(ValueError): RandomIntervalFeatureExtractor(n_intervals=bad_features).fit(X)
# -*- coding: utf-8 -*- import numpy as np from sklearn.model_selection import train_test_split from sklearn.pipeline import FeatureUnion from sklearn.pipeline import Pipeline from sklearn.preprocessing import FunctionTransformer from sklearn.tree import DecisionTreeClassifier from sktime.datasets import load_gunpoint from sktime.transformers.panel.compose import ( SeriesToPrimitivesRowTransformer, ) from sktime.transformers.panel.segment import RandomIntervalSegmenter from sktime.utils._testing import make_classification_problem # load data X, y = make_classification_problem() X_train, X_test, y_train, y_test = train_test_split(X, y) mean_transformer = SeriesToPrimitivesRowTransformer(FunctionTransformer( func=np.mean, validate=False), check_transformer=False) std_transformer = SeriesToPrimitivesRowTransformer(FunctionTransformer( func=np.std, validate=False), check_transformer=False) def test_FeatureUnion_pipeline(): # pipeline with segmentation plus multiple feature extraction steps = [ ("segment", RandomIntervalSegmenter(n_intervals=1)),