def test_different_implementations(): random_seed = 1233 X_train, y_train = load_gunpoint(return_X_y=True) # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_seed) tran2 = RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_seed) B = tran.fit_transform(X_train) np.testing.assert_array_equal(A, B) # Compare with transformer pipeline using TSFeatureUnion. steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt', check_input=False)), ('transform', TSFeatureUnion([ ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))), ])), ] pipe = TSPipeline(steps, random_state=random_seed) a = pipe.fit_transform(X_train) n_ints = a.shape[1] // 2 # Rename columns for comparing re-ordered arrays. a.columns = [*a.columns[:n_ints] + '_mean', *a.columns[n_ints:n_ints * 2] + '_std'] a = a.reindex(np.sort(a.columns), axis=1) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std], random_state=random_seed) b = tran.fit_transform(X_train) b = b.reindex(np.sort(b.columns), axis=1) np.testing.assert_array_equal(a, b)
def test_rowwise_transformer_function_transformer_series_to_primitives(): X, y = load_gunpoint(return_X_y=True) ft = FunctionTransformer(func=np.mean, validate=False) t = RowwiseTransformer(ft) Xt = t.fit_transform(X, y) assert Xt.shape == X.shape assert isinstance(Xt.iloc[0, 0], float) # check series-to-primitive transforms
def test_rowwise_transformer_transform_inverse_transform(): X, y = load_gunpoint(return_X_y=True) t = RowwiseTransformer(StandardScaler()) Xt = t.fit_transform(X) Xit = t.inverse_transform(Xt) assert Xit.shape == X.shape assert isinstance(Xit.iloc[0, 0], (pd.Series, np.ndarray)) # check series-to-series transforms np.testing.assert_array_almost_equal(tabularise(X).values, tabularise(Xit).values, decimal=5)
def test_rowwise_transformer_sklearn_transfomer(): mu = 10 sd = 5 X = generate_df_from_array(np.random.normal(loc=mu, scale=5, size=(100,)), n_rows=10, n_cols=1) t = StandardScaler(with_mean=True, with_std=True) r = RowwiseTransformer(t) Xt = r.fit_transform(X) assert Xt.shape == X.shape assert isinstance(Xt.iloc[0, 0], (pd.Series, np.ndarray)) # check series-to-series transform np.testing.assert_almost_equal(Xt.iloc[0, 0].mean(), 0) # check standardisation np.testing.assert_almost_equal(Xt.iloc[0, 0].std(), 1, decimal=2)
def test_rowwise_transformer_function_transformer_series_to_series(): X, y = load_gunpoint(return_X_y=True) # series-to-series transform function def powerspectrum(x): fft = np.fft.fft(x) ps = fft.real * fft.real + fft.imag * fft.imag return ps[:ps.shape[0] // 2] ft = FunctionTransformer(func=powerspectrum, validate=False) t = RowwiseTransformer(ft) Xt = t.fit_transform(X, y) assert Xt.shape == X.shape assert isinstance(Xt.iloc[0, 0], (pd.Series, np.ndarray)) # check series-to-series transforms
def test_different_implementations(): random_seed = 1233 X_train, y_train = load_gunpoint(return_X_y=True) # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_seed) tran2 = RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False)) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_seed) B = tran.fit_transform(X_train) np.testing.assert_array_equal(A, B)