def test_output_of_transformer(): X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_rows=1, n_cols=1) s = SlopeTransformer(num_intervals=2).fit(X) res = s.transform(X) orig = convert_list_to_dataframe([[(5 + math.sqrt(41)) / 4, (1 + math.sqrt(101)) / -10]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig) X = generate_df_from_array(np.array( [-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3, 0.2]), n_rows=1, n_cols=1) s = s.fit(X) res = s.transform(X) orig = convert_list_to_dataframe([[ (104.8 + math.sqrt(14704.04)) / 61, (143.752 + math.sqrt(20790.0775)) / -11.2 ]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_output_dimensions(): # test with univariate X = generate_df_from_array(np.ones(12), n_rows=10, n_cols=1) p = PAA(num_intervals=5).fit(X) res = p.transform(X) # get the dimension of the generated dataframe. corr_time_series_length = res.iloc[0, 0].shape[0] num_rows = res.shape[0] num_cols = res.shape[1] assert corr_time_series_length == 5 assert num_rows == 10 assert num_cols == 1 # test with multivariate X = generate_df_from_array(np.ones(12), n_rows=10, n_cols=5) p = PAA(num_intervals=5).fit(X) res = p.transform(X) # get the dimension of the generated dataframe. corr_time_series_length = res.iloc[0, 0].shape[0] num_rows = res.shape[0] num_cols = res.shape[1] assert corr_time_series_length == 5 assert num_rows == 10 assert num_cols == 5
def test_tabularize(): n_obs_X = 20 n_cols_X = 3 X = generate_df_from_array(np.random.normal(size=n_obs_X), n_rows=10, n_cols=n_cols_X) # Test single series input. Xt = tabularize(X.iloc[:, 0], return_array=True) assert Xt.shape[0] == X.shape[0] assert Xt.shape[1] == n_obs_X Xt = tabularize(X.iloc[:, 0]) assert Xt.index.equals(X.index) # Test dataframe input with columns having series of different length. n_obs_Y = 13 n_cols_Y = 2 Y = generate_df_from_array(np.random.normal(size=n_obs_Y), n_rows=10, n_cols=n_cols_Y) X = pd.concat([X, Y], axis=1) Xt = tabularize(X, return_array=True) assert Xt.shape[0] == X.shape[0] assert Xt.shape[1] == (n_cols_X * n_obs_X) + (n_cols_Y * n_obs_Y) Xt = tabularize(X) assert Xt.index.equals(X.index)
def test_output_of_transformer(): X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 6]), n_rows=1, n_cols=1) st = SlidingWindowSegmenter(window_length=1).fit(X) res = st.transform(X) orig = convert_list_to_dataframe([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]) assert check_if_dataframes_are_equal(res, orig) st = SlidingWindowSegmenter(window_length=5).fit(X) res = st.transform(X) orig = convert_list_to_dataframe([[1.0, 1.0, 1.0, 2.0, 3.0], [1.0, 1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0, 5.0], [2.0, 3.0, 4.0, 5.0, 6.0], [3.0, 4.0, 5.0, 6.0, 6.0], [4.0, 5.0, 6.0, 6.0, 6.0]]) assert check_if_dataframes_are_equal(res, orig) st = SlidingWindowSegmenter(window_length=10).fit(X) res = st.transform(X) orig = convert_list_to_dataframe( [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0], [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0], [1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0], [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0]]) assert check_if_dataframes_are_equal(res, orig)
def test_no_levels_does_no_change(): X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 56]), n_rows=1, n_cols=1) d = DWTTransformer(num_levels=0).fit(X) res = d.transform(X) assert check_if_dataframes_are_equal(res, X)
def test_bad_num_bins(bad_num_bins): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) if not isinstance(bad_num_bins, int): with pytest.raises(TypeError): HOG1DTransformer(num_bins=bad_num_bins).fit(X).transform(X) else: with pytest.raises(ValueError): HOG1DTransformer(num_bins=bad_num_bins).fit(X).transform(X)
def test_bad_input_args(bad_num_intervals): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) if not isinstance(bad_num_intervals, int): with pytest.raises(TypeError): PAA(num_intervals=bad_num_intervals).fit(X).transform(X) else: with pytest.raises(ValueError): PAA(num_intervals=bad_num_intervals).fit(X).transform(X)
def test_bad_input_args(bad_components): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) if isinstance(bad_components, str): with pytest.raises(TypeError): PCATransformer(n_components=bad_components).fit(X) else: with pytest.raises(ValueError): PCATransformer(n_components=bad_components).fit(X)
def test_shape_descriptor_function(bad_sdf): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) y = np.zeros(10) if not isinstance(bad_sdf, str): with pytest.raises(TypeError): ShapeDTW(shape_descriptor_function=bad_sdf).fit(X, y) else: with pytest.raises(ValueError): ShapeDTW(shape_descriptor_function=bad_sdf).fit(X, y)
def test_output_of_transformer(): X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), n_rows=1, n_cols=1) p = PAA(num_intervals=3).fit(X) res = p.transform(X) orig = convert_list_to_dataframe([[2.2, 5.5, 8.8]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_paa_performs_correcly_along_each_dim(): X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), n_rows=1, n_cols=2) p = PAA(num_intervals=3).fit(X) res = p.transform(X) orig = convert_list_to_dataframe([[2.2, 5.5, 8.8], [2.2, 5.5, 8.8]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_subsequence_length(bad_subsequence_length): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) y = np.zeros(10) if not isinstance(bad_subsequence_length, int): with pytest.raises(TypeError): ShapeDTW(subsequence_length=bad_subsequence_length).fit(X, y) else: with pytest.raises(ValueError): ShapeDTW(subsequence_length=bad_subsequence_length).fit(X, y)
def test_bad_input_args(bad_window_length): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) if not isinstance(bad_window_length, int): with pytest.raises(TypeError): SlidingWindowSegmenter(window_length=bad_window_length) \ .fit(X).transform(X) else: with pytest.raises(ValueError): SlidingWindowSegmenter(window_length=bad_window_length) \ .fit(X).transform(X)
def test_bad_scaling_factor(bad_scaling_factor): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) if not isinstance(bad_scaling_factor, numbers.Number): with pytest.raises(TypeError): HOG1DTransformer(scaling_factor=bad_scaling_factor) \ .fit(X).transform(X) else: HOG1DTransformer(scaling_factor=bad_scaling_factor) \ .fit(X).transform(X)
def test_shape_descriptor_functions(bad_sdfs): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) y = np.zeros(10) if not len(bad_sdfs) == 2: with pytest.raises(ValueError): ShapeDTW(shape_descriptor_function="compound", shape_descriptor_functions=bad_sdfs).fit(X, y) else: ShapeDTW(shape_descriptor_function="compound", shape_descriptor_functions=bad_sdfs).fit(X, y)
def test_output_format_dim(n_instances, n_timepoints, n_intervals, features): X = generate_df_from_array(np.ones(n_timepoints), n_rows=n_instances, n_cols=1) n_rows, n_cols = X.shape trans = RandomIntervalFeatureExtractor(n_intervals=n_intervals, features=features) Xt = trans.fit_transform(X) assert isinstance(Xt, pd.DataFrame) assert Xt.shape[0] == n_rows assert np.array_equal(Xt.values, np.ones(Xt.shape))
def test_hog1d_performs_correcly_along_each_dim(): X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_rows=1, n_cols=2) h = HOG1DTransformer().fit(X) res = h.transform(X) orig = convert_list_to_dataframe( [[0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0], [0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_slope_performs_correcly_along_each_dim(): X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_rows=1, n_cols=2) s = SlopeTransformer(num_intervals=2).fit(X) res = s.transform(X) orig = convert_list_to_dataframe([[(5+math.sqrt(41))/4, (1+math.sqrt(101))/-10], [(5+math.sqrt(41))/4, (1+math.sqrt(101))/-10]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_output_of_transformer(): X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_rows=1, n_cols=1) h = HOG1DTransformer().fit(X) res = h.transform(X) orig = convert_list_to_dataframe( [[0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig) X = generate_df_from_array(np.array( [-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3, 0.2]), n_rows=1, n_cols=1) h = h.fit(X) res = h.transform(X) orig = convert_list_to_dataframe( [[0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 2, 0, 2, 1, 0, 0]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_row_transformer_sklearn_transfomer(): mu = 10 X = generate_df_from_array(np.random.normal(loc=mu, scale=5, size=(100,)), n_rows=10, n_cols=1) t = StandardScaler(with_mean=True, with_std=True) r = RowTransformer(t) Xt = r.fit_transform(X) assert Xt.shape == X.shape assert isinstance(Xt.iloc[0, 0], ( pd.Series, np.ndarray)) # check series-to-series transform np.testing.assert_almost_equal(Xt.iloc[0, 0].mean(), 0) # check standardisation np.testing.assert_almost_equal(Xt.iloc[0, 0].std(), 1, decimal=2)
def test_output_of_transformer(): X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_rows=1, n_cols=1) d = DWTTransformer(num_levels=2).fit(X) res = d.transform(X) orig = convert_list_to_dataframe( [[16, 12, -6, 2, -math.sqrt(2), -math.sqrt(2), math.sqrt(2), 0]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig) X = generate_df_from_array(np.array([-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3]), n_rows=1, n_cols=1) d = d.fit(X) res = d.transform(X) orig = convert_list_to_dataframe([[ 0.75000, 13.25000, -3.25000, -4.75000, -5.303301, -1.414214, 8.131728, -4.242641 ]])
def test_random_state(): X = generate_df_from_array(np.random.normal(size=10)) random_state = 1234 for n_intervals in [0.5, 10, 'sqrt', 'random', 'log']: trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state) first_Xt = trans.fit_transform(X) for _ in range(N_ITER): trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state) Xt = trans.fit_transform(X) np.testing.assert_array_equal( tabularize(first_Xt).values, tabularize(Xt).values)
def test_output_dimensions(num_bins, corr_series_length): X = generate_df_from_array(np.ones(13), n_rows=10, n_cols=1) h = HOG1DTransformer(num_bins=num_bins).fit(X) res = h.transform(X) # get the dimension of the generated dataframe. act_time_series_length = res.iloc[0, 0].shape[0] num_rows = res.shape[0] num_cols = res.shape[1] assert act_time_series_length == corr_series_length assert num_rows == 10 assert num_cols == 1
def test_output_dimensions(time_series_length, window_length): X = generate_df_from_array(np.ones(time_series_length), n_rows=10, n_cols=1) st = SlidingWindowSegmenter(window_length=window_length).fit(X) res = st.transform(X) # get the dimension of the generated dataframe. corr_time_series_length = res.iloc[0, 0].shape[0] num_rows = res.shape[0] num_cols = res.shape[1] assert corr_time_series_length == window_length assert num_rows == 10 assert num_cols == time_series_length
def test_dwt_performs_correcly_along_each_dim(): X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), n_rows=1, n_cols=2) d = DWTTransformer(num_levels=3).fit(X) res = d.transform(X) orig = convert_list_to_dataframe([[ 9 * math.sqrt(2), -4 * math.sqrt(2), -2, -2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2 ], [ 9 * math.sqrt(2), -4 * math.sqrt(2), -2, -2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2 ]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_results(n_instances, n_timepoints, n_intervals): x = np.random.normal(size=n_timepoints) X = generate_df_from_array(x, n_rows=n_instances, n_cols=1) t = RandomIntervalFeatureExtractor( n_intervals=n_intervals, features=[np.mean, np.std, time_series_slope]) Xt = t.fit_transform(X) # Check results intervals = t.intervals_ for start, end in intervals: expected_mean = np.mean(x[start:end]) expected_std = np.std(x[start:end]) expected_slope = time_series_slope(x[start:end]) actual_means = Xt.filter(like=f'*_{start}_{end}_mean').values actual_stds = Xt.filter(like=f'_{start}_{end}_std').values actual_slopes = Xt.filter( like=f'_{start}_{end}_time_series_slope').values assert np.all(actual_means == expected_mean) assert np.all(actual_stds == expected_std) assert np.all(actual_slopes == expected_slope)
def test_output_format_dim(n_timepoints, n_instances, n_intervals): X = generate_df_from_array(np.ones(n_timepoints), n_rows=n_instances, n_cols=1) trans = RandomIntervalSegmenter(n_intervals=n_intervals) Xt = trans.fit_transform(X) # Check number of rows and output type. assert isinstance(Xt, pd.DataFrame) assert Xt.shape[0] == X.shape[0] # Check number of generated intervals/columns. if n_intervals != 'random': if np.issubdtype(type(n_intervals), np.floating): assert Xt.shape[1] == np.maximum(1, int(n_timepoints * n_intervals)) elif np.issubdtype(type(n_intervals), np.integer): assert Xt.shape[1] == n_intervals elif n_intervals == 'sqrt': assert Xt.shape[1] == np.maximum(1, int(np.sqrt(n_timepoints))) elif n_intervals == 'log': assert Xt.shape[1] == np.maximum(1, int(np.log(n_timepoints)))
def test_early_trans_fail(): X = generate_df_from_array(np.ones(10), n_rows=1, n_cols=1) pca = PCATransformer(n_components=1) with pytest.raises(NotFittedError): pca.transform(X)
def test_metric_params(): X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1) y = np.zeros(10) # test the raw shape descriptor shp = ShapeDTW() assert shp._get_transformer("rAw") is None # test the paa shape descriptor shp = ShapeDTW(metric_params={"num_intERvals_paa": 3}) assert shp._get_transformer("pAA").num_intervals == 3 shp = ShapeDTW() assert shp._get_transformer("pAA").num_intervals == 8 assert isinstance(shp._get_transformer("paa"), PAA) # test the dwt shape descriptor assert shp._get_transformer("dWt").num_levels == 3 shp = ShapeDTW(metric_params={"num_LEvEls_dwt": 5}) assert shp._get_transformer("Dwt").num_levels == 5 assert isinstance(shp._get_transformer("dwt"), DWTTransformer) # test the slope shape descriptor shp = ShapeDTW() assert shp._get_transformer("sLoPe").num_intervals == 8 shp = ShapeDTW(metric_params={"num_inTErvals_slope": 2}) assert shp._get_transformer("slope").num_intervals == 2 assert isinstance(shp._get_transformer("slope"), SlopeTransformer) # test the derivative shape descriptor shp = ShapeDTW() assert isinstance(shp._get_transformer("derivative"), DerivativeSlopeTransformer) # test the hog1d shape descriptor assert shp._get_transformer("hOG1d").num_intervals == 2 and \ shp._get_transformer("hOG1d").num_bins == 8 and \ shp._get_transformer("hog1d").scaling_factor == 0.1 # test hog1d with only 1 custom parameter shp = ShapeDTW(metric_params={"NUM_intervals_hog1d": 5}) assert shp._get_transformer("hoG1d").num_intervals == 5 and \ shp._get_transformer("hOG1d").num_bins == 8 and \ shp._get_transformer("hog1d").scaling_factor == 0.1 shp = ShapeDTW(metric_params={"nUM_BinS_hog1d": 63}) assert shp._get_transformer("hoG1d").num_intervals == 2 and \ shp._get_transformer("hOG1d").num_bins == 63 and \ shp._get_transformer("hog1d").scaling_factor == 0.1 shp = ShapeDTW(metric_params={"scaling_factor_hog1d": 0.5}) assert shp._get_transformer("hoG1d").num_intervals == 2 and \ shp._get_transformer("hOG1d").num_bins == 8 and \ shp._get_transformer("hog1d").scaling_factor == 0.5 # test hog1d with 2 custom parameters shp = ShapeDTW(metric_params={"NUM_intervals_hog1d": 5, "nUM_BinS_hog1d": 63}) assert shp._get_transformer("hoG1d").num_intervals == 5 and \ shp._get_transformer("hOG1d").num_bins == 63 and \ shp._get_transformer("hog1d").scaling_factor == 0.1 shp = ShapeDTW(metric_params={"NUM_bins_hog1d": 63, "scaling_factor_hog1d": 0.5}) assert shp._get_transformer("hoG1d").num_intervals == 2 and \ shp._get_transformer("hOG1d").num_bins == 63 and \ shp._get_transformer("hog1d").scaling_factor == 0.5 shp = ShapeDTW(metric_params={"scaling_factor_hog1d": 0.5, "nUM_intervals_hog1d": 5}) assert shp._get_transformer("hoG1d").num_intervals == 5 and \ shp._get_transformer("hOG1d").num_bins == 8 and \ shp._get_transformer("hog1d").scaling_factor == 0.5 # test hog1d with all 3 custom parameters shp = ShapeDTW(metric_params={"scaling_factor_hog1d": 0.5, "nUM_intervals_hog1d": 5, "num_bins_hog1d": 63}) assert shp._get_transformer("hoG1d").num_intervals == 5 and \ shp._get_transformer("hOG1d").num_bins == 63 and \ shp._get_transformer("hog1d").scaling_factor == 0.5 shp = ShapeDTW() assert isinstance(shp._get_transformer("hog1d"), HOG1DTransformer) # test compound shape descriptor (mix upper and lower cases) shp = ShapeDTW(shape_descriptor_function="compound", shape_descriptor_functions=["raw", "derivative"], metric_params={"weighting_FACtor": 20}) shp.fit(X, y) assert shp.fit(X, y).weighting_factor == 20 with pytest.raises(ValueError): ShapeDTW(shape_descriptor_function="paa", metric_params={"num_intervals": 8}).fit(X, y)
def test_fails_if_multivariate(): X = generate_df_from_array(np.ones(5), n_rows=10, n_cols=5) with pytest.raises(ValueError): SlidingWindowSegmenter().fit(X).transform(X)