def test_output_dimensions(): # test with univariate X = _make_nested_from_array(np.ones(12), n_instances=10, n_columns=1) p = PAA(num_intervals=5).fit(X) res = p.transform(X) # get the dimension of the generated dataframe. corr_time_series_length = res.iloc[0, 0].shape[0] num_rows = res.shape[0] num_cols = res.shape[1] assert corr_time_series_length == 5 assert num_rows == 10 assert num_cols == 1 # test with multivariate X = _make_nested_from_array(np.ones(12), n_instances=10, n_columns=5) p = PAA(num_intervals=5).fit(X) res = p.transform(X) # get the dimension of the generated dataframe. corr_time_series_length = res.iloc[0, 0].shape[0] num_rows = res.shape[0] num_cols = res.shape[1] assert corr_time_series_length == 5 assert num_rows == 10 assert num_cols == 5
def test_output_of_transformer(): X = _make_nested_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_instances=1, n_columns=1) d = DWTTransformer(num_levels=2).fit(X) res = d.transform(X) orig = convert_list_to_dataframe( [[16, 12, -6, 2, -math.sqrt(2), -math.sqrt(2), math.sqrt(2), 0]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig) X = _make_nested_from_array(np.array([-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3]), n_instances=1, n_columns=1) d = d.fit(X) res = d.transform(X) orig = convert_list_to_dataframe([[ 0.75000, 13.25000, -3.25000, -4.75000, -5.303301, -1.414214, 8.131728, -4.242641, ]])
def test_output_of_transformer(): X = _make_nested_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_instances=1, n_columns=1) s = SlopeTransformer(num_intervals=2).fit(X) res = s.transform(X) orig = convert_list_to_dataframe([[(5 + math.sqrt(41)) / 4, (1 + math.sqrt(101)) / -10]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig) X = _make_nested_from_array(np.array( [-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3, 0.2]), n_instances=1, n_columns=1) s = s.fit(X) res = s.transform(X) orig = convert_list_to_dataframe([[ (104.8 + math.sqrt(14704.04)) / 61, (143.752 + math.sqrt(20790.0775)) / -11.2, ]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_dwt_performs_correcly_along_each_dim(): X = _make_nested_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), n_instances=1, n_columns=2) d = DWTTransformer(num_levels=3).fit(X) res = d.transform(X) orig = convert_list_to_dataframe([ [ 9 * math.sqrt(2), -4 * math.sqrt(2), -2, -2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, ], [ 9 * math.sqrt(2), -4 * math.sqrt(2), -2, -2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2, ], ]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_output_of_transformer(): X = _make_nested_from_array(np.array([1, 2, 3, 4, 5, 6]), n_instances=1, n_columns=1) st = SlidingWindowSegmenter(window_length=1).fit(X) res = st.transform(X) orig = convert_list_to_dataframe([[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]) assert check_if_dataframes_are_equal(res, orig) st = SlidingWindowSegmenter(window_length=5).fit(X) res = st.transform(X) orig = convert_list_to_dataframe([ [1.0, 1.0, 1.0, 2.0, 3.0], [1.0, 1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0, 5.0], [2.0, 3.0, 4.0, 5.0, 6.0], [3.0, 4.0, 5.0, 6.0, 6.0], [4.0, 5.0, 6.0, 6.0, 6.0], ]) assert check_if_dataframes_are_equal(res, orig) st = SlidingWindowSegmenter(window_length=10).fit(X) res = st.transform(X) orig = convert_list_to_dataframe([ [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0], [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0], [1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0], [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0], ]) assert check_if_dataframes_are_equal(res, orig)
def test_no_levels_does_no_change(): X = _make_nested_from_array(np.array([1, 2, 3, 4, 5, 56]), n_instances=1, n_columns=1) d = DWTTransformer(num_levels=0).fit(X) res = d.transform(X) assert check_if_dataframes_are_equal(res, X)
def test_bad_input_args(bad_window_length): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) if not isinstance(bad_window_length, int): with pytest.raises(TypeError): SlidingWindowSegmenter(window_length=bad_window_length).fit(X).transform(X) else: with pytest.raises(ValueError): SlidingWindowSegmenter(window_length=bad_window_length).fit(X).transform(X)
def test_bad_scaling_factor(bad_scaling_factor): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) if not isinstance(bad_scaling_factor, numbers.Number): with pytest.raises(TypeError): HOG1DTransformer(scaling_factor=bad_scaling_factor).fit(X).transform(X) else: HOG1DTransformer(scaling_factor=bad_scaling_factor).fit(X).transform(X)
def test_bad_input_args(bad_components): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) if isinstance(bad_components, str): with pytest.raises(TypeError): PCATransformer(n_components=bad_components).fit(X) else: with pytest.raises(ValueError): PCATransformer(n_components=bad_components).fit(X)
def test_bad_input_args(bad_num_intervals): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) if not isinstance(bad_num_intervals, int): with pytest.raises(TypeError): PAA(num_intervals=bad_num_intervals).fit(X).transform(X) else: with pytest.raises(ValueError): PAA(num_intervals=bad_num_intervals).fit(X).transform(X)
def test_bad_num_bins(bad_num_bins): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) if not isinstance(bad_num_bins, int): with pytest.raises(TypeError): HOG1DTransformer(num_bins=bad_num_bins).fit(X).transform(X) else: with pytest.raises(ValueError): HOG1DTransformer(num_bins=bad_num_bins).fit(X).transform(X)
def test_subsequence_length(bad_subsequence_length): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) y = np.zeros(10) if not isinstance(bad_subsequence_length, int): with pytest.raises(TypeError): ShapeDTW(subsequence_length=bad_subsequence_length).fit(X, y) else: with pytest.raises(ValueError): ShapeDTW(subsequence_length=bad_subsequence_length).fit(X, y)
def test_paa_performs_correcly_along_each_dim(): X = _make_nested_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), n_instances=1, n_columns=2) p = PAA(num_intervals=3).fit(X) res = p.transform(X) orig = convert_list_to_dataframe([[2.2, 5.5, 8.8], [2.2, 5.5, 8.8]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_output_of_transformer(): X = _make_nested_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), n_instances=1, n_columns=1) p = PAA(num_intervals=3).fit(X) res = p.transform(X) orig = convert_list_to_dataframe([[2.2, 5.5, 8.8]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_shape_descriptor_function(bad_sdf): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) y = np.zeros(10) if not isinstance(bad_sdf, str): with pytest.raises(TypeError): ShapeDTW(shape_descriptor_function=bad_sdf).fit(X, y) else: with pytest.raises(ValueError): ShapeDTW(shape_descriptor_function=bad_sdf).fit(X, y)
def test_output_of_transformer(): X = _make_nested_from_array( np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_instances=1, n_columns=1 ) h = HOG1DTransformer().fit(X) res = h.transform(X) orig = convert_list_to_dataframe([[0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig) X = _make_nested_from_array( np.array([-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3, 0.2]), n_instances=1, n_columns=1 ) h = h.fit(X) res = h.transform(X) orig = convert_list_to_dataframe([[0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 2, 0, 2, 1, 0, 0]]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_output_format_dim(n_instances, n_timepoints, n_intervals, features): X = _make_nested_from_array(np.ones(n_timepoints), n_instances=n_instances, n_columns=1) n_rows, n_cols = X.shape trans = RandomIntervalFeatureExtractor(n_intervals=n_intervals, features=features) Xt = trans.fit_transform(X) assert isinstance(Xt, pd.DataFrame) assert Xt.shape[0] == n_rows assert np.array_equal(Xt.values, np.ones(Xt.shape))
def test_shape_descriptor_functions(bad_sdfs): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) y = np.zeros(10) if not len(bad_sdfs) == 2: with pytest.raises(ValueError): ShapeDTW( shape_descriptor_function="compound", shape_descriptor_functions=bad_sdfs, ).fit(X, y) else: ShapeDTW(shape_descriptor_function="compound", shape_descriptor_functions=bad_sdfs).fit(X, y)
def test_slope_performs_correcly_along_each_dim(): X = _make_nested_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_instances=1, n_columns=2) s = SlopeTransformer(num_intervals=2).fit(X) res = s.transform(X) orig = convert_list_to_dataframe([ [(5 + math.sqrt(41)) / 4, (1 + math.sqrt(101)) / -10], [(5 + math.sqrt(41)) / 4, (1 + math.sqrt(101)) / -10], ]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_hog1d_performs_correcly_along_each_dim(): X = _make_nested_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_instances=1, n_columns=2) h = HOG1DTransformer().fit(X) res = h.transform(X) orig = convert_list_to_dataframe([ [0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0], [0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0], ]) orig.columns = X.columns assert check_if_dataframes_are_equal(res, orig)
def test_output_dimensions(num_bins, corr_series_length): X = _make_nested_from_array(np.ones(13), n_instances=10, n_columns=1) h = HOG1DTransformer(num_bins=num_bins).fit(X) res = h.transform(X) # get the dimension of the generated dataframe. act_time_series_length = res.iloc[0, 0].shape[0] num_rows = res.shape[0] num_cols = res.shape[1] assert act_time_series_length == corr_series_length assert num_rows == 10 assert num_cols == 1
def test_row_transformer_sklearn_transfomer(): mu = 10 X = _make_nested_from_array( np.random.normal(loc=mu, scale=5, size=(100,)), n_instances=10, n_columns=1 ) t = StandardScaler(with_mean=True, with_std=True) r = SeriesToSeriesRowTransformer(t, check_transformer=False) Xt = r.fit_transform(X) assert Xt.shape == X.shape assert isinstance( Xt.iloc[0, 0], (pd.Series, np.ndarray) ) # check series-to-series transform np.testing.assert_almost_equal(Xt.iloc[0, 0].mean(), 0) # check standardisation np.testing.assert_almost_equal(Xt.iloc[0, 0].std(), 1, decimal=2)
def test_output_dimensions(time_series_length, window_length): X = _make_nested_from_array( np.ones(time_series_length), n_instances=10, n_columns=1 ) st = SlidingWindowSegmenter(window_length=window_length).fit(X) res = st.transform(X) # get the dimension of the generated dataframe. corr_time_series_length = res.iloc[0, 0].shape[0] num_rows = res.shape[0] num_cols = res.shape[1] assert corr_time_series_length == window_length assert num_rows == 10 assert num_cols == time_series_length
def test_output_format_dim(n_timepoints, n_instances, n_intervals): X = _make_nested_from_array( np.ones(n_timepoints), n_instances=n_instances, n_columns=1 ) trans = RandomIntervalSegmenter(n_intervals=n_intervals) Xt = trans.fit_transform(X) # Check number of rows and output type. assert isinstance(Xt, pd.DataFrame) assert Xt.shape[0] == X.shape[0] # Check number of generated intervals/columns. if n_intervals != "random": if np.issubdtype(type(n_intervals), np.floating): assert Xt.shape[1] == np.maximum(1, int(n_timepoints * n_intervals)) elif np.issubdtype(type(n_intervals), np.integer): assert Xt.shape[1] == n_intervals elif n_intervals == "sqrt": assert Xt.shape[1] == np.maximum(1, int(np.sqrt(n_timepoints))) elif n_intervals == "log": assert Xt.shape[1] == np.maximum(1, int(np.log(n_timepoints)))
def test_fails_if_multivariate(): X = _make_nested_from_array(np.ones(5), n_instances=10, n_columns=5) with pytest.raises(ValueError): SlidingWindowSegmenter().fit(X).transform(X)
def test_early_trans_fail(): X = _make_nested_from_array(np.ones(10), n_instances=1, n_columns=1) pca = PCATransformer(n_components=1) with pytest.raises(NotFittedError): pca.transform(X)
def test_metric_params(): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1) y = np.zeros(10) # test the raw shape descriptor shp = ShapeDTW() assert shp._get_transformer("rAw") is None # test the paa shape descriptor shp = ShapeDTW(metric_params={"num_intERvals_paa": 3}) assert shp._get_transformer("pAA").num_intervals == 3 shp = ShapeDTW() assert shp._get_transformer("pAA").num_intervals == 8 assert isinstance(shp._get_transformer("paa"), PAA) # test the dwt shape descriptor assert shp._get_transformer("dWt").num_levels == 3 shp = ShapeDTW(metric_params={"num_LEvEls_dwt": 5}) assert shp._get_transformer("Dwt").num_levels == 5 assert isinstance(shp._get_transformer("dwt"), DWTTransformer) # test the slope shape descriptor shp = ShapeDTW() assert shp._get_transformer("sLoPe").num_intervals == 8 shp = ShapeDTW(metric_params={"num_inTErvals_slope": 2}) assert shp._get_transformer("slope").num_intervals == 2 assert isinstance(shp._get_transformer("slope"), SlopeTransformer) # test the derivative shape descriptor shp = ShapeDTW() assert isinstance(shp._get_transformer("derivative"), DerivativeSlopeTransformer) # test the hog1d shape descriptor assert (shp._get_transformer("hOG1d").num_intervals == 2 and shp._get_transformer("hOG1d").num_bins == 8 and shp._get_transformer("hog1d").scaling_factor == 0.1) # test hog1d with only 1 custom parameter shp = ShapeDTW(metric_params={"NUM_intervals_hog1d": 5}) assert (shp._get_transformer("hoG1d").num_intervals == 5 and shp._get_transformer("hOG1d").num_bins == 8 and shp._get_transformer("hog1d").scaling_factor == 0.1) shp = ShapeDTW(metric_params={"nUM_BinS_hog1d": 63}) assert (shp._get_transformer("hoG1d").num_intervals == 2 and shp._get_transformer("hOG1d").num_bins == 63 and shp._get_transformer("hog1d").scaling_factor == 0.1) shp = ShapeDTW(metric_params={"scaling_factor_hog1d": 0.5}) assert (shp._get_transformer("hoG1d").num_intervals == 2 and shp._get_transformer("hOG1d").num_bins == 8 and shp._get_transformer("hog1d").scaling_factor == 0.5) # test hog1d with 2 custom parameters shp = ShapeDTW(metric_params={ "NUM_intervals_hog1d": 5, "nUM_BinS_hog1d": 63 }) assert (shp._get_transformer("hoG1d").num_intervals == 5 and shp._get_transformer("hOG1d").num_bins == 63 and shp._get_transformer("hog1d").scaling_factor == 0.1) shp = ShapeDTW(metric_params={ "NUM_bins_hog1d": 63, "scaling_factor_hog1d": 0.5 }) assert (shp._get_transformer("hoG1d").num_intervals == 2 and shp._get_transformer("hOG1d").num_bins == 63 and shp._get_transformer("hog1d").scaling_factor == 0.5) shp = ShapeDTW(metric_params={ "scaling_factor_hog1d": 0.5, "nUM_intervals_hog1d": 5 }) assert (shp._get_transformer("hoG1d").num_intervals == 5 and shp._get_transformer("hOG1d").num_bins == 8 and shp._get_transformer("hog1d").scaling_factor == 0.5) # test hog1d with all 3 custom parameters shp = ShapeDTW( metric_params={ "scaling_factor_hog1d": 0.5, "nUM_intervals_hog1d": 5, "num_bins_hog1d": 63, }) assert (shp._get_transformer("hoG1d").num_intervals == 5 and shp._get_transformer("hOG1d").num_bins == 63 and shp._get_transformer("hog1d").scaling_factor == 0.5) shp = ShapeDTW() assert isinstance(shp._get_transformer("hog1d"), HOG1DTransformer) # test compound shape descriptor (mix upper and lower cases) shp = ShapeDTW( shape_descriptor_function="compound", shape_descriptor_functions=["raw", "derivative"], metric_params={"weighting_FACtor": 20}, ) shp.fit(X, y) assert shp.fit(X, y).weighting_factor == 20 with pytest.raises(ValueError): ShapeDTW(shape_descriptor_function="paa", metric_params={ "num_intervals": 8 }).fit(X, y)
def test_bad_input_args(bad_interval): X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=2) with pytest.raises(ValueError): RandomIntervalSegmenter(n_intervals=bad_interval).fit(X)