示例#1
0
def test_output_of_transformer():

    X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]),
                               n_rows=1,
                               n_cols=1)

    s = SlopeTransformer(num_intervals=2).fit(X)
    res = s.transform(X)
    orig = convert_list_to_dataframe([[(5 + math.sqrt(41)) / 4,
                                       (1 + math.sqrt(101)) / -10]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)

    X = generate_df_from_array(np.array(
        [-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3, 0.2]),
                               n_rows=1,
                               n_cols=1)
    s = s.fit(X)
    res = s.transform(X)
    orig = convert_list_to_dataframe([[
        (104.8 + math.sqrt(14704.04)) / 61,
        (143.752 + math.sqrt(20790.0775)) / -11.2
    ]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)
示例#2
0
def test_output_dimensions():
    # test with univariate
    X = generate_df_from_array(np.ones(12), n_rows=10, n_cols=1)

    p = PAA(num_intervals=5).fit(X)
    res = p.transform(X)

    # get the dimension of the generated dataframe.
    corr_time_series_length = res.iloc[0, 0].shape[0]
    num_rows = res.shape[0]
    num_cols = res.shape[1]

    assert corr_time_series_length == 5
    assert num_rows == 10
    assert num_cols == 1

    # test with multivariate
    X = generate_df_from_array(np.ones(12), n_rows=10, n_cols=5)

    p = PAA(num_intervals=5).fit(X)
    res = p.transform(X)

    # get the dimension of the generated dataframe.
    corr_time_series_length = res.iloc[0, 0].shape[0]
    num_rows = res.shape[0]
    num_cols = res.shape[1]

    assert corr_time_series_length == 5
    assert num_rows == 10
    assert num_cols == 5
示例#3
0
def test_tabularize():
    n_obs_X = 20
    n_cols_X = 3
    X = generate_df_from_array(np.random.normal(size=n_obs_X),
                               n_rows=10,
                               n_cols=n_cols_X)

    # Test single series input.
    Xt = tabularize(X.iloc[:, 0], return_array=True)
    assert Xt.shape[0] == X.shape[0]
    assert Xt.shape[1] == n_obs_X

    Xt = tabularize(X.iloc[:, 0])
    assert Xt.index.equals(X.index)

    # Test dataframe input with columns having series of different length.
    n_obs_Y = 13
    n_cols_Y = 2
    Y = generate_df_from_array(np.random.normal(size=n_obs_Y),
                               n_rows=10,
                               n_cols=n_cols_Y)
    X = pd.concat([X, Y], axis=1)

    Xt = tabularize(X, return_array=True)
    assert Xt.shape[0] == X.shape[0]
    assert Xt.shape[1] == (n_cols_X * n_obs_X) + (n_cols_Y * n_obs_Y)

    Xt = tabularize(X)
    assert Xt.index.equals(X.index)
示例#4
0
def test_output_of_transformer():
    X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 6]),
                               n_rows=1,
                               n_cols=1)

    st = SlidingWindowSegmenter(window_length=1).fit(X)
    res = st.transform(X)
    orig = convert_list_to_dataframe([[1.0], [2.0], [3.0], [4.0], [5.0],
                                      [6.0]])
    assert check_if_dataframes_are_equal(res, orig)

    st = SlidingWindowSegmenter(window_length=5).fit(X)
    res = st.transform(X)
    orig = convert_list_to_dataframe([[1.0, 1.0, 1.0, 2.0, 3.0],
                                      [1.0, 1.0, 2.0, 3.0, 4.0],
                                      [1.0, 2.0, 3.0, 4.0, 5.0],
                                      [2.0, 3.0, 4.0, 5.0, 6.0],
                                      [3.0, 4.0, 5.0, 6.0, 6.0],
                                      [4.0, 5.0, 6.0, 6.0, 6.0]])

    assert check_if_dataframes_are_equal(res, orig)

    st = SlidingWindowSegmenter(window_length=10).fit(X)
    res = st.transform(X)
    orig = convert_list_to_dataframe(
        [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0],
         [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
         [1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0],
         [1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0],
         [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0],
         [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0]])
    assert check_if_dataframes_are_equal(res, orig)
示例#5
0
def test_no_levels_does_no_change():

    X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 56]),
                               n_rows=1,
                               n_cols=1)
    d = DWTTransformer(num_levels=0).fit(X)
    res = d.transform(X)
    assert check_if_dataframes_are_equal(res, X)
示例#6
0
def test_bad_num_bins(bad_num_bins):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)

    if not isinstance(bad_num_bins, int):
        with pytest.raises(TypeError):
            HOG1DTransformer(num_bins=bad_num_bins).fit(X).transform(X)
    else:
        with pytest.raises(ValueError):
            HOG1DTransformer(num_bins=bad_num_bins).fit(X).transform(X)
示例#7
0
def test_bad_input_args(bad_num_intervals):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)

    if not isinstance(bad_num_intervals, int):
        with pytest.raises(TypeError):
            PAA(num_intervals=bad_num_intervals).fit(X).transform(X)
    else:
        with pytest.raises(ValueError):
            PAA(num_intervals=bad_num_intervals).fit(X).transform(X)
def test_bad_input_args(bad_components):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)

    if isinstance(bad_components, str):
        with pytest.raises(TypeError):
            PCATransformer(n_components=bad_components).fit(X)
    else:
        with pytest.raises(ValueError):
            PCATransformer(n_components=bad_components).fit(X)
示例#9
0
def test_shape_descriptor_function(bad_sdf):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)
    y = np.zeros(10)

    if not isinstance(bad_sdf, str):
        with pytest.raises(TypeError):
            ShapeDTW(shape_descriptor_function=bad_sdf).fit(X, y)
    else:
        with pytest.raises(ValueError):
            ShapeDTW(shape_descriptor_function=bad_sdf).fit(X, y)
示例#10
0
def test_output_of_transformer():
    X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
                               n_rows=1,
                               n_cols=1)

    p = PAA(num_intervals=3).fit(X)
    res = p.transform(X)
    orig = convert_list_to_dataframe([[2.2, 5.5, 8.8]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)
示例#11
0
def test_paa_performs_correcly_along_each_dim():
    X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
                               n_rows=1,
                               n_cols=2)

    p = PAA(num_intervals=3).fit(X)
    res = p.transform(X)
    orig = convert_list_to_dataframe([[2.2, 5.5, 8.8], [2.2, 5.5, 8.8]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)
示例#12
0
def test_subsequence_length(bad_subsequence_length):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)
    y = np.zeros(10)

    if not isinstance(bad_subsequence_length, int):
        with pytest.raises(TypeError):
            ShapeDTW(subsequence_length=bad_subsequence_length).fit(X, y)
    else:
        with pytest.raises(ValueError):
            ShapeDTW(subsequence_length=bad_subsequence_length).fit(X, y)
示例#13
0
def test_bad_input_args(bad_window_length):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)

    if not isinstance(bad_window_length, int):
        with pytest.raises(TypeError):
            SlidingWindowSegmenter(window_length=bad_window_length) \
                                   .fit(X).transform(X)
    else:
        with pytest.raises(ValueError):
            SlidingWindowSegmenter(window_length=bad_window_length) \
                                   .fit(X).transform(X)
示例#14
0
def test_bad_scaling_factor(bad_scaling_factor):

    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)

    if not isinstance(bad_scaling_factor, numbers.Number):
        with pytest.raises(TypeError):
            HOG1DTransformer(scaling_factor=bad_scaling_factor) \
                            .fit(X).transform(X)
    else:
        HOG1DTransformer(scaling_factor=bad_scaling_factor) \
                            .fit(X).transform(X)
示例#15
0
def test_shape_descriptor_functions(bad_sdfs):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)
    y = np.zeros(10)

    if not len(bad_sdfs) == 2:
        with pytest.raises(ValueError):
            ShapeDTW(shape_descriptor_function="compound",
                     shape_descriptor_functions=bad_sdfs).fit(X, y)
    else:
        ShapeDTW(shape_descriptor_function="compound",
                 shape_descriptor_functions=bad_sdfs).fit(X, y)
def test_output_format_dim(n_instances, n_timepoints, n_intervals, features):
    X = generate_df_from_array(np.ones(n_timepoints),
                               n_rows=n_instances,
                               n_cols=1)
    n_rows, n_cols = X.shape
    trans = RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                           features=features)
    Xt = trans.fit_transform(X)
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == n_rows
    assert np.array_equal(Xt.values, np.ones(Xt.shape))
示例#17
0
def test_hog1d_performs_correcly_along_each_dim():

    X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]),
                               n_rows=1,
                               n_cols=2)

    h = HOG1DTransformer().fit(X)
    res = h.transform(X)
    orig = convert_list_to_dataframe(
        [[0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0],
         [0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)
def test_slope_performs_correcly_along_each_dim():

    X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]),
                               n_rows=1, n_cols=2)

    s = SlopeTransformer(num_intervals=2).fit(X)
    res = s.transform(X)
    orig = convert_list_to_dataframe([[(5+math.sqrt(41))/4,
                                       (1+math.sqrt(101))/-10],
                                      [(5+math.sqrt(41))/4,
                                       (1+math.sqrt(101))/-10]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)
示例#19
0
def test_output_of_transformer():

    X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]),
                               n_rows=1,
                               n_cols=1)

    h = HOG1DTransformer().fit(X)
    res = h.transform(X)
    orig = convert_list_to_dataframe(
        [[0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)

    X = generate_df_from_array(np.array(
        [-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3, 0.2]),
                               n_rows=1,
                               n_cols=1)
    h = h.fit(X)
    res = h.transform(X)
    orig = convert_list_to_dataframe(
        [[0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 2, 0, 2, 1, 0, 0]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)
示例#20
0
def test_row_transformer_sklearn_transfomer():
    mu = 10
    X = generate_df_from_array(np.random.normal(loc=mu, scale=5, size=(100,)),
                               n_rows=10, n_cols=1)
    t = StandardScaler(with_mean=True, with_std=True)
    r = RowTransformer(t)

    Xt = r.fit_transform(X)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0], (
        pd.Series, np.ndarray))  # check series-to-series transform
    np.testing.assert_almost_equal(Xt.iloc[0, 0].mean(),
                                   0)  # check standardisation
    np.testing.assert_almost_equal(Xt.iloc[0, 0].std(), 1, decimal=2)
示例#21
0
def test_output_of_transformer():

    X = generate_df_from_array(np.array([4, 6, 10, 12, 8, 6, 5, 5]),
                               n_rows=1,
                               n_cols=1)

    d = DWTTransformer(num_levels=2).fit(X)
    res = d.transform(X)
    orig = convert_list_to_dataframe(
        [[16, 12, -6, 2, -math.sqrt(2), -math.sqrt(2),
          math.sqrt(2), 0]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)

    X = generate_df_from_array(np.array([-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3]),
                               n_rows=1,
                               n_cols=1)
    d = d.fit(X)
    res = d.transform(X)
    orig = convert_list_to_dataframe([[
        0.75000, 13.25000, -3.25000, -4.75000, -5.303301, -1.414214, 8.131728,
        -4.242641
    ]])
示例#22
0
def test_random_state():
    X = generate_df_from_array(np.random.normal(size=10))
    random_state = 1234

    for n_intervals in [0.5, 10, 'sqrt', 'random', 'log']:
        trans = RandomIntervalSegmenter(n_intervals=n_intervals,
                                        random_state=random_state)
        first_Xt = trans.fit_transform(X)
        for _ in range(N_ITER):
            trans = RandomIntervalSegmenter(n_intervals=n_intervals,
                                            random_state=random_state)
            Xt = trans.fit_transform(X)
            np.testing.assert_array_equal(
                tabularize(first_Xt).values,
                tabularize(Xt).values)
示例#23
0
def test_output_dimensions(num_bins, corr_series_length):

    X = generate_df_from_array(np.ones(13), n_rows=10, n_cols=1)

    h = HOG1DTransformer(num_bins=num_bins).fit(X)
    res = h.transform(X)

    # get the dimension of the generated dataframe.
    act_time_series_length = res.iloc[0, 0].shape[0]
    num_rows = res.shape[0]
    num_cols = res.shape[1]

    assert act_time_series_length == corr_series_length
    assert num_rows == 10
    assert num_cols == 1
示例#24
0
def test_output_dimensions(time_series_length, window_length):
    X = generate_df_from_array(np.ones(time_series_length),
                               n_rows=10,
                               n_cols=1)

    st = SlidingWindowSegmenter(window_length=window_length).fit(X)
    res = st.transform(X)

    # get the dimension of the generated dataframe.
    corr_time_series_length = res.iloc[0, 0].shape[0]
    num_rows = res.shape[0]
    num_cols = res.shape[1]

    assert corr_time_series_length == window_length
    assert num_rows == 10
    assert num_cols == time_series_length
示例#25
0
def test_dwt_performs_correcly_along_each_dim():

    X = generate_df_from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
                               n_rows=1,
                               n_cols=2)

    d = DWTTransformer(num_levels=3).fit(X)
    res = d.transform(X)
    orig = convert_list_to_dataframe([[
        9 * math.sqrt(2), -4 * math.sqrt(2), -2, -2, -math.sqrt(2) / 2,
        -math.sqrt(2) / 2, -math.sqrt(2) / 2, -math.sqrt(2) / 2,
        -math.sqrt(2) / 2
    ],
                                      [
                                          9 * math.sqrt(2), -4 * math.sqrt(2),
                                          -2, -2, -math.sqrt(2) / 2,
                                          -math.sqrt(2) / 2, -math.sqrt(2) / 2,
                                          -math.sqrt(2) / 2, -math.sqrt(2) / 2
                                      ]])
    orig.columns = X.columns
    assert check_if_dataframes_are_equal(res, orig)
def test_results(n_instances, n_timepoints, n_intervals):
    x = np.random.normal(size=n_timepoints)
    X = generate_df_from_array(x, n_rows=n_instances, n_cols=1)
    t = RandomIntervalFeatureExtractor(
        n_intervals=n_intervals, features=[np.mean, np.std, time_series_slope])
    Xt = t.fit_transform(X)
    # Check results
    intervals = t.intervals_
    for start, end in intervals:
        expected_mean = np.mean(x[start:end])
        expected_std = np.std(x[start:end])
        expected_slope = time_series_slope(x[start:end])

        actual_means = Xt.filter(like=f'*_{start}_{end}_mean').values
        actual_stds = Xt.filter(like=f'_{start}_{end}_std').values
        actual_slopes = Xt.filter(
            like=f'_{start}_{end}_time_series_slope').values

        assert np.all(actual_means == expected_mean)
        assert np.all(actual_stds == expected_std)
        assert np.all(actual_slopes == expected_slope)
示例#27
0
def test_output_format_dim(n_timepoints, n_instances, n_intervals):
    X = generate_df_from_array(np.ones(n_timepoints),
                               n_rows=n_instances,
                               n_cols=1)

    trans = RandomIntervalSegmenter(n_intervals=n_intervals)
    Xt = trans.fit_transform(X)

    # Check number of rows and output type.
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == X.shape[0]

    # Check number of generated intervals/columns.
    if n_intervals != 'random':
        if np.issubdtype(type(n_intervals), np.floating):
            assert Xt.shape[1] == np.maximum(1,
                                             int(n_timepoints * n_intervals))
        elif np.issubdtype(type(n_intervals), np.integer):
            assert Xt.shape[1] == n_intervals
        elif n_intervals == 'sqrt':
            assert Xt.shape[1] == np.maximum(1, int(np.sqrt(n_timepoints)))
        elif n_intervals == 'log':
            assert Xt.shape[1] == np.maximum(1, int(np.log(n_timepoints)))
示例#28
0
def test_early_trans_fail():
    X = generate_df_from_array(np.ones(10), n_rows=1, n_cols=1)
    pca = PCATransformer(n_components=1)

    with pytest.raises(NotFittedError):
        pca.transform(X)
示例#29
0
def test_metric_params():

    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)
    y = np.zeros(10)

    # test the raw shape descriptor
    shp = ShapeDTW()
    assert shp._get_transformer("rAw") is None

    # test the paa shape descriptor
    shp = ShapeDTW(metric_params={"num_intERvals_paa": 3})
    assert shp._get_transformer("pAA").num_intervals == 3
    shp = ShapeDTW()
    assert shp._get_transformer("pAA").num_intervals == 8
    assert isinstance(shp._get_transformer("paa"), PAA)

    # test the dwt shape descriptor
    assert shp._get_transformer("dWt").num_levels == 3
    shp = ShapeDTW(metric_params={"num_LEvEls_dwt": 5})
    assert shp._get_transformer("Dwt").num_levels == 5
    assert isinstance(shp._get_transformer("dwt"), DWTTransformer)

    # test the slope shape descriptor
    shp = ShapeDTW()
    assert shp._get_transformer("sLoPe").num_intervals == 8
    shp = ShapeDTW(metric_params={"num_inTErvals_slope": 2})
    assert shp._get_transformer("slope").num_intervals == 2
    assert isinstance(shp._get_transformer("slope"), SlopeTransformer)

    # test the derivative shape descriptor
    shp = ShapeDTW()
    assert isinstance(shp._get_transformer("derivative"),
                      DerivativeSlopeTransformer)

    # test the hog1d shape descriptor
    assert shp._get_transformer("hOG1d").num_intervals == 2 and \
        shp._get_transformer("hOG1d").num_bins == 8 and \
        shp._get_transformer("hog1d").scaling_factor == 0.1

    # test hog1d with only 1 custom parameter
    shp = ShapeDTW(metric_params={"NUM_intervals_hog1d": 5})
    assert shp._get_transformer("hoG1d").num_intervals == 5 and \
        shp._get_transformer("hOG1d").num_bins == 8 and \
        shp._get_transformer("hog1d").scaling_factor == 0.1

    shp = ShapeDTW(metric_params={"nUM_BinS_hog1d": 63})
    assert shp._get_transformer("hoG1d").num_intervals == 2 and \
        shp._get_transformer("hOG1d").num_bins == 63 and \
        shp._get_transformer("hog1d").scaling_factor == 0.1

    shp = ShapeDTW(metric_params={"scaling_factor_hog1d": 0.5})
    assert shp._get_transformer("hoG1d").num_intervals == 2 and \
        shp._get_transformer("hOG1d").num_bins == 8 and \
        shp._get_transformer("hog1d").scaling_factor == 0.5

    # test hog1d with 2 custom parameters
    shp = ShapeDTW(metric_params={"NUM_intervals_hog1d": 5,
                                  "nUM_BinS_hog1d": 63})
    assert shp._get_transformer("hoG1d").num_intervals == 5 and \
        shp._get_transformer("hOG1d").num_bins == 63 and \
        shp._get_transformer("hog1d").scaling_factor == 0.1

    shp = ShapeDTW(metric_params={"NUM_bins_hog1d": 63,
                                  "scaling_factor_hog1d": 0.5})
    assert shp._get_transformer("hoG1d").num_intervals == 2 and \
        shp._get_transformer("hOG1d").num_bins == 63 and \
        shp._get_transformer("hog1d").scaling_factor == 0.5

    shp = ShapeDTW(metric_params={"scaling_factor_hog1d": 0.5,
                                  "nUM_intervals_hog1d": 5})
    assert shp._get_transformer("hoG1d").num_intervals == 5 and \
        shp._get_transformer("hOG1d").num_bins == 8 and \
        shp._get_transformer("hog1d").scaling_factor == 0.5

    # test hog1d with all 3 custom parameters
    shp = ShapeDTW(metric_params={"scaling_factor_hog1d": 0.5,
                                  "nUM_intervals_hog1d": 5,
                                  "num_bins_hog1d": 63})
    assert shp._get_transformer("hoG1d").num_intervals == 5 and \
        shp._get_transformer("hOG1d").num_bins == 63 and \
        shp._get_transformer("hog1d").scaling_factor == 0.5

    shp = ShapeDTW()
    assert isinstance(shp._get_transformer("hog1d"), HOG1DTransformer)

    # test compound shape descriptor (mix upper and lower cases)
    shp = ShapeDTW(shape_descriptor_function="compound",
                   shape_descriptor_functions=["raw", "derivative"],
                   metric_params={"weighting_FACtor": 20})
    shp.fit(X, y)
    assert shp.fit(X, y).weighting_factor == 20

    with pytest.raises(ValueError):
        ShapeDTW(shape_descriptor_function="paa",
                 metric_params={"num_intervals": 8}).fit(X, y)
示例#30
0
def test_fails_if_multivariate():
    X = generate_df_from_array(np.ones(5), n_rows=10, n_cols=5)

    with pytest.raises(ValueError):
        SlidingWindowSegmenter().fit(X).transform(X)