def test_tabularize():
    n_obs_X = 20
    n_cols_X = 3
    X = generate_df_from_array(np.random.normal(size=n_obs_X),
                               n_rows=10,
                               n_cols=n_cols_X)

    # Test single series input.
    Xt = tabularize(X.iloc[:, 0], return_array=True)
    assert Xt.shape[0] == X.shape[0]
    assert Xt.shape[1] == n_obs_X

    Xt = tabularize(X.iloc[:, 0])
    assert Xt.index.equals(X.index)

    # Test dataframe input with columns having series of different length.
    n_obs_Y = 13
    n_cols_Y = 2
    Y = generate_df_from_array(np.random.normal(size=n_obs_Y),
                               n_rows=10,
                               n_cols=n_cols_Y)
    X = pd.concat([X, Y], axis=1)

    Xt = tabularize(X, return_array=True)
    assert Xt.shape[0] == X.shape[0]
    assert Xt.shape[1] == (n_cols_X * n_obs_X) + (n_cols_Y * n_obs_Y)

    Xt = tabularize(X)
    assert Xt.index.equals(X.index)
def test_output_format_dim():
    for n_cols in [1, 3]:
        for n_rows in [1, 3]:
            for n_obs in [2, 100]:
                for n_intervals in [
                        0.1, 0.5, 1.0, 1, 3, 10, 'sqrt', 'random', 'log'
                ]:
                    X = generate_df_from_array(np.ones(n_obs),
                                               n_rows=n_rows,
                                               n_cols=n_cols)

                    trans = RandomIntervalSegmenter(n_intervals=n_intervals)
                    Xt = trans.fit_transform(X)

                    # Check number of rows and output type.
                    assert isinstance(Xt, pd.DataFrame)
                    assert Xt.shape[0] == X.shape[0]

                    # Check number of generated intervals/columns.
                    if n_intervals != 'random':
                        if np.issubdtype(type(n_intervals), np.floating):
                            assert Xt.shape[1] == np.maximum(
                                1, int(n_obs * n_intervals)) * n_cols
                        elif np.issubdtype(type(n_intervals), np.integer):
                            assert Xt.shape[1] == n_intervals * n_cols
                        elif n_intervals == 'sqrt':
                            assert Xt.shape[1] == np.maximum(
                                1, int(np.sqrt(n_obs))) * n_cols
                        elif n_intervals == 'log':
                            assert Xt.shape[1] == np.maximum(
                                1, int(np.log(n_obs))) * n_cols
def test_output_format_dim():
    for n_cols in range(1, 3, 10):
        for n_rows in [1, 3, 10]:
            for n_obs in [2, 3, 10]:
                X = generate_df_from_array(np.ones(n_obs),
                                           n_rows=n_rows,
                                           n_cols=n_cols)
                _test_output_format_dim(X)
Exemplo n.º 4
0
def test_bad_input_args(bad_components):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=1)

    if isinstance(bad_components, str):
        with pytest.raises(TypeError):
            PCATransformer(n_components=bad_components).fit(X)
    else:
        with pytest.raises(ValueError):
            PCATransformer(n_components=bad_components).fit(X)
def test_random_state():
    X = generate_df_from_array(np.random.normal(size=20))
    random_state = 1234
    trans = RandomIntervalFeatureExtractor(n_intervals='random',
                                           random_state=random_state)
    first_Xt = trans.fit_transform(X)
    for _ in range(N_ITER):
        trans = RandomIntervalFeatureExtractor(n_intervals='random',
                                               random_state=random_state)
        Xt = trans.fit_transform(X)
        assert first_Xt.equals(Xt)
def test_output_format_dim(n_instances, len_series, n_intervals, features):
    X = generate_df_from_array(np.ones(len_series),
                               n_rows=n_instances,
                               n_cols=1)
    n_rows, n_cols = X.shape
    trans = RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                           features=features)
    Xt = trans.fit_transform(X)
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == n_rows
    assert np.array_equal(Xt.values, np.ones(Xt.shape))
def test_random_state():
    X = generate_df_from_array(np.random.normal(size=10))
    random_state = 1234

    for n_intervals in [0.5, 10, 'sqrt', 'random', 'log']:
        trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state)
        first_Xt = trans.fit_transform(X)
        for _ in range(N_ITER):
            trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state)
            Xt = trans.fit_transform(X)
            np.testing.assert_array_equal(tabularize(first_Xt).values, tabularize(Xt).values)
Exemplo n.º 8
0
def test_rowwise_transformer_sklearn_transfomer():
    mu = 10
    sd = 5
    X = generate_df_from_array(np.random.normal(loc=mu, scale=5, size=(100,)), n_rows=10, n_cols=1)
    t = StandardScaler(with_mean=True, with_std=True)
    r = RowwiseTransformer(t)

    Xt = r.fit_transform(X)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0], (pd.Series, np.ndarray))  # check series-to-series transform
    np.testing.assert_almost_equal(Xt.iloc[0, 0].mean(), 0)  # check standardisation
    np.testing.assert_almost_equal(Xt.iloc[0, 0].std(), 1, decimal=2)
def test_results(n_instances, len_series, n_intervals):
    x = np.random.normal(size=len_series)
    X = generate_df_from_array(x, n_rows=n_instances, n_cols=1)
    trans = RandomIntervalFeatureExtractor(
        n_intervals=n_intervals, features=[np.mean, np.std, time_series_slope])
    Xt = trans.fit_transform(X)
    # Check results
    for s, e in trans.intervals_:
        assert np.all(Xt.filter(like=f'_{s}_{e}_mean') == np.mean(x[s:e]))
        assert np.all(Xt.filter(like=f'_{s}_{e}_std') == np.std(x[s:e]))
        assert np.all(
            Xt.filter(like=f'_{s}_{e}_time_series_slope') == time_series_slope(
                x[s:e]))
def test_output_format_dim(len_series, n_instances, n_intervals):
    X = generate_df_from_array(np.ones(len_series), n_rows=n_instances, n_cols=1)

    trans = RandomIntervalSegmenter(n_intervals=n_intervals)
    Xt = trans.fit_transform(X)

    # Check number of rows and output type.
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == X.shape[0]

    # Check number of generated intervals/columns.
    if n_intervals != 'random':
        if np.issubdtype(type(n_intervals), np.floating):
            assert Xt.shape[1] == np.maximum(1, int(len_series * n_intervals))
        elif np.issubdtype(type(n_intervals), np.integer):
            assert Xt.shape[1] == n_intervals
        elif n_intervals == 'sqrt':
            assert Xt.shape[1] == np.maximum(1, int(np.sqrt(len_series)))
        elif n_intervals == 'log':
            assert Xt.shape[1] == np.maximum(1, int(np.log(len_series)))
Exemplo n.º 11
0
from sktime.datasets import load_gunpoint
from sktime.pipeline import FeatureUnion, Pipeline
from sklearn.tree import DecisionTreeClassifier
from sktime.transformers.series_to_series import RandomIntervalSegmenter
from sktime.transformers.series_to_tabular import RandomIntervalFeatureExtractor
from sklearn.preprocessing import FunctionTransformer
from sktime.utils.time_series import time_series_slope

N_ITER = 10

n = 20
d = 1
m = 20
n_classes = 2

X = generate_df_from_array(np.random.normal(size=m), n_rows=n, n_cols=d)
y = pd.Series(np.random.choice(np.arange(n_classes) + 1, size=n))


# Check if random state always gives same results
def test_random_state():
    random_state = 1234
    clf = TimeSeriesForestClassifier(n_estimators=2, random_state=random_state)
    clf.fit(X, y)
    first_pred = clf.predict_proba(X)
    for _ in range(N_ITER):
        clf = TimeSeriesForestClassifier(n_estimators=2,
                                         random_state=random_state)
        clf.fit(X, y)
        y_pred = clf.predict_proba(X)
        np.testing.assert_array_equal(first_pred, y_pred)
def test_bad_input_args(bad_interval):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=2)
    with pytest.raises(ValueError):
        RandomIntervalSegmenter(n_intervals=bad_interval).fit(X)
Exemplo n.º 13
0
def test_early_trans_fail():
    X = generate_df_from_array(np.ones(10), n_rows=1, n_cols=1)
    pca = PCATransformer(n_components=1)

    with pytest.raises(NotFittedError):
        pca.transform(X)
def test_bad_input_args():
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=2)
    bad_n_intervals = [0, -0, 'str', 1.2, -1.2, -1]
    for arg in bad_n_intervals:
        with pytest.raises(ValueError):
            RandomIntervalSegmenter(n_intervals=arg).fit(X)
from sktime.datasets import load_gunpoint
from sktime.pipeline import FeatureUnion, Pipeline
from sklearn.tree import DecisionTreeClassifier
from sktime.transformers.segment import RandomIntervalSegmenter
from sktime.transformers.summarise import RandomIntervalFeatureExtractor
from sklearn.preprocessing import FunctionTransformer
from sktime.utils.time_series import time_series_slope
import pytest

n_instances = 20
n_columns = 1
len_series = 20
n_classes = 2

X = generate_df_from_array(np.random.normal(size=len_series),
                           n_rows=n_instances,
                           n_cols=n_columns)
y = pd.Series(np.random.choice(np.arange(n_classes) + 1, size=n_instances))


# Check if random state always gives same results
def test_random_state():
    N_ITER = 10

    random_state = 1234
    clf = TimeSeriesForestClassifier(n_estimators=2, random_state=random_state)
    clf.fit(X, y)
    first_pred = clf.predict_proba(X)
    for _ in range(N_ITER):
        clf = TimeSeriesForestClassifier(n_estimators=2,
                                         random_state=random_state)