def test_random_state():
    X = generate_df_from_array(np.random.normal(size=10))
    random_state = 1234

    for n_intervals in [0.5, 10, 'sqrt', 'random', 'log']:
        trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state)
        first_Xt = trans.fit_transform(X)
        for _ in range(N_ITER):
            trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state)
            Xt = trans.fit_transform(X)
            np.testing.assert_array_equal(tabularize(first_Xt).values, tabularize(Xt).values)
def test_different_implementations():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals='sqrt',
                                    random_state=random_seed)
    tran2 = RowwiseTransformer(
        FunctionTransformer(func=np.mean, validate=False))
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                          features=[np.mean],
                                          random_state=random_seed)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_equal(A, B)
def test_output_format_dim(len_series, n_instances, n_intervals):
    X = generate_df_from_array(np.ones(len_series), n_rows=n_instances, n_cols=1)

    trans = RandomIntervalSegmenter(n_intervals=n_intervals)
    Xt = trans.fit_transform(X)

    # Check number of rows and output type.
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == X.shape[0]

    # Check number of generated intervals/columns.
    if n_intervals != 'random':
        if np.issubdtype(type(n_intervals), np.floating):
            assert Xt.shape[1] == np.maximum(1, int(len_series * n_intervals))
        elif np.issubdtype(type(n_intervals), np.integer):
            assert Xt.shape[1] == n_intervals
        elif n_intervals == 'sqrt':
            assert Xt.shape[1] == np.maximum(1, int(np.sqrt(len_series)))
        elif n_intervals == 'log':
            assert Xt.shape[1] == np.maximum(1, int(np.log(len_series)))