示例#1
0
 def distance(instance_a, instance_b, **params):
     # find distance
     instance_a = tabularise(instance_a, return_array=True)  # todo use specific dimension rather than whole
     # thing?
     instance_b = tabularise(instance_b, return_array=True)  # todo use specific dimension rather than whole thing?
     instance_a = np.transpose(instance_a)
     instance_b = np.transpose(instance_b)
     return distance_measure(instance_a, instance_b, **params)
示例#2
0
def test_rowwise_transformer_transform_inverse_transform():
    X, y = load_gunpoint(return_X_y=True)
    t = RowwiseTransformer(StandardScaler())
    Xt = t.fit_transform(X)
    Xit = t.inverse_transform(Xt)
    assert Xit.shape == X.shape
    assert isinstance(Xit.iloc[0, 0], (pd.Series, np.ndarray))  # check series-to-series transforms
    np.testing.assert_array_almost_equal(tabularise(X).values, tabularise(Xit).values, decimal=5)
def test_zero_sp_identity(model):
    # test if zero seasonal periodicity returns unchanged X (identity transformer)
    n_obs = 100
    n_samples = 10
    order = 1
    sp = 1
    X = generate_seasonal_time_series_data_with_trend(n_samples=n_samples, n_obs=n_obs, order=order, sp=sp, model=model)

    tran = Deseasonaliser(sp=sp, model=model)
    Xt = tran.fit_transform(X)
    np.testing.assert_array_almost_equal(tabularise(Xt).values, tabularise(X).values)

    # test that inverse transform on same data restores original data
    Xit = tran.inverse_transform(Xt)
    np.testing.assert_array_almost_equal(tabularise(Xit).values, tabularise(X).values)
示例#4
0
 def rowwise_first(X):
     if isinstance(X, pd.Series):
         X = pd.DataFrame(X)
     Xt = pd.concat(
         [pd.Series(tabularise(col).iloc[:, 0]) for _, col in X.items()],
         axis=1)
     return Xt
def test_transform_inverse_transform_equivalence(n_samples, order, sp, model):
    # generate data
    n_obs = 100
    X = generate_seasonal_time_series_data_with_trend(n_samples=n_samples, n_obs=n_obs, order=order, sp=sp, model=model)

    # split data for testing
    cutoff = n_obs - (n_obs // 4)
    a_times = np.arange(n_obs)[:cutoff]
    b_times = np.arange(n_obs)[cutoff:]

    A = select_times(X, a_times)
    B = select_times(X, b_times)

    # test successful deseasonalising when true seasonal periodicity is given
    tran = Deseasonaliser(sp=sp, model=model)
    At = tran.fit_transform(A)
    assert At.shape == A.shape
    assert tabularise(At).shape == tabularise(At).shape

    # compare deseasonalised data with data generated by same process only without seasonality
    expected = generate_time_series_data_with_trend(n_samples=n_samples, n_obs=cutoff, order=order)

    # adjust testing criteria for complexity of data/model
    if model == 'multiplicative' and (order > 0):
        np.testing.assert_allclose(tabularise(At).values, tabularise(expected).values, rtol=0.06)
    else:
        np.testing.assert_array_almost_equal(tabularise(At).values, tabularise(expected).values, decimal=1)

    # test that inverse transform on same data restores original data
    Ait = tran.inverse_transform(At)
    assert Ait.shape == A.shape
    assert tabularise(Ait).shape == tabularise(A).shape
    np.testing.assert_array_almost_equal(Ait.iloc[0, 0].values, A.iloc[0, 0].values)

    # test correct inverse transform on new data with a different time index
    # e.g. necessary for inverse transforms after predicting/forecasting
    C = generate_time_series_data_with_trend(n_samples=n_samples, n_obs=n_obs, order=order)
    C = select_times(C, b_times)
    Cit = tran.inverse_transform(C)
    if model == 'multiplicative' and (order > 0):
        np.testing.assert_allclose(B.iloc[0, 0].values, Cit.iloc[0, 0].values, rtol=0.15)
    else:
        np.testing.assert_array_almost_equal(B.iloc[0, 0].values, Cit.iloc[0, 0].values, decimal=1)
示例#6
0
    def _prepare_X(X):
        """Helper function to transform nested pandas DataFrame X into 2d numpy array as required by `statsmodels`
        estimators.

        Parameters
        ----------
        X : pandas.DataFrame, shape=[1, n_variables]
            Nested dataframe with series of shape [n_obs,] in cells

        Returns
        -------
        Xt : ndarray, shape=[n_obs, n_variables]
        """
        if X is None:
            return X

        if X.shape[1] > 1:
            Xl = X.iloc[0, :].tolist()
            Xt = np.column_stack(Xl)
        else:
            Xt = tabularise(X).values.T

        return Xt
示例#7
0
def _univariate_nested_df_to_array(X):
    return tabularise(X, return_array=True)
示例#8
0
 def transform(self, X, y = None):
     if self.unpack_train and isinstance(X, pd.DataFrame):
         X = tabularise(X, return_array = True)
     return X