def distance(instance_a, instance_b, **params): # find distance instance_a = tabularise(instance_a, return_array=True) # todo use specific dimension rather than whole # thing? instance_b = tabularise(instance_b, return_array=True) # todo use specific dimension rather than whole thing? instance_a = np.transpose(instance_a) instance_b = np.transpose(instance_b) return distance_measure(instance_a, instance_b, **params)
def test_rowwise_transformer_transform_inverse_transform(): X, y = load_gunpoint(return_X_y=True) t = RowwiseTransformer(StandardScaler()) Xt = t.fit_transform(X) Xit = t.inverse_transform(Xt) assert Xit.shape == X.shape assert isinstance(Xit.iloc[0, 0], (pd.Series, np.ndarray)) # check series-to-series transforms np.testing.assert_array_almost_equal(tabularise(X).values, tabularise(Xit).values, decimal=5)
def test_zero_sp_identity(model): # test if zero seasonal periodicity returns unchanged X (identity transformer) n_obs = 100 n_samples = 10 order = 1 sp = 1 X = generate_seasonal_time_series_data_with_trend(n_samples=n_samples, n_obs=n_obs, order=order, sp=sp, model=model) tran = Deseasonaliser(sp=sp, model=model) Xt = tran.fit_transform(X) np.testing.assert_array_almost_equal(tabularise(Xt).values, tabularise(X).values) # test that inverse transform on same data restores original data Xit = tran.inverse_transform(Xt) np.testing.assert_array_almost_equal(tabularise(Xit).values, tabularise(X).values)
def rowwise_first(X): if isinstance(X, pd.Series): X = pd.DataFrame(X) Xt = pd.concat( [pd.Series(tabularise(col).iloc[:, 0]) for _, col in X.items()], axis=1) return Xt
def test_transform_inverse_transform_equivalence(n_samples, order, sp, model): # generate data n_obs = 100 X = generate_seasonal_time_series_data_with_trend(n_samples=n_samples, n_obs=n_obs, order=order, sp=sp, model=model) # split data for testing cutoff = n_obs - (n_obs // 4) a_times = np.arange(n_obs)[:cutoff] b_times = np.arange(n_obs)[cutoff:] A = select_times(X, a_times) B = select_times(X, b_times) # test successful deseasonalising when true seasonal periodicity is given tran = Deseasonaliser(sp=sp, model=model) At = tran.fit_transform(A) assert At.shape == A.shape assert tabularise(At).shape == tabularise(At).shape # compare deseasonalised data with data generated by same process only without seasonality expected = generate_time_series_data_with_trend(n_samples=n_samples, n_obs=cutoff, order=order) # adjust testing criteria for complexity of data/model if model == 'multiplicative' and (order > 0): np.testing.assert_allclose(tabularise(At).values, tabularise(expected).values, rtol=0.06) else: np.testing.assert_array_almost_equal(tabularise(At).values, tabularise(expected).values, decimal=1) # test that inverse transform on same data restores original data Ait = tran.inverse_transform(At) assert Ait.shape == A.shape assert tabularise(Ait).shape == tabularise(A).shape np.testing.assert_array_almost_equal(Ait.iloc[0, 0].values, A.iloc[0, 0].values) # test correct inverse transform on new data with a different time index # e.g. necessary for inverse transforms after predicting/forecasting C = generate_time_series_data_with_trend(n_samples=n_samples, n_obs=n_obs, order=order) C = select_times(C, b_times) Cit = tran.inverse_transform(C) if model == 'multiplicative' and (order > 0): np.testing.assert_allclose(B.iloc[0, 0].values, Cit.iloc[0, 0].values, rtol=0.15) else: np.testing.assert_array_almost_equal(B.iloc[0, 0].values, Cit.iloc[0, 0].values, decimal=1)
def _prepare_X(X): """Helper function to transform nested pandas DataFrame X into 2d numpy array as required by `statsmodels` estimators. Parameters ---------- X : pandas.DataFrame, shape=[1, n_variables] Nested dataframe with series of shape [n_obs,] in cells Returns ------- Xt : ndarray, shape=[n_obs, n_variables] """ if X is None: return X if X.shape[1] > 1: Xl = X.iloc[0, :].tolist() Xt = np.column_stack(Xl) else: Xt = tabularise(X).values.T return Xt
def _univariate_nested_df_to_array(X): return tabularise(X, return_array=True)
def transform(self, X, y = None): if self.unpack_train and isinstance(X, pd.DataFrame): X = tabularise(X, return_array = True) return X