def standardize_pre_and_post_data( pre_data: pd.DataFrame, post_data: pd.DataFrame ) -> Tuple[pd.DataFrame, pd.DataFrame, Tuple[float, float]]: """ Applies standardization in pre and post data, based on mean and standard deviation of `pre_data` (as it's used for training the causal impact model). Args ---- pre_data: pd.DataFrame data selected to be the pre-intervention dataset of causal impact. post_data: pd.DataFrame Returns ------- Tuple[pd.DataFrame, pd.DataFrame, Tuple[float, float]] `pre_data` and `post_data` normalized along with the mean and variance used for response variable `y` only. """ normed_pre_data, (mu, sig) = standardize(pre_data) normed_post_data = (post_data - mu) / sig mu_sig = (mu[0], sig[0]) return (normed_pre_data, normed_post_data, mu_sig)
def test_standardize_raises_single_input(): with pytest.raises(ValueError): standardize(pd.DataFrame([1]))
def test_standardize_w_various_distinct_inputs(): test_data = [[1, 2, 1], [1, np.nan, 3], [10, 20, 30]] test_data = [pd.DataFrame(data, dtype="float") for data in test_data] for data in test_data: result, (mu, sig) = standardize(data) pd.util.testing.assert_frame_equal(unstandardize(result, (mu, sig)), data)