def test_standardize_w_various_distinct_inputs(): test_data = [[1, 2, 1], [1, np.nan, 3], [10, 20, 30]] test_data = [pd.DataFrame(data, dtype="float") for data in test_data] for data in test_data: result, (mu, sig) = standardize(data) pd.util.testing.assert_frame_equal(unstandardize(result, (mu, sig)), data)
def compile_posterior_inferences(model, data_post, alpha=0.05, orig_std_params=identity): # Compute point predictions of counterfactual (in standardized space) predict_mean = res.forecast(steps=len(data_post), exog=data_post, alpha=0.05) # Undo standardization (if any) predict_res = unstandardize(point_pred, orig_std_params) y = unstandardize(model.data.orig_endog, orig_std_params) # Compile summary statistics (in original space) predict_mean = predict_res.predicted_mean predict_ci = predict_res.conf_int(alpha=0.05) summary = compile_summary_table(data_post, predict_mean, alpha) report = interpret_summary_table(summary) inferences = {"series": series, "summary": summary, "report": report}
def _unstardardize(self, data): """ If input data was standardized, this method is used to bring back data to its original form. The parameter `self.mu_sig` from `main.BaseCausal` holds the values used for normalization (average and std, respectively). In case `self.mu_sig` is None, it means no standardization was applied; in this case we just return data. Args ---- self: mu_sig: tuple First value is the mean and second is the standard deviation used for normalization. data: numpy.array Input vector to apply unstardization. Returns ------- numpy.array: `data` if `self.mu_sig` is None; the unstandizated data otherwise. """ if self.mu_sig is None: return data return unstandardize(data, self.mu_sig)
def compile_posterior_inferences(results, data, df_pre, df_post, post_period_response, alpha, orig_std_params, estimation, log): if estimation == "MLE": # Compute point predictions of counterfactual (in standardized space) if df_post is not None: predict = results.get_prediction() forecast = results.get_forecast( steps=len(df_post), exog=df_post.iloc[:, 1:], alpha=alpha) else: pre_len = results.model.nobs - len(post_period_response) predict = results.get_prediction(end=pre_len - 1) forecast = results.get_prediction(start=pre_len) # Compile summary statistics (in original space) pre_pred = unstandardize(predict.predicted_mean, orig_std_params) pre_pred.index = df_pre.index post_pred = unstandardize(forecast.predicted_mean, orig_std_params) post_pred.index = df_post.index point_pred = pd.concat([pre_pred, post_pred]) pre_ci = unstandardize(predict.conf_int(alpha=alpha), orig_std_params) pre_ci.index = df_pre.index post_ci = unstandardize(forecast.conf_int(alpha=alpha), orig_std_params) post_ci.index = df_post.index ci = pd.concat([pre_ci, post_ci]) log.debug('{0}'.format(list(ci.columns.values.tolist()))) point_pred_upper = ci["upper y"].to_frame() log.debug('{0}'.format(point_pred_upper)) point_pred_lower = ci["lower y"].to_frame() response = data.iloc[:, 0] response_index = data.index response = pd.DataFrame(response) cum_response = np.cumsum(response) cum_pred = np.cumsum(point_pred) cum_pred_upper = np.cumsum(point_pred_upper) cum_pred_lower = np.cumsum(point_pred_lower) data = pd.concat([point_pred, point_pred_upper, point_pred_lower, cum_pred, cum_pred_lower, cum_pred_upper], axis=1) data = pd.concat([response, cum_response], axis=1).join(data, lsuffix='l') data.columns = ["response", "cum_response", "point_pred", "point_pred_upper", "point_pred_lower", "cum_pred", "cum_pred_lower", "cum_pred_upper"] point_effect = (data.response - data.point_pred).to_frame() point_effect_upper = (data.response - data.point_pred_upper).to_frame() point_effect_lower = (data.response - data.point_pred_lower).to_frame() cum_effect = point_effect.copy() cum_effect.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect = np.cumsum(cum_effect) cum_effect_upper = point_effect_upper.copy() cum_effect_upper.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect_upper = np.cumsum(cum_effect_upper) cum_effect_lower = point_effect_lower.copy() cum_effect_lower.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect_lower = np.cumsum(cum_effect_lower) data = pd.concat([data, point_effect, point_effect_upper, point_effect_lower, cum_effect, cum_effect_lower, cum_effect_upper], axis=1) # Create DataFrame of results data.columns = ["response", "cum_response", "point_pred", "point_pred_upper", "point_pred_lower", "cum_pred", "cum_pred_lower", "cum_pred_upper", "point_effect", "point_effect_lower", "point_effect_upper", "cum_effect", "cum_effect_lower", "cum_effect_upper"] data.index = response_index # index = np.concatenate([results.data.orig_endog.index, # df_post.iloc[:, 0].index]) # data.index = index # import pdb # pdb.set_trace() # Undo standardization (if any) series = data # summary = compile_summary_table(data_post, predict_mean, alpha) # report = interpret_summary_table(summary) inferences = {"series": series, # "summary": summary, # "report": report } return inferences else: raise NotImplementedError()
def compile_posterior_inferences(results, data, df_pre, df_post, post_period_response, alpha, orig_std_params, estimation): if estimation == "MLE": # Compute point predictions of counterfactual (in standardized space) if df_post is not None: predict = results.get_prediction() forecast = results.get_forecast( steps=len(df_post), exog=df_post.iloc[:, 1:], alpha=alpha) else: pre_len = results.model.nobs - len(post_period_response) predict = results.get_prediction(end=pre_len - 1) forecast = results.get_prediction(start=pre_len) # Compile summary statistics (in original space) pre_pred = unstandardize(predict.predicted_mean, orig_std_params) pre_pred.index = df_pre.index post_pred = unstandardize(forecast.predicted_mean, orig_std_params) post_pred.index = df_post.index point_pred = pd.concat([pre_pred, post_pred]) pre_ci = unstandardize(predict.conf_int(alpha=alpha), orig_std_params) pre_ci.index = df_pre.index post_ci = unstandardize(forecast.conf_int(alpha=alpha), orig_std_params) post_ci.index = df_post.index ci = pd.concat([pre_ci, post_ci]) point_pred_upper = ci["upper y"].to_frame() point_pred_lower = ci["lower y"].to_frame() response = data.iloc[:, 0] response_index = data.index response = pd.DataFrame(response) cum_response = np.cumsum(response) cum_pred = np.cumsum(point_pred) cum_pred_upper = np.cumsum(point_pred_upper) cum_pred_lower = np.cumsum(point_pred_lower) data = pd.concat([point_pred, point_pred_upper, point_pred_lower, cum_pred, cum_pred_lower, cum_pred_upper], axis=1) data = pd.concat([response, cum_response], axis=1).join(data, lsuffix='l') data.columns = ["response", "cum_response", "point_pred", "point_pred_upper", "point_pred_lower", "cum_pred", "cum_pred_lower", "cum_pred_upper"] point_effect = (data.response - data.point_pred).to_frame() point_effect_upper = (data.response - data.point_pred_upper).to_frame() point_effect_lower = (data.response - data.point_pred_lower).to_frame() cum_effect = point_effect.copy() cum_effect.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect = np.cumsum(cum_effect) cum_effect_upper = point_effect_upper.copy() cum_effect_upper.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect_upper = np.cumsum(cum_effect_upper) cum_effect_lower = point_effect_lower.copy() cum_effect_lower.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect_lower = np.cumsum(cum_effect_lower) data = pd.concat([data, point_effect, point_effect_upper, point_effect_lower, cum_effect, cum_effect_lower, cum_effect_upper], axis=1) # Create DataFrame of results data.columns = ["response", "cum_response", "point_pred", "point_pred_upper", "point_pred_lower", "cum_pred", "cum_pred_lower", "cum_pred_upper", "point_effect", "point_effect_lower", "point_effect_upper", "cum_effect", "cum_effect_lower", "cum_effect_upper"] data.index = response_index # index = np.concatenate([results.data.orig_endog.index, # df_post.iloc[:, 0].index]) # data.index = index # import pdb # pdb.set_trace() # Undo standardization (if any) series = data # summary = compile_summary_table(data_post, predict_mean, alpha) # report = interpret_summary_table(summary) inferences = {"series": series, # "summary": summary, # "report": report } return inferences else: raise NotImplementedError()
def compile_posterior_inferences(results, df_pre, df_post, post_period_response, alpha, orig_std_params, estimation): if estimation == "MLE": # Compute point predictions of counterfactual (in standardized space) if df_post is not None: predict = results.get_prediction() forecast = results.get_forecast(steps=len(df_post), exog=df_post.iloc[:, 1:], alpha=alpha) else: pre_len = results.model.nobs - len(post_period_response) predict = results.get_prediction(end=pre_len - 1) forecast = results.get_prediction(start=pre_len) # Compile summary statistics (in original space) pre_pred = unstandardize(predict.predicted_mean, orig_std_params) post_pred = unstandardize(forecast.predicted_mean, orig_std_params) point_pred = pd.concat([pre_pred, post_pred], ignore_index=True) pre_ci = unstandardize(predict.conf_int(alpha=alpha), orig_std_params) post_ci = unstandardize(forecast.conf_int(alpha=alpha), orig_std_params) ci = pd.concat([pre_ci, post_ci], ignore_index=True) point_pred_upper = ci["upper y"].to_frame() point_pred_lower = ci["lower y"].to_frame() if df_post is not None: response = np.concatenate([df_pre.iloc[:, 0], df_post.iloc[:, 0]]) response_index = np.concatenate([df_pre.index, df_post.index]) else: response = np.concatenate([df_pre, post_period_response]) # response.reset_index() response_index = pd.RangeIndex(0, len(response)) response = unstandardize(response, orig_std_params) response = pd.DataFrame(response) cum_response = np.cumsum(response) cum_pred = np.cumsum(point_pred) cum_pred_upper = np.cumsum(point_pred_upper) cum_pred_lower = np.cumsum(point_pred_lower) point_effect = (response.iloc[:, 0] - point_pred.iloc[:, 0]).to_frame() point_effect_upper = (response.iloc[:, 0] - point_pred_upper.iloc[:, 0]).to_frame() point_effect_lower = (response.iloc[:, 0] - point_pred_lower.iloc[:, 0]).to_frame() cum_effect = point_effect cum_effect.iloc[:len(pre_pred)] = 0 cum_effect = np.cumsum(cum_effect) cum_effect_upper = point_effect_upper cum_effect_upper.iloc[:len(pre_pred)] = 0 cum_effect_upper = np.cumsum(cum_effect_upper) cum_effect_lower = point_effect_lower cum_effect_lower.iloc[:len(pre_pred)] = 0 cum_effect_lower = np.cumsum(cum_effect_lower) # Create DataFrame of results data = pd.concat([ response, cum_response, point_pred, point_pred_upper, point_pred_lower, cum_pred, cum_pred_lower, cum_pred_upper, point_effect, point_effect_lower, point_effect_upper, cum_effect, cum_effect_lower, cum_effect_upper ], axis=1) data.columns = [ "response", "cum_response", "point_pred", "point_pred_upper", "point_pred_lower", "cum_pred", "cum_pred_lower", "cum_pred_upper", "point_effect", "point_effect_lower", "point_effect_upper", "cum_effect", "cum_effect_lower", "cum_effect_upper" ] data.index = response_index # index = np.concatenate([results.data.orig_endog.index, # df_post.iloc[:, 0].index]) # data.index = index # import pdb # pdb.set_trace() # Undo standardization (if any) series = data # summary = compile_summary_table(data_post, predict_mean, alpha) # report = interpret_summary_table(summary) inferences = { "series": series, # "summary": summary, # "report": report } return inferences else: raise NotImplementedError()
def compile_posterior_inferences(results, data, df_pre, df_post, post_period_response, alpha, orig_std_params, estimation='MLE'): """Compiles posterior inferences to make predictions for post intervention period. Args: results: trained UnobservedComponents model from statsmodels package. data: pd.DataFrame pre and post-intervention data containing y and X. df_pre: pd.DataFrame pre intervention data df_post: pd.DataFrame post intervention data post_period_response: pd.DataFrame used when the model trained is not default one but a customized instead. In this case, ``df_post`` is None. alpha: float significance level for confidence intervals. orig_std_params: tuple of floats where first value is the mean and second value is standard deviation used for standardizing data. estimation: str to choose fitting method. "MLE" as default Returns: dict containing all data related to the inference process. """ if estimation == "MLE": # Compute point predictions of counterfactual (in standardized space) if df_post is not None: predict = results.get_prediction() forecast = results.get_forecast(steps=len(df_post), exog=df_post.iloc[:, 1:], alpha=alpha) else: pre_len = results.model.nobs - len(post_period_response) predict = results.get_prediction(end=pre_len - 1) forecast = results.get_prediction(start=pre_len) df_post = post_period_response df_post.index = pd.core.indexes.range.RangeIndex(start=pre_len, stop=pre_len + len(df_post), step=1) # Compile summary statistics (in original space) pre_pred = unstandardize(predict.predicted_mean, orig_std_params) pre_pred.index = df_pre.index post_pred = unstandardize(forecast.predicted_mean, orig_std_params) post_pred.index = df_post.index point_pred = pd.concat([pre_pred, post_pred]) pre_ci = unstandardize(predict.conf_int(alpha=alpha), orig_std_params) pre_ci.index = df_pre.index post_ci = unstandardize(forecast.conf_int(alpha=alpha), orig_std_params) post_ci.index = df_post.index ci = pd.concat([pre_ci, post_ci]) point_pred_lower = ci.iloc[:, 0].to_frame() point_pred_upper = ci.iloc[:, 1].to_frame() response = data.iloc[:, 0] response_index = data.index response = pd.DataFrame(response) cum_response = np.cumsum(response) cum_pred = np.cumsum(point_pred) cum_pred_lower = np.cumsum(point_pred_lower) cum_pred_upper = np.cumsum(point_pred_upper) data = pd.concat([ point_pred, point_pred_lower, point_pred_upper, cum_pred, cum_pred_lower, cum_pred_upper ], axis=1) data = pd.concat([response, cum_response], axis=1).join(data, lsuffix='l') data.columns = [ "response", "cum_response", "point_pred", "point_pred_lower", "point_pred_upper", "cum_pred", "cum_pred_lower", "cum_pred_upper" ] point_effect = (data.response - data.point_pred).to_frame() point_effect_lower = (data.response - data.point_pred_lower).to_frame() point_effect_upper = (data.response - data.point_pred_upper).to_frame() cum_effect = point_effect.copy() cum_effect.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect = np.cumsum(cum_effect) cum_effect_lower = point_effect_lower.copy() cum_effect_lower.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect_lower = np.cumsum(cum_effect_lower) cum_effect_upper = point_effect_upper.copy() cum_effect_upper.loc[df_pre.index[0]:df_pre.index[-1]] = 0 cum_effect_upper = np.cumsum(cum_effect_upper) data = pd.concat([ data, point_effect, point_effect_lower, point_effect_upper, cum_effect, cum_effect_lower, cum_effect_upper ], axis=1) # Create DataFrame of results data.columns = [ "response", "cum_response", "point_pred", "point_pred_lower", "point_pred_upper", "cum_pred", "cum_pred_lower", "cum_pred_upper", "point_effect", "point_effect_lower", "point_effect_upper", "cum_effect", "cum_effect_lower", "cum_effect_upper" ] data.index = response_index series = data # summary = compile_summary_table(data_post, predict_mean, alpha) # report = interpret_summary_table(summary) inferences = { "series": series, # "summary": summary, # "report": report } return inferences else: raise NotImplementedError()