def _run_with_data(self, data, pre_period, post_period, model_args, alpha, estimation): # Zoom in on data in modeling range if data.shape[1] == 1: # no exogenous values provided raise ValueError("data contains no exogenous variables") data_modeling = data.copy() df_pre = data_modeling.loc[pre_period[0]:pre_period[1], :] df_post = data_modeling.loc[post_period[0]:post_period[1], :] # Standardize all variables orig_std_params = (0, 1) if model_args["standardize_data"]: sd_results = standardize_all_variables(data_modeling, pre_period, post_period) df_pre = sd_results["data_pre"] df_post = sd_results["data_post"] orig_std_params = sd_results["orig_std_params"] # Construct model and perform inference model = construct_model(df_pre, model_args) self.model = model trained_model = model_fit(model, estimation, model_args["niter"]) self.model = trained_model inferences = compile_posterior_inferences(trained_model, data, df_pre, df_post, None, alpha, orig_std_params, estimation) # "append" to 'CausalImpact' object self.inferences = inferences["series"]
def _run_with_data(self, data, pre_period, post_period, model_args, alpha, estimation): # Zoom in on data in modeling range if data.shape[1] == 1: # no exogenous values provided raise ValueError("data contains no exogenous variables") non_null = pd.isnull(data.iloc[:, 1]).to_numpy().nonzero() first_non_null = non_null[0] if first_non_null.size > 0: pre_period[0] = max(pre_period[0], data.index[first_non_null[0]]) data_modeling = data.copy() df_pre = data_modeling.loc[pre_period[0]:pre_period[1], :] df_post = data_modeling.loc[post_period[0]:post_period[1], :] # Standardize all variables orig_std_params = (0, 1) if model_args["standardize_data"]: sd_results = standardize_all_variables(data_modeling, pre_period, post_period) df_pre = sd_results["data_pre"] df_post = sd_results["data_post"] orig_std_params = sd_results["orig_std_params"] # Construct model and perform inference ucm_model = construct_model(self, df_pre, model_args) res = model_fit(self, ucm_model, estimation, model_args["niter"]) inferences = compile_posterior_inferences(res, data, df_pre, df_post, None, alpha, orig_std_params, estimation) # "append" to 'CausalImpact' object self.inferences = inferences["series"] self.model = ucm_model
def _run_with_data(self, data, pre_period, post_period, model_args, alpha): # Zoom in on data in modeling range first_non_null = pd.isnull(data.iloc[:, 1]).nonzero()[0] if len(first_non_null) > 0: pre_period[0] = max(pre_period[0], data.index[first_non_null[0]]) data_modeling = data.iloc[pre_period[0]:post_period[1], :] # Standardize all variables? orig_std_params = np.identity if model_args["standardize_data"]: sd_results = standardize_all_variables(data_modeling) data_modeling = sd_results["data"] orig_std_params = sd_results["orig_std_params"] # Set observed response in post-period to NA data_modeling.iloc[post_period[0]:, 1] = np.nan # Construct model and perform inference ucm_model = construct_model(data_modeling, model_args) # Compile posterior inferences if ucm_model is not None: data_post = data.iloc[post_period[0]:post_period[1], :] inferences = compile_posterior_inferences(ucm_model, data_post, alpha, orig_std_params) else: inferences = compile_na_inferences(data.iloc[:, 1]) # Extend <series> to cover original range # (padding with NA as necessary) empty = pd.DataFrame(index=data.index) inferences["series"] = pd.merge(inferences["series"], empty, left_index=True, right_index=True, how="outer") if len(inferences["series"]) != len(data): raise ValueError("""inferences['series'] must have the same number of rows as 'data'""") # Replace <y.model> by full original response inferences["series"].iloc[:, 0] = data[:, 0] # Assign response-variable names inferences["series"].iloc[:, 0].name = "response" inferences["series"].iloc[:, 1].name = "cum.response" # Return 'CausalImpact' object params = {"pre_period": pre_period, "post_period": post_period, "model_args": model_args, "alpha": alpha} self.inferences = inferences["series"] self.summary = inferences["summary"] self.report = inferences["report"] self.model = model self.params = params
def _run_with_ucm(self, ucm_model, post_period_response, alpha, model_args, estimation): """ Runs an impact analysis on top of a ucm model. Args: ucm_model: Model as returned by UnobservedComponents(), in which the data during the post-period was set to NA post_period_response: observed data during the post-intervention period alpha: tail-probabilities of posterior intervals""" # Guess <pre_period> and <post_period> from the observation vector # These will be needed for plotting period boundaries in plot(). #raise NotImplementedError() """ try: indices = infer_period_indices_from_data(y) except ValueError: raise ValueError("ucm_model must have been fitted on data where " + "the values in the post-intervention period " + "have been set to NA") """ df_pre = ucm_model.data.orig_endog[:-len(post_period_response)] df_pre = pd.DataFrame(df_pre) post_period_response = pd.DataFrame(post_period_response) data = pd.DataFrame( np.concatenate([df_pre.values, post_period_response.values])) orig_std_params = (0, 1) fitted_model = model_fit(ucm_model, estimation, model_args["niter"]) # Compile posterior inferences inferences = compile_posterior_inferences(fitted_model, data, df_pre, None, post_period_response, alpha, orig_std_params, estimation) obs_inter = pre_len = fitted_model.model.nobs - len( post_period_response) self.params["pre_period"] = [0, obs_inter - 1] self.params["post_period"] = [obs_inter, -1] self.data = pd.concat([df_pre, post_period_response]) self.inferences = inferences["series"] self.model = fitted_model
def _process_posterior_inferences(self) -> None: """ Run `inferrer` to process data forecasts and predictions. Results feeds the summary table as well as the plotting functionalities. """ observed_time_series = (self.pre_data if self.normed_pre_data is None else self.normed_pre_data).astype(np.float32) self.observed_time_series = pd.DataFrame(observed_time_series.iloc[:, 0]) num_steps_forecast = len(self.post_data) self.one_step_dist = cimodel.build_one_step_dist( self.model, self.observed_time_series, self.model_samples) self.posterior_dist = cimodel.build_posterior_dist( self.model, self.observed_time_series, self.model_samples, num_steps_forecast) self.inferences = inferrer.compile_posterior_inferences( self.pre_data, self.post_data, self.one_step_dist, self.posterior_dist, self.mu_sig, self.alpha, self.model_args['niter'])
def _run_with_ucm(self, ucm_model, post_period_response, alpha): """ Runs an impact analysis on top of a ucm model. Args: ucm_model: Model as returned by UnobservedComponents(), in which the data during the post-period was set to NA post_period_response: observed data during the post-intervention period alpha: tail-probabilities of posterior intervals""" # Guess <pre_period> and <post_period> from the observation vector # These will be needed for plotting period boundaries in plot(). y = ucm_model["original_series"] try: indices = infer_period_indices_from_data(y) except ValueError: raise ValueError("ucm_model must have been fitted on data where \ the values in the post-intervention period have \ been set to NA") # Compile posterior inferences inferences = compile_posterior_inferences(ucm_model=ucm_model, y_post=post_period_response, alpha=alpha) # Assign response-variable names # N.B. The modeling period comprises everything found in ucm, so the # actual observed data is equal to the data in the modeling period inferences["series"].columns = ["response", "cum_response"] # Return 'CausalImpact' object params = {"pre_period": pre_period, "post_period": post_period, "model_args": model_args, "alpha": alpha} self.inferences = inferences["series"] self.summary = inferences["summary"] self.report = inferences["report"] self.model = model self.params = params
def test_compile_posterior_inferences(): data = pd.DataFrame(np.arange(10)) pre_data = data.iloc[:3] post_data = data.iloc[7:] one_step_mean = 3 one_step_stddev = 1.5 posterior_mean = 7.5 posterior_stddev = 1.5 alpha = 0.05 mu = 1 sig = 2 mu_sig = (mu, sig) niter = 10 class OneStepDist: def mean(self): return np.ones((len(pre_data), 1)) * one_step_mean def stddev(self): return np.ones((len(pre_data), 1)) * one_step_stddev class PosteriorDist: def sample(self, niter): tmp = tf.convert_to_tensor( np.tile(np.arange(start=7.1, stop=10.1, step=1), (niter, 1)) + np.arange(niter).reshape(-1, 1), dtype=np.float32) tmp = tmp[..., tf.newaxis] return tmp def mean(self): return np.ones((len(post_data), 1)) * posterior_mean def stddev(self): return np.ones((len(post_data), 1)) * posterior_stddev one_step_dist = OneStepDist() posterior_dist = PosteriorDist() inferences = inferrer.compile_posterior_inferences(pre_data, post_data, one_step_dist, posterior_dist, mu_sig, alpha=alpha, niter=niter) expected_index = np.array([0, 1, 2, 7, 8, 9]) # test complete_preds_means expec_complete_preds_means = pd.DataFrame(data=np.array( [7, 7, 7, 16, 16, 16]), index=expected_index, dtype=np.float64, columns=['complete_preds_means']) pd.testing.assert_series_equal( expec_complete_preds_means['complete_preds_means'], inferences['complete_preds_means']) # test complete_preds_lower pre_preds_lower = (np.array([1, 1, 1]) * one_step_mean - get_z_score(1 - alpha / 2) * one_step_stddev) * sig + mu pre_preds_lower[ np.abs(pre_preds_lower) > np.quantile(pre_preds_lower, 0.5) + 3 * np.std(pre_preds_lower)] = np.nan post_preds_lower = ( np.array([1, 1, 1]) * posterior_mean - get_z_score(1 - alpha / 2) * posterior_stddev) * sig + mu expec_complete_preds_lower = np.concatenate( [pre_preds_lower, post_preds_lower]) expec_complete_preds_lower = pd.DataFrame(data=expec_complete_preds_lower, index=expected_index, dtype=np.float64, columns=['complete_preds_lower']) pd.testing.assert_series_equal( expec_complete_preds_lower['complete_preds_lower'], inferences['complete_preds_lower']) # test complete_preds_upper pre_preds_upper = (np.array([1, 1, 1]) * one_step_mean + get_z_score(1 - alpha / 2) * one_step_stddev) * sig + mu pre_preds_upper[ np.abs(pre_preds_upper) > np.quantile(pre_preds_upper, 0.5) + 3 * np.std(pre_preds_upper)] = np.nan post_preds_upper = ( np.array([1, 1, 1]) * posterior_mean + get_z_score(1 - alpha / 2) * posterior_stddev) * sig + mu expec_complete_preds_upper = np.concatenate( [pre_preds_upper, post_preds_upper]) expec_complete_preds_upper = pd.DataFrame(data=expec_complete_preds_upper, index=expected_index, dtype=np.float64, columns=['complete_preds_upper']) pd.testing.assert_series_equal( expec_complete_preds_upper['complete_preds_upper'], inferences['complete_preds_upper']) # test post_preds_means expec_post_preds_means = pd.DataFrame( data=np.array([np.nan] * 3 + [posterior_mean * sig + mu] * len(pre_data)), index=expected_index, dtype=np.float64, columns=['post_preds_means']) pd.testing.assert_series_equal(expec_post_preds_means['post_preds_means'], inferences['post_preds_means']) # test post_preds_lower post_preds_lower = ( np.array([np.nan] * 3 + [1, 1, 1]) * posterior_mean - get_z_score(1 - alpha / 2) * posterior_stddev) * sig + mu expec_post_preds_lower = pd.DataFrame(data=post_preds_lower, index=expected_index, dtype=np.float64, columns=['post_preds_lower']) pd.testing.assert_series_equal(expec_post_preds_lower['post_preds_lower'], inferences['post_preds_lower']) # test post_preds_upper post_preds_upper = ( np.array([np.nan] * 3 + [1, 1, 1]) * posterior_mean + get_z_score(1 - alpha / 2) * posterior_stddev) * sig + mu expec_post_preds_upper = pd.DataFrame(data=post_preds_upper, index=expected_index, dtype=np.float64, columns=['post_preds_upper']) pd.testing.assert_series_equal(expec_post_preds_upper['post_preds_upper'], inferences['post_preds_upper']) # test post_cum_Y post_cum_y = np.concatenate([[np.nan] * (len(pre_data) - 1) + [0], np.cumsum(post_data.iloc[:, 0])]) expec_post_cum_y = pd.DataFrame(data=post_cum_y, index=expected_index, dtype=np.float64, columns=['post_cum_y']) pd.testing.assert_series_equal(expec_post_cum_y['post_cum_y'], inferences['post_cum_y']) # test post_cum_preds_means expec_post_cum_preds_means = np.cumsum(expec_post_preds_means) expec_post_cum_preds_means.rename( columns={'post_preds_means': 'post_cum_preds_means'}, inplace=True) expec_post_cum_preds_means['post_cum_preds_means'][len(pre_data) - 1] = 0 pd.testing.assert_series_equal( expec_post_cum_preds_means['post_cum_preds_means'], inferences['post_cum_preds_means']) # test post_cum_preds_lower post_cum_preds_lower, post_cum_preds_upper = np.percentile(np.cumsum( maybe_unstandardize(np.squeeze(posterior_dist.sample(niter)), mu_sig), axis=1), [100 * alpha / 2, 100 - 100 * alpha / 2], axis=0) post_cum_preds_lower = np.concatenate( [np.array([np.nan] * (len(pre_data) - 1) + [0]), post_cum_preds_lower]) expec_post_cum_preds_lower = pd.DataFrame(data=post_cum_preds_lower, index=expected_index, dtype=np.float64, columns=['post_cum_preds_lower']) pd.testing.assert_series_equal( expec_post_cum_preds_lower['post_cum_preds_lower'], inferences['post_cum_preds_lower']) # test post_cum_preds_upper post_cum_preds_upper = np.concatenate( [np.array([np.nan] * (len(pre_data) - 1) + [0]), post_cum_preds_upper]) expec_post_cum_preds_upper = pd.DataFrame(data=post_cum_preds_upper, index=expected_index, dtype=np.float64, columns=['post_cum_preds_upper']) pd.testing.assert_series_equal( expec_post_cum_preds_upper['post_cum_preds_upper'], inferences['post_cum_preds_upper']) # test point_effects_means net_data = pd.concat([pre_data, post_data]) expec_point_effects_means = net_data.iloc[:, 0] - inferences[ 'complete_preds_means'] expec_point_effects_means = pd.DataFrame(data=expec_point_effects_means, index=expected_index, dtype=np.float64, columns=['point_effects_means']) pd.testing.assert_series_equal( expec_point_effects_means['point_effects_means'], inferences['point_effects_means']) # test point_effects_lower expec_point_effects_lower = net_data.iloc[:, 0] - inferences[ 'complete_preds_upper'] expec_point_effects_lower = pd.DataFrame(data=expec_point_effects_lower, index=expected_index, dtype=np.float64, columns=['point_effects_lower']) pd.testing.assert_series_equal( expec_point_effects_lower['point_effects_lower'], inferences['point_effects_lower']) # test point_effects_upper expec_point_effects_upper = net_data.iloc[:, 0] - inferences[ 'complete_preds_lower'] expec_point_effects_upper = pd.DataFrame(data=expec_point_effects_upper, index=expected_index, dtype=np.float64, columns=['point_effects_upper']) pd.testing.assert_series_equal( expec_point_effects_upper['point_effects_upper'], inferences['point_effects_upper']) # test post_cum_effects_means post_effects_means = post_data.iloc[:, 0] - inferences['post_preds_means'] post_effects_means.iloc[len(pre_data) - 1] = 0 expec_post_cum_effects_means = np.cumsum(post_effects_means) expec_post_cum_effects_means = pd.DataFrame( data=expec_post_cum_effects_means, index=expected_index, dtype=np.float64, columns=['post_cum_effects_means']) pd.testing.assert_series_equal( expec_post_cum_effects_means['post_cum_effects_means'], inferences['post_cum_effects_means']) # test post_cum_effects_lower post_cum_effects_lower, post_cum_effects_upper = np.percentile(np.cumsum( post_data.iloc[:, 0].values - maybe_unstandardize(np.squeeze(posterior_dist.sample(niter)), mu_sig), axis=1), [100 * alpha / 2, 100 - 100 * alpha / 2], axis=0) post_cum_effects_lower = np.concatenate([ np.array([np.nan] * (len(pre_data) - 1) + [0]), post_cum_effects_lower ]) expec_post_cum_effects_lower = pd.DataFrame( data=post_cum_effects_lower, index=expected_index, dtype=np.float64, columns=['post_cum_effects_lower']) pd.testing.assert_series_equal( expec_post_cum_effects_lower['post_cum_effects_lower'], inferences['post_cum_effects_lower']) # test post_cum_effects_upper post_cum_effects_upper = np.concatenate([ np.array([np.nan] * (len(pre_data) - 1) + [0]), post_cum_effects_upper ]) expec_post_cum_effects_upper = pd.DataFrame( data=post_cum_effects_upper, index=expected_index, dtype=np.float64, columns=['post_cum_effects_upper']) pd.testing.assert_series_equal( expec_post_cum_effects_upper['post_cum_effects_upper'], inferences['post_cum_effects_upper'])