Пример #1
0
    def _run_with_data(self, data, pre_period, post_period, model_args, alpha,
                       estimation):
        # Zoom in on data in modeling range
        if data.shape[1] == 1:  # no exogenous values provided
            raise ValueError("data contains no exogenous variables")
        data_modeling = data.copy()

        df_pre = data_modeling.loc[pre_period[0]:pre_period[1], :]
        df_post = data_modeling.loc[post_period[0]:post_period[1], :]

        # Standardize all variables
        orig_std_params = (0, 1)
        if model_args["standardize_data"]:
            sd_results = standardize_all_variables(data_modeling, pre_period,
                                                   post_period)
            df_pre = sd_results["data_pre"]
            df_post = sd_results["data_post"]
            orig_std_params = sd_results["orig_std_params"]

        # Construct model and perform inference
        model = construct_model(df_pre, model_args)
        self.model = model

        trained_model = model_fit(model, estimation, model_args["niter"])
        self.model = trained_model

        inferences = compile_posterior_inferences(trained_model, data, df_pre,
                                                  df_post, None, alpha,
                                                  orig_std_params, estimation)

        # "append" to 'CausalImpact' object
        self.inferences = inferences["series"]
Пример #2
0
    def _run_with_data(self, data, pre_period, post_period, model_args, alpha,
                       estimation):
        # Zoom in on data in modeling range
        if data.shape[1] == 1:  # no exogenous values provided
            raise ValueError("data contains no exogenous variables")
        non_null = pd.isnull(data.iloc[:, 1]).to_numpy().nonzero()
        first_non_null = non_null[0]
        if first_non_null.size > 0:
            pre_period[0] = max(pre_period[0], data.index[first_non_null[0]])
        data_modeling = data.copy()
        df_pre = data_modeling.loc[pre_period[0]:pre_period[1], :]
        df_post = data_modeling.loc[post_period[0]:post_period[1], :]

        # Standardize all variables
        orig_std_params = (0, 1)
        if model_args["standardize_data"]:
            sd_results = standardize_all_variables(data_modeling, pre_period,
                                                   post_period)
            df_pre = sd_results["data_pre"]
            df_post = sd_results["data_post"]
            orig_std_params = sd_results["orig_std_params"]

        # Construct model and perform inference
        ucm_model = construct_model(self, df_pre, model_args)
        res = model_fit(self, ucm_model, estimation, model_args["niter"])

        inferences = compile_posterior_inferences(res, data, df_pre, df_post, None,
                                                  alpha, orig_std_params,
                                                  estimation)

        # "append" to 'CausalImpact' object
        self.inferences = inferences["series"]
        self.model = ucm_model
Пример #3
0
    def _run_with_data(self, data, pre_period, post_period, model_args, alpha):
        # Zoom in on data in modeling range

        first_non_null = pd.isnull(data.iloc[:, 1]).nonzero()[0]
        if len(first_non_null) > 0:
            pre_period[0] = max(pre_period[0], data.index[first_non_null[0]])
        data_modeling = data.iloc[pre_period[0]:post_period[1], :]

        # Standardize all variables?
        orig_std_params = np.identity
        if model_args["standardize_data"]:
            sd_results = standardize_all_variables(data_modeling)
            data_modeling = sd_results["data"]
            orig_std_params = sd_results["orig_std_params"]

        # Set observed response in post-period to NA
        data_modeling.iloc[post_period[0]:, 1] = np.nan

        # Construct model and perform inference
        ucm_model = construct_model(data_modeling, model_args)

        # Compile posterior inferences
        if ucm_model is not None:
            data_post = data.iloc[post_period[0]:post_period[1], :]
            inferences = compile_posterior_inferences(ucm_model, data_post,
                                                      alpha, orig_std_params)
        else:
            inferences = compile_na_inferences(data.iloc[:, 1])

        # Extend <series> to cover original range
        # (padding with NA as necessary)
        empty = pd.DataFrame(index=data.index)
        inferences["series"] = pd.merge(inferences["series"], empty,
                                        left_index=True, right_index=True,
                                        how="outer")
        if len(inferences["series"]) != len(data):
            raise ValueError("""inferences['series'] must have the same number
                             of rows as 'data'""")

        # Replace <y.model> by full original response
        inferences["series"].iloc[:, 0] = data[:, 0]

        # Assign response-variable names
        inferences["series"].iloc[:, 0].name = "response"
        inferences["series"].iloc[:, 1].name = "cum.response"

        # Return 'CausalImpact' object
        params = {"pre_period": pre_period, "post_period": post_period,
                 "model_args": model_args, "alpha": alpha}

        self.inferences = inferences["series"]
        self.summary = inferences["summary"]
        self.report = inferences["report"]
        self.model = model
        self.params = params
Пример #4
0
    def _run_with_ucm(self, ucm_model, post_period_response, alpha, model_args,
                      estimation):
        """ Runs an impact analysis on top of a ucm model.

           Args:
             ucm_model: Model as returned by UnobservedComponents(),
                        in which the data during the post-period was set to NA
             post_period_response: observed data during the post-intervention
                                   period
             alpha: tail-probabilities of posterior intervals"""
        # Guess <pre_period> and <post_period> from the observation vector
        # These will be needed for plotting period boundaries in plot().
        #raise NotImplementedError()
        """
        try:
            indices = infer_period_indices_from_data(y)
        except ValueError:
            raise ValueError("ucm_model must have been fitted on data where " +
                             "the values in the post-intervention period " +
                             "have been set to NA")
        """

        df_pre = ucm_model.data.orig_endog[:-len(post_period_response)]
        df_pre = pd.DataFrame(df_pre)

        post_period_response = pd.DataFrame(post_period_response)

        data = pd.DataFrame(
            np.concatenate([df_pre.values, post_period_response.values]))

        orig_std_params = (0, 1)

        fitted_model = model_fit(ucm_model, estimation, model_args["niter"])

        # Compile posterior inferences
        inferences = compile_posterior_inferences(fitted_model, data, df_pre,
                                                  None, post_period_response,
                                                  alpha, orig_std_params,
                                                  estimation)

        obs_inter = pre_len = fitted_model.model.nobs - len(
            post_period_response)

        self.params["pre_period"] = [0, obs_inter - 1]
        self.params["post_period"] = [obs_inter, -1]
        self.data = pd.concat([df_pre, post_period_response])
        self.inferences = inferences["series"]
        self.model = fitted_model
Пример #5
0
    def _process_posterior_inferences(self) -> None:
        """
        Run `inferrer` to process data forecasts and predictions. Results feeds the
        summary table as well as the plotting functionalities.
        """
        observed_time_series = (self.pre_data if self.normed_pre_data is None
                                else self.normed_pre_data).astype(np.float32)
        self.observed_time_series = pd.DataFrame(observed_time_series.iloc[:,
                                                                           0])

        num_steps_forecast = len(self.post_data)
        self.one_step_dist = cimodel.build_one_step_dist(
            self.model, self.observed_time_series, self.model_samples)
        self.posterior_dist = cimodel.build_posterior_dist(
            self.model, self.observed_time_series, self.model_samples,
            num_steps_forecast)
        self.inferences = inferrer.compile_posterior_inferences(
            self.pre_data, self.post_data, self.one_step_dist,
            self.posterior_dist, self.mu_sig, self.alpha,
            self.model_args['niter'])
Пример #6
0
    def _run_with_ucm(self, ucm_model, post_period_response, alpha):
        """ Runs an impact analysis on top of a ucm model.

           Args:
             ucm_model: Model as returned by UnobservedComponents(),
                        in which the data during the post-period was set to NA
             post_period_response: observed data during the post-intervention
                                   period
             alpha: tail-probabilities of posterior intervals"""
        # Guess <pre_period> and <post_period> from the observation vector
        # These will be needed for plotting period boundaries in plot().
        y = ucm_model["original_series"]
        try:
            indices = infer_period_indices_from_data(y)
        except ValueError:
            raise ValueError("ucm_model must have been fitted on data where \
                             the values in the post-intervention period have \
                             been set to NA")

        # Compile posterior inferences
        inferences = compile_posterior_inferences(ucm_model=ucm_model,
                                                  y_post=post_period_response,
                                                  alpha=alpha)

        # Assign response-variable names
        # N.B. The modeling period comprises everything found in ucm, so the
        # actual observed data is equal to the data in the modeling period
        inferences["series"].columns = ["response", "cum_response"]

        # Return 'CausalImpact' object
        params = {"pre_period": pre_period, "post_period": post_period,
                 "model_args": model_args, "alpha": alpha}

        self.inferences = inferences["series"]
        self.summary = inferences["summary"]
        self.report = inferences["report"]
        self.model = model
        self.params = params
Пример #7
0
def test_compile_posterior_inferences():
    data = pd.DataFrame(np.arange(10))
    pre_data = data.iloc[:3]
    post_data = data.iloc[7:]
    one_step_mean = 3
    one_step_stddev = 1.5
    posterior_mean = 7.5
    posterior_stddev = 1.5
    alpha = 0.05
    mu = 1
    sig = 2
    mu_sig = (mu, sig)
    niter = 10

    class OneStepDist:
        def mean(self):
            return np.ones((len(pre_data), 1)) * one_step_mean

        def stddev(self):
            return np.ones((len(pre_data), 1)) * one_step_stddev

    class PosteriorDist:
        def sample(self, niter):
            tmp = tf.convert_to_tensor(
                np.tile(np.arange(start=7.1, stop=10.1, step=1),
                        (niter, 1)) + np.arange(niter).reshape(-1, 1),
                dtype=np.float32)
            tmp = tmp[..., tf.newaxis]
            return tmp

        def mean(self):
            return np.ones((len(post_data), 1)) * posterior_mean

        def stddev(self):
            return np.ones((len(post_data), 1)) * posterior_stddev

    one_step_dist = OneStepDist()
    posterior_dist = PosteriorDist()
    inferences = inferrer.compile_posterior_inferences(pre_data,
                                                       post_data,
                                                       one_step_dist,
                                                       posterior_dist,
                                                       mu_sig,
                                                       alpha=alpha,
                                                       niter=niter)

    expected_index = np.array([0, 1, 2, 7, 8, 9])
    # test complete_preds_means
    expec_complete_preds_means = pd.DataFrame(data=np.array(
        [7, 7, 7, 16, 16, 16]),
                                              index=expected_index,
                                              dtype=np.float64,
                                              columns=['complete_preds_means'])
    pd.testing.assert_series_equal(
        expec_complete_preds_means['complete_preds_means'],
        inferences['complete_preds_means'])
    # test complete_preds_lower
    pre_preds_lower = (np.array([1, 1, 1]) * one_step_mean -
                       get_z_score(1 - alpha / 2) * one_step_stddev) * sig + mu
    pre_preds_lower[
        np.abs(pre_preds_lower) > np.quantile(pre_preds_lower, 0.5) +
        3 * np.std(pre_preds_lower)] = np.nan
    post_preds_lower = (
        np.array([1, 1, 1]) * posterior_mean -
        get_z_score(1 - alpha / 2) * posterior_stddev) * sig + mu
    expec_complete_preds_lower = np.concatenate(
        [pre_preds_lower, post_preds_lower])
    expec_complete_preds_lower = pd.DataFrame(data=expec_complete_preds_lower,
                                              index=expected_index,
                                              dtype=np.float64,
                                              columns=['complete_preds_lower'])
    pd.testing.assert_series_equal(
        expec_complete_preds_lower['complete_preds_lower'],
        inferences['complete_preds_lower'])
    # test complete_preds_upper
    pre_preds_upper = (np.array([1, 1, 1]) * one_step_mean +
                       get_z_score(1 - alpha / 2) * one_step_stddev) * sig + mu
    pre_preds_upper[
        np.abs(pre_preds_upper) > np.quantile(pre_preds_upper, 0.5) +
        3 * np.std(pre_preds_upper)] = np.nan
    post_preds_upper = (
        np.array([1, 1, 1]) * posterior_mean +
        get_z_score(1 - alpha / 2) * posterior_stddev) * sig + mu
    expec_complete_preds_upper = np.concatenate(
        [pre_preds_upper, post_preds_upper])
    expec_complete_preds_upper = pd.DataFrame(data=expec_complete_preds_upper,
                                              index=expected_index,
                                              dtype=np.float64,
                                              columns=['complete_preds_upper'])
    pd.testing.assert_series_equal(
        expec_complete_preds_upper['complete_preds_upper'],
        inferences['complete_preds_upper'])
    # test post_preds_means
    expec_post_preds_means = pd.DataFrame(
        data=np.array([np.nan] * 3 +
                      [posterior_mean * sig + mu] * len(pre_data)),
        index=expected_index,
        dtype=np.float64,
        columns=['post_preds_means'])
    pd.testing.assert_series_equal(expec_post_preds_means['post_preds_means'],
                                   inferences['post_preds_means'])
    # test post_preds_lower
    post_preds_lower = (
        np.array([np.nan] * 3 + [1, 1, 1]) * posterior_mean -
        get_z_score(1 - alpha / 2) * posterior_stddev) * sig + mu
    expec_post_preds_lower = pd.DataFrame(data=post_preds_lower,
                                          index=expected_index,
                                          dtype=np.float64,
                                          columns=['post_preds_lower'])
    pd.testing.assert_series_equal(expec_post_preds_lower['post_preds_lower'],
                                   inferences['post_preds_lower'])
    # test post_preds_upper
    post_preds_upper = (
        np.array([np.nan] * 3 + [1, 1, 1]) * posterior_mean +
        get_z_score(1 - alpha / 2) * posterior_stddev) * sig + mu
    expec_post_preds_upper = pd.DataFrame(data=post_preds_upper,
                                          index=expected_index,
                                          dtype=np.float64,
                                          columns=['post_preds_upper'])
    pd.testing.assert_series_equal(expec_post_preds_upper['post_preds_upper'],
                                   inferences['post_preds_upper'])
    # test post_cum_Y
    post_cum_y = np.concatenate([[np.nan] * (len(pre_data) - 1) + [0],
                                 np.cumsum(post_data.iloc[:, 0])])
    expec_post_cum_y = pd.DataFrame(data=post_cum_y,
                                    index=expected_index,
                                    dtype=np.float64,
                                    columns=['post_cum_y'])
    pd.testing.assert_series_equal(expec_post_cum_y['post_cum_y'],
                                   inferences['post_cum_y'])
    # test post_cum_preds_means
    expec_post_cum_preds_means = np.cumsum(expec_post_preds_means)
    expec_post_cum_preds_means.rename(
        columns={'post_preds_means': 'post_cum_preds_means'}, inplace=True)
    expec_post_cum_preds_means['post_cum_preds_means'][len(pre_data) - 1] = 0
    pd.testing.assert_series_equal(
        expec_post_cum_preds_means['post_cum_preds_means'],
        inferences['post_cum_preds_means'])
    # test post_cum_preds_lower
    post_cum_preds_lower, post_cum_preds_upper = np.percentile(np.cumsum(
        maybe_unstandardize(np.squeeze(posterior_dist.sample(niter)), mu_sig),
        axis=1), [100 * alpha / 2, 100 - 100 * alpha / 2],
                                                               axis=0)
    post_cum_preds_lower = np.concatenate(
        [np.array([np.nan] * (len(pre_data) - 1) + [0]), post_cum_preds_lower])
    expec_post_cum_preds_lower = pd.DataFrame(data=post_cum_preds_lower,
                                              index=expected_index,
                                              dtype=np.float64,
                                              columns=['post_cum_preds_lower'])
    pd.testing.assert_series_equal(
        expec_post_cum_preds_lower['post_cum_preds_lower'],
        inferences['post_cum_preds_lower'])
    # test post_cum_preds_upper
    post_cum_preds_upper = np.concatenate(
        [np.array([np.nan] * (len(pre_data) - 1) + [0]), post_cum_preds_upper])
    expec_post_cum_preds_upper = pd.DataFrame(data=post_cum_preds_upper,
                                              index=expected_index,
                                              dtype=np.float64,
                                              columns=['post_cum_preds_upper'])
    pd.testing.assert_series_equal(
        expec_post_cum_preds_upper['post_cum_preds_upper'],
        inferences['post_cum_preds_upper'])
    # test point_effects_means
    net_data = pd.concat([pre_data, post_data])
    expec_point_effects_means = net_data.iloc[:, 0] - inferences[
        'complete_preds_means']
    expec_point_effects_means = pd.DataFrame(data=expec_point_effects_means,
                                             index=expected_index,
                                             dtype=np.float64,
                                             columns=['point_effects_means'])
    pd.testing.assert_series_equal(
        expec_point_effects_means['point_effects_means'],
        inferences['point_effects_means'])
    # test point_effects_lower
    expec_point_effects_lower = net_data.iloc[:, 0] - inferences[
        'complete_preds_upper']
    expec_point_effects_lower = pd.DataFrame(data=expec_point_effects_lower,
                                             index=expected_index,
                                             dtype=np.float64,
                                             columns=['point_effects_lower'])
    pd.testing.assert_series_equal(
        expec_point_effects_lower['point_effects_lower'],
        inferences['point_effects_lower'])
    # test point_effects_upper
    expec_point_effects_upper = net_data.iloc[:, 0] - inferences[
        'complete_preds_lower']
    expec_point_effects_upper = pd.DataFrame(data=expec_point_effects_upper,
                                             index=expected_index,
                                             dtype=np.float64,
                                             columns=['point_effects_upper'])
    pd.testing.assert_series_equal(
        expec_point_effects_upper['point_effects_upper'],
        inferences['point_effects_upper'])
    # test post_cum_effects_means
    post_effects_means = post_data.iloc[:, 0] - inferences['post_preds_means']
    post_effects_means.iloc[len(pre_data) - 1] = 0
    expec_post_cum_effects_means = np.cumsum(post_effects_means)
    expec_post_cum_effects_means = pd.DataFrame(
        data=expec_post_cum_effects_means,
        index=expected_index,
        dtype=np.float64,
        columns=['post_cum_effects_means'])
    pd.testing.assert_series_equal(
        expec_post_cum_effects_means['post_cum_effects_means'],
        inferences['post_cum_effects_means'])
    # test post_cum_effects_lower
    post_cum_effects_lower, post_cum_effects_upper = np.percentile(np.cumsum(
        post_data.iloc[:, 0].values -
        maybe_unstandardize(np.squeeze(posterior_dist.sample(niter)), mu_sig),
        axis=1), [100 * alpha / 2, 100 - 100 * alpha / 2],
                                                                   axis=0)
    post_cum_effects_lower = np.concatenate([
        np.array([np.nan] * (len(pre_data) - 1) + [0]), post_cum_effects_lower
    ])
    expec_post_cum_effects_lower = pd.DataFrame(
        data=post_cum_effects_lower,
        index=expected_index,
        dtype=np.float64,
        columns=['post_cum_effects_lower'])
    pd.testing.assert_series_equal(
        expec_post_cum_effects_lower['post_cum_effects_lower'],
        inferences['post_cum_effects_lower'])
    # test post_cum_effects_upper
    post_cum_effects_upper = np.concatenate([
        np.array([np.nan] * (len(pre_data) - 1) + [0]), post_cum_effects_upper
    ])
    expec_post_cum_effects_upper = pd.DataFrame(
        data=post_cum_effects_upper,
        index=expected_index,
        dtype=np.float64,
        columns=['post_cum_effects_upper'])
    pd.testing.assert_series_equal(
        expec_post_cum_effects_upper['post_cum_effects_upper'],
        inferences['post_cum_effects_upper'])