Exemplo n.º 1
0
def test_mle_reg():
    endog = np.arange(100)*1.0
    exog = endog*2
    # Make the fit not-quite-perfect
    endog[::2] += 0.01
    endog[1::2] -= 0.01

    with warnings.catch_warnings(record=True) as w:
        mod1 = UnobservedComponents(endog, irregular=True, exog=exog, mle_regression=False)
        res1 = mod1.fit(disp=-1)

        mod2 = UnobservedComponents(endog, irregular=True, exog=exog, mle_regression=True)
        res2 = mod2.fit(disp=-1)

    assert_allclose(res1.regression_coefficients.filtered[0, -1], 0.5, atol=1e-5)
    assert_allclose(res2.params[1], 0.5, atol=1e-5)
Exemplo n.º 2
0
def test_mle_reg():
    endog = np.arange(100)*1.0
    exog = endog*2
    # Make the fit not-quite-perfect
    endog[::2] += 0.01
    endog[1::2] -= 0.01

    with warnings.catch_warnings(record=True) as w:
        mod1 = UnobservedComponents(endog, irregular=True, exog=exog, mle_regression=False)
        res1 = mod1.fit(disp=-1)

        mod2 = UnobservedComponents(endog, irregular=True, exog=exog, mle_regression=True)
        res2 = mod2.fit(disp=-1)

    assert_allclose(res1.regression_coefficients.filtered[0, -1], 0.5, atol=1e-5)
    assert_allclose(res2.params[1], 0.5, atol=1e-5)
Exemplo n.º 3
0
def test_mle_reg(use_exact_diffuse):
    endog = np.arange(100) * 1.0
    exog = endog * 2
    # Make the fit not-quite-perfect
    endog[::2] += 0.01
    endog[1::2] -= 0.01

    with warnings.catch_warnings(record=True):
        mod1 = UnobservedComponents(endog,
                                    irregular=True,
                                    exog=exog,
                                    mle_regression=False,
                                    use_exact_diffuse=use_exact_diffuse)
        res1 = mod1.fit(disp=-1)

        mod2 = UnobservedComponents(endog,
                                    irregular=True,
                                    exog=exog,
                                    mle_regression=True,
                                    use_exact_diffuse=use_exact_diffuse)
        res2 = mod2.fit(disp=-1)

    assert_allclose(res1.regression_coefficients.filtered[0, -1],
                    0.5,
                    atol=1e-5)
    assert_allclose(res2.params[1], 0.5, atol=1e-5)

    # When the regression component is part of the state vector with exact
    # diffuse initialization, we have two diffuse observations
    if use_exact_diffuse:
        print(res1.predicted_diffuse_state_cov)
        assert_equal(res1.nobs_diffuse, 2)
        assert_equal(res2.nobs_diffuse, 0)
    else:
        assert_equal(res1.loglikelihood_burn, 1)
        assert_equal(res2.loglikelihood_burn, 0)
Exemplo n.º 4
0
class CausalImpact:
    """
    Causal inference through counterfactual predictions using a Bayesian structural time-series model.
    """

    def __init__(self, data, inter_date, model_args=None):
        """Main constructor.

        :param pandas.DataFrame data: input data. Must contain at least 2 columns, one being named 'y'.
            See the README for more details.
        :param object inter_date: date of intervention. Must be of same type of the data index elements.
            This should usually be int of datetime.date
        :param {str: object} model_args: parameters of the model
            > max_iter: number of samples in the MCMC sampling
            > n_seasons: number of seasons in the seasonal component of the BSTS model

        """
        self.data = None            # Input data, with a reset index
        self.data_index = None      # Data initial index
        self.data_inter = None      # Data intervention date, relative to the reset index
        self.model = None           # statsmodels BSTS model
        self.fit = None             # statsmodels BSTS fitted model
        self.model_args = None      # BSTS model arguments
        # Checking input arguments
        self._check_input(data, inter_date)
        self._check_model_args(model_args)

    def run(self):
        """Fit the BSTS model to the data.
        """
        self.model = UnobservedComponents(
            self.data.loc[:self.data_inter - 1, self._obs_col()].values,
            exog=self.data.loc[:self.data_inter - 1, self._reg_cols()].values,
            level='local linear trend',
            seasonal=self.model_args['n_seasons'],
        )
        self.fit = self.model.fit(
            maxiter=self.model_args['max_iter'],
        )

    def _check_input(self, data, inter_date):
        """Check input data.

        :param pandas.DataFrame data: input data. Must contain at least 2 columns, one being named 'y'.
            See the README for more details.
        :param object inter_date: date of intervention. Must be of same type of the data index elements.
            This should usually be int of datetime.date
        """
        self.data_index = data.index
        self.data = data.reset_index(drop=True)
        try:
            self.data_inter = self.data_index.tolist().index(inter_date)
        except ValueError:
            raise ValueError('Input intervention date could not be found in data index.')

    def _check_model_args(self, model_args):
        """Check input arguments, and add missing ones if needed.

        :return: the valid dict of arguments
        :rtype: {str: object}
        """
        if model_args is None:
            model_args = {}

        for key, val in DEFAULT_ARGS.items():
            if key not in model_args:
                model_args[key] = val

        self.model_args = model_args

    def _obs_col(self):
        """Get name of column to be modeled in input data.

        :return: column name
        :rtype: str
        """
        return 'y'

    def _reg_cols(self):
        """Get names of columns used in the regression component of the model.

        :return: the column names
        :rtype: pandas.indexes.base.Index
        """
        return self.data.columns.difference([self._obs_col()])

    def plot_components(self):
        """Plot the estimated components of the model.
        """
        self.fit.plot_components(figsize=(15, 9), legend_loc='lower right')
        plt.show()

    def plot(self):
        """Produce final impact plots.
        """
        min_t = 2 if self.model_args['n_seasons'] is None else self.model_args['n_seasons'] + 1
        # Data model before date of intervention - allows to evaluate quality of fit
        pred = self.fit.get_prediction()
        pre_model = pred.predicted_mean
        pre_lower = pred.conf_int()['lower y'].values
        pre_upper = pred.conf_int()['upper y'].values
        pre_model[:min_t] = np.nan
        pre_lower[:min_t] = np.nan
        pre_upper[:min_t] = np.nan
        # Best prediction of y without any intervention
        post_pred = self.fit.get_forecast(
            steps=self.data.shape[0] - self.data_inter,
            exog=self.data.loc[self.data_inter:, self._reg_cols()]
        )
        post_model = post_pred.predicted_mean
        post_lower = post_pred.conf_int()['lower y'].values
        post_upper = post_pred.conf_int()['upper y'].values

        plt.figure(figsize=(15, 12))

        # Observation and regression components
        ax1 = plt.subplot(3, 1, 1)
        for col in self._reg_cols():
            plt.plot(self.data[col], label=col)
        plt.plot(np.concatenate([pre_model, post_model]), 'r--', linewidth=2, label='model')
        plt.plot(self.data[self._obs_col()], 'k', linewidth=2, label=self._obs_col())
        plt.axvline(self.data_inter, c='k', linestyle='--')
        plt.fill_between(
            self.data.loc[:self.data_inter - 1].index,
            pre_lower,
            pre_upper,
            facecolor='gray', interpolate=True, alpha=0.25,
        )
        plt.fill_between(
            self.data.loc[self.data_inter:].index,
            post_lower,
            post_upper,
            facecolor='gray', interpolate=True, alpha=0.25,
        )
        plt.setp(ax1.get_xticklabels(), visible=False)
        plt.legend(loc='upper left')
        plt.title('Observation vs prediction')

        # Pointwise difference
        ax2 = plt.subplot(312, sharex=ax1)
        plt.plot(self.data[self._obs_col()] - np.concatenate([pre_model, post_model]), 'r--', linewidth=2)
        plt.plot(self.data.index, np.zeros(self.data.shape[0]), 'g-', linewidth=2)
        plt.axvline(self.data_inter, c='k', linestyle='--')
        plt.fill_between(
            self.data.loc[:self.data_inter - 1].index,
            self.data.loc[:self.data_inter - 1, self._obs_col()] - pre_lower,
            self.data.loc[:self.data_inter - 1, self._obs_col()] - pre_upper,
            facecolor='gray', interpolate=True, alpha=0.25,
        )
        plt.fill_between(
            self.data.loc[self.data_inter:].index,
            self.data.loc[self.data_inter:, self._obs_col()] - post_lower,
            self.data.loc[self.data_inter:, self._obs_col()] - post_upper,
            facecolor='gray', interpolate=True, alpha=0.25,
        )
        plt.setp(ax2.get_xticklabels(), visible=False)
        plt.title('Difference')

        # Cumulative impact
        ax3 = plt.subplot(313, sharex=ax1)
        plt.plot(
            self.data.loc[self.data_inter:].index,
            (self.data.loc[self.data_inter:, self._obs_col()] - post_model).cumsum(),
            'r--', linewidth=2,
        )
        plt.plot(self.data.index, np.zeros(self.data.shape[0]), 'g-', linewidth=2)
        plt.axvline(self.data_inter, c='k', linestyle='--')
        plt.fill_between(
            self.data.loc[self.data_inter:].index,
            (self.data.loc[self.data_inter:, self._obs_col()] - post_lower).cumsum(),
            (self.data.loc[self.data_inter:, self._obs_col()] - post_upper).cumsum(),
            facecolor='gray', interpolate=True, alpha=0.25,
        )
        plt.axis([self.data.index[0], self.data.index[-1], None, None])
        ax3.set_xticklabels(self.data_index)
        plt.title('Cumulative Impact')
        plt.xlabel('$T$')
        plt.show()

        print('Note: the first {} observations are not shown, due to approximate diffuse initialization'.format(min_t))
        
        def summary_forecast(self)
Exemplo n.º 5
0
class CausalImpact:
    """
    Causal inference through counterfactual predictions using a Bayesian structural time-series model.
    """

    def __init__(self, data, inter_date, model_args=None):
        """Main constructor.

        :param pandas.DataFrame data: input data. Must contain at least 2 columns, one being named 'y'.
            See the README for more details.
        :param object inter_date: date of intervention. Must be of same type of the data index elements.
            This should usually be int of datetime.date
        :param {str: object} model_args: parameters of the model
            > max_iter: number of samples in the MCMC sampling
            > n_seasons: number of seasons in the seasonal component of the BSTS model

        """
        # Publicly exposed attributes
        self.data = None            # Input data, with a reset index
        self.data_index = None      # Data initial index
        self.data_inter = None      # Data intervention date, relative to the reset index
        self.model_args = None      # BSTS model arguments
        self.result = None          #
        # Private attributes for modeling purposes only
        self._model = None          # statsmodels BSTS model
        self._fit = None            # statsmodels BSTS fitted model
        # Checking input arguments
        self._check_input(data, inter_date)
        self._check_model_args(data, model_args)

    def _check_input(self, data, inter_date):
        """Check input data.

        :param pandas.DataFrame data: input data. Must contain at least 2 columns, one being named 'y'.
            See the README for more details.
        :param object inter_date: date of intervention. Must be of same type of the data index elements.
            This should usually be int of datetime.date
        """
        self.data_index = data.index
        self.data = data.reset_index(drop=True)
        try:
            self.data_inter = self.data_index.tolist().index(inter_date)
        except ValueError:
            raise ValueError('Input intervention date could not be found in data index.')
        self.result = data.reset_index(drop=False)

    def _check_model_args(self, data, model_args):
        """Check input arguments, and add missing ones if needed.

        :return: the valid dict of arguments
        :rtype: {str: object}
        """
        if model_args is None:
            model_args = {}

        for key, val in DEFAULT_ARGS.items():
            if key not in model_args:
                model_args[key] = val

        if self.data_inter < model_args['n_seasons']:
            raise ValueError('Training data contains more samples than number of seasons in BSTS model.')

        self.model_args = model_args

    def run(self, return_df=False):
        """Fit the BSTS model to the data.
        """
        self._model = UnobservedComponents(
            self.data.loc[:self.data_inter - 1, self._obs_col()].values,
            exog=self.data.loc[:self.data_inter - 1, self._reg_cols()].values,
            level='local linear trend',
            seasonal=self.model_args['n_seasons'],
        )
        self._fit = self._model.fit(
            maxiter=self.model_args['max_iter'],
        )
        self._get_estimates()
        self._get_difference_estimates()
        self._get_cumulative_estimates()

        if return_df:
            return self.result

    def _get_estimates(self):
        """Extracting model estimate (before and after intervention) as well as 95% confidence interval.
        """
        lpred = self._fit.get_prediction()   # Left: model before date of intervention (allows to evaluate fit quality)
        rpred = self._fit.get_forecast(      # Right: best prediction of y without any intervention
            steps=self.data.shape[0] - self.data_inter,
            exog=self.data.loc[self.data_inter:, self._reg_cols()]
        )
        # Model prediction
        self.result = self.result.assign(pred=np.concatenate([lpred.predicted_mean, rpred.predicted_mean]))

        # 95% confidence interval
        lower_conf_ints = []
        upper_conf_ints = []
        for pred in [lpred, rpred]:
            conf_int = pred.conf_int()
            if isinstance(conf_int, np.ndarray):    # As of 0.9.0, statsmodels returns a np.ndarray here
                lower_conf_ints.append(conf_int[:, 0])
                upper_conf_ints.append(conf_int[:, 1])
            else:                                   # instead of a dataframe with "lower y" and "upper y" columns
                lower_conf_ints.append(conf_int.loc[:, 'lower y'].values)
                upper_conf_ints.append(conf_int.loc[:, 'upper y'].values)

        self.result = self.result.assign(pred_conf_int_lower=np.concatenate(lower_conf_ints))
        self.result = self.result.assign(pred_conf_int_upper=np.concatenate(upper_conf_ints))

    def _get_difference_estimates(self):
        """Extracting the difference between the model prediction and the actuals, as well as the related 95%
        confidence interval.
        """
        # Difference between actuals and model
        self.result = self.result.assign(pred_diff=self.data[self._obs_col()].values - self.result['pred'])
        # Confidence interval of the difference
        self.result = self.result.assign(
            pred_diff_conf_int_lower=self.data[self._obs_col()] - self.result['pred_conf_int_upper']
        )
        self.result = self.result.assign(
            pred_diff_conf_int_upper=self.data[self._obs_col()] - self.result['pred_conf_int_lower']
        )

    def _get_cumulative_estimates(self):
        """Extracting estimate of the cumulative impact of the intervention, and its 95% confidence interval.
        """
        # Cumulative sum of modeled impact
        self.result = self.result.assign(cum_impact=0)
        self.result.loc[self.data_inter:, 'cum_impact'] = (
            self.data[self._obs_col()] - self.result['pred']
        ).loc[self.data_inter:].cumsum()

        # Confidence interval of the cumulative sum
        radius_cumsum = np.sqrt(
            ((self.result['pred'] - self.result['pred_conf_int_lower']).loc[self.data_inter:] ** 2).cumsum()
        )
        self.result = self.result.assign(cum_impact_conf_int_lower=0, cum_impact_conf_int_upper=0)
        self.result.loc[self.data_inter:, 'cum_impact_conf_int_lower'] = \
            self.result['cum_impact'].loc[self.data_inter:] - radius_cumsum
        self.result.loc[self.data_inter:, 'cum_impact_conf_int_upper'] = \
            self.result['cum_impact'].loc[self.data_inter:] + radius_cumsum

    def _obs_col(self):
        """Get name of column to be modeled in input data.

        :return: column name
        :rtype: str
        """
        return 'y'

    def _reg_cols(self):
        """Get names of columns used in the regression component of the model.

        :return: the column names
        :rtype: pandas.indexes.base.Index
        """
        return self.data.columns.difference([self._obs_col()])

    def plot_components(self):
        """Plot the estimated components of the model.
        """
        self._fit.plot_components(figsize=(15, 9), legend_loc='lower right')
        plt.show()

    def plot(self):
        """Produce final impact plots.
        Note: the first few observations are not shown due to approximate diffuse initialization.
        """
        min_t = 2 if self.model_args['n_seasons'] is None else self.model_args['n_seasons'] + 1

        plt.figure(figsize=(15, 12))

        # Observation and regression components
        ax1 = plt.subplot(3, 1, 1)
        for col in self._reg_cols():
            plt.plot(self.data[col], label=col)
        plt.plot(self.result['pred'].iloc[min_t:], 'r--', linewidth=2, label='model')
        plt.plot(self.data[self._obs_col()], 'k', linewidth=2, label=self._obs_col())
        plt.axvline(self.data_inter, c='k', linestyle='--')
        plt.fill_between(
            self.data.index[min_t:],
            self.result['pred_conf_int_lower'].iloc[min_t:],
            self.result['pred_conf_int_upper'].iloc[min_t:],
            facecolor='gray', interpolate=True, alpha=0.25,
        )
        plt.setp(ax1.get_xticklabels(), visible=False)
        plt.legend(loc='upper left')
        plt.title('Observation vs prediction')

        # Pointwise difference
        ax2 = plt.subplot(312, sharex=ax1)
        plt.plot(self.result['pred_diff'].iloc[min_t:], 'r--', linewidth=2)
        plt.plot(self.data.index, np.zeros(self.data.shape[0]), 'g-', linewidth=2)
        plt.axvline(self.data_inter, c='k', linestyle='--')
        plt.fill_between(
            self.data.index[min_t:],
            self.result['pred_diff_conf_int_lower'].iloc[min_t:],
            self.result['pred_diff_conf_int_upper'].iloc[min_t:],
            facecolor='gray', interpolate=True, alpha=0.25,
        )
        plt.setp(ax2.get_xticklabels(), visible=False)
        plt.title('Difference')

        # Cumulative impact
        ax3 = plt.subplot(313, sharex=ax1)
        plt.plot(self.data.index, self.result['cum_impact'], 'r--', linewidth=2)
        plt.plot(self.data.index, np.zeros(self.data.shape[0]), 'g-', linewidth=2)
        plt.axvline(self.data_inter, c='k', linestyle='--')
        plt.fill_between(
            self.data.index,
            self.result['cum_impact_conf_int_lower'],
            self.result['cum_impact_conf_int_upper'],
            facecolor='gray', interpolate=True, alpha=0.25,
        )
        plt.axis([self.data.index[0], self.data.index[-1], None, None])
        ax3.set_xticklabels(self.data_index, rotation=45)
        plt.locator_params(axis='x', nbins=min(12, self.data.shape[0]))
        plt.title('Cumulative Impact')
        plt.xlabel('$T$')
        plt.show()
Exemplo n.º 6
0
def run_ucm(name):
    true = getattr(results_structural, name)

    for model in true['models']:
        kwargs = model.copy()
        kwargs.update(true['kwargs'])

        # Make a copy of the data
        values = dta.copy()

        freq = kwargs.pop('freq', None)
        if freq is not None:
            values.index = pd.date_range(start='1959-01-01', periods=len(dta),
                                  freq=freq)

        # Test pandas exog
        if 'exog' in kwargs:
            # Default value here is pd.Series object
            exog = np.log(values['realgdp'])

            # Also allow a check with a 1-dim numpy array
            if kwargs['exog'] == 'numpy':
                exog = exog.values.squeeze()
            
            kwargs['exog'] = exog

        # Create the model
        mod = UnobservedComponents(values['unemp'], **kwargs)

        # Smoke test for starting parameters, untransform, transform
        # Also test that transform and untransform are inverses
        mod.start_params
        assert_allclose(mod.start_params, mod.transform_params(mod.untransform_params(mod.start_params)))

        # Fit the model at the true parameters
        res_true = mod.filter(true['params'])

        # Check that the cycle bounds were computed correctly
        freqstr = freq[0] if freq is not None else values.index.freqstr[0]
        if 'cycle_period_bounds' in kwargs:
            cycle_period_bounds = kwargs['cycle_period_bounds']
        elif freqstr == 'A':
            cycle_period_bounds = (1.5, 12)
        elif freqstr == 'Q':
            cycle_period_bounds = (1.5*4, 12*4)
        elif freqstr == 'M':
            cycle_period_bounds = (1.5*12, 12*12)
        else:
            # If we have no information on data frequency, require the
            # cycle frequency to be between 0 and pi
            cycle_period_bounds = (2, np.inf)

        # Test that the cycle frequency bound is correct
        assert_equal(mod.cycle_frequency_bound,
            (2*np.pi / cycle_period_bounds[1],
             2*np.pi / cycle_period_bounds[0])
        )

        # Test that the likelihood is correct
        rtol = true.get('rtol', 1e-7)
        atol = true.get('atol', 0)
        assert_allclose(res_true.llf, true['llf'], rtol=rtol, atol=atol)

        # Smoke test for plot_components
        if have_matplotlib:
            fig = res_true.plot_components()
            plt.close(fig)

        # Now fit the model via MLE
        with warnings.catch_warnings(record=True) as w:
            res = mod.fit(disp=-1)
            # If we found a higher likelihood, no problem; otherwise check
            # that we're very close to that found by R
            if res.llf <= true['llf']:
                assert_allclose(res.llf, true['llf'], rtol=1e-4)

            # Smoke test for summary
            res.summary()
Exemplo n.º 7
0
def run_ucm(name):
    true = getattr(results_structural, name)

    for model in true['models']:
        kwargs = model.copy()
        kwargs.update(true['kwargs'])

        # Make a copy of the data
        values = dta.copy()

        freq = kwargs.pop('freq', None)
        if freq is not None:
            values.index = pd.date_range(start='1959-01-01', periods=len(dta),
                                         freq=freq)

        # Test pandas exog
        if 'exog' in kwargs:
            # Default value here is pd.Series object
            exog = np.log(values['realgdp'])

            # Also allow a check with a 1-dim numpy array
            if kwargs['exog'] == 'numpy':
                exog = exog.values.squeeze()

            kwargs['exog'] = exog

        # Create the model
        mod = UnobservedComponents(values['unemp'], **kwargs)

        # Smoke test for starting parameters, untransform, transform
        # Also test that transform and untransform are inverses
        mod.start_params
        roundtrip = mod.transform_params(
            mod.untransform_params(mod.start_params))
        assert_allclose(mod.start_params, roundtrip)

        # Fit the model at the true parameters
        res_true = mod.filter(true['params'])

        # Check that the cycle bounds were computed correctly
        freqstr = freq[0] if freq is not None else values.index.freqstr[0]
        if 'cycle_period_bounds' in kwargs:
            cycle_period_bounds = kwargs['cycle_period_bounds']
        elif freqstr == 'A':
            cycle_period_bounds = (1.5, 12)
        elif freqstr == 'Q':
            cycle_period_bounds = (1.5*4, 12*4)
        elif freqstr == 'M':
            cycle_period_bounds = (1.5*12, 12*12)
        else:
            # If we have no information on data frequency, require the
            # cycle frequency to be between 0 and pi
            cycle_period_bounds = (2, np.inf)

        # Test that the cycle frequency bound is correct
        assert_equal(mod.cycle_frequency_bound,
                     (2*np.pi / cycle_period_bounds[1],
                      2*np.pi / cycle_period_bounds[0]))

        # Test that the likelihood is correct
        rtol = true.get('rtol', 1e-7)
        atol = true.get('atol', 0)
        assert_allclose(res_true.llf, true['llf'], rtol=rtol, atol=atol)

        # Optional smoke test for plot_components
        try:
            import matplotlib.pyplot as plt
            try:
                from pandas.plotting import register_matplotlib_converters
                register_matplotlib_converters()
            except ImportError:
                pass
            fig = plt.figure()
            res_true.plot_components(fig=fig)
        except ImportError:
            pass

        # Now fit the model via MLE
        with warnings.catch_warnings(record=True):
            res = mod.fit(disp=-1)
            # If we found a higher likelihood, no problem; otherwise check
            # that we're very close to that found by R
            if res.llf <= true['llf']:
                assert_allclose(res.llf, true['llf'], rtol=1e-4)

            # Smoke test for summary
            res.summary()
Exemplo n.º 8
0
def test_custom_model_fit(rand_data, pre_int_period, post_int_period,
                          monkeypatch):
    fit_mock = mock.Mock()
    monkeypatch.setattr(
        'causalimpact.main.CausalImpact._process_posterior_inferences',
        mock.Mock())

    pre_data = rand_data.loc[pre_int_period[0]:pre_int_period[1], :]
    model = UnobservedComponents(endog=pre_data.iloc[:, 0],
                                 level='llevel',
                                 exog=pre_data.iloc[:, 1:])

    model.fit = fit_mock

    CausalImpact(rand_data, pre_int_period, post_int_period, model=model)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None), (None, None)],
                                disp=False,
                                nseasons=[],
                                standardize=True)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None), (None, None)],
                                disp=True,
                                nseasons=[],
                                standardize=True)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True,
                 prior_level_sd=0.01)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None), (None, None)],
                                disp=True,
                                prior_level_sd=0.01,
                                nseasons=[],
                                standardize=True)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True,
                 prior_level_sd=None)
    fit_mock.assert_called_with(bounds=[(None, None), (None, None),
                                        (None, None), (None, None)],
                                disp=True,
                                prior_level_sd=None,
                                nseasons=[],
                                standardize=True)

    model = UnobservedComponents(endog=pre_data.iloc[:, 0],
                                 level='llevel',
                                 exog=pre_data.iloc[:, 1:],
                                 freq_seasonal=[{
                                     'period': 3
                                 }])
    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True,
                 prior_level_sd=0.001)
    fit_mock.assert_called_with(bounds=[
        (None, None), (0.001 / 1.2, 0.001 * 1.2), (None, None), (None, None),
        (None, None)
    ],
                                disp=True,
                                prior_level_sd=0.001,
                                nseasons=[],
                                standardize=True)

    model = UnobservedComponents(endog=pre_data.iloc[:, 0],
                                 level=True,
                                 exog=pre_data.iloc[:, 1],
                                 trend=True,
                                 seasonal=3,
                                 stochastic_level=True)
    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=True,
                 prior_level_sd=0.001)
    fit_mock.assert_called_with(bounds=[(0.001 / 1.2, 0.001 * 1.2),
                                        (None, None), (None, None)],
                                disp=True,
                                prior_level_sd=0.001,
                                nseasons=[],
                                standardize=True)

    new_pre_data = rand_data.loc[pre_int_period[0]:pre_int_period[1],
                                 ['y', 'x1']]
    model = UnobservedComponents(endog=new_pre_data.iloc[:, 0],
                                 level='llevel',
                                 exog=new_pre_data.iloc[:, 1:])

    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=False)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None)],
                                disp=False,
                                nseasons=[],
                                standardize=True)

    model = UnobservedComponents(endog=new_pre_data.iloc[:, 0],
                                 level='dtrend',
                                 exog=new_pre_data.iloc[:, 1:])
    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=False)
    fit_mock.assert_called_with(bounds=[(None, None), (None, None)],
                                disp=False,
                                nseasons=[],
                                standardize=True)

    model = UnobservedComponents(endog=new_pre_data.iloc[:, 0],
                                 level='lltrend',
                                 exog=new_pre_data.iloc[:, 1:])
    model.fit = fit_mock

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 model=model,
                 disp=False)
    fit_mock.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.01 * 1.2),
                                        (None, None), (None, None)],
                                disp=False,
                                nseasons=[],
                                standardize=True)
Exemplo n.º 9
0
def test_default_model_fit(rand_data, pre_int_period, post_int_period,
                           monkeypatch):
    pre_data = rand_data.loc[pre_int_period[0]:pre_int_period[1], :]
    fit_mock = mock.Mock()
    model = UnobservedComponents(endog=pre_data.iloc[:, 0],
                                 level='llevel',
                                 exog=pre_data.iloc[:, 1:])

    model.fit = fit_mock

    construct_mock = mock.Mock(return_value=model)

    monkeypatch.setattr('causalimpact.main.CausalImpact._get_default_model',
                        construct_mock)
    monkeypatch.setattr(
        'causalimpact.main.CausalImpact._process_posterior_inferences',
        mock.Mock())

    CausalImpact(rand_data, pre_int_period, post_int_period)
    model.fit.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.012),
                                         (None, None), (None, None)],
                                 disp=False,
                                 nseasons=[],
                                 standardize=True)

    CausalImpact(rand_data, pre_int_period, post_int_period, disp=True)
    model.fit.assert_called_with(bounds=[(None, None), (0.01 / 1.2, 0.012),
                                         (None, None), (None, None)],
                                 disp=True,
                                 nseasons=[],
                                 standardize=True)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 disp=True,
                 prior_level_sd=0.1)
    model.fit.assert_called_with(bounds=[(None, None), (0.1 / 1.2, 0.1 * 1.2),
                                         (None, None), (None, None)],
                                 disp=True,
                                 prior_level_sd=0.1,
                                 nseasons=[],
                                 standardize=True)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 disp=True,
                 prior_level_sd=None)
    model.fit.assert_called_with(bounds=[(None, None), (None, None),
                                         (None, None), (None, None)],
                                 disp=True,
                                 prior_level_sd=None,
                                 nseasons=[],
                                 standardize=True)

    model = UnobservedComponents(endog=pre_data.iloc[:, 0],
                                 level='llevel',
                                 exog=pre_data.iloc[:, 1:],
                                 freq_seasonal=[{
                                     'period': 3
                                 }])

    model.fit = fit_mock

    construct_mock = mock.Mock(return_value=model)

    monkeypatch.setattr('causalimpact.main.CausalImpact._get_default_model',
                        construct_mock)

    CausalImpact(rand_data,
                 pre_int_period,
                 post_int_period,
                 disp=True,
                 prior_level_sd=0.001,
                 nseasons=[{
                     'period': 3
                 }])
    model.fit.assert_called_with(bounds=[(None, None),
                                         (0.001 / 1.2, 0.001 * 1.2),
                                         (None, None), (None, None),
                                         (None, None)],
                                 disp=True,
                                 prior_level_sd=0.001,
                                 nseasons=[{
                                     'period': 3
                                 }],
                                 standardize=True)

    model = UnobservedComponents(endog=pre_data.iloc[:, 0], level='llevel')

    model.fit = fit_mock

    construct_mock = mock.Mock(return_value=model)

    monkeypatch.setattr('causalimpact.main.CausalImpact._get_default_model',
                        construct_mock)

    new_data = pd.DataFrame(np.random.randn(200, 1), columns=['y'])
    CausalImpact(new_data, pre_int_period, post_int_period, disp=False)
    model.fit.assert_called_with(bounds=[(None, None),
                                         (0.01 / 1.2, 0.01 * 1.2)],
                                 disp=False,
                                 nseasons=[],
                                 standardize=True)
Exemplo n.º 10
0
def run_ucm(name, use_exact_diffuse=False):
    true = getattr(results_structural, name)

    for model in true['models']:
        kwargs = model.copy()
        kwargs.update(true['kwargs'])
        kwargs['use_exact_diffuse'] = use_exact_diffuse

        # Make a copy of the data
        values = dta.copy()

        freq = kwargs.pop('freq', None)
        if freq is not None:
            values.index = pd.date_range(start='1959-01-01',
                                         periods=len(dta),
                                         freq=freq)

        # Test pandas exog
        if 'exog' in kwargs:
            # Default value here is pd.Series object
            exog = np.log(values['realgdp'])

            # Also allow a check with a 1-dim numpy array
            if kwargs['exog'] == 'numpy':
                exog = exog.values.squeeze()

            kwargs['exog'] = exog

        # Create the model
        mod = UnobservedComponents(values['unemp'], **kwargs)

        # Smoke test for starting parameters, untransform, transform
        # Also test that transform and untransform are inverses
        mod.start_params
        roundtrip = mod.transform_params(
            mod.untransform_params(mod.start_params))
        assert_allclose(mod.start_params, roundtrip)

        # Fit the model at the true parameters
        res_true = mod.filter(true['params'])

        # Check that the cycle bounds were computed correctly
        freqstr = freq[0] if freq is not None else values.index.freqstr[0]
        if 'cycle_period_bounds' in kwargs:
            cycle_period_bounds = kwargs['cycle_period_bounds']
        elif freqstr == 'A':
            cycle_period_bounds = (1.5, 12)
        elif freqstr == 'Q':
            cycle_period_bounds = (1.5 * 4, 12 * 4)
        elif freqstr == 'M':
            cycle_period_bounds = (1.5 * 12, 12 * 12)
        else:
            # If we have no information on data frequency, require the
            # cycle frequency to be between 0 and pi
            cycle_period_bounds = (2, np.inf)

        # Test that the cycle frequency bound is correct
        assert_equal(mod.cycle_frequency_bound,
                     (2 * np.pi / cycle_period_bounds[1],
                      2 * np.pi / cycle_period_bounds[0]))

        # Test that the likelihood is correct
        rtol = true.get('rtol', 1e-7)
        atol = true.get('atol', 0)

        if use_exact_diffuse:
            # If we are using exact diffuse initialization, then we need to
            # adjust for the fact that KFAS does not include the constant in
            # the likelihood function for the diffuse periods
            # (see note to test_exact_diffuse_filtering.py for details).
            res_llf = (res_true.llf_obs.sum() +
                       res_true.nobs_diffuse * 0.5 * np.log(2 * np.pi))
        else:
            # If we are using approximate diffuse initialization, then we need
            # to ignore the first period, and this will agree with KFAS (since
            # it does not include the constant in the likelihood function for
            # diffuse periods).
            res_llf = res_true.llf_obs[res_true.loglikelihood_burn:].sum()

        assert_allclose(res_llf, true['llf'], rtol=rtol, atol=atol)

        # Optional smoke test for plot_components
        try:
            import matplotlib.pyplot as plt
            try:
                from pandas.plotting import register_matplotlib_converters
                register_matplotlib_converters()
            except ImportError:
                pass
            fig = plt.figure()
            res_true.plot_components(fig=fig)
        except ImportError:
            pass

        # Now fit the model via MLE
        with warnings.catch_warnings(record=True):
            fit_kwargs = {}
            if 'maxiter' in true:
                fit_kwargs['maxiter'] = true['maxiter']
            res = mod.fit(start_params=true.get('start_params', None),
                          disp=-1,
                          **fit_kwargs)
            # If we found a higher likelihood, no problem; otherwise check
            # that we're very close to that found by R

            # See note above about these computation
            if use_exact_diffuse:
                res_llf = (res.llf_obs.sum() +
                           res.nobs_diffuse * 0.5 * np.log(2 * np.pi))
            else:
                res_llf = res.llf_obs[res_true.loglikelihood_burn:].sum()

            if res_llf <= true['llf']:
                assert_allclose(res_llf, true['llf'], rtol=1e-4)

            # Smoke test for summary
            res.summary()
Exemplo n.º 11
0
def test_compile_posterior_inferences_w_data(data):
    pre_period = [0, 70]
    post_period = [71, 100]

    df_pre = data.loc[pre_period[0]:pre_period[1], :]
    df_post = data.loc[post_period[0]:post_period[1], :]

    post_period_response = None
    alpha = 0.05
    orig_std_params = (0., 1.)

    model = UnobservedComponents(endog=df_pre.iloc[:, 0].values,
                                 level='llevel',
                                 exog=df_pre.iloc[:, 1:].values)

    trained_model = model.fit()

    inferences = compile_posterior(trained_model, data, df_pre, df_post,
                                   post_period_response, alpha,
                                   orig_std_params)

    expected_response = pd.Series(data.iloc[:, 0], name='response')
    assert_series_equal(expected_response, inferences['series']['response'])

    expected_cumsum = pd.Series(np.cumsum(expected_response),
                                name='cum_response')

    assert_series_equal(expected_cumsum, inferences['series']['cum_response'])

    predictor = trained_model.get_prediction()
    forecaster = trained_model.get_forecast(
        steps=len(df_post),
        exog=df_post.iloc[:, 1].values.reshape(-1, 1),
        alpha=alpha)

    pre_pred = predictor.predicted_mean
    post_pred = forecaster.predicted_mean

    point_pred = np.concatenate([pre_pred, post_pred])

    expected_point_pred = pd.Series(point_pred, name='point_pred')
    assert_series_equal(expected_point_pred,
                        inferences['series']['point_pred'])

    pre_ci = pd.DataFrame(predictor.conf_int(alpha=alpha))
    pre_ci.index = df_pre.index
    post_ci = pd.DataFrame(forecaster.conf_int(alpha=alpha))
    post_ci.index = df_post.index

    ci = pd.concat([pre_ci, post_ci])

    expected_pred_upper = ci.iloc[:, 1]
    expected_pred_upper = expected_pred_upper.rename('point_pred_upper')
    expected_pred_lower = ci.iloc[:, 0]
    expected_pred_lower = expected_pred_lower.rename('point_pred_lower')

    assert_series_equal(expected_pred_upper,
                        inferences['series']['point_pred_upper'])
    assert_series_equal(expected_pred_lower,
                        inferences['series']['point_pred_lower'])

    expected_cum_pred = pd.Series(np.cumsum(point_pred), name='cum_pred')
    assert_series_equal(expected_cum_pred, inferences['series']['cum_pred'])

    expected_cum_pred_lower = pd.Series(np.cumsum(expected_pred_lower),
                                        name='cum_pred_lower')
    assert_series_equal(expected_cum_pred_lower,
                        inferences['series']['cum_pred_lower'])

    expected_cum_pred_upper = pd.Series(np.cumsum(expected_pred_upper),
                                        name='cum_pred_upper')
    assert_series_equal(expected_cum_pred_upper,
                        inferences['series']['cum_pred_upper'])

    expected_point_effect = pd.Series(expected_response - expected_point_pred,
                                      name='point_effect')
    assert_series_equal(expected_point_effect,
                        inferences['series']['point_effect'])

    expected_point_effect_lower = pd.Series(expected_response -
                                            expected_pred_lower,
                                            name='point_effect_lower')
    assert_series_equal(expected_point_effect_lower,
                        inferences['series']['point_effect_lower'])

    expected_point_effect_upper = pd.Series(expected_response -
                                            expected_pred_upper,
                                            name='point_effect_upper')
    assert_series_equal(expected_point_effect_upper,
                        inferences['series']['point_effect_upper'])

    expected_cum_effect = pd.Series(np.concatenate(
        (np.zeros(len(df_pre)),
         np.cumsum(expected_point_effect.iloc[len(df_pre):]))),
                                    name='cum_effect')
    assert_series_equal(expected_cum_effect,
                        inferences['series']['cum_effect'])

    expected_cum_effect_lower = pd.Series(np.concatenate(
        (np.zeros(len(df_pre)),
         np.cumsum(expected_point_effect_lower.iloc[len(df_pre):]))),
                                          name='cum_effect_lower')
    assert_series_equal(expected_cum_effect_lower,
                        inferences['series']['cum_effect_lower'])

    expected_cum_effect_upper = pd.Series(np.concatenate(
        (np.zeros(len(df_pre)),
         np.cumsum(expected_point_effect_upper.iloc[len(df_pre):]))),
                                          name='cum_effect_upper')
    assert_series_equal(expected_cum_effect_upper,
                        inferences['series']['cum_effect_upper'])
Exemplo n.º 12
0
class CausalImpact:
    """
    Causal inference through counterfactual predictions using a Bayesian structural time-series model.
    """
    def __init__(self, data, inter_date, n_seasons=7):
        """Main constructor.

        :param pandas.DataFrame data: input data. Must contain at least 2 columns, one being named 'y'.
            See the README for more details.
        :param object inter_date: date of intervention. Must be of same type of the data index elements.
            This should usually be int of datetime.date
        :param int n_seasons: number of seasons in the seasonal component of the BSTS model

        """
        # Constructor arguments
        self.data = data.reset_index(
            drop=True)  # Input data, with a reset index
        self.inter_date = inter_date  # Date of intervention as passed in input
        self.n_seasons = n_seasons  # Number of seasons in the seasonal component of the BSTS model
        # DataFrame holding the results of the BSTS model predictions.
        self.result = None
        # Private attributes for modeling purposes only
        self._input_index = data.index  # Input data index
        self._inter_index = None  # Data intervention date, relative to the reset index
        self._model = None  # statsmodels BSTS model
        self._fit = None  # statsmodels BSTS fitted model
        # Checking input arguments
        self._check_input()
        self._check_model_args()

    def _check_input(self):
        """Check input data.
        """
        try:
            self._inter_index = self._input_index.tolist().index(
                self.inter_date)
        except ValueError:
            raise ValueError(
                'Input intervention date could not be found in data index.')
        self.result = self.data.copy()

    def _check_model_args(self):
        """Check if input arguments are compatible with the data.
        """
        if self.n_seasons < 2:
            raise ValueError(
                'Seasonal component must have a seasonal period of at least 2.'
            )
        if self._inter_index < self.n_seasons:
            raise ValueError(
                'Training data contains more samples than number of seasons in BSTS model.'
            )

    def run(self, max_iter=1000, return_df=False):
        """Fit the BSTS model to the data.

        :param int max_iter: max number of iterations in UnobservedComponents.fit (maximum likelihood estimator)
        :param bool return_df: set to `True` if you want this method to return the dataframe of model results

        :return: None or pandas.DataFrame of results
        """
        self._model = UnobservedComponents(
            self.data.loc[:self._inter_index - 1,
                          self._obs_col()].values,
            exog=self.data.loc[:self._inter_index - 1,
                               self._reg_cols()].values,
            level='local linear trend',
            seasonal=self.n_seasons,
        )
        self._fit = self._model.fit(maxiter=max_iter)
        self._get_estimates()
        self._get_difference_estimates()
        self._get_cumulative_estimates()

        if return_df:
            return self.result

    def _get_estimates(self):
        """Extracting model estimate (before and after intervention) as well as 95% confidence interval.
        """
        lpred = self._fit.get_prediction(
        )  # Left: model before date of intervention (allows to evaluate fit quality)
        rpred = self._fit.get_forecast(  # Right: best prediction of y without any intervention
            steps=self.data.shape[0] - self._inter_index,
            exog=self.data.loc[self._inter_index:,
                               self._reg_cols()])
        # Model prediction
        self.result = self.result.assign(
            pred=np.concatenate([lpred.predicted_mean, rpred.predicted_mean]))

        # 95% confidence interval
        lower_conf_ints = []
        upper_conf_ints = []
        for pred in [lpred, rpred]:
            conf_int = pred.conf_int()
            if isinstance(
                    conf_int, np.ndarray
            ):  # As of 0.9.0, statsmodels returns a np.ndarray here
                lower_conf_ints.append(conf_int[:, 0])
                upper_conf_ints.append(conf_int[:, 1])
            else:  # instead of a dataframe with "lower y" and "upper y" columns
                lower_conf_ints.append(conf_int.loc[:, 'lower y'].values)
                upper_conf_ints.append(conf_int.loc[:, 'upper y'].values)

        self.result = self.result.assign(
            pred_conf_int_lower=np.concatenate(lower_conf_ints))
        self.result = self.result.assign(
            pred_conf_int_upper=np.concatenate(upper_conf_ints))

    def _get_difference_estimates(self):
        """Extracting the difference between the model prediction and the actuals, as well as the related 95%
        confidence interval.
        """
        # Difference between actuals and model
        self.result = self.result.assign(
            pred_diff=self.data[self._obs_col()].values - self.result['pred'])
        # Confidence interval of the difference
        self.result = self.result.assign(
            pred_diff_conf_int_lower=self.data[self._obs_col()] -
            self.result['pred_conf_int_upper'])
        self.result = self.result.assign(
            pred_diff_conf_int_upper=self.data[self._obs_col()] -
            self.result['pred_conf_int_lower'])

    def _get_cumulative_estimates(self):
        """Extracting estimate of the cumulative impact of the intervention, and its 95% confidence interval.
        """
        # Cumulative sum of modeled impact
        self.result = self.result.assign(cum_impact=0)
        self.result.loc[self._inter_index:, 'cum_impact'] = (
            self.data[self._obs_col()] -
            self.result['pred']).loc[self._inter_index:].cumsum()

        # Confidence interval of the cumulative sum
        radius_cumsum = np.sqrt(
            ((self.result['pred'] - self.result['pred_conf_int_lower']
              ).loc[self._inter_index:]**2).cumsum())
        self.result = self.result.assign(cum_impact_conf_int_lower=0,
                                         cum_impact_conf_int_upper=0)
        self.result.loc[self._inter_index:, 'cum_impact_conf_int_lower'] = \
            self.result['cum_impact'].loc[self._inter_index:] - radius_cumsum
        self.result.loc[self._inter_index:, 'cum_impact_conf_int_upper'] = \
            self.result['cum_impact'].loc[self._inter_index:] + radius_cumsum

    def _obs_col(self):
        """Get name of column to be modeled in input data.

        :return: column name
        :rtype: str
        """
        return 'y'

    def _reg_cols(self):
        """Get names of columns used in the regression component of the model.

        :return: the column names
        :rtype: pandas.indexes.base.Index
        """
        return self.data.columns.difference([self._obs_col()])

    def plot_components(self):
        """Plot the estimated components of the model.
        """
        self._fit.plot_components(figsize=(15, 9), legend_loc='lower right')
        plt.show()

    def plot(self, split=False):
        """Produce final impact plots.
        Note: the first few observations are not shown due to approximate diffuse initialization.

        :param bool split: set to `True` if you want to split plot of input data into multiple charts. Default: `False`.
        """
        min_t = 2 if self.n_seasons is None else self.n_seasons + 1

        n_plots = 3 + split * len(self._reg_cols())
        grid = gs.GridSpec(n_plots, 1)
        plt.figure(figsize=(15, 4 * n_plots))

        # Observation and regression components
        ax1 = plt.subplot(grid[0, :])
        # Regression components
        for i, col in enumerate(self._reg_cols()):
            plt.plot(self.data[col], label=col)
            if split:  # Creating new subplot if charts should be split
                plt.axvline(self._inter_index, c='k', linestyle='--')
                plt.title(col)
                ax = plt.subplot(grid[i + 1, :], sharex=ax1)
                plt.setp(ax.get_xticklabels(), visible=False)
        # Model and confidence intervals
        plt.plot(self.result['pred'].iloc[min_t:],
                 'r--',
                 linewidth=2,
                 label='model')
        plt.plot(self.data[self._obs_col()],
                 'k',
                 linewidth=2,
                 label=self._obs_col())
        plt.axvline(self._inter_index, c='k', linestyle='--')
        plt.fill_between(
            self.data.index[min_t:],
            self.result['pred_conf_int_lower'].iloc[min_t:],
            self.result['pred_conf_int_upper'].iloc[min_t:],
            facecolor='gray',
            interpolate=True,
            alpha=0.25,
        )
        plt.setp(ax1.get_xticklabels(), visible=False)
        plt.legend(loc='upper left')
        plt.title('Observation vs prediction')

        # Pointwise difference
        ax2 = plt.subplot(grid[-2, :], sharex=ax1)
        plt.plot(self.result['pred_diff'].iloc[min_t:], 'r--', linewidth=2)
        plt.plot(self.data.index,
                 np.zeros(self.data.shape[0]),
                 'g-',
                 linewidth=2)
        plt.axvline(self._inter_index, c='k', linestyle='--')
        plt.fill_between(
            self.data.index[min_t:],
            self.result['pred_diff_conf_int_lower'].iloc[min_t:],
            self.result['pred_diff_conf_int_upper'].iloc[min_t:],
            facecolor='gray',
            interpolate=True,
            alpha=0.25,
        )
        plt.setp(ax2.get_xticklabels(), visible=False)
        plt.title('Difference')

        # Cumulative impact
        ax3 = plt.subplot(grid[-1, :], sharex=ax1)
        plt.plot(self.data.index,
                 self.result['cum_impact'],
                 'r--',
                 linewidth=2)
        plt.plot(self.data.index,
                 np.zeros(self.data.shape[0]),
                 'g-',
                 linewidth=2)
        plt.axvline(self._inter_index, c='k', linestyle='--')
        plt.fill_between(
            self.data.index,
            self.result['cum_impact_conf_int_lower'],
            self.result['cum_impact_conf_int_upper'],
            facecolor='gray',
            interpolate=True,
            alpha=0.25,
        )
        plt.axis([self.data.index[0], self.data.index[-1], None, None])
        ax3.set_xticklabels(self._input_index, rotation=45)
        plt.locator_params(axis='x', nbins=min(12, self.data.shape[0]))
        plt.title('Cumulative Impact')
        plt.xlabel('$T$')
        plt.show()