示例#1
0
 def test_pre_period_starts_after_beginning_of_data(self, data):
     pre_period = [3, 100]
     impact = CausalImpact(data, pre_period, [101, 199])
     impact.run()
     np.testing.assert_array_equal(
         impact.inferences.response.values, data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[0:pre_period[0], 2:]))
示例#2
0
    def test_missing_values_in_pre_period_y(self, pre_period, post_period):
        data = pd.DataFrame(np.random.randn(200, 3), columns=["y", "x1", "x2"])
        data.iloc[95:100, 0] = np.nan

        impact = CausalImpact(data, pre_period, post_period)
        impact.run()

        """Test that all columns in the result series except those associated
        with point predictions have missing values at the time points the
        result time series has missing values."""

        predicted_cols = [
            impact.inferences.columns.get_loc(col) for col in
            impact.inferences.columns if
            ("response" not in col and "point_effect" not in col)
        ]

        effect_cols = [impact.inferences.columns.get_loc(col) for col in
                       impact.inferences.columns if "point_effect" in col]

        response_cols = [impact.inferences.columns.get_loc(col) for col in
                         impact.inferences.columns if "response" in col]

        assert np.all(np.isnan(impact.inferences.iloc[95:100, response_cols]))
        assert (np.any(np.isnan(
            impact.inferences.iloc[95:100, predicted_cols])) == False)
        assert np.any(np.isnan(impact.inferences.iloc[:95, :])) == False
        assert np.any(np.isnan(impact.inferences.iloc[101:, :])) == False
示例#3
0
 def test_post_period_finishes_before_end_of_data(self, data, pre_period):
     post_period = [101, 197]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     np.testing.assert_array_equal(
         impact.inferences.response.values, data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[-2:, 2:]))
示例#4
0
 def test_late_start_early_finish_and_gap_between_periods(self, data):
     pre_period = [3, 80]
     post_period = [120, 197]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert np.all(pd.isnull(
         impact.inferences.loc[:2, impact.inferences.columns[2:]]))
     assert np.all(pd.isnull(
         impact.inferences.loc[81:119, impact.inferences.columns[2:]]))
     assert np.all(pd.isnull(
         impact.inferences.loc[198:, impact.inferences.columns[2:]]))
示例#5
0
    def test_input_w_time_column(self):
        data = pd.DataFrame(np.random.randn(100, 2), columns=['x1', 'x2'])
        data['time'] = pd.date_range(start='2018-01-01', periods=100)
        data = data[['time', 'x1', 'x2']]
        pre_period = ['2018-01-01', '2018-02-10']
        post_period = ['2018-02-11', '2018-4-10']

        causal_impact = CausalImpact(data, pre_period, post_period, {})

        data = data.set_index('time')
        pre_period = [pd.to_datetime(e) for e in pre_period]
        post_period = [pd.to_datetime(e) for e in post_period]

        expected = {
            "data": data,
            "pre_period": pre_period,
            "post_period": post_period,
            "model_args": causal_impact.params['model_args'],
            "ucm_model": None,
            "post_period_response": None,
            "alpha": causal_impact.params['alpha']
        }
        result = causal_impact._format_input(
            causal_impact.params["data"],
            causal_impact.params["pre_period"],
            causal_impact.params["post_period"],
            causal_impact.params["model_args"],
            None,
            None,
            causal_impact.params["alpha"]
        )

        result_data = result["data"]
        expected_data = expected["data"]
        assert_frame_equal(result_data, expected_data)

        result_model_args = result["model_args"]
        expected_model_args = expected["model_args"]
        assert result_model_args == expected_model_args

        result_other = {
            key: result[key] for key in result if key not in {
                "model_args", "data"}
        }

        expected_other = {
            key: expected[key] for key in expected if key not in
            {"model_args", "data"}}
        assert result_other == expected_other
示例#6
0
    def test_input_w_just_2_points_raises_exception(self):
        data = pd.DataFrame(np.random.randn(2, 2), columns=['x1', 'x2'])
        causal_impact = CausalImpact(data, [0, 0], [1, 1], {})

        with pytest.raises(ValueError) as excinfo:
            causal_impact._format_input(
                causal_impact.params["data"],
                causal_impact.params["pre_period"],
                causal_impact.params["post_period"],
                causal_impact.params["model_args"],
                None,
                None,
                causal_impact.params["alpha"]
            )
        assert str(excinfo.value) == 'data must have at least 3 time points'
示例#7
0
    def test_unlabelled_pandas_series(
            self, expected_columns, pre_period, post_period):
        model_args = {"niter": 123, 'standardize_data': False}
        alpha = 0.05
        data = pd.DataFrame(np.random.randn(200, 3))
        causal_impact = CausalImpact(
            data.values,
            pre_period,
            post_period,
            model_args,
            None,
            None,
            alpha,
            "MLE"
        )

        causal_impact.run()
        actual_columns = list(causal_impact.inferences.columns)
        assert actual_columns == expected_columns
示例#8
0
    def test_float_index_pre_period_contains_int(self):
        data = np.random.randn(200, 3)
        data = pd.DataFrame(data, columns=['y', 'x1', 'x2'])
        data = data.set_index(np.array([float(i) for i in range(200)]))
        causal_impact = CausalImpact(data, [0, 3], [3, 4], {})

        expected = {
            "data": causal_impact.params['data'],
            "pre_period": causal_impact.params['pre_period'],
            "post_period": causal_impact.params['post_period'],
            "model_args": causal_impact.params['model_args'],
            "ucm_model": None,
            "post_period_response": None,
            "alpha": causal_impact.params['alpha']
        }
        result = causal_impact._format_input(
            causal_impact.params["data"],
            causal_impact.params["pre_period"],
            causal_impact.params["post_period"],
            causal_impact.params["model_args"],
            None,
            None,
            causal_impact.params["alpha"]
        )

        result_data = result["data"]
        expected_data = expected["data"]
        assert_frame_equal(result_data, expected_data)

        result_model_args = result["model_args"]
        expected_model_args = expected["model_args"]
        assert result_model_args == expected_model_args

        result_other = {
            key: result[key] for key in result if key not in {
                "model_args", "data"}
        }
        expected_other = {
            key: expected[key] for key in expected if key not in {
                "model_args", "data"}
        }
        assert result_other == expected_other
示例#9
0
    def test_summary(self, inference_input):
        inferences_df = pd.DataFrame(inference_input)
        causal = CausalImpact()

        params = {
            'alpha': 0.05,
            'post_period': [2, 4]
        }

        causal.params = params
        causal.inferences = inferences_df

        expected = [
            [3, 7],
            [3, 7],
            [[3, 3], [7, 7]],
            [' ', ' '],
            [0, 0],
            [[0, 0], [0, 0]],
            [' ', ' '],
            ['-2.8%', '-2.8%'],
            [['0.0%', '-11.1%'], ['0.0%', '-11.1%']],
            [' ', ' '],
            ['0.0%', ' '],
            ['100.0%', ' '],
        ]

        expected = pd.DataFrame(
            expected,
            columns=['Average', 'Cumulative'],
            index=[
                'Actual',
                'Predicted',
                '95% CI',
                ' ',
                'Absolute Effect',
                '95% CI',
                ' ',
                'Relative Effect',
                '95% CI',
                " ",
                "P-value",
                "Prob. of Causal Effect"
            ]
        )

        tmpdir = mkdtemp()
        tmp_expected = 'tmp_expected'
        tmp_result = 'tmp_test_summary'

        result_file = os.path.join(tmpdir, tmp_result)
        expected_file = os.path.join(tmpdir, tmp_expected)

        expected.to_csv(expected_file)
        expected_str = open(expected_file).read()

        causal.summary(path=result_file)

        result = open(result_file).read()
        assert result == expected_str
示例#10
0
    def test_input_covariates_w_nan_value_raises(self):
        data = np.array(
            [
                [1, 1, 2],
                [1, 2, 3],
                [1, 3, 4],
                [1, np.nan, 5],
                [1, 6, 7]
            ]
        )
        data = pd.DataFrame(data, columns=['y', 'x1', 'x2'])
        causal_impact = CausalImpact(data, [0, 3], [3, 4], {})

        with pytest.raises(ValueError) as excinfo:
            causal_impact._format_input(
                causal_impact.params["data"],
                causal_impact.params["pre_period"],
                causal_impact.params["post_period"],
                causal_impact.params["model_args"],
                None,
                None,
                causal_impact.params["alpha"]
            )
        assert str(excinfo.value) == 'covariates must not contain null values'
示例#11
0
    def test_summary_wrong_argument_raises(self, inference_input):
        inferences_df = pd.DataFrame(inference_input)
        causal = CausalImpact()

        params = {
            'alpha': 0.05,
            'post_period': [2, 4]
        }

        causal.params = params
        causal.inferences = inferences_df

        with pytest.raises(ValueError):
            causal.summary(output='wrong_argument')
示例#12
0
    def test_summary_w_report_output(
            self,
            monkeypatch,
            inference_input,
            summary_report_filename
        ):
        inferences_df = pd.DataFrame(inference_input)
        causal = CausalImpact()

        params = {
            'alpha': 0.05,
            'post_period': [2, 4]
        }

        causal.params = params
        causal.inferences = inferences_df

        dedent_mock = mock.Mock()

        expected = open(summary_report_filename).read()
        expected = re.sub(r'\s+', ' ', expected)
        expected = expected.strip()

        tmpdir = mkdtemp()
        tmp_file = os.path.join(tmpdir, 'summary_test')

        def dedent_side_effect(msg):
            with open(tmp_file, 'a') as file_obj:
                msg = re.sub(r'\s+', ' ', msg)
                msg = msg.strip()
                file_obj.write(msg)
            return msg

        dedent_mock.side_effect = dedent_side_effect
        monkeypatch.setattr('textwrap.dedent', dedent_mock)

        causal.summary(output='report')
        result_str = open(tmp_file, 'r').read()
        assert result_str == expected
示例#13
0
    def test_plot(self, monkeypatch):
        causal = CausalImpact()

        params = {
            'alpha': 0.05,
            'post_period': [2, 4],
            'pre_period': [0, 1]
        }

        inferences_mock = {
            'point_pred': 'points predicted',
            'response': 'y obs',
            'point_pred_lower': 'lower predictions',
            'point_pred_upper': 'upper predictions'
        }

        class Inferences(object):
            @property
            def iloc(self):
                class Iloc(object):
                    def __getitem__(*args, **kwargs):
                        class EnhancedDict(dict):
                            @property
                            def index(self):
                                return [0, 1]

                            @property
                            def point_effect(self):
                                return 'lift'

                            @property
                            def point_effect_lower(self):
                                return 'point effect lower'

                            @property
                            def point_effect_upper(self):
                                return 'point effect upper'

                            @property
                            def cum_effect(self):
                                return 'cum effect'

                            @property
                            def cum_effect_upper(self):
                                return 'cum effect upper'

                            @property
                            def cum_effect_lower(self):
                                return 'cum effect lower'
                        return EnhancedDict(inferences_mock)
                return Iloc()

        class Data(object):
            @property
            def index(self):
                return 'index'

            @property
            def shape(self):
                return [(1, 2)]

        plot_mock = mock.Mock()
        fill_mock = mock.Mock()
        show_mock = mock.Mock()
        np_zeros_mock = mock.Mock()
        np_zeros_mock.side_effect = lambda x: [0, 0]

        get_lib_mock = mock.Mock(return_value=plot_mock)
        monkeypatch.setattr(
            'causalimpact.analysis.get_matplotlib',
            get_lib_mock
        )

        monkeypatch.setattr('numpy.zeros', np_zeros_mock)

        causal.params = params
        causal.inferences = Inferences()
        causal.data = Data()

        causal.plot(panels=['original', 'pointwise', 'cumulative'])
        causal.plot(panels=['pointwise', 'cumulative'])

        causal.plot(panels=['original'])

        plot_mock.plot.assert_any_call(
            'y obs',
            'k',
            label='endog',
            linewidth=2
        )
        plot_mock.plot.assert_any_call(
            'points predicted',
            'r--',
            label='model',
            linewidth=2
        )

        plot_mock.fill_between.assert_any_call(
            [0, 1],
            'lower predictions',
            'upper predictions',
            facecolor='gray',
            interpolate=True,
            alpha=0.25
        )

        causal.plot(panels=['pointwise'])

        plot_mock.plot.assert_any_call('lift', 'r--', linewidth=2)
        plot_mock.plot.assert_any_call('index', [0, 0], 'g-', linewidth=2)

        causal.plot(panels=['cumulative'])

        plot_mock.plot.assert_any_call(
            [0, 1],
            'cum effect',
            'r--',
            linewidth=2
        )
        plot_mock.plot.assert_any_call('index', [0, 0], 'g-', linewidth=2)
示例#14
0
    def test_other_formats(self, expected_columns, pre_period, post_period):
        # Test other data formats
        model_args = {"niter": 100, "standardize_data": True}

        # labelled dataframe
        data = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"])
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = list(impact.inferences.columns)
        assert actual_columns == expected_columns

        # numpy array
        data = np.random.randn(200, 3)
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = list(impact.inferences.columns)
        assert actual_columns == expected_columns

        # list of lists
        data = np.random.randn(200, 2).tolist()
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = list(impact.inferences.columns)
        assert actual_columns == expected_columns
示例#15
0
 def test_pre_period_lower_than_data_index_min(self, data):
     pre_period = [-1, 100]
     post_period = [101, 199]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert impact.params['pre_period'] == [0, 100]
示例#16
0
 def test_post_period_bigger_than_data_index_max(self, data):
     pre_period = [0, 100]
     post_period = [101, 300]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert impact.params['post_period'] == [101, 199]
示例#17
0
def causal_impact(data, pre_period, post_period):
    model_args = {"niter": 123}
    return CausalImpact(data, pre_period, post_period, model_args)
示例#18
0
    def test_pre_period_in_conflict_w_post_period(self):
        data = pd.DataFrame(np.random.randn(20, 2), columns=['x1', 'x2'])
        causal_impact = CausalImpact(data, [0, 10], [9, 20], {})

        with pytest.raises(ValueError) as excinfo:
            causal_impact._format_input(
                causal_impact.params["data"],
                causal_impact.params["pre_period"],
                causal_impact.params["post_period"],
                causal_impact.params["model_args"],
                None,
                None,
                causal_impact.params["alpha"]
            )
        assert str(excinfo.value) == (
            'post period must start at least 1 observation after the end of '
            'the pre_period'
        )

        causal_impact = CausalImpact(data, [0, 10], [11, 9], {})
        with pytest.raises(ValueError) as excinfo:
            causal_impact._format_input(
                causal_impact.params["data"],
                causal_impact.params["pre_period"],
                causal_impact.params["post_period"],
                causal_impact.params["model_args"],
                None,
                None,
                causal_impact.params["alpha"]
            )
        assert str(excinfo.value) == (
            'post_period[1] must not be earlier than post_period[0]'
        )

        causal_impact = CausalImpact(data, [0, 10], [11, 9], {})
        with pytest.raises(ValueError) as excinfo:
            causal_impact._format_input(
                causal_impact.params["data"],
                causal_impact.params["pre_period"],
                causal_impact.params["post_period"],
                causal_impact.params["model_args"],
                None,
                None,
                causal_impact.params["alpha"]
            )
        assert str(excinfo.value) == (
            'post_period[1] must not be earlier than post_period[0]'
        )
示例#19
0
 def test_gap_between_pre_and_post_periods(self, data, pre_period):
     post_period = [120, 199]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert np.all(pd.isnull(impact.inferences.loc[
         101:119, impact.inferences.columns[2:]]))
示例#20
0
def impact_ucm(ucm_model):
    post_period_response = np.random.randn(100)
    return CausalImpact(
        ucm_model=ucm_model,
        post_period_response=post_period_response
    )
示例#21
0
 def test_missing_input(self):
     with pytest.raises(SyntaxError):
         impact = CausalImpact()
         impact.run()
示例#22
0
 def test_missing_pre_period_data(self, data, pre_period, post_period):
     model_data = data.copy()
     model_data.iloc[3:5, 0] = np.nan
     impact = CausalImpact(model_data, pre_period, post_period)
     impact.run()
     assert len(impact.inferences) == len(model_data)
示例#23
0
 def test_frame_w_no_exog(self, pre_period, post_period):
     data = np.random.randn(200)
     impact = CausalImpact(data, pre_period, post_period, {})
     with pytest.raises(ValueError) as excinfo:
         impact.run()
     assert str(excinfo.value) == 'data contains no exogenous variables'