Exemplo n.º 1
0
def test_causal_cto_w_seasons(date_rand_data, pre_str_period, post_str_period):
    ci = CausalImpact(date_rand_data,
                      pre_str_period,
                      post_str_period,
                      nseasons=[{
                          'period': 4
                      }, {
                          'period': 3
                      }])
    assert ci.model.freq_seasonal_periods == [4, 3]
    assert ci.model.freq_seasonal_harmonics == [2, 1]

    ci = CausalImpact(date_rand_data,
                      pre_str_period,
                      post_str_period,
                      nseasons=[{
                          'period': 4,
                          'harmonics': 1
                      }, {
                          'period': 3,
                          'harmonis': 1
                      }])
    assert ci.model.freq_seasonal_periods == [4, 3]
    assert ci.model.freq_seasonal_harmonics == [1, 1]
    assert ci.inferences.index.dtype == date_rand_data.index.dtype
Exemplo n.º 2
0
    def test_other_formats(self):
        # Test other data formats
        pre_period = [1, 100]
        post_period = [101, 200]
        model_args = {"niter": 100}

        # labelled dataframe
        data = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"])
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = impact.inferences.columns
        assert actual_columns[0] == "response"

        # numpy array
        data = np.random.randn(200, 3)
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = impact.inferences.columns
        assert actual_columns[0] == "response"

        # numpy array
        data = np.random.randn(200, 3)
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = impact.inferences.columns
        assert actual_columns[0] == "response"

        # list of lists
        data = [[n, n + 2] for n in range(200)]
        impact = CausalImpact(data, pre_period, post_period, model_args)
        impact.run()
        actual_columns = impact.inferences.columns
        assert actual_columns[0] == "response"
Exemplo n.º 3
0
def test_custom_model_input_validation(rand_data, pre_int_period,
                                       post_int_period):
    with pytest.raises(ValueError) as excinfo:
        CausalImpact(rand_data, pre_int_period, post_int_period, model='test')
    assert str(
        excinfo.value) == 'Input model must be of type UnobservedComponents.'

    ucm = UnobservedComponents(rand_data.iloc[:101, 0],
                               level='llevel',
                               exog=rand_data.iloc[:101, 1:])
    ucm.level = False
    with pytest.raises(ValueError) as excinfo:
        CausalImpact(rand_data, pre_int_period, post_int_period, model=ucm)
    assert str(excinfo.value) == 'Model must have level attribute set.'

    ucm = UnobservedComponents(rand_data.iloc[:101, 0],
                               level='llevel',
                               exog=rand_data.iloc[:101, 1:])
    ucm.exog = None
    with pytest.raises(ValueError) as excinfo:
        CausalImpact(rand_data, pre_int_period, post_int_period, model=ucm)
    assert str(excinfo.value) == 'Model must have exog attribute set.'

    ucm = UnobservedComponents(rand_data.iloc[:101, 0],
                               level='llevel',
                               exog=rand_data.iloc[:101, 1:])
    ucm.data = None
    with pytest.raises(ValueError) as excinfo:
        CausalImpact(rand_data, pre_int_period, post_int_period, model=ucm)
    assert str(excinfo.value) == 'Model must have data attribute set.'
Exemplo n.º 4
0
def test_invalid_alpha_raises(rand_data, pre_int_period, post_int_period):
    with pytest.raises(ValueError) as excinfo:
        CausalImpact(rand_data, pre_int_period, post_int_period, alpha=1)
    assert str(excinfo.value) == 'alpha must be of type float.'

    with pytest.raises(ValueError) as excinfo:
        CausalImpact(rand_data, pre_int_period, post_int_period, alpha=2.)
    assert str(excinfo.value) == (
        'alpha must range between 0 (zero) and 1 (one) inclusive.')
Exemplo n.º 5
0
def test_causal_cto_raises_on_None_input(rand_data, pre_int_period, post_int_period):
    with pytest.raises(ValueError) as excinfo:
        CausalImpact(None, pre_int_period, post_int_period)
    assert str(excinfo.value) == 'data input cannot be empty'

    with pytest.raises(ValueError) as excinfo:
        CausalImpact(rand_data, None, post_int_period)
    assert str(excinfo.value) == 'pre_period input cannot be empty'

    with pytest.raises(ValueError) as excinfo:
        CausalImpact(rand_data, pre_int_period, None)
    assert str(excinfo.value) == 'post_period input cannot be empty'
Exemplo n.º 6
0
def test_invalid_data_input_raises():
    with pytest.raises(ValueError) as excinfo:
        CausalImpact('test', [0, 5], [5, 10])
    assert str(excinfo.value) == 'Could not transform input data to pandas DataFrame.'

    data = [1, 2, 3, 4, 5, 6, 2 + 1j]
    with pytest.raises(ValueError) as excinfo:
        CausalImpact(data, [0, 3], [3, 6])
    assert str(excinfo.value) == 'Input data must contain only numeric values.'

    data = np.random.randn(10, 2)
    data[0, 1] = np.nan
    with pytest.raises(ValueError) as excinfo:
        CausalImpact(data, [0, 3], [3, 6])
    assert str(excinfo.value) == 'Input data cannot have NAN values.'
Exemplo n.º 7
0
def test_kwargs_validation(rand_data, pre_int_period, post_int_period):
    with pytest.raises(ValueError) as excinfo:
        ci = CausalImpact(rand_data,
                          pre_int_period,
                          post_int_period,
                          standardize='yes')
    assert str(excinfo.value) == 'Standardize argument must be of type bool.'
Exemplo n.º 8
0
def test_default_causal_cto(rand_data, pre_int_period, post_int_period):
    ci = CausalImpact(rand_data, pre_int_period, post_int_period)
    assert_frame_equal(ci.data, rand_data)
    assert ci.pre_period == pre_int_period
    assert ci.post_period == post_int_period
    pre_data = rand_data.loc[pre_int_period[0]:pre_int_period[1], :]
    assert_frame_equal(ci.pre_data, pre_data)
    post_data = rand_data.loc[post_int_period[0]:post_int_period[1], :]
    assert_frame_equal(ci.post_data, post_data)
    assert ci.alpha == 0.05
    normed_pre_data, (mu, sig) = standardize(pre_data)
    assert_frame_equal(ci.normed_pre_data, normed_pre_data)
    normed_post_data = (post_data - mu) / sig
    assert_frame_equal(ci.normed_post_data, normed_post_data)
    assert ci.mu_sig == (mu[0], sig[0])
    assert ci.model_args == {
        'fit_method': 'hmc',
        'niter': 1000,
        'prior_level_sd': 0.01,
        'season_duration': 1,
        'nseasons': 1,
        'standardize': True
    }
    assert isinstance(ci.model, tfp.sts.Sum)
    design_matrix = ci.model.components[1].design_matrix.to_dense()
    assert_array_equal(
        design_matrix,
        pd.concat([normed_pre_data,
                   normed_post_data]).astype(np.float32).iloc[:, 1:])
    assert ci.inferences is not None
    assert ci.inferences.index.dtype == rand_data.index.dtype
    assert ci.summary_data is not None
    assert ci.p_value > 0 and ci.p_value < 1
    assert ci.model_args['niter'] == 1000
    assert ci.model_samples is not None
Exemplo n.º 9
0
def test_summarizer(monkeypatch, rand_data, pre_int_period, post_int_period):
    summarizer_mock = mock.Mock()
    fit_mock = mock.Mock()
    process_mock = mock.Mock()
    summarize_mock = mock.Mock()
    monkeypatch.setattr('causalimpact.main.CausalImpact._fit_model', fit_mock)
    monkeypatch.setattr('causalimpact.main.CausalImpact._summarize_inferences',
                        summarize_mock)
    monkeypatch.setattr(
        'causalimpact.main.CausalImpact._process_posterior_inferences',
        process_mock)
    monkeypatch.setattr('causalimpact.main.summarizer', summarizer_mock)
    ci = CausalImpact(rand_data,
                      pre_int_period,
                      post_int_period,
                      model_args={'fit_method': 'vi'})
    ci.summary_data = 'summary_data'
    ci.p_value = 0.5
    ci.alpha = 0.05
    ci.summary()
    summarizer_mock.summary.assert_called_with('summary_data', 0.5, 0.05,
                                               'summary', 2)

    with pytest.raises(ValueError) as excinfo:
        ci.summary(digits='1')
    assert str(
        excinfo.value) == ('Input value for digits must be integer. Received '
                           '"<class \'str\'>" instead.')
Exemplo n.º 10
0
def test_plotter(monkeypatch, rand_data, pre_int_period, post_int_period):
    plotter_mock = mock.Mock()
    fit_mock = mock.Mock()
    process_mock = mock.Mock()
    summarize_mock = mock.Mock()
    monkeypatch.setattr('causalimpact.main.CausalImpact._fit_model', fit_mock)
    monkeypatch.setattr('causalimpact.main.CausalImpact._summarize_inferences',
                        summarize_mock)
    monkeypatch.setattr(
        'causalimpact.main.CausalImpact._process_posterior_inferences',
        process_mock)
    monkeypatch.setattr('causalimpact.main.plotter', plotter_mock)
    ci = CausalImpact(rand_data,
                      pre_int_period,
                      post_int_period,
                      model_args={'fit_method': 'vi'})
    ci.inferences = 'inferences'
    ci.pre_data = 'pre_data'
    ci.post_data = 'post_data'
    ci.plot()
    plotter_mock.plot.assert_called_with(
        'inferences',
        'pre_data',
        'post_data',
        panels=['original', 'pointwise', 'cumulative'],
        figsize=(10, 7))
Exemplo n.º 11
0
def get_results_from_statsmodels(
        processed_data: pd.DataFrame,
        experiment_config: ExperimentConfig) -> ExperimentOutput:
    """
    Get results from the CausalImpact implementation that uses statsmodels
    as a backend
    """
    if PYTHON_IMPLEMENTATION_USED != "pycausalimpact":
        raise EnvironmentError(
            f"tfcausalimpact not currently used. Implementations: {PYTHON_IMPLEMENTATION_USED}"
        )

    ci = CausalImpact(
        processed_data,
        experiment_config.training_period,
        experiment_config.evaluation_period,
    )

    experiment_output = ExperimentOutput(
        **{
            "results_summary": ci.summary_data,
            "trained_model": ci,
            "experiment_name": experiment_config.experiment_name,
        })

    return experiment_output
Exemplo n.º 12
0
 def test_gap(self):
     post_period = [120, 200]
     impact = CausalImpact(data, pre_period, post_period, model_args)
     impact.run()
     assert np.all(
         pd.isnull(impact.inferences.loc[101:119,
                                         impact.inferences.columns[2:]]))
Exemplo n.º 13
0
def test_causal_cto_with_no_standardization(rand_data, pre_int_period, post_int_period):
    ci = CausalImpact(rand_data, pre_int_period, post_int_period, model_args=dict(
        standardize=False, fit_method='vi'))
    assert ci.normed_pre_data is None
    assert ci.normed_post_data is None
    assert ci.mu_sig is None
    assert ci.p_value > 0 and ci.p_value < 1
Exemplo n.º 14
0
    def test_summary_w_report_output(self, monkeypatch, inference_input,
                                     summary_report_filename):
        inferences_df = pd.DataFrame(inference_input)
        causal = CausalImpact()

        params = {'alpha': 0.05, 'post_period': [2, 4]}

        causal.params = params
        causal.inferences = inferences_df

        dedent_mock = mock.Mock()

        expected = open(summary_report_filename).read()
        expected = re.sub(r'\s+', ' ', expected)
        expected = expected.strip()

        tmpdir = mkdtemp()
        tmp_file = os.path.join(tmpdir, 'summary_test')

        def dedent_side_effect(msg):
            with open(tmp_file, 'a') as file_obj:
                msg = re.sub(r'\s+', ' ', msg)
                msg = msg.strip()
                file_obj.write(msg)
            return msg

        dedent_mock.side_effect = dedent_side_effect
        monkeypatch.setattr('textwrap.dedent', dedent_mock)

        causal.summary(output='report')
        result_str = open(tmp_file, 'r').read()
        assert result_str == expected
Exemplo n.º 15
0
    def test_missing_values_in_pre_period_y(self, pre_period, post_period):
        data = pd.DataFrame(np.random.randn(200, 3), columns=["y", "x1", "x2"])
        data.iloc[95:100, 0] = np.nan

        impact = CausalImpact(data, pre_period, post_period)
        impact.run()
        """Test that all columns in the result series except those associated
        with point predictions have missing values at the time points the
        result time series has missing values."""

        predicted_cols = [
            impact.inferences.columns.get_loc(col)
            for col in impact.inferences.columns
            if ("response" not in col and "point_effect" not in col)
        ]

        effect_cols = [
            impact.inferences.columns.get_loc(col)
            for col in impact.inferences.columns if "point_effect" in col
        ]

        response_cols = [
            impact.inferences.columns.get_loc(col)
            for col in impact.inferences.columns if "response" in col
        ]

        assert np.all(np.isnan(impact.inferences.iloc[95:100, response_cols]))
        assert (np.any(np.isnan(
            impact.inferences.iloc[95:100, predicted_cols])) == False)
        assert np.any(np.isnan(impact.inferences.iloc[:95, :])) == False
        assert np.any(np.isnan(impact.inferences.iloc[101:, :])) == False
Exemplo n.º 16
0
 def test_gap_between_pre_and_post_periods(self, data, pre_period):
     post_period = [120, 199]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     assert np.all(
         pd.isnull(impact.inferences.loc[101:119,
                                         impact.inferences.columns[2:]]))
Exemplo n.º 17
0
 def test_post_period_finishes_before_end_of_data(self, data, pre_period):
     post_period = [101, 197]
     impact = CausalImpact(data, pre_period, post_period)
     impact.run()
     np.testing.assert_array_equal(impact.inferences.response.values,
                                   data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[-2:, 2:]))
Exemplo n.º 18
0
 def test_pre_period_starts_after_beginning_of_data(self, data):
     pre_period = [3, 100]
     impact = CausalImpact(data, pre_period, [101, 199])
     impact.run()
     np.testing.assert_array_equal(impact.inferences.response.values,
                                   data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[0:pre_period[0], 2:]))
Exemplo n.º 19
0
def get_results_from_tensorflow(
        processed_data: pd.DataFrame,
        experiment_config: ExperimentConfig) -> ExperimentOutput:

    if PYTHON_IMPLEMENTATION_USED != "tfcausalimpact":
        raise EnvironmentError(
            f"tfcausalimpact not currently used. Implementations: {PYTHON_IMPLEMENTATION_USED}"
        )

    causal_model_config = experiment_config.causal_model_config
    alpha, model, model_args = process_causal_impact_config(
        causal_model_config)
    print("things passed to the causal model: ")
    print(alpha, model, model_args)
    ci = CausalImpact(
        processed_data,
        experiment_config.training_period,
        experiment_config.evaluation_period,
        alpha=alpha,
        model=model,
        model_args=model_args,
    )

    experiment_output = ExperimentOutput(
        **{
            "results_summary": ci.summary_data,
            "trained_model": ci,
            "experiment_name": experiment_config.experiment_name,
        })

    return experiment_output
Exemplo n.º 20
0
def test_default_causal_inferences(fix_path):
    np.random.seed(1)
    data = pd.read_csv(os.path.join(fix_path, 'google_data.csv'))
    del data['t']

    pre_period = [0, 60]
    post_period = [61, 90]

    ci = CausalImpact(data, pre_period, post_period)
    assert int(ci.summary_data['average']['actual']) == 156
    assert int(ci.summary_data['average']['predicted']) == 129
    assert int(ci.summary_data['average']['predicted_lower']) == 102
    assert int(ci.summary_data['average']['predicted_upper']) == 156
    assert int(ci.summary_data['average']['abs_effect']) == 26
    assert round(ci.summary_data['average']['abs_effect_lower'], 1) == -0.2
    assert int(ci.summary_data['average']['abs_effect_upper']) == 53
    assert round(ci.summary_data['average']['rel_effect'], 1) == 0.2
    assert round(ci.summary_data['average']['rel_effect_lower'], 1) == 0.0
    assert round(ci.summary_data['average']['rel_effect_upper'], 1) == 0.4

    assert int(ci.summary_data['cumulative']['actual']) == 4687
    assert int(ci.summary_data['cumulative']['predicted']) == 3883
    assert int(ci.summary_data['cumulative']['predicted_lower']) == 3085
    assert int(ci.summary_data['cumulative']['predicted_upper']) == 4693
    assert int(ci.summary_data['cumulative']['abs_effect']) == 803
    assert round(ci.summary_data['cumulative']['abs_effect_lower'], 1) == -6.8
    assert int(ci.summary_data['cumulative']['abs_effect_upper']) == 1601
    assert round(ci.summary_data['cumulative']['rel_effect'], 1) == 0.2
    assert round(ci.summary_data['cumulative']['rel_effect_lower'], 1) == 0.0
    assert round(ci.summary_data['cumulative']['rel_effect_upper'], 1) == 0.4

    assert round(ci.p_value, 1) == 0.0
Exemplo n.º 21
0
def test_default_causal_inferences_w_date(fix_path):
    np.random.seed(1)
    data = pd.read_csv(os.path.join(fix_path, 'google_data.csv'))
    data['date'] = pd.to_datetime(data['t'])
    data.index = data['date']
    del data['t']
    del data['date']

    pre_period = ['2016-02-20 22:41:20', '2016-02-20 22:51:20']
    post_period = ['2016-02-20 22:51:30', '2016-02-20 22:56:20']

    ci = CausalImpact(data, pre_period, post_period)
    assert int(ci.summary_data['average']['actual']) == 156
    assert int(ci.summary_data['average']['predicted']) == 129
    assert int(ci.summary_data['average']['predicted_lower']) == 102
    assert int(ci.summary_data['average']['predicted_upper']) == 156
    assert int(ci.summary_data['average']['abs_effect']) == 26
    assert round(ci.summary_data['average']['abs_effect_lower'], 1) == -0.2
    assert int(ci.summary_data['average']['abs_effect_upper']) == 53
    assert round(ci.summary_data['average']['rel_effect'], 1) == 0.2
    assert round(ci.summary_data['average']['rel_effect_lower'], 1) == 0.0
    assert round(ci.summary_data['average']['rel_effect_upper'], 1) == 0.4

    assert int(ci.summary_data['cumulative']['actual']) == 4687
    assert int(ci.summary_data['cumulative']['predicted']) == 3883
    assert int(ci.summary_data['cumulative']['predicted_lower']) == 3085
    assert int(ci.summary_data['cumulative']['predicted_upper']) == 4693
    assert int(ci.summary_data['cumulative']['abs_effect']) == 803
    assert round(ci.summary_data['cumulative']['abs_effect_lower'], 1) == -6.8
    assert int(ci.summary_data['cumulative']['abs_effect_upper']) == 1601
    assert round(ci.summary_data['cumulative']['rel_effect'], 1) == 0.2
    assert round(ci.summary_data['cumulative']['rel_effect_lower'], 1) == 0.0
    assert round(ci.summary_data['cumulative']['rel_effect_upper'], 1) == 0.4

    assert round(ci.p_value, 1) == 0.0
Exemplo n.º 22
0
def test_string_index_with_no_date_formatted(rand_data, pre_int_period,
                                             post_int_period):
    rand_data.set_index(rand_data.index.map(str), inplace=True)
    pre_period = ['0', '60']
    post_period = ['61', '90']

    _ = CausalImpact(rand_data, pre_period, post_period)
Exemplo n.º 23
0
def test_plot_cumulative_panel(rand_data, pre_int_period, post_int_period, monkeypatch):
    ci = CausalImpact(rand_data, pre_int_period, post_int_period)
    ax_mock = mock.Mock()
    plotter_mock = mock.Mock()
    plotter_mock.subplot.return_value = ax_mock
    plot_mock = mock.Mock(return_value=plotter_mock)
    monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock)

    ci.plot(panels=['cumulative'])
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(15, 12))
    plotter_mock.subplot.assert_any_call(1, 1, 1, sharex=ax_mock)
    ax_args = ax_mock.plot.call_args

    inferences = ci.inferences.iloc[1:, :]

    assert_array_equal(inferences['post_cum_effects'], ax_args[0][0])
    assert ax_args[0][1] == 'b--'
    assert ax_args[1] == {'label': 'Cumulative Effect'}

    ax_mock.axvline.assert_called_with(ci.post_period[0] - 1, c='k', linestyle='--')

    ax_args = ax_mock.fill_between.call_args_list[0]
    assert_array_equal(ax_args[0][0], inferences['post_cum_effects'].index)
    assert_array_equal(ax_args[0][1], inferences['post_cum_effects_lower'])
    assert_array_equal(ax_args[0][2], inferences['post_cum_effects_upper'])
    assert ax_args[1] == {'facecolor': 'blue', 'interpolate': True, 'alpha': 0.25}

    ax_mock.axhline.assert_called_with(y=0, color='k', linestyle='--')

    ax_mock.grid.assert_called_with(True, linestyle='--')
    ax_mock.legend.assert_called()

    plotter_mock.show.assert_called_once()
Exemplo n.º 24
0
def test_default_causal_inferences_w_str_date(fix_path):
    np.random.seed(1)
    data = pd.read_csv(os.path.join(fix_path, 'volks_data.csv'),
                       header=0,
                       sep=' ',
                       index_col='Date')

    pre_period = [np.min(data.index.values), pd.Timestamp('2015-09-13')]
    post_period = [pd.Timestamp('2015-09-20'), np.max(data.index.values)]

    ci = CausalImpact(data, pre_period, post_period)
    assert int(ci.summary_data['average']['actual']) == 126
    assert int(ci.summary_data['average']['predicted']) == 171
    assert int(ci.summary_data['average']['predicted_lower']) == 165
    assert int(ci.summary_data['average']['predicted_upper']) == 177
    assert int(ci.summary_data['average']['abs_effect']) == -44
    assert round(ci.summary_data['average']['abs_effect_lower'], 1) == -50.4
    assert int(ci.summary_data['average']['abs_effect_upper']) == -39
    assert round(ci.summary_data['average']['rel_effect'], 1) == -0.3
    assert round(ci.summary_data['average']['rel_effect_lower'], 2) == -0.29
    assert round(ci.summary_data['average']['rel_effect_upper'], 2) == -0.23

    assert int(ci.summary_data['cumulative']['actual']) == 10026
    assert int(ci.summary_data['cumulative']['predicted']) == 13574
    assert int(ci.summary_data['cumulative']['predicted_lower']) == 13113
    assert int(ci.summary_data['cumulative']['predicted_upper']) == 14004
    assert int(ci.summary_data['cumulative']['abs_effect']) == -3548
    assert int(ci.summary_data['cumulative']['abs_effect_lower']) == -3977
    assert int(ci.summary_data['cumulative']['abs_effect_upper']) == -3087
    assert round(ci.summary_data['cumulative']['rel_effect'], 1) == -0.3
    assert round(ci.summary_data['cumulative']['rel_effect_lower'], 2) == -0.29
    assert round(ci.summary_data['cumulative']['rel_effect_upper'], 2) == -0.23

    assert round(ci.p_value, 1) == 0.0
    assert ci.inferences.index.dtype == data.index.dtype
Exemplo n.º 25
0
 def test_early_post_period(self):
     post_period = [101, 197]
     impact = CausalImpact(data, pre_period, post_period, model_args)
     impact.run()
     assert pd.util.testing.assert_numpy_array_equal(
         impact.inferences.response.values, data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[-2:, 2:]))
Exemplo n.º 26
0
def test_default_model_sparse_linear_regression_arma_data():
    data = pd.read_csv('tests/fixtures/arma_sparse_reg.csv')
    data.iloc[70:, 0] += 5

    pre_period = [0, 69]
    post_period = [70, 99]

    ci = CausalImpact(data, pre_period, post_period)
    samples = ci.model_samples

    # Weights are computed as per original TFP source code:
    # https://github.com/tensorflow/probability/blob/v0.12.1/tensorflow_probability/python/sts/regression.py#L489-L494 # noqa: E501
    global_scale = (
        samples['SparseLinearRegression/_global_scale_noncentered'] *
        tf.sqrt(samples['SparseLinearRegression/_global_scale_variance']) * 0.1
    )
    local_scales = (
        samples['SparseLinearRegression/_local_scales_noncentered'] *
        tf.sqrt(samples['SparseLinearRegression/_local_scale_variances'])
    )
    weights = (
        samples['SparseLinearRegression/_weights_noncentered'] * local_scales *
        global_scale[..., tf.newaxis]
    )
    assert tf.abs(tf.reduce_mean(weights, axis=0).numpy()[1]) < 0.05
Exemplo n.º 27
0
def test_default_causal_inferences(fix_path):
    np.random.seed(1)
    data = pd.read_csv(os.path.join(fix_path, 'google_data.csv'))
    del data['t']

    pre_period = [0, 60]
    post_period = [61, 90]

    ci = CausalImpact(data, pre_period, post_period)
    assert int(ci.summary_data['average']['actual']) == 156
    assert int(ci.summary_data['average']['predicted']) == 129
    assert int(ci.summary_data['average']['predicted_lower']) == 124
    assert int(ci.summary_data['average']['predicted_upper']) == 134
    assert int(ci.summary_data['average']['abs_effect']) == 27
    assert round(ci.summary_data['average']['abs_effect_lower'], 1) == 21.6
    assert int(ci.summary_data['average']['abs_effect_upper']) == 31
    assert round(ci.summary_data['average']['rel_effect'], 1) == 0.2
    assert round(ci.summary_data['average']['rel_effect_lower'], 2) == 0.17
    assert round(ci.summary_data['average']['rel_effect_upper'], 2) == 0.25

    assert int(ci.summary_data['cumulative']['actual']) == 4687
    assert int(ci.summary_data['cumulative']['predicted']) == 3876
    assert int(ci.summary_data['cumulative']['predicted_lower']) == 3729
    assert int(ci.summary_data['cumulative']['predicted_upper']) == 4040
    assert int(ci.summary_data['cumulative']['abs_effect']) == 810
    assert int(ci.summary_data['cumulative']['abs_effect_lower']) == 646
    assert int(ci.summary_data['cumulative']['abs_effect_upper']) == 957
    assert round(ci.summary_data['cumulative']['rel_effect'], 1) == 0.2
    assert round(ci.summary_data['cumulative']['rel_effect_lower'], 2) == 0.17
    assert round(ci.summary_data['cumulative']['rel_effect_upper'], 2) == 0.25

    assert round(ci.p_value, 1) == 0.0
    assert ci.inferences.index.dtype == data.index.dtype
Exemplo n.º 28
0
def test_default_causal_cto_no_covariates(rand_data, pre_int_period,
                                          post_int_period):
    rand_data = pd.DataFrame(rand_data.iloc[:, 0])
    ci = CausalImpact(rand_data, pre_int_period, post_int_period)
    assert_frame_equal(ci.data, rand_data)
    assert ci.pre_period == pre_int_period
    assert ci.post_period == post_int_period
    pre_data = rand_data.loc[pre_int_period[0]:pre_int_period[1], :]
    assert_frame_equal(ci.pre_data, pre_data)
    post_data = rand_data.loc[post_int_period[0]:post_int_period[1], :]
    assert_frame_equal(ci.post_data, post_data)
    assert ci.alpha == 0.05
    normed_pre_data, (mu, sig) = standardize(pre_data)
    assert_frame_equal(ci.normed_pre_data, normed_pre_data)
    normed_post_data = (post_data - mu) / sig
    assert_frame_equal(ci.normed_post_data, normed_post_data)
    assert ci.mu_sig == (mu[0], sig[0])
    assert ci.model_args == {
        'fit_method': 'hmc',
        'niter': 1000,
        'prior_level_sd': 0.01,
        'season_duration': 1,
        'nseasons': 1,
        'standardize': True
    }
    assert isinstance(ci.model, tfp.sts.LocalLevel)
    assert ci.inferences is not None
    assert ci.inferences.index.dtype == rand_data.index.dtype
    assert ci.summary_data is not None
    assert ci.p_value > 0 and ci.p_value < 1
    assert ci.model_args['niter'] == 1000
    assert ci.model_samples is not None
Exemplo n.º 29
0
def main():
    with open("/path/to/glowroot/data.json", 'rt') as file:
        glowroot_data = json.load(file)

        x = list()
        y = list()
        for point in glowroot_data["dataSeries"][0]["data"]:
            x.append(point[0])
            y.append(point[1])

        data_frame = pd.DataFrame({
            "timestamp": pd.to_datetime(x, unit="ms"),
            "y": y
        })
        data_frame = data_frame.set_index("timestamp")
        logging.info(data_frame)
        pre_period = [
            pd.to_datetime(1573661277259, unit="ms"),
            pd.to_datetime(1573661647328, unit="ms")
        ]
        post_period = [
            pd.to_datetime(1573661652328, unit="ms"),
            pd.to_datetime(1573661932369, unit="ms")
        ]

        causal_impact = CausalImpact(data_frame,
                                     pre_period,
                                     post_period,
                                     prior_level_sd=0.1)
        logging.info(causal_impact.summary())
        causal_impact.plot()
Exemplo n.º 30
0
 def test_late_pre_period(self):
     pre_period = [3, 100]
     impact = CausalImpact(data, pre_period, post_period, model_args)
     impact.run()
     assert pd.util.testing.assert_numpy_array_equal(
         impact.inferences.response.values, data.y.values)
     assert np.all(pd.isnull(impact.inferences.iloc[0:pre_period[0], 2:]))