def test_gap(self): post_period = [120, 200] impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() assert np.all( pd.isnull(impact.inferences.loc[101:119, impact.inferences.columns[2:]]))
def test_pre_period_starts_after_beginning_of_data(self, data): pre_period = [3, 100] impact = CausalImpact(data, pre_period, [101, 199]) impact.run() np.testing.assert_array_equal(impact.inferences.response.values, data.y.values) assert np.all(pd.isnull(impact.inferences.iloc[0:pre_period[0], 2:]))
def test_late_pre_period(self): pre_period = [3, 100] impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() assert pd.util.testing.assert_numpy_array_equal( impact.inferences.response.values, data.y.values) assert np.all(pd.isnull(impact.inferences.iloc[0:pre_period[0], 2:]))
def test_gap_between_pre_and_post_periods(self, data, pre_period): post_period = [120, 199] impact = CausalImpact(data, pre_period, post_period) impact.run() assert np.all( pd.isnull(impact.inferences.loc[101:119, impact.inferences.columns[2:]]))
def test_early_post_period(self): post_period = [101, 197] impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() assert pd.util.testing.assert_numpy_array_equal( impact.inferences.response.values, data.y.values) assert np.all(pd.isnull(impact.inferences.iloc[-2:, 2:]))
def test_missing_values_in_pre_period_y(self, pre_period, post_period): data = pd.DataFrame(np.random.randn(200, 3), columns=["y", "x1", "x2"]) data.iloc[95:100, 0] = np.nan impact = CausalImpact(data, pre_period, post_period) impact.run() """Test that all columns in the result series except those associated with point predictions have missing values at the time points the result time series has missing values.""" predicted_cols = [ impact.inferences.columns.get_loc(col) for col in impact.inferences.columns if ("response" not in col and "point_effect" not in col) ] effect_cols = [ impact.inferences.columns.get_loc(col) for col in impact.inferences.columns if "point_effect" in col ] response_cols = [ impact.inferences.columns.get_loc(col) for col in impact.inferences.columns if "response" in col ] assert np.all(np.isnan(impact.inferences.iloc[95:100, response_cols])) assert (np.any(np.isnan( impact.inferences.iloc[95:100, predicted_cols])) == False) assert np.any(np.isnan(impact.inferences.iloc[:95, :])) == False assert np.any(np.isnan(impact.inferences.iloc[101:, :])) == False
def test_post_period_finishes_before_end_of_data(self, data, pre_period): post_period = [101, 197] impact = CausalImpact(data, pre_period, post_period) impact.run() np.testing.assert_array_equal(impact.inferences.response.values, data.y.values) assert np.all(pd.isnull(impact.inferences.iloc[-2:, 2:]))
def test_unlabelled_pandas_series(self, expected_columns, pre_period, post_period): model_args = {"niter": 123, 'standardize_data': False} alpha = 0.05 data = pd.DataFrame(np.random.randn(200, 3)) causal_impact = CausalImpact(data.values, pre_period, post_period, model_args, None, None, alpha, "MLE") causal_impact.run() actual_columns = list(causal_impact.inferences.columns) assert actual_columns == expected_columns
def test_late_early_and_gap(self): pre_period = [3, 80] post_period = [120, 197] impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() assert np.all( pd.isnull(impact.inferences.loc[:2, impact.inferences.columns[2:]])) assert np.all( pd.isnull(impact.inferences.loc[81:119, impact.inferences.columns[2:]])) assert np.all( pd.isnull(impact.inferences.loc[198:, impact.inferences.columns[2:]]))
def test_late_start_early_finish_and_gap_between_periods(self, data): pre_period = [3, 80] post_period = [120, 197] impact = CausalImpact(data, pre_period, post_period) impact.run() assert np.all( pd.isnull(impact.inferences.loc[:2, impact.inferences.columns[2:]])) assert np.all( pd.isnull(impact.inferences.loc[81:119, impact.inferences.columns[2:]])) assert np.all( pd.isnull(impact.inferences.loc[198:, impact.inferences.columns[2:]]))
def test_other_formats(self): # Test other data formats pre_period = [1, 100] post_period = [101, 200] model_args = {"niter": 100} # labelled dataframe data = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"]) impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() actual_columns = impact.inferences.columns assert actual_columns[0] == "response" # numpy array data = np.random.randn(200, 3) impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() actual_columns = impact.inferences.columns assert actual_columns[0] == "response" # numpy array data = np.random.randn(200, 3) impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() actual_columns = impact.inferences.columns assert actual_columns[0] == "response" # list of lists data = [[n, n + 2] for n in range(200)] impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() actual_columns = impact.inferences.columns assert actual_columns[0] == "response"
def test_other_formats(self, expected_columns, pre_period, post_period): # Test other data formats model_args = {"niter": 100, "standardize_data": True} # labelled dataframe data = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"]) impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() actual_columns = list(impact.inferences.columns) assert actual_columns == expected_columns # numpy array data = np.random.randn(200, 3) impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() actual_columns = list(impact.inferences.columns) assert actual_columns == expected_columns # list of lists data = np.random.randn(200, 2).tolist() impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() actual_columns = list(impact.inferences.columns) assert actual_columns == expected_columns
def test_missing_pre_period_data(self): data.iloc[3:5, 0] = np.nan impact = CausalImpact(data, pre_period, post_period, model_args) impact.run() assert len(impact.inferences) == len(data)
def test_frame_no_exog(self): data = np.random.randn(200) impact = CausalImpact(data, pre_period, post_period, model_args) with pytest.raises(ValueError): impact.run()
def test_missing_input(self): with pytest.raises(SyntaxError): impact = CausalImpact() impact.run()
def test_post_period_bigger_than_data_index_max(self, data): pre_period = [0, 100] post_period = [101, 300] impact = CausalImpact(data, pre_period, post_period) impact.run() assert impact.params['post_period'] == [101, 199]
def test_pre_period_lower_than_data_index_min(self, data): pre_period = [-1, 100] post_period = [101, 199] impact = CausalImpact(data, pre_period, post_period) impact.run() assert impact.params['pre_period'] == [0, 100]
inputs2 = pd.merge(wiki_views_t2,wiki_views_c,on='Date',how='left') inputs3 = pd.merge(wiki_views_t3,wiki_views_c,on='Date',how='left') inputs4 = pd.merge(wiki_views_t4,wiki_views_c,on='Date',how='left') inputs5 = pd.merge(wiki_views_t5,wiki_views_c,on='Date',how='left') inputs6 = pd.merge(wiki_views_t6,wiki_views_c,on='Date',how='left') inputs7 = pd.merge(wiki_views_t7,wiki_views_c,on='Date',how='left') #### 4 CONDUCT CAUSAL IMPACT MEASUREMENT TO IDENTIFY SIGNIFICANT LIFT #draft is april 26 - april 28 pre_period = [pd.to_datetime(date) for date in ["2018-02-05", "2018-04-19"]] post_period = [pd.to_datetime(date) for date in ["2018-04-26", "2018-05-05"]] impact = CausalImpact(inputs1, pre_period, post_period) impact.run() impact <- CausalImpact(inputs1, pre.period, post.period) plot(impact) inputs1['Date'] #error message "upper y" pd.__version__ statsmodels.__version__ import statsmodels.api as sm sm.version.version CausalImpact. df = pd.DataFrame( {'y': [150, 200, 225, 150, 175], 'x1': [150, 249, 150, 125, 325], 'x2': [275, 125, 249, 275, 250]
def test_frame_w_no_exog(self, pre_period, post_period): data = np.random.randn(200) impact = CausalImpact(data, pre_period, post_period, {}) with pytest.raises(ValueError) as excinfo: impact.run() assert str(excinfo.value) == 'data contains no exogenous variables'
def test_missing_pre_period_data(self, data, pre_period, post_period): model_data = data.copy() model_data.iloc[3:5, 0] = np.nan impact = CausalImpact(model_data, pre_period, post_period) impact.run() assert len(impact.inferences) == len(model_data)