def test_drlearner(self): y, T, X, W = self._get_data() for est in [ LinearDRLearner(random_state=123), SparseLinearDRLearner(random_state=123) ]: est.fit(y, T, X=X, W=W, cache_values=True) np.testing.assert_equal(est.model_regression, 'auto') est.model_regression = LinearRegression() est.model_propensity = LogisticRegression(random_state=123) est.fit(y, T, X=X, W=W, cache_values=True) assert isinstance(est.model_regression, LinearRegression) with pytest.raises(ValueError): est.multitask_model_final = True with pytest.raises(ValueError): est.model_final = LinearRegression() est.min_propensity = .1 est.mc_iters = 2 est.featurizer = PolynomialFeatures(degree=2, include_bias=False) est.refit_final() assert isinstance(est.featurizer_, PolynomialFeatures) np.testing.assert_equal(est.mc_iters, 2) intcpt = est.intercept_(T=1) est.fit_cate_intercept = False np.testing.assert_equal(est.intercept_(T=1), intcpt) est.refit_final() with pytest.raises(AttributeError): est.intercept(T=1) est.fit(y, T, X=X, W=W, cache_values=False) with pytest.raises(AssertionError): est.refit_final()
def test_can_summarize(self): LinearDML(model_t=LinearRegression(), model_y=LinearRegression()).fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W).summary() LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), fit_cate_intercept=False).fit( TestInference.Y, TestInference.T > 0, TestInference.X, TestInference.W, inference=BootstrapInference(5)).summary(1)
def test_store_dataframe_name(self): Y, T, X, W, Z = self._get_data() Y_name = "outcome" Y = pd.Series(Y, name=Y_name) T_name = "treatment" T = pd.Series(T, name=T_name) X_name = ["feature"] X = pd.DataFrame(X, columns=X_name) W_name = ["control1", "control2", "control3", "control4"] W = pd.DataFrame(W, columns=W_name) est = LinearDRLearner().dowhy.fit(Y, T, X, W) np.testing.assert_array_equal(est._common_causes, X_name + W_name) np.testing.assert_array_equal(est._effect_modifiers, X_name) np.testing.assert_array_equal(est._treatment, [T_name]) np.testing.assert_array_equal(est._outcome, [Y_name])
def test_dr_random_state(self): Y, T, X, W, X_test = self._make_data(500, 2) for est in [ DRLearner(model_final=RandomForestRegressor( max_depth=3, n_estimators=10, min_samples_leaf=100, bootstrap=True, random_state=123), cv=2, random_state=123), LinearDRLearner(random_state=123), SparseLinearDRLearner(cv=2, random_state=123), ForestDRLearner( model_regression=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123), model_propensity=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123), cv=2, random_state=123) ]: TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)
def test_mean_pred_stderr(self): """Test that mean_pred_stderr is not None when estimator's final stage is linear""" Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W ests = [ LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False)), LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False)) ] for est in ests: est.fit(Y, T, X=X, W=W) assert est.const_marginal_effect_inference( X).population_summary().mean_pred_stderr is not None # only is not None when T1 is a constant or a list of constant assert est.effect_inference( X).population_summary().mean_pred_stderr is not None if est.__class__.__name__ == "LinearDRLearner": assert est.coef__inference(T=1).mean_pred_stderr is None else: assert est.coef__inference().mean_pred_stderr is None
def test_summary_discrete(self): """Tests the inference results summary for discrete treatment estimators.""" # Test inference results when `cate_feature_names` doesn not exist for inference in [BootstrapInference(n_bootstrap_samples=5), 'auto']: cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False) ) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = cate_est.summary(T=1) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] default_names = get_input_columns(TestInference.X) fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names(default_names) np.testing.assert_array_equal(coef_rows, fnames) intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0] np.testing.assert_array_equal(intercept_rows, ['cate_intercept']) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=PolynomialFeatures(degree=2, include_bias=False) ) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(T=1, feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names(input_features=fnames) np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=None) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = cate_est.summary(T=1) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=None) cate_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(T=1, feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) summary_results = wrapped_est.summary(T=1) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit( TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference ) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = wrapped_est.summary(T=1, feature_names=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames)