def test_can_use_sample_weights(self): """ TODO Almost identical to DML test, so consider merging Test that we can pass sample weights to an estimator. """ dml = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto'), featurizer=FunctionTransformer(validate=True)) dml.fit(np.array([1, 2, 1, 2]), np.array([1, 2, 1, 2]), W=np.ones((4, 1)), sample_weight=np.ones((4, ))) self.assertAlmostEqual(dml.intercept_(T=2), 1)
def test_can_use_vectors(self): """ TODO Almost identical to DML test, so consider merging Test that we can pass vectors for T and Y (not only 2-dimensional arrays). """ dml = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto'), fit_cate_intercept=False, featurizer=FunctionTransformer(validate=True)) dml.fit(np.array([1, 2, 1, 2]), np.array([1, 2, 1, 2]), X=np.ones((4, 1))) self.assertAlmostEqual(dml.coef_(T=2).reshape(())[()], 1)
def test_can_use_statsmodel_inference(self): """ TODO Almost identical to DML test, so consider merging Test that we can use statsmodels to generate confidence intervals """ dml = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression( C=1000, solver='lbfgs', multi_class='auto')) dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)), inference='statsmodels') interval = dml.effect_interval(np.ones((9, 1)), T0=np.array([1, 1, 1, 1, 1, 1, 1, 1, 1]), T1=np.array([2, 2, 3, 2, 2, 3, 2, 2, 3]), alpha=0.05) point = dml.effect(np.ones((9, 1)), T0=np.array([1, 1, 1, 1, 1, 1, 1, 1, 1]), T1=np.array([2, 2, 3, 2, 2, 3, 2, 2, 3])) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.const_marginal_effect_interval(np.ones((9, 1)), alpha=0.05) point = dml.const_marginal_effect(np.ones((9, 1))) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width interval = dml.coef__interval(T=2, alpha=0.05) point = dml.coef_(T=2) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert (lo <= point).all() assert (point <= hi).all() assert (lo < hi).any( ) # for at least some of the examples, the CI should have nonzero width
def test_drlearner(self): y, T, X, W = self._get_data() for est in [LinearDRLearner(random_state=123), SparseLinearDRLearner(random_state=123)]: est.fit(y, T, X=X, W=W, cache_values=True) np.testing.assert_equal(est.model_regression, 'auto') est.model_regression = LinearRegression() est.model_propensity = LogisticRegression() est.fit(y, T, X=X, W=W, cache_values=True) assert isinstance(est.model_regression, LinearRegression) with pytest.raises(ValueError): est.multitask_model_final = True with pytest.raises(ValueError): est.model_final = LinearRegression() est.min_propensity = .1 est.mc_iters = 2 est.featurizer = PolynomialFeatures(degree=2, include_bias=False) est.refit_final() assert isinstance(est.featurizer_, PolynomialFeatures) np.testing.assert_equal(est.mc_iters, 2) intcpt = est.intercept_(T=1) est.fit_cate_intercept = False np.testing.assert_equal(est.intercept_(T=1), intcpt) est.refit_final() with pytest.raises(AttributeError): est.intercept(T=1) est.fit(y, T, X=X, W=W, cache_values=False) with pytest.raises(AssertionError): est.refit_final()
def test_discrete_treatments(self): """ TODO Almost identical to DML test, so consider merging Test that we can use discrete treatments """ dml = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression( C=1000, solver='lbfgs', multi_class='auto'), featurizer=FunctionTransformer(validate=True)) # create a simple artificial setup where effect of moving from treatment # 1 -> 2 is 2, # 1 -> 3 is 1, and # 2 -> 3 is -1 (necessarily, by composing the previous two effects) # Using an uneven number of examples from different classes, # and having the treatments in non-lexicographic order, # Should rule out some basic issues. dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array( [3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1))) np.testing.assert_almost_equal(dml.effect(np.ones((9, 1)), T0=np.array( [1, 1, 1, 2, 2, 2, 3, 3, 3]), T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])), [0, 2, 1, -2, 0, -1, -1, 1, 0]) dml.score(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array( [3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
def test_can_summarize(self): LinearDMLCateEstimator().fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference='statsmodels').summary() LinearDRLearner(fit_cate_intercept=False).fit( TestInference.Y, TestInference.T > 0, TestInference.X, TestInference.W, inference=BootstrapInference(5)).summary(1)
def test_can_summarize(self): LinearDML(model_t=LinearRegression(), model_y=LinearRegression()).fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W).summary() LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), fit_cate_intercept=False).fit( TestInference.Y, TestInference.T > 0, TestInference.X, TestInference.W, inference=BootstrapInference(5)).summary(1)
def test_dr_random_state(self): Y, T, X, W, X_test = self._make_data(500, 2) for est in [ DRLearner(model_final=RandomForestRegressor( max_depth=3, n_estimators=10, min_samples_leaf=100, bootstrap=True, random_state=123), cv=2, random_state=123), LinearDRLearner(random_state=123), SparseLinearDRLearner(cv=2, random_state=123), ForestDRLearner( model_regression=RandomForestRegressor(n_estimators=10, max_depth=4, random_state=123), model_propensity=RandomForestClassifier(n_estimators=10, max_depth=4, random_state=123), cv=2, random_state=123) ]: TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)
def test_cate_api(self): """Test that we correctly implement the CATE API.""" n = 20 def make_random(is_discrete, d): if d is None: return None sz = (n, d) if d > 0 else (n,) if is_discrete: while True: arr = np.random.choice(['a', 'b', 'c'], size=sz) # ensure that we've got at least two of every element _, counts = np.unique(arr, return_counts=True) if len(counts) == 3 and counts.min() > 1: return arr else: return np.random.normal(size=sz) for d_y in [0, 1]: is_discrete = True for d_t in [0, 1]: for d_x in [2, None]: for d_w in [2, None]: W, X, Y, T = [make_random(is_discrete, d) for is_discrete, d in [(False, d_w), (False, d_x), (False, d_y), (is_discrete, d_t)]] if (X is None) and (W is None): continue d_t_final = 2 if is_discrete else d_t effect_shape = (n,) + ((d_y,) if d_y > 0 else ()) effect_summaryframe_shape = ( n * (d_y if d_y > 0 else 1), 6) marginal_effect_shape = ((n,) + ((d_y,) if d_y > 0 else ()) + ((d_t_final,) if d_t_final > 0 else ())) marginal_effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1), 6 * (d_t_final if d_t_final > 0 else 1)) # since T isn't passed to const_marginal_effect, defaults to one row if X is None const_marginal_effect_shape = ((n if d_x else 1,) + ((d_y,) if d_y > 0 else ()) + ((d_t_final,) if d_t_final > 0 else())) const_marginal_effect_summaryframe_shape = ( (n if d_x else 1) * (d_y if d_y > 0 else 1), 6 * (d_t_final if d_t_final > 0 else 1)) for est in [LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto')), DRLearner(model_propensity=LogisticRegression(multi_class='auto'), model_regression=LinearRegression(), model_final=StatsModelsLinearRegression(), multitask_model_final=True)]: # TODO: add stratification to bootstrap so that we can use it even with discrete treatments infs = [None] if isinstance(est, LinearDRLearner): infs.append('statsmodels') for inf in infs: with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, is_discrete=is_discrete, est=est, inf=inf): est.fit(Y, T, X, W, inference=inf) # make sure we can call the marginal_effect and effect methods const_marg_eff = est.const_marginal_effect( X) marg_eff = est.marginal_effect(T, X) self.assertEqual( shape(marg_eff), marginal_effect_shape) self.assertEqual( shape(const_marg_eff), const_marginal_effect_shape) np.testing.assert_array_equal( marg_eff if d_x else marg_eff[0:1], const_marg_eff) T0 = np.full_like(T, 'a') eff = est.effect(X, T0=T0, T1=T) self.assertEqual(shape(eff), effect_shape) if inf is not None: const_marg_eff_int = est.const_marginal_effect_interval( X) marg_eff_int = est.marginal_effect_interval( T, X) const_marg_effect_inf = est.const_marginal_effect_inference( X) T1 = np.full_like(T, 'b') effect_inf = est.effect_inference( X, T0=T0, T1=T1) marg_effect_inf = est.marginal_effect_inference( T, X) self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape) self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape) self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)), (2,) + effect_shape) # test const marginal inference self.assertEqual(shape(const_marg_effect_inf.summary_frame()), const_marginal_effect_summaryframe_shape) self.assertEqual(shape(const_marg_effect_inf.point_estimate), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.stderr), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.var), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.pvalue()), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.zstat()), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.conf_int()), (2,) + const_marginal_effect_shape) np.testing.assert_array_almost_equal(const_marg_effect_inf.conf_int() [0], const_marg_eff_int[0], decimal=5) const_marg_effect_inf.population_summary()._repr_html_() # test effect inference self.assertEqual(shape(effect_inf.summary_frame()), effect_summaryframe_shape) self.assertEqual(shape(effect_inf.point_estimate), effect_shape) self.assertEqual(shape(effect_inf.stderr), effect_shape) self.assertEqual(shape(effect_inf.var), effect_shape) self.assertEqual(shape(effect_inf.pvalue()), effect_shape) self.assertEqual(shape(effect_inf.zstat()), effect_shape) self.assertEqual(shape(effect_inf.conf_int()), (2,) + effect_shape) np.testing.assert_array_almost_equal(effect_inf.conf_int() [0], est.effect_interval( X, T0=T0, T1=T1) [0], decimal=5) effect_inf.population_summary()._repr_html_() # test marginal effect inference self.assertEqual(shape(marg_effect_inf.summary_frame()), marginal_effect_summaryframe_shape) self.assertEqual(shape(marg_effect_inf.point_estimate), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.stderr), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.var), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.pvalue()), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.zstat()), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.conf_int()), (2,) + marginal_effect_shape) np.testing.assert_array_almost_equal(marg_effect_inf.conf_int() [0], marg_eff_int[0], decimal=5) marg_effect_inf.population_summary()._repr_html_() est.score(Y, T, X, W) # make sure we can call effect with implied scalar treatments, no matter the # dimensions of T, and also that we warn when there are multiple treatments if d_t > 1: cm = self.assertWarns(Warning) else: cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager with cm: effect_shape2 = ( n if d_x else 1,) + ((d_y,) if d_y > 0 else()) eff = est.effect(X, T0='a', T1='b') self.assertEqual( shape(eff), effect_shape2)
def test_can_custom_splitter(self): """ TODO Almost identical to DML test, so consider merging """ # test that we can fit with a KFold instance dml = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression( C=1000, solver='lbfgs', multi_class='auto'), n_splits=KFold(n_splits=3)) dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array( [1, 2, 3, 1, 2, 3]), np.ones((6, 1))) dml.score(np.array([1, 2, 3, 1, 2, 3]), np.array( [1, 2, 3, 1, 2, 3]), np.ones((6, 1))) # test that we can fit with a train/test iterable dml = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression( C=1000, solver='lbfgs', multi_class='auto'), n_splits=[([0, 1, 2], [3, 4, 5])]) dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array( [1, 2, 3, 1, 2, 3]), np.ones((6, 1))) dml.score(np.array([1, 2, 3, 1, 2, 3]), np.array( [1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
def test_summary_discrete(self): """Tests the inference results summary for discrete treatment estimators.""" # Test inference results when `cate_feature_names` doesn not exist for inference in [ BootstrapInference(n_bootstrap_samples=5), 'statsmodels' ]: cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=PolynomialFeatures( degree=2, include_bias=False)) cate_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) summary_results = cate_est.summary(T=1) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names() np.testing.assert_array_equal(coef_rows, fnames) intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0] np.testing.assert_array_equal(intercept_rows, ['intercept']) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=PolynomialFeatures( degree=2, include_bias=False)) cate_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(T=1, feat_name=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] fnames = PolynomialFeatures(degree=2, include_bias=False).fit( TestInference.X).get_feature_names(input_features=fnames) np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=None) cate_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) summary_results = cate_est.summary(T=1) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal( coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=None) cate_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = cate_est.summary(T=1, feat_name=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) summary_results = wrapped_est.summary(T=1) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal( coef_rows, ['X' + str(i) for i in range(TestInference.d_x)]) cate_est = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(), featurizer=None) wrapped_est = self._NoFeatNamesEst(cate_est) wrapped_est.fit(TestInference.Y, TestInference.T, TestInference.X, TestInference.W, inference=inference) fnames = ['Q' + str(i) for i in range(TestInference.d_x)] summary_results = wrapped_est.summary(T=1, feat_name=fnames) coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0] np.testing.assert_array_equal(coef_rows, fnames)
def test_linear_drlearner_all_attributes(self): from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor from sklearn.linear_model import LinearRegression, LogisticRegression from econml.utilities import StatsModelsLinearRegression import scipy.special np.random.seed(123) controls = np.random.uniform(-1, 1, size=(5000, 3)) T = np.random.binomial(2, scipy.special.expit(controls[:, 0])) sigma = 0.01 y = (1 + .5 * controls[:, 0]) * T + controls[:, 0] + np.random.normal( 0, sigma, size=(5000, )) for X in [None, controls]: for W in [None, controls]: for sample_weight, sample_var in [(None, None), (np.ones(T.shape[0]), np.zeros(T.shape[0]))]: for featurizer in [ None, PolynomialFeatures(degree=2, include_bias=False) ]: for models in [(GradientBoostingClassifier(), GradientBoostingRegressor()), (LogisticRegression(solver='lbfgs', multi_class='auto'), LinearRegression())]: for inference in [ 'statsmodels', StatsModelsInferenceDiscrete( cov_type='nonrobust') ]: with self.subTest(X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, featurizer=featurizer, models=models, inference=inference): est = LinearDRLearner( model_propensity=models[0], model_regression=models[1], featurizer=featurizer) if (X is None) and (W is None): with pytest.raises( AttributeError) as e_info: est.fit( y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var) continue est.fit(y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, inference=inference) if X is not None: lower, upper = est.effect_interval( X[:3], T0=0, T1=1) point = est.effect(X[:3], T0=0, T1=1) truth = 1 + .5 * X[:3, 0] TestDRLearner._check_with_interval( truth, point, lower, upper) lower, upper = est.const_marginal_effect_interval( X[:3]) point = est.const_marginal_effect( X[:3]) truth = np.hstack([ 1 + .5 * X[:3, [0]], 2 * (1 + .5 * X[:3, [0]]) ]) TestDRLearner._check_with_interval( truth, point, lower, upper) else: lower, upper = est.effect_interval( T0=0, T1=1) point = est.effect(T0=0, T1=1) truth = np.array([1]) TestDRLearner._check_with_interval( truth, point, lower, upper) lower, upper = est.const_marginal_effect_interval( ) point = est.const_marginal_effect() truth = np.array([[1, 2]]) TestDRLearner._check_with_interval( truth, point, lower, upper) for t in [1, 2]: if X is not None: lower, upper = est.marginal_effect_interval( t, X[:3]) point = est.marginal_effect( t, X[:3]) truth = np.hstack([ 1 + .5 * X[:3, [0]], 2 * (1 + .5 * X[:3, [0]]) ]) TestDRLearner._check_with_interval( truth, point, lower, upper) else: lower, upper = est.marginal_effect_interval( t) point = est.marginal_effect(t) truth = np.array([[1, 2]]) TestDRLearner._check_with_interval( truth, point, lower, upper) assert isinstance(est.score_, float) assert isinstance( est.score(y, T, X=X, W=W), float) if X is not None: feat_names = ['A', 'B', 'C'] else: feat_names = [] out_feat_names = feat_names if X is not None: if (featurizer is not None): out_feat_names = featurizer.fit( X).get_feature_names( feat_names) np.testing.assert_array_equal( est.featurizer. n_input_features_, 3) np.testing.assert_array_equal( est.cate_feature_names(feat_names), out_feat_names) if isinstance(models[0], GradientBoostingClassifier): np.testing.assert_array_equal( np.array([ mdl.feature_importances_ for mdl in est.models_regression ]).shape, [ 2, 2 + len(feat_names) + (W.shape[1] if W is not None else 0) ]) np.testing.assert_array_equal( np.array([ mdl.feature_importances_ for mdl in est.models_propensity ]).shape, [ 2, len(feat_names) + (W.shape[1] if W is not None else 0) ]) else: np.testing.assert_array_equal( np.array([ mdl.coef_ for mdl in est.models_regression ]).shape, [ 2, 2 + len(feat_names) + (W.shape[1] if W is not None else 0) ]) np.testing.assert_array_equal( np.array([ mdl.coef_ for mdl in est.models_propensity ]).shape, [ 2, 3, len(feat_names) + (W.shape[1] if W is not None else 0) ]) if X is not None: for t in [1, 2]: true_coef = np.zeros( len(out_feat_names)) true_coef[0] = .5 * t lower, upper = est.model_cate( T=t).coef__interval() point = est.model_cate(T=t).coef_ truth = true_coef TestDRLearner._check_with_interval( truth, point, lower, upper) lower, upper = est.coef__interval( t) point = est.coef_(t) truth = true_coef TestDRLearner._check_with_interval( truth, point, lower, upper) for t in [1, 2]: lower, upper = est.model_cate( T=t).intercept__interval() point = est.model_cate(T=t).intercept_ truth = t TestDRLearner._check_with_interval( truth, point, lower, upper) lower, upper = est.intercept__interval( t) point = est.intercept_(t) truth = t TestDRLearner._check_with_interval( truth, point, lower, upper)
def test_drlearners(self): X = TestPandasIntegration.df[TestPandasIntegration.features] W = TestPandasIntegration.df[TestPandasIntegration.controls] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.bin_treat] # Test LinearDRLearner est = LinearDRLearner(model_propensity=GradientBoostingClassifier(), model_regression=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='statsmodels') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary(T=1)) self._check_popsum_names(est.effect_inference(X).population_summary()) # Test SparseLinearDRLearner est = SparseLinearDRLearner( model_propensity=GradientBoostingClassifier(), model_regression=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary(T=1)) self._check_popsum_names(est.effect_inference(X).population_summary()) # Test ForestDRLearner est = ForestDRLearner(model_propensity=GradientBoostingClassifier(), model_regression=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='blb') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_popsum_names(est.effect_inference(X).population_summary())