def test_can_use_sample_weights(self): """ TODO Almost identical to DML test, so consider merging Test that we can pass sample weights to an estimator. """ dml = LinearDRLearner(model_regression=LinearRegression(), model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto'), featurizer=FunctionTransformer(validate=True)) dml.fit(np.array([1, 2, 1, 2]), np.array([1, 2, 1, 2]), W=np.ones((4, 1)), sample_weight=np.ones((4, ))) self.assertAlmostEqual(dml.intercept_(T=2), 1)
def test_linear_drlearner_all_attributes(self): from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor from sklearn.linear_model import LinearRegression, LogisticRegression from econml.utilities import StatsModelsLinearRegression import scipy.special np.random.seed(123) controls = np.random.uniform(-1, 1, size=(5000, 3)) T = np.random.binomial(2, scipy.special.expit(controls[:, 0])) sigma = 0.01 y = (1 + .5 * controls[:, 0]) * T + controls[:, 0] + np.random.normal( 0, sigma, size=(5000, )) for X in [None, controls]: for W in [None, controls]: for sample_weight, sample_var in [(None, None), (np.ones(T.shape[0]), np.zeros(T.shape[0]))]: for featurizer in [ None, PolynomialFeatures(degree=2, include_bias=False) ]: for models in [(GradientBoostingClassifier(), GradientBoostingRegressor()), (LogisticRegression(solver='lbfgs', multi_class='auto'), LinearRegression())]: for inference in [ 'statsmodels', StatsModelsInferenceDiscrete( cov_type='nonrobust') ]: with self.subTest(X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, featurizer=featurizer, models=models, inference=inference): est = LinearDRLearner( model_propensity=models[0], model_regression=models[1], featurizer=featurizer) if (X is None) and (W is None): with pytest.raises( AttributeError) as e_info: est.fit( y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var) continue est.fit(y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, inference=inference) if X is not None: lower, upper = est.effect_interval( X[:3], T0=0, T1=1) point = est.effect(X[:3], T0=0, T1=1) truth = 1 + .5 * X[:3, 0] TestDRLearner._check_with_interval( truth, point, lower, upper) lower, upper = est.const_marginal_effect_interval( X[:3]) point = est.const_marginal_effect( X[:3]) truth = np.hstack([ 1 + .5 * X[:3, [0]], 2 * (1 + .5 * X[:3, [0]]) ]) TestDRLearner._check_with_interval( truth, point, lower, upper) else: lower, upper = est.effect_interval( T0=0, T1=1) point = est.effect(T0=0, T1=1) truth = np.array([1]) TestDRLearner._check_with_interval( truth, point, lower, upper) lower, upper = est.const_marginal_effect_interval( ) point = est.const_marginal_effect() truth = np.array([[1, 2]]) TestDRLearner._check_with_interval( truth, point, lower, upper) for t in [1, 2]: if X is not None: lower, upper = est.marginal_effect_interval( t, X[:3]) point = est.marginal_effect( t, X[:3]) truth = np.hstack([ 1 + .5 * X[:3, [0]], 2 * (1 + .5 * X[:3, [0]]) ]) TestDRLearner._check_with_interval( truth, point, lower, upper) else: lower, upper = est.marginal_effect_interval( t) point = est.marginal_effect(t) truth = np.array([[1, 2]]) TestDRLearner._check_with_interval( truth, point, lower, upper) assert isinstance(est.score_, float) assert isinstance( est.score(y, T, X=X, W=W), float) if X is not None: feat_names = ['A', 'B', 'C'] else: feat_names = [] out_feat_names = feat_names if X is not None: if (featurizer is not None): out_feat_names = featurizer.fit( X).get_feature_names( feat_names) np.testing.assert_array_equal( est.featurizer. n_input_features_, 3) np.testing.assert_array_equal( est.cate_feature_names(feat_names), out_feat_names) if isinstance(models[0], GradientBoostingClassifier): np.testing.assert_array_equal( np.array([ mdl.feature_importances_ for mdl in est.models_regression ]).shape, [ 2, 2 + len(feat_names) + (W.shape[1] if W is not None else 0) ]) np.testing.assert_array_equal( np.array([ mdl.feature_importances_ for mdl in est.models_propensity ]).shape, [ 2, len(feat_names) + (W.shape[1] if W is not None else 0) ]) else: np.testing.assert_array_equal( np.array([ mdl.coef_ for mdl in est.models_regression ]).shape, [ 2, 2 + len(feat_names) + (W.shape[1] if W is not None else 0) ]) np.testing.assert_array_equal( np.array([ mdl.coef_ for mdl in est.models_propensity ]).shape, [ 2, 3, len(feat_names) + (W.shape[1] if W is not None else 0) ]) if X is not None: for t in [1, 2]: true_coef = np.zeros( len(out_feat_names)) true_coef[0] = .5 * t lower, upper = est.model_cate( T=t).coef__interval() point = est.model_cate(T=t).coef_ truth = true_coef TestDRLearner._check_with_interval( truth, point, lower, upper) lower, upper = est.coef__interval( t) point = est.coef_(t) truth = true_coef TestDRLearner._check_with_interval( truth, point, lower, upper) for t in [1, 2]: lower, upper = est.model_cate( T=t).intercept__interval() point = est.model_cate(T=t).intercept_ truth = t TestDRLearner._check_with_interval( truth, point, lower, upper) lower, upper = est.intercept__interval( t) point = est.intercept_(t) truth = t TestDRLearner._check_with_interval( truth, point, lower, upper)