def test_orthoiv(self): X = TestPandasIntegration.df[TestPandasIntegration.features] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.bin_treat] Z = TestPandasIntegration.df[TestPandasIntegration.instrument] # Test LinearIntentToTreatDRIV est = LinearIntentToTreatDRIV( model_y_xw=GradientBoostingRegressor(), model_t_xwz=GradientBoostingClassifier(), flexible_model_effect=GradientBoostingRegressor()) est.fit(Y, T, Z=Z, X=X, inference='statsmodels') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary()) # Check input names propagate self._check_popsum_names(est.effect_inference(X).population_summary())
def test_stratify_orthoiv(self): """Test that we can properly stratify by treatment/instrument pair""" T = [1, 0, 1, 1, 0, 0, 1, 0] Z = [1, 0, 0, 1, 0, 1, 0, 1] Y = [1, 2, 3, 4, 5, 6, 7, 8] X = np.array([1, 1, 2, 2, 1, 2, 1, 2]).reshape(-1, 1) est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(), model_T_XZ=LogisticRegression(), flexible_model_effect=LinearRegression(), cv=2) inference = BootstrapInference(n_bootstrap_samples=20, n_jobs=-1, verbose=3) est.fit(Y, T, Z=Z, X=X, inference=inference) est.const_marginal_effect_interval(X)
def test_access_to_internal_models(self): """ Test that API related to accessing the nuisance models, cate_model and featurizer is working. """ est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(), model_T_XZ=LogisticRegression(C=1000), flexible_model_effect=WeightedLasso(), featurizer=PolynomialFeatures( degree=2, include_bias=False)) Y = np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]) T = np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]) Z = np.array([1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]) X = np.array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]).reshape(-1, 1) est.fit(Y, T, Z=Z, X=X) assert isinstance(est.original_featurizer, PolynomialFeatures) assert isinstance(est.featurizer_, Pipeline) assert isinstance(est.model_final_, StatsModelsLinearRegression) for mdl in est.models_Y_X: assert isinstance(mdl, LinearRegression) for mdl in est.models_T_XZ: assert isinstance(mdl, LogisticRegression) np.testing.assert_array_equal(est.cate_feature_names(['A']), ['A', 'A^2']) np.testing.assert_array_equal(est.cate_feature_names(), ['x0', 'x0^2']) est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(), model_T_XZ=LogisticRegression(C=1000), flexible_model_effect=WeightedLasso(), featurizer=None) est.fit(Y, T, Z=Z, X=X) assert est.original_featurizer is None assert isinstance(est.featurizer_, FunctionTransformer) assert isinstance(est.model_final_, StatsModelsLinearRegression) for mdl in est.models_Y_X: assert isinstance(mdl, LinearRegression) for mdl in est.models_T_XZ: assert isinstance(mdl, LogisticRegression) np.testing.assert_array_equal(est.cate_feature_names(['A']), ['A'])
def test_can_use_statsmodel_inference(self): """Test that we can use statsmodels to generate confidence intervals""" est = LinearIntentToTreatDRIV(model_Y_X=LinearRegression(), model_T_XZ=LogisticRegression(C=1000), flexible_model_effect=WeightedLasso()) est.fit(np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]), np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]), Z=np.array([1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]), X=np.array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]).reshape(-1, 1)) interval = est.effect_interval(np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 1, 1, 1]), T1=np.array([1, 2, 1, 1, 2, 2, 2, 2, 1]), alpha=0.05) point = est.effect(np.ones((9, 1)), T0=np.array([1, 1, 1, 2, 2, 2, 1, 1, 1]), T1=np.array([1, 2, 1, 1, 2, 2, 2, 2, 1])) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert np.all(lo <= point) assert np.all(point <= hi) assert np.any( lo < hi ) # for at least some of the examples, the CI should have nonzero width interval = est.const_marginal_effect_interval(np.ones((9, 1)), alpha=0.05) point = est.const_marginal_effect(np.ones((9, 1))) assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert np.all(lo <= point) assert np.all(point <= hi) assert np.any( lo < hi ) # for at least some of the examples, the CI should have nonzero width interval = est.coef__interval(alpha=0.05) point = est.coef_ assert len(interval) == 2 lo, hi = interval assert lo.shape == hi.shape == point.shape assert np.all(lo <= point) assert np.all(point <= hi) assert np.any( lo < hi ) # for at least some of the examples, the CI should have nonzero width interval = est.intercept__interval(alpha=0.05) point = est.intercept_ assert len(interval) == 2 lo, hi = interval assert np.all(lo <= point) assert np.all(point <= hi) assert np.any( lo < hi ) # for at least some of the examples, the CI should have nonzero width