示例#1
0
 def test_dml(self):
     #################################
     #  Single treatment and outcome #
     #################################
     X = TestPandasIntegration.df[TestPandasIntegration.features]
     W = TestPandasIntegration.df[TestPandasIntegration.controls]
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat]
     # Test LinearDML
     est = LinearDML(model_y=LassoCV(), model_t=LassoCV())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(
         est.summary())  # Check that names propagate as expected
     # Test re-fit
     X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns})
     est.fit(Y, T, X=X1, W=W, inference='statsmodels')
     self._check_input_names(est.summary(), feat_comp=X1.columns)
     # Test SparseLinearDML
     est = SparseLinearDML(model_y=LassoCV(), model_t=LassoCV())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(
         est.summary())  # Check that names propagate as expected
     # ForestDML
     est = ForestDML(model_y=GradientBoostingRegressor(),
                     model_t=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='blb')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     ####################################
     #  Mutiple treatments and outcomes #
     ####################################
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome_multi]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat_multi]
     # Test LinearDML
     est = LinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     self._check_input_names(est.summary(), True,
                             True)  # Check that names propagate as expected
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
     est.fit(Y, T, X=X, W=W,
             inference='bootstrap')  # Check bootstrap as well
     self._check_input_names(est.summary(), True, True)
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
     # Test SparseLinearDML
     est = SparseLinearDML(model_y=MultiTaskLasso(),
                           model_t=MultiTaskLasso())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary(), True,
                             True)  # Check that names propagate as expected
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
示例#2
0
    def test_all_kinds(self):
        T = [1, 0, 1, 2, 0, 2] * 5
        Y = [1, 2, 3, 4, 5, 6] * 5
        X = np.array([1, 1, 2, 2, 1, 2] * 5).reshape(-1, 1)
        est = LinearDML(n_splits=2)
        for kind in ['percentile', 'pivot', 'normal']:
            with self.subTest(kind=kind):
                inference = BootstrapInference(n_bootstrap_samples=5,
                                               bootstrap_type=kind)
                est.fit(Y, T, inference=inference)
                i = est.const_marginal_effect_interval()
                inf = est.const_marginal_effect_inference()
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                est.fit(Y, T, X=X, inference=inference)
                i = est.const_marginal_effect_interval(X)
                inf = est.const_marginal_effect_inference(X)
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                i = est.coef__interval()
                inf = est.coef__inference()
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])

                i = est.effect_interval(X)
                inf = est.effect_inference(X)
                assert i[0].shape == i[1].shape == inf.point_estimate.shape
                assert np.allclose(i[0], inf.conf_int()[0])
                assert np.allclose(i[1], inf.conf_int()[1])
示例#3
0
    def test_refit_final_inference(self):
        """Test that we can perform inference during refit_final"""
        est = LinearDML(linear_first_stages=False, featurizer=PolynomialFeatures(1, include_bias=False))

        X = np.random.choice(np.arange(5), size=(500, 3))
        y = np.random.normal(size=(500,))
        T = np.random.choice(np.arange(3), size=(500, 2))
        W = np.random.normal(size=(500, 2))

        est.fit(y, T, X=X, W=W, cache_values=True, inference='statsmodels')

        assert isinstance(est.effect_inference(X), NormalInferenceResults)

        with pytest.raises(ValueError):
            est.refit_final(inference=BootstrapInference(2))
示例#4
0
 def test_dml(self):
     #################################
     #  Single treatment and outcome #
     #################################
     X = TestPandasIntegration.df[TestPandasIntegration.features]
     W = TestPandasIntegration.df[TestPandasIntegration.controls]
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat]
     # Test LinearDML
     est = LinearDML(model_y=LassoCV(), model_t=LassoCV())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(
         est.summary())  # Check that names propagate as expected
     # |--> Test featurizers
     est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     self._check_input_names(
         est.summary(),
         feat_comp=est.original_featurizer.get_feature_names(X.columns))
     est.featurizer = FunctionTransformer()
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     self._check_input_names(
         est.summary(),
         feat_comp=[
             f"feat(X){i}" for i in range(TestPandasIntegration.n_features)
         ])
     est.featurizer = ColumnTransformer([('passthrough', 'passthrough', [0])
                                         ])
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     # ColumnTransformer doesn't propagate column names
     self._check_input_names(est.summary(), feat_comp=["x0"])
     # |--> Test re-fit
     est.featurizer = None
     X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns})
     est.fit(Y, T, X=X1, W=W, inference='statsmodels')
     self._check_input_names(est.summary(), feat_comp=X1.columns)
     # Test SparseLinearDML
     est = SparseLinearDML(model_y=LassoCV(), model_t=LassoCV())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(
         est.summary())  # Check that names propagate as expected
     # Test ForestDML
     est = ForestDML(model_y=GradientBoostingRegressor(),
                     model_t=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='blb')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     ####################################
     #  Mutiple treatments and outcomes #
     ####################################
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome_multi]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat_multi]
     # Test LinearDML
     est = LinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     self._check_input_names(est.summary(), True,
                             True)  # Check that names propagate as expected
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
     est.fit(Y, T, X=X, W=W,
             inference='bootstrap')  # Check bootstrap as well
     self._check_input_names(est.summary(), True, True)
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
     # Test SparseLinearDML
     est = SparseLinearDML(model_y=MultiTaskLasso(),
                           model_t=MultiTaskLasso())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary(), True,
                             True)  # Check that names propagate as expected
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)