Пример #1
0
 def test_can_use_sample_weights(self):
     """
     TODO Almost identical to DML test, so consider merging
     Test that we can pass sample weights to an estimator.
     """
     dml = LinearDRLearner(model_regression=LinearRegression(),
                           model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto'),
                           featurizer=FunctionTransformer(validate=True))
     dml.fit(np.array([1, 2, 1, 2]), np.array([1, 2, 1, 2]), W=np.ones((4, 1)),
             sample_weight=np.ones((4, )))
     self.assertAlmostEqual(dml.intercept_(T=2), 1)
Пример #2
0
 def test_can_use_vectors(self):
     """
     TODO Almost identical to DML test, so consider merging
     Test that we can pass vectors for T and Y (not only 2-dimensional arrays).
     """
     dml = LinearDRLearner(model_regression=LinearRegression(),
                           model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto'),
                           fit_cate_intercept=False,
                           featurizer=FunctionTransformer(validate=True))
     dml.fit(np.array([1, 2, 1, 2]), np.array([1, 2, 1, 2]), X=np.ones((4, 1)))
     self.assertAlmostEqual(dml.coef_(T=2).reshape(())[()], 1)
Пример #3
0
    def test_can_use_statsmodel_inference(self):
        """
        TODO Almost identical to DML test, so consider merging
        Test that we can use statsmodels to generate confidence intervals
        """
        dml = LinearDRLearner(model_regression=LinearRegression(),
                              model_propensity=LogisticRegression(
                                  C=1000, solver='lbfgs', multi_class='auto'))
        dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]),
                np.array([3, 2, 1, 2, 3, 1, 1, 1]),
                np.ones((8, 1)),
                inference='statsmodels')
        interval = dml.effect_interval(np.ones((9, 1)),
                                       T0=np.array([1, 1, 1, 1, 1, 1, 1, 1,
                                                    1]),
                                       T1=np.array([2, 2, 3, 2, 2, 3, 2, 2,
                                                    3]),
                                       alpha=0.05)
        point = dml.effect(np.ones((9, 1)),
                           T0=np.array([1, 1, 1, 1, 1, 1, 1, 1, 1]),
                           T1=np.array([2, 2, 3, 2, 2, 3, 2, 2, 3]))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.const_marginal_effect_interval(np.ones((9, 1)),
                                                      alpha=0.05)
        point = dml.const_marginal_effect(np.ones((9, 1)))
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width

        interval = dml.coef__interval(T=2, alpha=0.05)
        point = dml.coef_(T=2)
        assert len(interval) == 2
        lo, hi = interval
        assert lo.shape == hi.shape == point.shape
        assert (lo <= point).all()
        assert (point <= hi).all()
        assert (lo < hi).any(
        )  # for at least some of the examples, the CI should have nonzero width
Пример #4
0
    def test_drlearner(self):
        y, T, X, W = self._get_data()

        for est in [LinearDRLearner(random_state=123),
                    SparseLinearDRLearner(random_state=123)]:
            est.fit(y, T, X=X, W=W, cache_values=True)
            np.testing.assert_equal(est.model_regression, 'auto')
            est.model_regression = LinearRegression()
            est.model_propensity = LogisticRegression()
            est.fit(y, T, X=X, W=W, cache_values=True)
            assert isinstance(est.model_regression, LinearRegression)
            with pytest.raises(ValueError):
                est.multitask_model_final = True
            with pytest.raises(ValueError):
                est.model_final = LinearRegression()
            est.min_propensity = .1
            est.mc_iters = 2
            est.featurizer = PolynomialFeatures(degree=2, include_bias=False)
            est.refit_final()
            assert isinstance(est.featurizer_, PolynomialFeatures)
            np.testing.assert_equal(est.mc_iters, 2)
            intcpt = est.intercept_(T=1)
            est.fit_cate_intercept = False
            np.testing.assert_equal(est.intercept_(T=1), intcpt)
            est.refit_final()
            with pytest.raises(AttributeError):
                est.intercept(T=1)
            est.fit(y, T, X=X, W=W, cache_values=False)
            with pytest.raises(AssertionError):
                est.refit_final()
Пример #5
0
 def test_discrete_treatments(self):
     """
     TODO Almost identical to DML test, so consider merging
     Test that we can use discrete treatments
     """
     dml = LinearDRLearner(model_regression=LinearRegression(),
                           model_propensity=LogisticRegression(
                               C=1000, solver='lbfgs', multi_class='auto'),
                           featurizer=FunctionTransformer(validate=True))
     # create a simple artificial setup where effect of moving from treatment
     #     1 -> 2 is 2,
     #     1 -> 3 is 1, and
     #     2 -> 3 is -1 (necessarily, by composing the previous two effects)
     # Using an uneven number of examples from different classes,
     # and having the treatments in non-lexicographic order,
     # Should rule out some basic issues.
     dml.fit(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array(
         [3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
     np.testing.assert_almost_equal(dml.effect(np.ones((9, 1)),
                                               T0=np.array(
                                                   [1, 1, 1, 2, 2, 2, 3, 3, 3]),
                                               T1=np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])),
                                    [0, 2, 1, -2, 0, -1, -1, 1, 0])
     dml.score(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array(
         [3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
Пример #6
0
    def test_can_summarize(self):
        LinearDMLCateEstimator().fit(TestInference.Y,
                                     TestInference.T,
                                     TestInference.X,
                                     TestInference.W,
                                     inference='statsmodels').summary()

        LinearDRLearner(fit_cate_intercept=False).fit(
            TestInference.Y,
            TestInference.T > 0,
            TestInference.X,
            TestInference.W,
            inference=BootstrapInference(5)).summary(1)
Пример #7
0
    def test_can_summarize(self):
        LinearDML(model_t=LinearRegression(),
                  model_y=LinearRegression()).fit(TestInference.Y,
                                                  TestInference.T,
                                                  TestInference.X,
                                                  TestInference.W).summary()

        LinearDRLearner(model_regression=LinearRegression(),
                        model_propensity=LogisticRegression(),
                        fit_cate_intercept=False).fit(
                            TestInference.Y,
                            TestInference.T > 0,
                            TestInference.X,
                            TestInference.W,
                            inference=BootstrapInference(5)).summary(1)
Пример #8
0
 def test_dr_random_state(self):
     Y, T, X, W, X_test = self._make_data(500, 2)
     for est in [
             DRLearner(model_final=RandomForestRegressor(
                 max_depth=3,
                 n_estimators=10,
                 min_samples_leaf=100,
                 bootstrap=True,
                 random_state=123),
                       cv=2,
                       random_state=123),
             LinearDRLearner(random_state=123),
             SparseLinearDRLearner(cv=2, random_state=123),
             ForestDRLearner(
                 model_regression=RandomForestRegressor(n_estimators=10,
                                                        max_depth=4,
                                                        random_state=123),
                 model_propensity=RandomForestClassifier(n_estimators=10,
                                                         max_depth=4,
                                                         random_state=123),
                 cv=2,
                 random_state=123)
     ]:
         TestRandomState._test_random_state(est, X_test, Y, T, X=X, W=W)
Пример #9
0
    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 20

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d > 0 else (n,)
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every element
                    _, counts = np.unique(arr, return_counts=True)
                    if len(counts) == 3 and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        for d_y in [0, 1]:
            is_discrete = True
            for d_t in [0, 1]:
                for d_x in [2, None]:
                    for d_w in [2, None]:
                        W, X, Y, T = [make_random(is_discrete, d)
                                      for is_discrete, d in [(False, d_w),
                                                             (False, d_x),
                                                             (False, d_y),
                                                             (is_discrete, d_t)]]

                        if (X is None) and (W is None):
                            continue
                        d_t_final = 2 if is_discrete else d_t

                        effect_shape = (n,) + ((d_y,) if d_y > 0 else ())
                        effect_summaryframe_shape = (
                            n * (d_y if d_y > 0 else 1), 6)
                        marginal_effect_shape = ((n,) +
                                                 ((d_y,) if d_y > 0 else ()) +
                                                 ((d_t_final,) if d_t_final > 0 else ()))
                        marginal_effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1),
                                                              6 * (d_t_final if d_t_final > 0 else 1))

                        # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                        const_marginal_effect_shape = ((n if d_x else 1,) +
                                                       ((d_y,) if d_y > 0 else ()) +
                                                       ((d_t_final,) if d_t_final > 0 else()))
                        const_marginal_effect_summaryframe_shape = (
                            (n if d_x else 1) * (d_y if d_y > 0 else 1),
                            6 * (d_t_final if d_t_final > 0 else 1))

                        for est in [LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs',
                                                                                        multi_class='auto')),
                                    DRLearner(model_propensity=LogisticRegression(multi_class='auto'),
                                              model_regression=LinearRegression(),
                                              model_final=StatsModelsLinearRegression(),
                                              multitask_model_final=True)]:

                            # TODO: add stratification to bootstrap so that we can use it even with discrete treatments
                            infs = [None]
                            if isinstance(est, LinearDRLearner):
                                infs.append('statsmodels')

                            for inf in infs:
                                with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
                                                  is_discrete=is_discrete, est=est, inf=inf):
                                    est.fit(Y, T, X, W, inference=inf)
                                    # make sure we can call the marginal_effect and effect methods
                                    const_marg_eff = est.const_marginal_effect(
                                        X)
                                    marg_eff = est.marginal_effect(T, X)
                                    self.assertEqual(
                                        shape(marg_eff), marginal_effect_shape)
                                    self.assertEqual(
                                        shape(const_marg_eff), const_marginal_effect_shape)

                                    np.testing.assert_array_equal(
                                        marg_eff if d_x else marg_eff[0:1], const_marg_eff)

                                    T0 = np.full_like(T, 'a')
                                    eff = est.effect(X, T0=T0, T1=T)
                                    self.assertEqual(shape(eff), effect_shape)
                                    if inf is not None:
                                        const_marg_eff_int = est.const_marginal_effect_interval(
                                            X)
                                        marg_eff_int = est.marginal_effect_interval(
                                            T, X)
                                        const_marg_effect_inf = est.const_marginal_effect_inference(
                                            X)
                                        T1 = np.full_like(T, 'b')
                                        effect_inf = est.effect_inference(
                                            X, T0=T0, T1=T1)
                                        marg_effect_inf = est.marginal_effect_inference(
                                            T, X)
                                        self.assertEqual(shape(marg_eff_int),
                                                         (2,) + marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_eff_int),
                                                         (2,) + const_marginal_effect_shape)
                                        self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
                                                         (2,) + effect_shape)

                                        # test const marginal inference
                                        self.assertEqual(shape(const_marg_effect_inf.summary_frame()),
                                                         const_marginal_effect_summaryframe_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.point_estimate),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.stderr),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.var),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.pvalue()),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.zstat()),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.conf_int()),
                                                         (2,) + const_marginal_effect_shape)
                                        np.testing.assert_array_almost_equal(const_marg_effect_inf.conf_int()
                                                                             [0], const_marg_eff_int[0], decimal=5)
                                        const_marg_effect_inf.population_summary()._repr_html_()

                                        # test effect inference
                                        self.assertEqual(shape(effect_inf.summary_frame()),
                                                         effect_summaryframe_shape)
                                        self.assertEqual(shape(effect_inf.point_estimate),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.stderr),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.var),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.pvalue()),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.zstat()),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.conf_int()),
                                                         (2,) + effect_shape)
                                        np.testing.assert_array_almost_equal(effect_inf.conf_int()
                                                                             [0], est.effect_interval(
                                                                                 X, T0=T0, T1=T1)
                                                                             [0], decimal=5)
                                        effect_inf.population_summary()._repr_html_()

                                        # test marginal effect inference
                                        self.assertEqual(shape(marg_effect_inf.summary_frame()),
                                                         marginal_effect_summaryframe_shape)
                                        self.assertEqual(shape(marg_effect_inf.point_estimate),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.stderr),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.var),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.pvalue()),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.zstat()),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.conf_int()),
                                                         (2,) + marginal_effect_shape)
                                        np.testing.assert_array_almost_equal(marg_effect_inf.conf_int()
                                                                             [0], marg_eff_int[0], decimal=5)
                                        marg_effect_inf.population_summary()._repr_html_()

                                    est.score(Y, T, X, W)

                                    # make sure we can call effect with implied scalar treatments, no matter the
                                    # dimensions of T, and also that we warn when there are multiple treatments
                                    if d_t > 1:
                                        cm = self.assertWarns(Warning)
                                    else:
                                        cm = ExitStack()  # ExitStack can be used as a "do nothing" ContextManager
                                    with cm:
                                        effect_shape2 = (
                                            n if d_x else 1,) + ((d_y,) if d_y > 0 else())
                                        eff = est.effect(X, T0='a', T1='b')
                                        self.assertEqual(
                                            shape(eff), effect_shape2)
Пример #10
0
    def test_can_custom_splitter(self):
        """
        TODO Almost identical to DML test, so consider merging
        """
        # test that we can fit with a KFold instance
        dml = LinearDRLearner(model_regression=LinearRegression(),
                              model_propensity=LogisticRegression(
                                  C=1000, solver='lbfgs', multi_class='auto'),
                              n_splits=KFold(n_splits=3))
        dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array(
            [1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
        dml.score(np.array([1, 2, 3, 1, 2, 3]), np.array(
            [1, 2, 3, 1, 2, 3]), np.ones((6, 1)))

        # test that we can fit with a train/test iterable
        dml = LinearDRLearner(model_regression=LinearRegression(),
                              model_propensity=LogisticRegression(
                                  C=1000, solver='lbfgs', multi_class='auto'),
                              n_splits=[([0, 1, 2], [3, 4, 5])])
        dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array(
            [1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
        dml.score(np.array([1, 2, 3, 1, 2, 3]), np.array(
            [1, 2, 3, 1, 2, 3]), np.ones((6, 1)))
Пример #11
0
    def test_summary_discrete(self):
        """Tests the inference results summary for discrete treatment estimators."""
        # Test inference results when `cate_feature_names` doesn not exist

        for inference in [
                BootstrapInference(n_bootstrap_samples=5), 'statsmodels'
        ]:
            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(),
                                       featurizer=PolynomialFeatures(
                                           degree=2, include_bias=False))
            cate_est.fit(TestInference.Y,
                         TestInference.T,
                         TestInference.X,
                         TestInference.W,
                         inference=inference)
            summary_results = cate_est.summary(T=1)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
                TestInference.X).get_feature_names()
            np.testing.assert_array_equal(coef_rows, fnames)
            intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0]
            np.testing.assert_array_equal(intercept_rows, ['intercept'])

            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(),
                                       featurizer=PolynomialFeatures(
                                           degree=2, include_bias=False))
            cate_est.fit(TestInference.Y,
                         TestInference.T,
                         TestInference.X,
                         TestInference.W,
                         inference=inference)
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = cate_est.summary(T=1, feat_name=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
                TestInference.X).get_feature_names(input_features=fnames)
            np.testing.assert_array_equal(coef_rows, fnames)
            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(),
                                       featurizer=None)
            cate_est.fit(TestInference.Y,
                         TestInference.T,
                         TestInference.X,
                         TestInference.W,
                         inference=inference)
            summary_results = cate_est.summary(T=1)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(
                coef_rows, ['X' + str(i) for i in range(TestInference.d_x)])

            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(),
                                       featurizer=None)
            cate_est.fit(TestInference.Y,
                         TestInference.T,
                         TestInference.X,
                         TestInference.W,
                         inference=inference)
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = cate_est.summary(T=1, feat_name=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, fnames)

            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(),
                                       featurizer=None)
            wrapped_est = self._NoFeatNamesEst(cate_est)
            wrapped_est.fit(TestInference.Y,
                            TestInference.T,
                            TestInference.X,
                            TestInference.W,
                            inference=inference)
            summary_results = wrapped_est.summary(T=1)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(
                coef_rows, ['X' + str(i) for i in range(TestInference.d_x)])

            cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                       model_propensity=LogisticRegression(),
                                       featurizer=None)
            wrapped_est = self._NoFeatNamesEst(cate_est)
            wrapped_est.fit(TestInference.Y,
                            TestInference.T,
                            TestInference.X,
                            TestInference.W,
                            inference=inference)
            fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
            summary_results = wrapped_est.summary(T=1, feat_name=fnames)
            coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
            np.testing.assert_array_equal(coef_rows, fnames)
Пример #12
0
    def test_linear_drlearner_all_attributes(self):
        from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
        from sklearn.linear_model import LinearRegression, LogisticRegression
        from econml.utilities import StatsModelsLinearRegression
        import scipy.special
        np.random.seed(123)
        controls = np.random.uniform(-1, 1, size=(5000, 3))
        T = np.random.binomial(2, scipy.special.expit(controls[:, 0]))
        sigma = 0.01
        y = (1 + .5 * controls[:, 0]) * T + controls[:, 0] + np.random.normal(
            0, sigma, size=(5000, ))
        for X in [None, controls]:
            for W in [None, controls]:
                for sample_weight, sample_var in [(None, None),
                                                  (np.ones(T.shape[0]),
                                                   np.zeros(T.shape[0]))]:
                    for featurizer in [
                            None,
                            PolynomialFeatures(degree=2, include_bias=False)
                    ]:
                        for models in [(GradientBoostingClassifier(),
                                        GradientBoostingRegressor()),
                                       (LogisticRegression(solver='lbfgs',
                                                           multi_class='auto'),
                                        LinearRegression())]:
                            for inference in [
                                    'statsmodels',
                                    StatsModelsInferenceDiscrete(
                                        cov_type='nonrobust')
                            ]:
                                with self.subTest(X=X,
                                                  W=W,
                                                  sample_weight=sample_weight,
                                                  sample_var=sample_var,
                                                  featurizer=featurizer,
                                                  models=models,
                                                  inference=inference):
                                    est = LinearDRLearner(
                                        model_propensity=models[0],
                                        model_regression=models[1],
                                        featurizer=featurizer)
                                    if (X is None) and (W is None):
                                        with pytest.raises(
                                                AttributeError) as e_info:
                                            est.fit(
                                                y,
                                                T,
                                                X=X,
                                                W=W,
                                                sample_weight=sample_weight,
                                                sample_var=sample_var)
                                        continue
                                    est.fit(y,
                                            T,
                                            X=X,
                                            W=W,
                                            sample_weight=sample_weight,
                                            sample_var=sample_var,
                                            inference=inference)
                                    if X is not None:
                                        lower, upper = est.effect_interval(
                                            X[:3], T0=0, T1=1)
                                        point = est.effect(X[:3], T0=0, T1=1)
                                        truth = 1 + .5 * X[:3, 0]
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)
                                        lower, upper = est.const_marginal_effect_interval(
                                            X[:3])
                                        point = est.const_marginal_effect(
                                            X[:3])
                                        truth = np.hstack([
                                            1 + .5 * X[:3, [0]],
                                            2 * (1 + .5 * X[:3, [0]])
                                        ])
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)
                                    else:
                                        lower, upper = est.effect_interval(
                                            T0=0, T1=1)
                                        point = est.effect(T0=0, T1=1)
                                        truth = np.array([1])
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)
                                        lower, upper = est.const_marginal_effect_interval(
                                        )
                                        point = est.const_marginal_effect()
                                        truth = np.array([[1, 2]])
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)

                                    for t in [1, 2]:
                                        if X is not None:
                                            lower, upper = est.marginal_effect_interval(
                                                t, X[:3])
                                            point = est.marginal_effect(
                                                t, X[:3])
                                            truth = np.hstack([
                                                1 + .5 * X[:3, [0]],
                                                2 * (1 + .5 * X[:3, [0]])
                                            ])
                                            TestDRLearner._check_with_interval(
                                                truth, point, lower, upper)
                                        else:
                                            lower, upper = est.marginal_effect_interval(
                                                t)
                                            point = est.marginal_effect(t)
                                            truth = np.array([[1, 2]])
                                            TestDRLearner._check_with_interval(
                                                truth, point, lower, upper)
                                    assert isinstance(est.score_, float)
                                    assert isinstance(
                                        est.score(y, T, X=X, W=W), float)

                                    if X is not None:
                                        feat_names = ['A', 'B', 'C']
                                    else:
                                        feat_names = []
                                    out_feat_names = feat_names
                                    if X is not None:
                                        if (featurizer is not None):
                                            out_feat_names = featurizer.fit(
                                                X).get_feature_names(
                                                    feat_names)
                                            np.testing.assert_array_equal(
                                                est.featurizer.
                                                n_input_features_, 3)
                                        np.testing.assert_array_equal(
                                            est.cate_feature_names(feat_names),
                                            out_feat_names)

                                    if isinstance(models[0],
                                                  GradientBoostingClassifier):
                                        np.testing.assert_array_equal(
                                            np.array([
                                                mdl.feature_importances_ for
                                                mdl in est.models_regression
                                            ]).shape, [
                                                2, 2 + len(feat_names) +
                                                (W.shape[1]
                                                 if W is not None else 0)
                                            ])
                                        np.testing.assert_array_equal(
                                            np.array([
                                                mdl.feature_importances_ for
                                                mdl in est.models_propensity
                                            ]).shape, [
                                                2,
                                                len(feat_names) +
                                                (W.shape[1]
                                                 if W is not None else 0)
                                            ])
                                    else:
                                        np.testing.assert_array_equal(
                                            np.array([
                                                mdl.coef_ for mdl in
                                                est.models_regression
                                            ]).shape, [
                                                2, 2 + len(feat_names) +
                                                (W.shape[1]
                                                 if W is not None else 0)
                                            ])
                                        np.testing.assert_array_equal(
                                            np.array([
                                                mdl.coef_ for mdl in
                                                est.models_propensity
                                            ]).shape, [
                                                2, 3,
                                                len(feat_names) +
                                                (W.shape[1]
                                                 if W is not None else 0)
                                            ])

                                    if X is not None:
                                        for t in [1, 2]:
                                            true_coef = np.zeros(
                                                len(out_feat_names))
                                            true_coef[0] = .5 * t
                                            lower, upper = est.model_cate(
                                                T=t).coef__interval()
                                            point = est.model_cate(T=t).coef_
                                            truth = true_coef
                                            TestDRLearner._check_with_interval(
                                                truth, point, lower, upper)

                                            lower, upper = est.coef__interval(
                                                t)
                                            point = est.coef_(t)
                                            truth = true_coef
                                            TestDRLearner._check_with_interval(
                                                truth, point, lower, upper)
                                    for t in [1, 2]:
                                        lower, upper = est.model_cate(
                                            T=t).intercept__interval()
                                        point = est.model_cate(T=t).intercept_
                                        truth = t
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)

                                        lower, upper = est.intercept__interval(
                                            t)
                                        point = est.intercept_(t)
                                        truth = t
                                        TestDRLearner._check_with_interval(
                                            truth, point, lower, upper)
Пример #13
0
 def test_drlearners(self):
     X = TestPandasIntegration.df[TestPandasIntegration.features]
     W = TestPandasIntegration.df[TestPandasIntegration.controls]
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome]
     T = TestPandasIntegration.df[TestPandasIntegration.bin_treat]
     # Test LinearDRLearner
     est = LinearDRLearner(model_propensity=GradientBoostingClassifier(),
                           model_regression=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary(T=1))
     self._check_popsum_names(est.effect_inference(X).population_summary())
     # Test SparseLinearDRLearner
     est = SparseLinearDRLearner(
         model_propensity=GradientBoostingClassifier(),
         model_regression=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary(T=1))
     self._check_popsum_names(est.effect_inference(X).population_summary())
     # Test ForestDRLearner
     est = ForestDRLearner(model_propensity=GradientBoostingClassifier(),
                           model_regression=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='blb')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_popsum_names(est.effect_inference(X).population_summary())