示例#1
0
 def test_predict_proba(self):
     self.estimator = Standardization(LogisticRegression(C=1e6,
                                                         solver='lbfgs'),
                                      predict_proba=True)
     ind_outcome = self.verify_individual_multiclass_output()
     with self.subTest("Test results are probabilities - sum to 1:"):
         for treatment_value, y_pred in ind_outcome.groupby(level="a",
                                                            axis="columns"):
             pd.testing.assert_series_equal(
                 pd.Series(1.0, index=y_pred.index),
                 y_pred.sum(axis="columns"))
示例#2
0
 def test_predict(self):
     self.estimator = Standardization(LogisticRegression(C=1e6,
                                                         solver='lbfgs'),
                                      predict_proba=False)
     self.estimator.fit(self.data_3cls["X"], self.data_3cls["a"],
                        self.data_3cls["y"])
     ind_outcome = self.estimator.estimate_individual_outcome(
         self.data_3cls["X"], self.data_3cls["a"])
     with self.subTest("Output size, # predictions:"):
         self.assertEqual(self.data_3cls["a"].nunique(),
                          ind_outcome.shape[1])
         self.assertNotEqual(self.data_3cls["y"].nunique(),
                             ind_outcome.shape[1])
示例#3
0
 def test_treatment_encoding(self):
     self.estimator = Standardization(LinearRegression(),
                                      encode_treatment=True)
     a = self.data_lin["a"].replace({0: "p", 1: "q"})
     self.estimator.fit(self.data_lin["X"], a, self.data_lin["y"])
     with self.subTest("Treatment encoder created:"):
         self.assertTrue(hasattr(self.estimator, "treatment_encoder_"))
     with self.subTest("Treatment categories properly encoded"):
         self.assertSetEqual(
             {"p", "q"},
             set(*self.estimator.treatment_encoder_.categories_))
     with self.subTest("Fitted model has the right size"):
         self.assertEqual(len(self.estimator.learner.coef_),
                          self.data_lin["X"].shape[1] + a.nunique())
示例#4
0
 def setUpClass(cls):
     TestDoublyRobustBase.setUpClass()
     # Avoids regularization of the model:
     ipw = IPW(LogisticRegression(C=1e6, solver='lbfgs'),
               use_stabilized=False)
     std = Standardization(LinearRegression(normalize=True))
     cls.estimator = DoublyRobustIpFeature(std, ipw)
示例#5
0
 def fit_and_predict_all_learners(self, data, estimator):
     X, a, y = data["X"], data["a"], data["y"]
     self.estimator.fit(X, a, y)
     doubly_res = self.estimator.estimate_population_outcome(X, a)
     std_res = Standardization(LinearRegression(normalize=True)).fit(X, a, y).estimate_population_outcome(X, a)
     ipw_res = self.estimator.weight_model.estimate_population_outcome(X, a, y)
     return doubly_res, std_res, ipw_res
示例#6
0
class TestStandardization(TestStandardizationCommon):
    @classmethod
    def setUpClass(cls):
        TestStandardizationCommon.setUpClass()
        # Avoids regularization of the model:
        cls.estimator = Standardization(LinearRegression(normalize=True))

    def setUp(self):
        self.estimator.fit(self.data_lin["X"], self.data_lin["a"],
                           self.data_lin["y"])

    def test_is_fitted(self):
        self.assertTrue(hasattr(self.estimator.learner, "coef_"))

    def test_effect_estimation(self):
        with self.subTest("Check by model coefficient:"):
            self.assertAlmostEqual(self.estimator.learner.coef_[0],
                                   self.data_lin["beta"],
                                   places=5)
        self.ensure_effect_estimation()

    def test_observed_prediction(self):
        self.ensure_observed_prediction()

    def test_counterfactual_outcomes(self):
        self.ensure_counterfactual_outcomes()

    def test_treatment_encoding(self):
        self.estimator = Standardization(LinearRegression(),
                                         encode_treatment=True)
        a = self.data_lin["a"].replace({0: "p", 1: "q"})
        self.estimator.fit(self.data_lin["X"], a, self.data_lin["y"])
        with self.subTest("Treatment encoder created:"):
            self.assertTrue(hasattr(self.estimator, "treatment_encoder_"))
        with self.subTest("Treatment categories properly encoded"):
            self.assertSetEqual(
                {"p", "q"},
                set(*self.estimator.treatment_encoder_.categories_))
        with self.subTest("Fitted model has the right size"):
            self.assertEqual(len(self.estimator.learner.coef_),
                             self.data_lin["X"].shape[1] + a.nunique())

    def test_pipeline_learner(self):
        self.ensure_pipeline_learner()

    def test_many_models(self):
        self.ensure_many_models()
示例#7
0
    def ensure_many_models(self, clip_min=None, clip_max=None):
        from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
        from sklearn.neural_network import MLPRegressor
        from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor
        from sklearn.neighbors import KNeighborsRegressor
        from sklearn.svm import SVR, LinearSVR

        from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
        from sklearn.neural_network import MLPClassifier
        from sklearn.neighbors import KNeighborsClassifier

        from sklearn.exceptions import ConvergenceWarning
        warnings.filterwarnings('ignore', category=ConvergenceWarning)

        data = self.create_uninformative_ox_dataset()
        for propensity_learner in [
                GradientBoostingClassifier(n_estimators=10),
                RandomForestClassifier(n_estimators=100),
                MLPClassifier(hidden_layer_sizes=(5, )),
                KNeighborsClassifier(n_neighbors=20)
        ]:
            weight_model = IPW(propensity_learner,
                               clip_min=clip_min,
                               clip_max=clip_max)
            propensity_learner_name = str(propensity_learner).split(
                "(", maxsplit=1)[0]
            for outcome_learner in [
                    GradientBoostingRegressor(n_estimators=10),
                    RandomForestRegressor(n_estimators=10),
                    MLPRegressor(hidden_layer_sizes=(5, )),
                    ElasticNet(),
                    RANSACRegressor(),
                    HuberRegressor(),
                    PassiveAggressiveRegressor(),
                    KNeighborsRegressor(),
                    SVR(),
                    LinearSVR()
            ]:
                outcome_learner_name = str(outcome_learner).split(
                    "(", maxsplit=1)[0]
                outcome_model = Standardization(outcome_learner)

                with self.subTest("Test fit & predict using {} & {}".format(
                        propensity_learner_name, outcome_learner_name)):
                    model = self.estimator.__class__(outcome_model,
                                                     weight_model)
                    model.fit(data["X"],
                              data["a"],
                              data["y"],
                              refit_weight_model=False)
                    model.estimate_individual_outcome(data["X"], data["a"])
                    self.assertTrue(True)  # Fit did not crash
示例#8
0
def test_standardization_matches_causallib(linear_data_pandas):
    w, t, y = linear_data_pandas
    causallib_standardization = Standardization(LinearRegression())
    causallib_standardization.fit(w, t, y)
    individual_potential_outcomes = causallib_standardization.estimate_individual_outcome(
        w, t)
    causallib_ite_estimates = individual_potential_outcomes[
        1] - individual_potential_outcomes[0]
    mean_potential_outcomes = causallib_standardization.estimate_population_outcome(
        w, t)
    causallib_ate_estimate = mean_potential_outcomes[
        1] - mean_potential_outcomes[0]

    standardization = StandardizationEstimator()
    standardization.fit(w, t, y)
    assert causallib_ate_estimate == standardization.estimate_ate()
    pd.testing.assert_series_equal(causallib_ite_estimates,
                                   standardization.estimate_ite())
示例#9
0
 def __init__(self, outcome_model=LinearRegression()):
     super().__init__(causallib_estimator=Standardization(learner=outcome_model))
示例#10
0
    def test_many_models(self):
        from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
        from sklearn.neural_network import MLPRegressor
        from sklearn.linear_model import ElasticNet, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor
        from sklearn.neighbors import KNeighborsRegressor
        from sklearn.svm import SVR, LinearSVR

        from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
        from sklearn.neural_network import MLPClassifier
        from sklearn.neighbors import KNeighborsClassifier

        from sklearn.exceptions import ConvergenceWarning
        warnings.filterwarnings('ignore', category=ConvergenceWarning)

        data = self.create_uninformative_ox_dataset()

        for propensity_learner in [
                GradientBoostingClassifier(n_estimators=10),
                RandomForestClassifier(n_estimators=100),
                MLPClassifier(hidden_layer_sizes=(5, )),
                KNeighborsClassifier(n_neighbors=20)
        ]:
            weight_model = IPW(propensity_learner)
            propensity_learner_name = str(propensity_learner).split(
                "(", maxsplit=1)[0]
            for outcome_learner in [
                    GradientBoostingRegressor(n_estimators=10),
                    RandomForestRegressor(n_estimators=10),
                    RANSACRegressor(),
                    HuberRegressor(),
                    SVR(),
                    LinearSVR()
            ]:
                outcome_learner_name = str(outcome_learner).split(
                    "(", maxsplit=1)[0]
                outcome_model = Standardization(outcome_learner)

                with self.subTest("Test fit using {} & {}".format(
                        propensity_learner_name, outcome_learner_name)):
                    model = self.estimator.__class__(outcome_model,
                                                     weight_model)
                    model.fit(data["X"],
                              data["a"],
                              data["y"],
                              refit_weight_model=False)
                    self.assertTrue(True)  # Fit did not crash

            for outcome_learner in [
                    MLPRegressor(hidden_layer_sizes=(5, )),
                    # ElasticNet(),  # supports sample_weights since v0.23, remove to support v<0.23
                    PassiveAggressiveRegressor(),
                    KNeighborsRegressor()
            ]:
                outcome_learner_name = str(outcome_learner).split(
                    "(", maxsplit=1)[0]
                outcome_model = Standardization(outcome_learner)

                with self.subTest("Test fit using {} & {}".format(
                        propensity_learner_name, outcome_learner_name)):
                    model = self.estimator.__class__(outcome_model,
                                                     weight_model)
                    with self.assertRaises(TypeError):
                        # Joffe forces learning with sample_weights,
                        # not all ML models support that and so calling should fail
                        model.fit(data["X"],
                                  data["a"],
                                  data["y"],
                                  refit_weight_model=False)
示例#11
0
 def test_decision_function(self):
     self.estimator = Standardization(SVC(decision_function_shape='ovr'),
                                      predict_proba=True)
     self.verify_individual_multiclass_output()
示例#12
0
class TestStandardizationClassification(TestStandardizationCommon):
    @classmethod
    def setUpClass(cls):
        # Three-class outcome, since decision_function might return a vector when n_classes=2, and we wish to check the
        # matrix form of the output behaves as expected:
        X, y = make_classification(n_features=3,
                                   n_informative=2,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_classes=3,
                                   n_clusters_per_class=1,
                                   flip_y=0.0,
                                   class_sep=10.0)
        X, a = X[:, :-1], X[:, -1]
        a = (a > np.median(a)).astype(int)
        cls.data_3cls = {
            "X": pd.DataFrame(X),
            "a": pd.Series(a),
            "y": pd.Series(y)
        }

        # X, y = make_classification(n_features=2, n_informative=1, n_redundant=0, n_repeated=0, n_classes=2,
        #                            n_clusters_per_class=1, flip_y=0.0, class_sep=10.0)
        # X, a = X[:, :-1], X[:, -1]
        # a = (a > np.median(a)).astype(int)
        # cls.data_2cls = {"X": pd.DataFrame(X), "a": pd.Series(a), "y": pd.Series(y)}

    def verify_individual_multiclass_output(self):
        self.estimator.fit(self.data_3cls["X"], self.data_3cls["a"],
                           self.data_3cls["y"])
        ind_outcome = self.estimator.estimate_individual_outcome(
            self.data_3cls["X"], self.data_3cls["a"])

        with self.subTest("Output size, # samples:"):
            self.assertEqual(self.data_3cls["X"].shape[0],
                             ind_outcome.shape[0])
        with self.subTest("Output size, # predictions:"):
            with self.subTest(
                    "Output's multiindex level names are describing treatment and outcome"
            ):
                self.assertEqual(["a", "y"], ind_outcome.columns.names)
            with self.subTest(
                    "Output's number of predictions is the same as number of outcome and treatment values"
            ):
                self.assertEqual(
                    self.data_3cls["a"].nunique() *
                    self.data_3cls["y"].nunique(), ind_outcome.shape[1])
                self.assertEqual(
                    self.data_3cls["a"].nunique(),
                    ind_outcome.columns.get_level_values("a").unique().size)
                self.assertEqual(
                    self.data_3cls["y"].nunique(),
                    ind_outcome.columns.get_level_values("y").unique().size)
        return ind_outcome

    def test_predict_proba(self):
        self.estimator = Standardization(LogisticRegression(C=1e6,
                                                            solver='lbfgs'),
                                         predict_proba=True)
        ind_outcome = self.verify_individual_multiclass_output()
        with self.subTest("Test results are probabilities - sum to 1:"):
            for treatment_value, y_pred in ind_outcome.groupby(level="a",
                                                               axis="columns"):
                pd.testing.assert_series_equal(
                    pd.Series(1.0, index=y_pred.index),
                    y_pred.sum(axis="columns"))

    def test_decision_function(self):
        self.estimator = Standardization(SVC(decision_function_shape='ovr'),
                                         predict_proba=True)
        self.verify_individual_multiclass_output()

    def test_predict(self):
        self.estimator = Standardization(LogisticRegression(C=1e6,
                                                            solver='lbfgs'),
                                         predict_proba=False)
        self.estimator.fit(self.data_3cls["X"], self.data_3cls["a"],
                           self.data_3cls["y"])
        ind_outcome = self.estimator.estimate_individual_outcome(
            self.data_3cls["X"], self.data_3cls["a"])
        with self.subTest("Output size, # predictions:"):
            self.assertEqual(self.data_3cls["a"].nunique(),
                             ind_outcome.shape[1])
            self.assertNotEqual(self.data_3cls["y"].nunique(),
                                ind_outcome.shape[1])
示例#13
0
 def setUpClass(cls):
     TestStandardizationCommon.setUpClass()
     # Avoids regularization of the model:
     cls.estimator = Standardization(LinearRegression(normalize=True))
示例#14
0
 def init(self, reduced, importance_sampling):
     self._estimator = TMLE(
         Standardization(self.outcome_model_cont),
         IPW(self.treatment_model),
         reduced=reduced, importance_sampling=importance_sampling,
     )