Python IPW.fit примеры использования

Язык программирования: Python

Пространство имен/Пакет: causallib.estimation

Класс/Тип: IPW

Метод/Функция: fit

Примеров на hotexamples.com: 3

Python IPW.fit - 3 примера найдено. Это лучшие примеры Python кода для causallib.estimation.IPW.fit, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

IPW(14)

estimate_population_outcome(4)

compute_weights(3)

estimate_effect(3)

fit(3)

Пример #1

Показать файл

Файл: test_estimators.py Проект: bradyneal/realcause

def test_ipw_matches_causallib(linear_data_pandas):
    w, t, y = linear_data_pandas
    causallib_ipw = IPW(learner=LogisticRegression())
    causallib_ipw.fit(w, t)
    potential_outcomes = causallib_ipw.estimate_population_outcome(
        w, t, y, treatment_values=[0, 1])
    causallib_effect = causallib_ipw.estimate_effect(potential_outcomes[1],
                                                     potential_outcomes[0])[0]

    ipw = IPWEstimator()
    ipw.fit(w, t, y)
    our_effect = ipw.estimate_ate()
    assert our_effect == causallib_effect

Пример #2

Показать файл

Файл: ipw_estimator.py Проект: bradyneal/realcause

class IPWEstimator(BaseEstimator):
    def __init__(self,
                 prop_score_model=LogisticRegression(),
                 trim_weights=False,
                 trim_eps=None,
                 stabilized=False):
        if trim_weights and trim_eps is None:
            trim_eps = TRIM_EPS
        self.ipw = IPW(learner=prop_score_model,
                       truncate_eps=trim_eps,
                       use_stabilized=stabilized)
        self.w = None
        self.t = None
        self.y = None

    def fit(self, w, t, y):
        w, t, y = to_pandas(w, t, y)
        self.ipw.fit(w, t)
        self.w = w
        self.t = t
        self.y = y

    def estimate_ate(self, t1=1, t0=0, w=None, t=None, y=None):
        w = self.w if w is None else w
        t = self.t if t is None else t
        y = self.y if y is None else y
        if w is None or t is None or y is None:
            raise RuntimeError(
                'Must run .fit(w, t, y) before running .estimate_ate()')
        w, t, y = to_pandas(w, t, y)
        mean_potential_outcomes = self.ipw.estimate_population_outcome(
            w, t, y, treatment_values=[t0, t1])
        ate_estimate = mean_potential_outcomes[1] - mean_potential_outcomes[0]
        # Use below estimate_effect() method if want to allow for effects that are not differences
        # ate_estimate = self.ipw.estimate_effect(mean_potential_outcomes[1], mean_potential_outcomes[0])[0]
        return ate_estimate

    def ate_conf_int(self, percentile=.95) -> tuple:
        raise NotImplementedError

Пример #3

Показать файл

Файл: test_ipw.py Проект: IBM/causallib

class TestIPW(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        # Data:
        X, a = make_classification(n_features=1,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_classes=2,
                                   n_clusters_per_class=1,
                                   flip_y=0.0,
                                   class_sep=10.0)
        cls.data_r_100 = {"X": pd.DataFrame(X), "a": pd.Series(a)}
        X, a = make_classification(n_features=1,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_classes=2,
                                   n_clusters_per_class=1,
                                   flip_y=0.2,
                                   class_sep=10.0)
        cls.data_r_80 = {"X": pd.DataFrame(X), "a": pd.Series(a)}

        # Data that maps x=0->a=0 and x=1->a=1:
        X = pd.Series([0] * 50 + [1] * 50)
        cls.data_cat_r_100 = {"X": X.to_frame(), "a": X}

        # Data that maps x=0->a=0 and x=1->a=1, but 10% of x=0->a=1 and 10% of x=1->a=0:
        X = pd.Series([0] * 40 + [1] * 10 + [1] * 40 + [0] * 10).to_frame()
        a = pd.Series([0] * 50 + [1] * 50)
        cls.data_cat_r_80 = {"X": X, "a": a}

        # Avoids regularization of the model:
        cls.estimator = IPW(LogisticRegression(C=1e6, solver='lbfgs'),
                            clip_min=0.05,
                            clip_max=0.95,
                            use_stabilized=False)

    def setUp(self):
        self.estimator.fit(self.data_r_100["X"], self.data_r_100["a"])

    def test_is_fitted(self):
        self.assertTrue(hasattr(self.estimator.learner, "coef_"))

    def test_weight_matrix_vector_matching(self):
        a = self.data_r_100["a"]
        p_vec = self.estimator.compute_weights(self.data_r_100["X"], a)
        p_mat = self.estimator.compute_weight_matrix(self.data_r_100["X"], a)
        self.assertEqual(p_vec.size, p_mat.shape[0])
        for i in range(a.shape[0]):
            self.assertAlmostEqual(p_mat.loc[i, a[i]], p_vec[i])

    def test_weight_sizes(self):
        a = self.data_r_100["a"]
        with self.subTest("Weight vector size"):
            p = self.estimator.compute_weights(self.data_r_100["X"], a)
            self.assertEqual(len(p.shape), 1)  # vector has no second axis
            self.assertEqual(p.shape[0], a.shape[0])

        with self.subTest("Weight matrix size"):
            p = self.estimator.compute_weight_matrix(self.data_r_100["X"], a)
            self.assertEqual(len(p.shape), 2)  # Matrix has two dimensions
            self.assertEqual(p.shape[0], a.shape[0])
            self.assertEqual(p.shape[1], np.unique(a).size)

    def ensure_truncation(self, test_weights):
        with self.subTest("Estimator initialization parameters"):
            p = self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"])
            if test_weights:
                p = self.estimator.compute_weights(self.data_r_80["X"],
                                                   self.data_r_80["a"]).pow(-1)

            self.assertAlmostEqual(p.min(), 0.05)
            self.assertAlmostEqual(p.max(), 1 - 0.05)

        with self.subTest("Overwrite parameters in compute_weights"):
            p = self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_min=0.1,
                                                  clip_max=0.9)
            if test_weights:
                p = self.estimator.compute_weights(self.data_r_80["X"],
                                                   self.data_r_80["a"],
                                                   clip_min=0.1,
                                                   clip_max=0.9).pow(-1)
            self.assertAlmostEqual(p.min(), 0.1)
            self.assertAlmostEqual(p.max(), 1 - 0.1)

        with self.subTest("Test asymmetric clipping"):
            p = self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_min=0.2,
                                                  clip_max=0.9)
            if test_weights:
                p = self.estimator.compute_weights(self.data_r_80["X"],
                                                   self.data_r_80["a"],
                                                   clip_min=0.2,
                                                   clip_max=0.9).pow(-1)
            self.assertAlmostEqual(p.min(), 0.2)
            self.assertAlmostEqual(p.max(), 0.9)

        with self.subTest(
                "Test calculation of fraction of clipped observations"):
            probabilities = pd.DataFrame()
            probabilities['col1'] = [
                0.01, 0.02, 0.03, 0.05, 0.3, 0.6, 0.9, 0.95, 0.99, 0.99
            ]
            probabilities['col2'] = [
                0.99, 0.98, 0.97, 0.95, 0.7, 0.4, 0.1, 0.05, 0.01, 0.01
            ]
            frac = self.estimator._IPW__count_truncated(probabilities,
                                                        clip_min=0.05,
                                                        clip_max=0.95)
            self.assertAlmostEqual(frac, 0.5)

        with self.subTest(
                "Test calculation of fraction of clipped observations - no clipping"
        ):
            probabilities = pd.DataFrame()
            probabilities['col1'] = [0.0, 0.0, 0.0, 1.0, 1.0]
            probabilities['col2'] = [1.0, 1.0, 1.0, 0.0, 0.0]
            frac = self.estimator._IPW__count_truncated(probabilities,
                                                        clip_min=0.0,
                                                        clip_max=1.0)
            self.assertAlmostEqual(frac, 0.0)

    def test_weight_truncation(self):
        self.ensure_truncation(test_weights=True)

    def test_propensity_truncation(self):
        self.ensure_truncation(test_weights=False)

        with self.subTest("Illegal truncation values assertion on compute"):
            with self.assertRaises(AssertionError):
                self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_min=0.6)
            with self.assertRaises(AssertionError):
                self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_max=0.4)
            with self.assertRaises(AssertionError):
                self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_min=0.6,
                                                  clip_max=0.9)
            with self.assertRaises(AssertionError):
                self.estimator.compute_propensity(self.data_r_80["X"],
                                                  self.data_r_80["a"],
                                                  clip_min=0.1,
                                                  clip_max=0.4)

        with self.subTest(
                "Illegal truncation values assertion on initialization"):
            with self.assertRaises(AssertionError):
                IPW(LogisticRegression(), clip_min=0.6)
            with self.assertRaises(AssertionError):
                IPW(LogisticRegression(), clip_max=0.4)
            with self.assertRaises(AssertionError):
                IPW(LogisticRegression(), clip_min=0.1, clip_max=0.4)
            with self.assertRaises(AssertionError):
                IPW(LogisticRegression(), clip_min=0.6, clip_max=0.9)

    def test_weights_sanity_check(self):
        with self.subTest(
                "Linearly separable X should have perfectly predicted propensity score"
        ):
            p = self.estimator.compute_weights(self.data_r_100["X"],
                                               self.data_r_100["a"],
                                               clip_min=0.0,
                                               clip_max=1.0).pow(-1)
            np.testing.assert_array_almost_equal(p, np.ones_like(p), decimal=3)

        with self.subTest(
                "Train on bijection X|a data and predict on data where q% are flipped"
        ):
            # Train on data that maps x=0->a=0 and x=1->a=1:
            self.estimator.fit(self.data_cat_r_100["X"],
                               self.data_cat_r_100["a"])
            # Predict on a set with mis-mapping: 10% of x=0 have a=1 and 10% of x=1 have a=0:
            p = self.estimator.compute_weights(self.data_cat_r_80["X"],
                                               self.data_cat_r_80["a"],
                                               clip_min=0.0,
                                               clip_max=1.0).pow(-1)
            # Extract subjects with mismatching X-a values:
            mis_assigned = np.logical_xor(self.data_cat_r_80["X"].iloc[:, 0],
                                          self.data_cat_r_80["a"])
            # See they have the same rate:
            self.assertAlmostEqual(p.mean(), 1.0 - mis_assigned.mean(), 4)
            np.testing.assert_almost_equal(p.mean(),
                                           1.0 - mis_assigned.mean(),
                                           decimal=4)

    def test_forcing_probability_learner(self):
        from sklearn.svm import SVC  # Arbitrary model with decision_function instead of predict_proba
        with self.assertRaises(AttributeError):
            IPW(SVC())

    def test_pipeline_learner(self):
        from sklearn.preprocessing import StandardScaler, MinMaxScaler
        from sklearn.pipeline import make_pipeline
        learner = make_pipeline(StandardScaler(), MinMaxScaler(),
                                LogisticRegression(solver='lbfgs'))
        with self.subTest("Test initialization with pipeline learner"):
            self.estimator = IPW(learner)
            self.assertTrue(True)  # Dummy assert for not thrown exception

        with self.subTest("Test fit with pipeline learner"):
            self.estimator.fit(self.data_r_100["X"], self.data_r_100["a"])
            self.assertTrue(True)  # Dummy assert for not thrown exception

        with self.subTest("Test 'predict' with pipeline learner"):
            self.estimator.compute_weights(self.data_r_100["X"],
                                           self.data_r_100["a"])
            self.assertTrue(True)  # Dummy assert for not thrown exception