Python CausalAnalysis.upper_bound_on_cat_expansion示例

编程语言: Python

命名空间/包名称: econml.solutions.causal_analysis

类/类型: CausalAnalysis

方法/功能: upper_bound_on_cat_expansion

hotexamples.com的示例: 2

Python CausalAnalysis.upper_bound_on_cat_expansion - 已找到2个示例。这些是从开源项目中提取的最受好评的econml.solutions.causal_analysis.CausalAnalysis.upper_bound_on_cat_expansion现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

CausalAnalysis(22)

fit(21)

global_causal_effect(18)

local_causal_effect(15)

_policy_tree_output(12)

_cohort_causal_effect_dict(10)

cohort_causal_effect(10)

_global_causal_effect_dict(10)

_local_causal_effect_dict(10)

_heterogeneity_tree_output(7)

_whatif_dict(6)

whatif(6)

_policy_tree_string(5)

_heterogeneity_tree_string(5)

individualized_policy(3)

_individualized_policy_dict(2)

upper_bound_on_cat_expansion(2)

feature_inds(1)

skip_cat_limit_checks(1)

typical_treatment_value(1)

示例#1

显示文件

文件： test_causal_analysis.py 项目： ScriptBox21/MS-EconML

    def test_over_cat_limit(self):
        y = pd.Series(np.random.choice([0, 1], size=(500, )))
        X = pd.DataFrame({
            'a': np.random.normal(size=500),
            'b': np.random.normal(size=500),
            'c': np.random.choice([0, 1], size=500),
            'd': np.random.choice(['a', 'b', 'c', 'd'], size=500),
            'e': np.random.choice([7, 8, 9, 10, 11], size=500),
            'f': np.random.choice(['x', 'y'], size=500),
            'g': np.random.choice([0, 1], size=500),
            'h': np.random.choice(['q', 'r', 's'], size=500)
        })
        inds = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
        cats = ['c', 'd', 'e', 'f', 'g', 'h']
        hinds = ['a', 'd']
        ca = CausalAnalysis(inds, cats, hinds, upper_bound_on_cat_expansion=2)
        ca.fit(X, y)

        # columns 'd', 'e', 'h' have too many values
        self.assertEqual([res.feature_name for res in ca._results],
                         ['a', 'b', 'c', 'f', 'g'])

        inds = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
        cats = ['c', 'd', 'e', 'f', 'g', 'h']
        hinds = ['a', 'd']
        ca = CausalAnalysis(inds, cats, hinds, upper_bound_on_cat_expansion=3)
        ca.fit(X, y)

        # columns 'd', 'e' have too many values
        self.assertEqual([res.feature_name for res in ca._results],
                         ['a', 'b', 'c', 'f', 'g', 'h'])

        ca.upper_bound_on_cat_expansion = 2
        ca.fit(X, y, warm_start=True)

        # lowering bound shouldn't affect already fit columns when warm starting
        self.assertEqual([res.feature_name for res in ca._results],
                         ['a', 'b', 'c', 'f', 'g', 'h'])

        ca.upper_bound_on_cat_expansion = 4
        ca.fit(X, y, warm_start=True)

        # column d is now okay, too
        self.assertEqual([res.feature_name for res in ca._results],
                         ['a', 'b', 'c', 'd', 'f', 'g', 'h'])

示例#2

显示文件

    def test_invalid_inds(self):
        X = np.zeros((300, 6))
        y = np.random.normal(size=(300, ))

        # first column: 10 ones, this is fine
        X[np.random.choice(300, 10, replace=False),
          0] = 1  # ten ones, should be fine

        # second column: 6 categories, plenty of random instances of each
        # this is fine only if we increase the cateogry limit
        X[:, 1] = np.random.choice(6, 300)  # six categories

        # third column: nine ones, lots of twos, not enough unless we disable check
        X[np.random.choice(300, 100, replace=False), 2] = 2
        X[np.random.choice(300, 9, replace=False), 2] = 1

        # fourth column: 5 ones, also not enough but barely works even with forest heterogeneity
        X[np.random.choice(300, 5, replace=False), 3] = 1

        # fifth column: 2 ones, ensures that we will change number of folds for linear heterogeneity
        # forest heterogeneity won't work
        X[np.random.choice(300, 2, replace=False), 4] = 1

        # sixth column: just 1 one, not enough even without check
        X[np.random.choice(300, 1), 5] = 1  # one instance of

        col_names = ['a', 'b', 'c', 'd', 'e', 'f']
        X = pd.DataFrame(X, columns=col_names)

        for n in ['linear', 'automl']:
            for h in ['linear', 'forest']:
                for warm_start in [True, False]:
                    ca = CausalAnalysis(col_names,
                                        col_names,
                                        col_names,
                                        verbose=1,
                                        nuisance_models=n,
                                        heterogeneity_model=h)
                    ca.fit(X, y)

                    self.assertEqual(ca.trained_feature_indices_,
                                     [0])  # only first column okay
                    self.assertEqual(ca.untrained_feature_indices_,
                                     [(1, 'upper_bound_on_cat_expansion'),
                                      (2, 'cat_limit'), (3, 'cat_limit'),
                                      (4, 'cat_limit'), (5, 'cat_limit')])

                    # increase bound on cat expansion
                    ca.upper_bound_on_cat_expansion = 6
                    ca.fit(X, y, warm_start=warm_start)

                    self.assertEqual(ca.trained_feature_indices_,
                                     [0, 1])  # second column okay also
                    self.assertEqual(ca.untrained_feature_indices_,
                                     [(2, 'cat_limit'), (3, 'cat_limit'),
                                      (4, 'cat_limit'), (5, 'cat_limit')])

                    # skip checks (reducing folds accordingly)
                    ca.skip_cat_limit_checks = True
                    ca.fit(X, y, warm_start=warm_start)

                    if h == 'linear':
                        self.assertEqual(
                            ca.trained_feature_indices_,
                            [0, 1, 2, 3, 4])  # all but last col okay
                        self.assertEqual(ca.untrained_feature_indices_,
                                         [(5, 'cat_limit')])
                    else:
                        self.assertEqual(ca.trained_feature_indices_,
                                         [0, 1, 2, 3])  # can't handle last two
                        self.assertEqual(ca.untrained_feature_indices_,
                                         [(4, 'cat_limit'), (5, 'cat_limit')])