示例#1
0
    def test_individualized_policy(self):
        y_arr = np.random.choice([0, 1], size=(500, ))
        X = pd.DataFrame({
            'a': np.random.normal(size=500),
            'b': np.random.normal(size=500),
            'c': np.random.choice([0, 1], size=500),
            'd': np.random.choice(['a', 'b', 'c'], size=500)
        })
        inds = ['a', 'b', 'c', 'd']
        cats = ['c', 'd']
        hinds = ['a', 'd']

        for y in [pd.Series(y_arr), y_arr.reshape(-1, 1)]:
            for classification in [True, False]:
                ca = CausalAnalysis(inds,
                                    cats,
                                    hinds,
                                    heterogeneity_model='linear',
                                    classification=classification)
                ca.fit(X, y)
                df = ca.individualized_policy(X, 'a')
                self.assertEqual(df.shape[0],
                                 500)  # all rows included by default
                self.assertEqual(
                    df.shape[1], 4 + X.shape[1]
                )  # new cols for policy, effect, upper and lower bounds
                df = ca.individualized_policy(X, 'b', n_rows=5)
                self.assertEqual(df.shape[0], 5)
                self.assertEqual(
                    df.shape[1], 4 + X.shape[1]
                )  # new cols for policy, effect, upper and lower bounds
                # verify that we can use a scalar treatment cost
                df = ca.individualized_policy(X, 'c', treatment_costs=100)
                self.assertEqual(df.shape[0], 500)
                self.assertEqual(
                    df.shape[1], 4 + X.shape[1]
                )  # new cols for policy, effect, upper and lower bounds
                # verify that we can specify per-treatment costs for each sample
                df = ca.individualized_policy(
                    X,
                    'd',
                    alpha=0.05,
                    treatment_costs=np.random.normal(size=(500, 2)))
                self.assertEqual(df.shape[0], 500)
                self.assertEqual(
                    df.shape[1], 4 + X.shape[1]
                )  # new cols for policy, effect, upper and lower bounds

                dictionary = ca._individualized_policy_dict(X, 'a')
示例#2
0
 def test_policy_with_index(self):
     inds = np.arange(1000)
     np.random.shuffle(inds)
     X = pd.DataFrame(np.random.normal(0, 1, size=(1000, 2)),
                      columns=['A', 'B'],
                      index=inds)
     y = np.random.normal(0, 1, size=1000)
     ca_test = CausalAnalysis(feature_inds=['A'], categorical=[])
     ca_test.fit(X, y)
     ind_policy = ca_test.individualized_policy(X[:50], feature_index='A')
     self.assertFalse(ind_policy.isnull().values.any())
    def compute(self):
        """Computes the causal insights by running the causal configuration."""
        for config in self._causal_config_list:
            if config.is_computed:
                continue

            config.is_computed = True
            if config.nuisance_model not in [CausalConstants.AUTOML,
                                             CausalConstants.LINEAR]:
                message = (f"nuisance_model should be one of "
                           f"['{CausalConstants.AUTOML}', "
                           f"'{CausalConstants.LINEAR}'], "
                           f"got {config.nuisance_model}")
                raise UserConfigValidationException(message)

            is_classification = self._task_type == ModelTask.CLASSIFICATION
            X = pd.concat([self._train, self._test], ignore_index=True)\
                .drop([self._target_column], axis=1)
            y = pd.concat([self._train, self._test], ignore_index=True)[
                self._target_column].values.ravel()

            categoricals = self._categorical_features
            if categoricals is None:
                categoricals = []

            analysis = CausalAnalysis(
                config.treatment_features,
                categoricals,
                heterogeneity_inds=config.heterogeneity_features,
                classification=is_classification,
                nuisance_models=config.nuisance_model,
                upper_bound_on_cat_expansion=config.max_cat_expansion,
                skip_cat_limit_checks=config.skip_cat_limit_checks,
                n_jobs=-1)
            analysis.fit(X, y)

            config.causal_analysis = analysis

            X_test = self._test.drop([self._target_column], axis=1)

            config.global_effects = analysis.global_causal_effect(
                alpha=config.alpha, keep_all_levels=True)
            config.local_effects = analysis.local_causal_effect(
                X_test, alpha=config.alpha, keep_all_levels=True)

            config.policies = []
            for treatment_feature in config.treatment_features:
                local_policies = analysis.individualized_policy(
                    X_test, treatment_feature,
                    treatment_costs=config.treatment_cost,
                    alpha=config.alpha)

                tree = analysis._policy_tree_output(
                    X_test, treatment_feature,
                    treatment_costs=config.treatment_cost,
                    max_depth=config.max_tree_depth,
                    min_samples_leaf=config.min_tree_leaf_samples,
                    alpha=config.alpha)

                policy = {
                    self.TREATMENT_FEATURE: treatment_feature,
                    self.CONTROL_TREATMENT: tree.control_name,
                    self.LOCAL_POLICIES: local_policies,
                    self.POLICY_GAINS: {
                        self.RECOMMENDED_POLICY_GAINS: tree.policy_value,
                        self.TREATMENT_GAINS: tree.always_treat,
                    },
                    self.POLICY_TREE: tree.tree_dictionary
                }
                config.policies.append(policy)