def test_causal_manager_global_cohort_effects(self, housing_data): train_df, test_df, target_feature = housing_data manager = CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, None) manager.add(['AveRooms']) manager.compute() id = manager.get()[0].id X_test = test_df.drop(target_feature, axis=1) causal_data = manager.request_global_cohort_effects(id, X_test) self.verify_common_causal_data_attributes(causal_data) assert hasattr(causal_data, 'global_effects') EFFECTS_ATTRIBUTES = [ 'point', 'stderr', 'zstat', 'ci_lower', 'ci_upper', 'p_value', ] for effect in EFFECTS_ATTRIBUTES: assert effect in causal_data.global_effects[0] incorrect_query_id = "incorrect_query_id" X_test = test_df.drop(target_feature, axis=1) with pytest.raises(ValueError, match="Failed to find causal result with ID: " "incorrect_query_id"): manager.request_global_cohort_effects(incorrect_query_id, X_test)
def _initialize_managers(self): """Initializes the managers. Initialized the causal, counterfactual, error analysis and explainer managers. """ self._causal_manager = CausalManager( self.train, self.test, self.target_column, self.task_type, self.categorical_features) self._counterfactual_manager = CounterfactualManager( model=self.model, train=self.train, test=self.test, target_column=self.target_column, task_type=self.task_type, categorical_features=self.categorical_features) self._error_analysis_manager = ErrorAnalysisManager( self.model, self.test, self.target_column, self._classes, self.categorical_features) self._explainer_manager = ExplainerManager( self.model, self.train, self.test, self.target_column, self._classes, categorical_features=self.categorical_features) self._managers = [self._causal_manager, self._counterfactual_manager, self._error_analysis_manager, self._explainer_manager]
def test_causal_train_test_categories(self, parks_data): train_df, test_df, target_feature = parks_data test_df = test_df.copy() test_df.loc[len(test_df.index)] = ['indiana', 301, 'trees', 78] test_df.loc[len(test_df.index)] = ['indiana', 222, 'trees', 81] manager = CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, ['state', 'attraction']) message = ("Causal analysis requires that every category of " "categorical features present in the test data be " "also present in the train data. " "Categories missing from train data: " "{'state': \\['indiana'\\]}") with pytest.raises(UserConfigValidationException, match=message): manager.add(['state'], skip_cat_limit_checks=True, upper_bound_on_cat_expansion=50)
def causal_result(parks_data): train_df, test_df, target_feature = parks_data manager = CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, ['state', 'attraction']) manager.add(['attraction'], skip_cat_limit_checks=True, upper_bound_on_cat_expansion=50) manager.compute() return manager.get()[0]
def test_categorical_policy(self, housing_data_categorical): train_df, test_df, target_feature = housing_data_categorical categoricals = train_df.select_dtypes(include=[object]).columns # Just use categoricals to force categorical policy tree new_features = list(categoricals) + [target_feature] train_df = train_df[new_features] test_df = test_df[new_features] # Sample data for easier debug test_df = test_df[:20] manager = CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, categoricals) result = manager.add(['HouseAge_CAT', 'Population_CAT'], random_state=42) manager.compute() result = manager.get()[0] dashboard_data = result._get_dashboard_data() policies = dashboard_data['policies'] assert len(policies) > 0 for policy in policies: tree = policy['policy_tree'] assert not tree['leaf'] assert tree['feature'] in categoricals assert tree['right_comparison'] == 'eq' is_very_old_comparison = tree['comparison_value'] == 'very-old' is_high_comparison = tree['comparison_value'] == 'high' assert is_very_old_comparison or is_high_comparison
def test_causal_no_categoricals(self, housing_data): train_df, test_df, target_feature = housing_data manager = CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, None) manager.add(['AveRooms']) manager.compute() result = manager.get()[0] assert len(result.policies) == 1 assert len(result.config.treatment_features) == 1 assert result.config.treatment_features[0] == 'AveRooms'
def test_causal_manager_global_cohort_policy(self, housing_data): train_df, test_df, target_feature = housing_data manager = CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, None) manager.add(['AveRooms']) manager.compute() id = manager.get()[0].id X_test = test_df.head(5).drop(target_feature, axis=1) causal_data = manager.request_global_cohort_policy(id, X_test) self.verify_common_causal_data_attributes(causal_data) assert hasattr(causal_data, 'policies') assert len(causal_data.policies[0].local_policies) == X_test.shape[0] assert causal_data.policies[0].treatment_feature == "AveRooms" incorrect_query_id = "incorrect_query_id" X_test = test_df.drop(target_feature, axis=1) with pytest.raises(ValueError, match="Failed to find causal result with ID: " "incorrect_query_id"): manager.request_global_cohort_effects(incorrect_query_id, X_test)
def test_causal_cat_expansion(self, parks_data): train_df, test_df, target_feature = parks_data manager = CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, ['state', 'attraction']) expected = "Increase the value 50" with pytest.raises(ValueError, match=expected): manager.add(['state']) manager.compute()
def cost_manager(housing_data): train_df, test_df, target_feature = housing_data test_df = test_df[:7] return CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, None)
def test_causal_manager_local_instance_effects(self, housing_data): train_df, test_df, target_feature = housing_data manager = CausalManager(train_df, test_df, target_feature, ModelTask.REGRESSION, None) manager.add(['AveRooms']) manager.compute() id = manager.get()[0].id X_test = test_df.head(1).drop(target_feature, axis=1) causal_data = manager.request_local_instance_effects(id, X_test) self.verify_common_causal_data_attributes(causal_data) assert hasattr(causal_data, 'local_effects') EFFECTS_ATTRIBUTES = [ 'sample', 'outcome', 'feature', 'feature_value', 'point', 'stderr', 'zstat', 'ci_lower', 'ci_upper', 'p_value', ] for effect in EFFECTS_ATTRIBUTES: assert effect in causal_data.local_effects[0][0] incorrect_query_id = "incorrect_query_id" X_test = test_df.drop(target_feature, axis=1) with pytest.raises(ValueError, match="Failed to find causal result with ID: " "incorrect_query_id"): manager.request_local_instance_effects(incorrect_query_id, X_test) id = manager.get()[0].id X_test = test_df.head(1).drop(target_feature, axis=1).values with pytest.raises(UserConfigValidationException, match='Data is of type <class \'numpy.ndarray\'>' ' but it must be a pandas DataFrame.'): manager.request_local_instance_effects(id, X_test) id = manager.get()[0].id X_test = test_df.head(5).drop(target_feature, axis=1) with pytest.raises(UserConfigValidationException, match='Only one row of data is allowed for ' 'local causal effects.'): manager.request_local_instance_effects(id, X_test)