def _save(self, path): """Save the ErrorAnalysisManager to the given path. :param path: The directory path to save the ErrorAnalysisManager to. :type path: str """ top_dir = Path(path) top_dir.mkdir(parents=True, exist_ok=True) if len(self._ea_config_list) != len(self._ea_report_list): raise ConfigAndResultMismatchException( "The number of error analysis configs {0} doesn't match the " "number of results {1}".format(len(self._ea_config_list), len(self._ea_report_list))) for index in range(0, len(self._ea_report_list)): # save the configs directory_manager = DirectoryManager(parent_directory_path=path) config_path = (directory_manager.create_config_directory() / 'config.json') ea_config = self._ea_config_list[index] with open(config_path, 'w') as file: json.dump(ea_config, file, default=config_json_converter) # save the reports report_path = (directory_manager.create_data_directory() / 'report.json') ea_report = self._ea_report_list[index] with open(report_path, 'w') as file: json.dump(ea_report, file, default=report_json_converter)
def validate_state_directory(path, manager_type): all_dirs = os.listdir(path) assert manager_type in all_dirs all_component_paths = os.listdir(path / manager_type) for component_path in all_component_paths: # Test if the component directory has UUID structure UUID(component_path, version=4) dm = DirectoryManager(path / manager_type, component_path) config_path = dm.get_config_directory() data_path = dm.get_data_directory() generators_path = dm.get_generators_directory() if manager_type == ManagerNames.EXPLAINER: assert not config_path.exists() assert data_path.exists() assert not generators_path.exists() elif manager_type == ManagerNames.COUNTERFACTUAL: assert config_path.exists() assert data_path.exists() assert generators_path.exists() elif manager_type == ManagerNames.ERROR_ANALYSIS: assert config_path.exists() assert data_path.exists() assert not generators_path.exists() elif manager_type == ManagerNames.CAUSAL: assert not config_path.exists() assert data_path.exists() assert not generators_path.exists()
def _load(path, rai_insights): """Load the CausalManager from the given path. :param path: The directory path to load the CausalManager from. :type path: str :param rai_insights: The loaded parent RAIInsights. :type rai_insights: RAIInsights :return: The CausalManager manager after loading. :rtype: CausalManager """ inst = CausalManager.__new__(CausalManager) # Rehydrate results all_causal_dirs = DirectoryManager.list_sub_directories(path) inst.__dict__['_results'] = [] for causal_dir in all_causal_dirs: dm = DirectoryManager(parent_directory_path=path, sub_directory_name=causal_dir) causal_result = CausalResult.load(dm.get_data_directory()) inst.__dict__['_results'].append(causal_result) # Rehydrate model analysis data inst.__dict__['_train'] = rai_insights.train inst.__dict__['_test'] = rai_insights.test inst.__dict__['_target_column'] = rai_insights.target_column inst.__dict__['_task_type'] = rai_insights.task_type inst.__dict__['_categorical_features'] = \ rai_insights.categorical_features return inst
def _save(self, path): """ Save the DataBalanceManager to the given path. :param path: The directory path to save the DataBalanceManager to. :type path: str """ top_dir = Path(path) top_dir.mkdir(parents=True, exist_ok=True) # If manager is not added to RAIInsights, don't save any of its configs if not self._is_added: return dir_manager = DirectoryManager(parent_directory_path=path) config_dir = dir_manager.create_config_directory() manager_path = config_dir / MANAGER_JSON with open(manager_path, "w") as f: json.dump(self.list(), f) # if measures have been computed, save the computed measures if self._data_balance_measures: measures_path = config_dir / MEASURES_JSON with open(measures_path, "w") as f: json.dump(self._data_balance_measures, f) data_path = dir_manager.create_data_directory() / DATA_JSON self._df.to_json(data_path, orient="split")
def test_counterfactual_manager_save_load(self, tmpdir): X_train, X_test, y_train, y_test, feature_names, _ = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train['target'] = y_train X_test['target'] = y_test rai_insights = RAIInsights( model=model, train=X_train, test=X_test.iloc[0:10], target_column='target', task_type='classification') rai_insights.counterfactual.add( total_CFs=10, desired_class=2, features_to_vary=[feature_names[0]], permitted_range={feature_names[0]: [2.0, 5.0]}) rai_insights.counterfactual.add( total_CFs=10, desired_class=1, features_to_vary=[feature_names[0]], permitted_range={feature_names[0]: [2.0, 5.0]}) rai_insights.counterfactual.compute() assert len(rai_insights.counterfactual.get()) == 2 cf_obj = rai_insights.counterfactual.get()[0] assert cf_obj is not None save_dir = tmpdir.mkdir('save-dir') rai_insights.save(save_dir) rai_insights_copy = RAIInsights.load(save_dir) assert len(rai_insights_copy.counterfactual.get()) == 2 cf_obj = rai_insights_copy.counterfactual.get()[0] assert cf_obj is not None # Delete the dice-ml explainer directory so that the dice-ml # explainer can be re-trained rather being loaded from the # disc counterfactual_path = save_dir / "counterfactual" all_cf_dirs = DirectoryManager.list_sub_directories( counterfactual_path) for counterfactual_config_dir in all_cf_dirs: directory_manager = DirectoryManager( parent_directory_path=counterfactual_path, sub_directory_name=counterfactual_config_dir) explainer_pkl_path = \ directory_manager.get_generators_directory() / "explainer.pkl" os.remove(explainer_pkl_path) rai_insights_copy_new = RAIInsights.load(save_dir) counterfactual_config_list = \ rai_insights_copy_new.counterfactual._counterfactual_config_list assert len(counterfactual_config_list) == 2 assert counterfactual_config_list[0].explainer is not None assert counterfactual_config_list[1].explainer is not None
def _load(path, rai_insights): """Load the ErrorAnalysisManager from the given path. :param path: The directory path to load the ErrorAnalysisManager from. :type path: str :param rai_insights: The loaded parent RAIInsights. :type rai_insights: RAIInsights :return: The ErrorAnalysisManager manager after loading. :rtype: ErrorAnalysisManager """ # create the ErrorAnalysisManager without any properties using # the __new__ function, similar to pickle inst = ErrorAnalysisManager.__new__(ErrorAnalysisManager) ea_config_list = [] ea_report_list = [] all_ea_dirs = DirectoryManager.list_sub_directories(path) for ea_dir in all_ea_dirs: directory_manager = DirectoryManager(parent_directory_path=path, sub_directory_name=ea_dir) config_path = (directory_manager.get_config_directory() / 'config.json') with open(config_path, 'r') as file: ea_config = json.load(file, object_hook=as_error_config) ea_config_list.append(ea_config) report_path = (directory_manager.get_data_directory() / 'report.json') with open(report_path, 'r') as file: ea_report = json.load(file, object_hook=as_error_report) # Validate the serialized output against schema schema = ErrorAnalysisManager._get_error_analysis_schema() jsonschema.validate(json.loads(ea_report.to_json()), schema) ea_report_list.append(ea_report) inst.__dict__['_ea_report_list'] = ea_report_list inst.__dict__['_ea_config_list'] = ea_config_list categorical_features = rai_insights.categorical_features inst.__dict__['_categorical_features'] = categorical_features target_column = rai_insights.target_column true_y = rai_insights.test[target_column] dataset = rai_insights.test.drop(columns=[target_column]) inst.__dict__['_dataset'] = dataset inst.__dict__['_true_y'] = true_y feature_names = list(dataset.columns) inst.__dict__['_feature_names'] = feature_names inst.__dict__['_analyzer'] = ModelAnalyzer(rai_insights.model, dataset, true_y, feature_names, categorical_features) return inst
def _load(path, rai_insights): """Load the ExplainerManager from the given path. :param path: The directory path to load the ExplainerManager from. :type path: str :param rai_insights: The loaded parent RAIInsights. :type rai_insights: RAIInsights :return: The ExplainerManager manager after loading. :rtype: ExplainerManager """ # create the ExplainerManager without any properties using the __new__ # function, similar to pickle inst = ExplainerManager.__new__(ExplainerManager) all_cf_dirs = DirectoryManager.list_sub_directories(path) if len(all_cf_dirs) != 0: directory_manager = DirectoryManager( parent_directory_path=path, sub_directory_name=all_cf_dirs[0]) data_directory = directory_manager.get_data_directory() with open(data_directory / META_JSON, 'r') as meta_file: meta = meta_file.read() meta = json.loads(meta) inst.__dict__['_' + IS_RUN] = meta[IS_RUN] inst.__dict__['_' + IS_ADDED] = meta[IS_ADDED] inst.__dict__[EXPLANATION] = None explanation_path = data_directory / ManagerNames.EXPLAINER if explanation_path.exists(): explanation = load_explanation(explanation_path) inst.__dict__[EXPLANATION] = explanation else: inst.__dict__['_' + IS_RUN] = False inst.__dict__['_' + IS_ADDED] = False inst.__dict__[EXPLANATION] = None inst.__dict__['_' + MODEL] = rai_insights.model inst.__dict__['_' + CLASSES] = rai_insights._classes inst.__dict__['_' + CATEGORICAL_FEATURES] = \ rai_insights.categorical_features target_column = rai_insights.target_column train = rai_insights.train.drop(columns=[target_column]) test = rai_insights.test.drop(columns=[target_column]) inst.__dict__[U_INITIALIZATION_EXAMPLES] = train inst.__dict__[U_EVALUATION_EXAMPLES] = test inst.__dict__['_' + FEATURES] = list(train.columns) # reset the surrogate model inst._initialize_surrogate_model() return inst
def _save(self, path): """Save the CausalManager to the given path. :param path: The directory path to save the CausalManager to. :type path: str """ causal_dir = Path(path) causal_dir.mkdir(parents=True, exist_ok=True) # Save results to disk for result in self._results: directory_manager = DirectoryManager(parent_directory_path=path) data_path = directory_manager.create_data_directory() result.save(data_path)
def test_causal_save_and_load(self, housing_data, tmpdir): train_df, test_df, target_feature = housing_data save_dir = tmpdir.mkdir('save-dir') insights = RAIInsights(None, train_df, test_df, target_feature, ModelTask.REGRESSION) insights.causal.add(['AveRooms']) insights.compute() pre_results = insights.causal.get() pre_result = pre_results[0] insights.causal._save(save_dir) manager = insights.causal._load(save_dir, insights) post_results = manager.get() post_result = post_results[0] assert post_result.id == pre_result.id assert post_result.causal_analysis is not None assert post_result.global_effects is not None assert post_result.local_effects is not None assert post_result.policies is not None # Remove the causal analysis models to test the loading of # causal models in case there is error in loading of the causal # models. all_causal_dirs = DirectoryManager.list_sub_directories(save_dir) for causal_dir in all_causal_dirs: dm = DirectoryManager(parent_directory_path=save_dir, sub_directory_name=causal_dir) causal_analysis_pkl_file_path = \ dm.get_data_directory() / "causal_analysis.pkl" os.remove(causal_analysis_pkl_file_path) model_load_err = ('ERROR-LOADING-EXPLAINER: ' 'There was an error loading the explainer. ' 'Some of RAI dashboard features may not work.') with pytest.warns(UserWarning, match=model_load_err): manager = insights.causal._load(save_dir, insights) post_results = manager.get() post_result = post_results[0] assert post_result.id == pre_result.id assert post_result.causal_analysis is None assert post_result.global_effects is not None assert post_result.local_effects is not None assert post_result.policies is not None
def _save(self, path): """Save the CounterfactualManager to the given path. :param path: The directory path to save the CounterfactualManager to. :type path: str """ counterfactual_dir = Path(path) counterfactual_dir.mkdir(parents=True, exist_ok=True) for counterfactual_config in self._counterfactual_config_list: directory_manager = DirectoryManager(parent_directory_path=path) counterfactual_config.save_config( directory_manager.create_config_directory()) counterfactual_config.save_result( directory_manager.create_data_directory()) counterfactual_config.save_explainer( directory_manager.create_generators_directory())
def _save(self, path): """Save the ExplainerManager to the given path. :param path: The directory path to save the ExplainerManager to. :type path: str """ top_dir = Path(path) top_dir.mkdir(parents=True, exist_ok=True) if self._is_added: directory_manager = DirectoryManager(parent_directory_path=path) data_directory = directory_manager.create_data_directory() # save the explanation if self._explanation: save_explanation(self._explanation, data_directory / ManagerNames.EXPLAINER) meta = {IS_RUN: self._is_run, IS_ADDED: self._is_added} with open(data_directory / META_JSON, 'w') as file: json.dump(meta, file)
def test_directory_manager(self, tmpdir, create_parent_directory): if create_parent_directory: parent_directory = tmpdir.mkdir('parent_directory') else: parent_directory = tmpdir / 'parent_directory' dm_one = DirectoryManager(parent_directory_path=parent_directory, sub_directory_name='known') assert dm_one.parent_directory_path.exists() assert dm_one.sub_directory_name == 'known' assert (dm_one.parent_directory_path / dm_one.sub_directory_name).exists() self._verify_directory_manager_operations(dm_one) assert isinstance( DirectoryManager.list_sub_directories(parent_directory), list) assert len( DirectoryManager.list_sub_directories(parent_directory)) == 1 assert 'known' in\ DirectoryManager.list_sub_directories(parent_directory) dm_two = DirectoryManager(parent_directory_path=parent_directory) assert dm_two.parent_directory_path.exists() assert dm_two.sub_directory_name is not None assert (dm_two.parent_directory_path / dm_two.sub_directory_name).exists() self._verify_directory_manager_operations(dm_two) assert isinstance( DirectoryManager.list_sub_directories(parent_directory), list) assert len( DirectoryManager.list_sub_directories(parent_directory)) == 2 assert dm_two.sub_directory_name in\ DirectoryManager.list_sub_directories(parent_directory)
def _load(path, rai_insights): """Load the CounterfactualManager from the given path. :param path: The directory path to load the CounterfactualManager from. :type path: str :param rai_insights: The loaded parent RAIInsights. :type rai_insights: RAIInsights :return: The CounterfactualManager manager after loading. :rtype: CounterfactualManager """ inst = CounterfactualManager.__new__(CounterfactualManager) # Rehydrate model analysis data inst.__dict__[CounterfactualManager._MODEL] = rai_insights.model inst.__dict__[CounterfactualManager._TRAIN] = rai_insights.train inst.__dict__[CounterfactualManager._TEST] = rai_insights.test inst.__dict__[CounterfactualManager._TARGET_COLUMN] = \ rai_insights.target_column inst.__dict__[CounterfactualManager._TASK_TYPE] = \ rai_insights.task_type inst.__dict__[CounterfactualManager._CATEGORICAL_FEATURES] = \ rai_insights.categorical_features inst.__dict__[CounterfactualManager._COUNTERFACTUAL_CONFIG_LIST] = [] # DirectoryManager.ensure_dir_exists(path) all_cf_dirs = DirectoryManager.list_sub_directories(path) for counterfactual_config_dir in all_cf_dirs: directory_manager = DirectoryManager( parent_directory_path=path, sub_directory_name=counterfactual_config_dir) counterfactual_config = CounterfactualConfig.load_config( directory_manager.get_config_directory()) counterfactual_config.load_result( directory_manager.get_data_directory()) counterfactual_config.load_explainer( directory_manager.get_generators_directory()) if counterfactual_config.explainer is None: explainer_load_err = ( 'ERROR-LOADING-COUNTERFACTUAL-EXPLAINER: ' 'There was an error loading the ' 'counterfactual explainer model. ' 'Retraining the counterfactual ' 'explainer.') warnings.warn(explainer_load_err) counterfactual_config.explainer = \ inst._create_diceml_explainer( counterfactual_config.method, counterfactual_config.continuous_features) if counterfactual_config.counterfactual_obj is not None: # Validate the serialized output against schema schema = CounterfactualManager._get_counterfactual_schema( version=counterfactual_config.counterfactual_obj. metadata['version']) jsonschema.validate( json.loads( counterfactual_config.counterfactual_obj.to_json()), schema) inst.__dict__[ CounterfactualManager._COUNTERFACTUAL_CONFIG_LIST].append( counterfactual_config) return inst
def test_counterfactual_manager_save_load(self, tmpdir): X_train, X_test, y_train, y_test, feature_names, _ = \ create_iris_data() model = create_lightgbm_classifier(X_train, y_train) X_train['target'] = y_train X_test['target'] = y_test rai_insights = RAIInsights(model=model, train=X_train, test=X_test.iloc[0:10], target_column='target', task_type='classification') rai_insights.counterfactual.add( total_CFs=10, desired_class=2, features_to_vary=[feature_names[0]], permitted_range={feature_names[0]: [2.0, 5.0]}) rai_insights.counterfactual.add( total_CFs=10, desired_class=1, features_to_vary=[feature_names[0]], permitted_range={feature_names[0]: [2.0, 5.0]}) rai_insights.counterfactual.compute() counterfactual_config_list_before_save = \ rai_insights.counterfactual._counterfactual_config_list assert len(counterfactual_config_list_before_save) == 2 assert len(rai_insights.counterfactual.get()) == 2 cf_obj_1 = rai_insights.counterfactual.get()[0] assert cf_obj_1 is not None cf_obj_2 = rai_insights.counterfactual.get()[1] assert cf_obj_2 is not None save_dir = tmpdir.mkdir('save-dir') rai_insights.save(save_dir) rai_insights_copy = RAIInsights.load(save_dir) counterfactual_config_list_after_save = \ rai_insights_copy.counterfactual._counterfactual_config_list assert len(rai_insights_copy.counterfactual.get()) == 2 cf_obj_1 = rai_insights_copy.counterfactual.get()[0] assert cf_obj_1 is not None cf_obj_2 = rai_insights_copy.counterfactual.get()[1] assert cf_obj_2 is not None assert counterfactual_config_list_before_save[0].id in \ [counterfactual_config_list_after_save[0].id, counterfactual_config_list_after_save[1].id] assert counterfactual_config_list_before_save[1].id in \ [counterfactual_config_list_after_save[0].id, counterfactual_config_list_after_save[1].id] # Delete the dice-ml explainer directory so that the dice-ml # explainer can be re-trained rather being loaded from the # disc counterfactual_path = save_dir / "counterfactual" all_cf_dirs = DirectoryManager.list_sub_directories( counterfactual_path) for counterfactual_config_dir in all_cf_dirs: directory_manager = DirectoryManager( parent_directory_path=counterfactual_path, sub_directory_name=counterfactual_config_dir) explainer_pkl_path = \ directory_manager.get_generators_directory() / "explainer.pkl" os.remove(explainer_pkl_path) with pytest.warns(UserWarning, match='ERROR-LOADING-COUNTERFACTUAL-EXPLAINER: ' 'There was an error loading the ' 'counterfactual explainer model. ' 'Retraining the counterfactual ' 'explainer.'): rai_insights_copy_new = RAIInsights.load(save_dir) counterfactual_config_list = \ rai_insights_copy_new.counterfactual._counterfactual_config_list assert len(counterfactual_config_list) == 2 assert counterfactual_config_list[0].explainer is not None assert counterfactual_config_list[1].explainer is not None
def _load(path, rai_insights): """ Load the DataBalanceManager from the given path. :param path: The directory path to load the DataBalanceManager from. :type path: str :param rai_insights: The loaded parent RAIInsights. :type rai_insights: RAIInsights :return: The DataBalanceManager after loading. :rtype: DataBalanceManager """ # create the DataBalanceManager without any properties using the # __new__ function, similar to pickle inst = DataBalanceManager.__new__(DataBalanceManager) inst.__dict__["_train"] = rai_insights.train inst.__dict__["_test"] = rai_insights.test is_added = False cols_of_interest = None task_type = rai_insights.task_type target_column = rai_insights.target_column classes = (list(map(str, rai_insights._classes)) if rai_insights._classes is not None else []) df = pd.concat([rai_insights.train, rai_insights.test]) data_balance_measures = None all_db_dirs = DirectoryManager.list_sub_directories(path) if len(all_db_dirs) != 0: dir_manager = DirectoryManager(parent_directory_path=path, sub_directory_name=all_db_dirs[0]) config_dir = dir_manager.get_config_directory() # Load manager with open(config_dir / MANAGER_JSON, "r") as f: manager_info = json.load(f) is_added = manager_info[Keys.IS_ADDED] task_type = manager_info[Keys.TASK_TYPE] cols_of_interest = manager_info[Keys.COLS_OF_INTEREST] target_column = manager_info[Keys.TARGET_COLUMN] classes = manager_info[Keys.CLASSES] # Load from data json data_path = dir_manager.get_data_directory() / DATA_JSON if data_path.exists(): df = pd.read_json(data_path, orient="split") # Load measures measures_path = config_dir / MEASURES_JSON if measures_path.exists(): with open(measures_path, "r") as f: data_balance_measures = json.load(f) inst.__dict__["_is_added"] = is_added inst.__dict__["_task_type"] = task_type inst.__dict__["_cols_of_interest"] = cols_of_interest inst.__dict__["_target_column"] = target_column inst.__dict__["_classes"] = classes inst.__dict__["_df"] = df inst.__dict__["_data_balance_measures"] = data_balance_measures return inst