def _load(path, rai_insights):
        """Load the CausalManager from the given path.

        :param path: The directory path to load the CausalManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The CausalManager manager after loading.
        :rtype: CausalManager
        """
        inst = CausalManager.__new__(CausalManager)

        # Rehydrate results
        all_causal_dirs = DirectoryManager.list_sub_directories(path)
        inst.__dict__['_results'] = []
        for causal_dir in all_causal_dirs:
            dm = DirectoryManager(parent_directory_path=path,
                                  sub_directory_name=causal_dir)
            causal_result = CausalResult.load(dm.get_data_directory())
            inst.__dict__['_results'].append(causal_result)

        # Rehydrate model analysis data
        inst.__dict__['_train'] = rai_insights.train
        inst.__dict__['_test'] = rai_insights.test
        inst.__dict__['_target_column'] = rai_insights.target_column
        inst.__dict__['_task_type'] = rai_insights.task_type
        inst.__dict__['_categorical_features'] = \
            rai_insights.categorical_features

        return inst
    def test_counterfactual_manager_save_load(self, tmpdir):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train['target'] = y_train
        X_test['target'] = y_test

        rai_insights = RAIInsights(
            model=model,
            train=X_train,
            test=X_test.iloc[0:10],
            target_column='target',
            task_type='classification')

        rai_insights.counterfactual.add(
            total_CFs=10, desired_class=2,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.add(
            total_CFs=10, desired_class=1,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.compute()

        assert len(rai_insights.counterfactual.get()) == 2
        cf_obj = rai_insights.counterfactual.get()[0]
        assert cf_obj is not None

        save_dir = tmpdir.mkdir('save-dir')
        rai_insights.save(save_dir)
        rai_insights_copy = RAIInsights.load(save_dir)

        assert len(rai_insights_copy.counterfactual.get()) == 2
        cf_obj = rai_insights_copy.counterfactual.get()[0]
        assert cf_obj is not None

        # Delete the dice-ml explainer directory so that the dice-ml
        # explainer can be re-trained rather being loaded from the
        # disc
        counterfactual_path = save_dir / "counterfactual"
        all_cf_dirs = DirectoryManager.list_sub_directories(
            counterfactual_path)
        for counterfactual_config_dir in all_cf_dirs:
            directory_manager = DirectoryManager(
                parent_directory_path=counterfactual_path,
                sub_directory_name=counterfactual_config_dir)
            explainer_pkl_path = \
                directory_manager.get_generators_directory() / "explainer.pkl"
            os.remove(explainer_pkl_path)

        rai_insights_copy_new = RAIInsights.load(save_dir)
        counterfactual_config_list = \
            rai_insights_copy_new.counterfactual._counterfactual_config_list
        assert len(counterfactual_config_list) == 2
        assert counterfactual_config_list[0].explainer is not None
        assert counterfactual_config_list[1].explainer is not None
示例#3
0
    def _load(path, rai_insights):
        """Load the ErrorAnalysisManager from the given path.

        :param path: The directory path to load the ErrorAnalysisManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The ErrorAnalysisManager manager after loading.
        :rtype: ErrorAnalysisManager
        """
        # create the ErrorAnalysisManager without any properties using
        # the __new__ function, similar to pickle
        inst = ErrorAnalysisManager.__new__(ErrorAnalysisManager)

        ea_config_list = []
        ea_report_list = []
        all_ea_dirs = DirectoryManager.list_sub_directories(path)
        for ea_dir in all_ea_dirs:
            directory_manager = DirectoryManager(parent_directory_path=path,
                                                 sub_directory_name=ea_dir)

            config_path = (directory_manager.get_config_directory() /
                           'config.json')
            with open(config_path, 'r') as file:
                ea_config = json.load(file, object_hook=as_error_config)
                ea_config_list.append(ea_config)

            report_path = (directory_manager.get_data_directory() /
                           'report.json')
            with open(report_path, 'r') as file:
                ea_report = json.load(file, object_hook=as_error_report)
                # Validate the serialized output against schema
                schema = ErrorAnalysisManager._get_error_analysis_schema()
                jsonschema.validate(json.loads(ea_report.to_json()), schema)
                ea_report_list.append(ea_report)

        inst.__dict__['_ea_report_list'] = ea_report_list
        inst.__dict__['_ea_config_list'] = ea_config_list

        categorical_features = rai_insights.categorical_features
        inst.__dict__['_categorical_features'] = categorical_features
        target_column = rai_insights.target_column
        true_y = rai_insights.test[target_column]
        dataset = rai_insights.test.drop(columns=[target_column])
        inst.__dict__['_dataset'] = dataset
        inst.__dict__['_true_y'] = true_y
        feature_names = list(dataset.columns)
        inst.__dict__['_feature_names'] = feature_names
        inst.__dict__['_analyzer'] = ModelAnalyzer(rai_insights.model, dataset,
                                                   true_y, feature_names,
                                                   categorical_features)
        return inst
    def _load(path, rai_insights):
        """Load the ExplainerManager from the given path.

        :param path: The directory path to load the ExplainerManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The ExplainerManager manager after loading.
        :rtype: ExplainerManager
        """
        # create the ExplainerManager without any properties using the __new__
        # function, similar to pickle
        inst = ExplainerManager.__new__(ExplainerManager)

        all_cf_dirs = DirectoryManager.list_sub_directories(path)
        if len(all_cf_dirs) != 0:
            directory_manager = DirectoryManager(
                parent_directory_path=path,
                sub_directory_name=all_cf_dirs[0])
            data_directory = directory_manager.get_data_directory()

            with open(data_directory / META_JSON, 'r') as meta_file:
                meta = meta_file.read()
            meta = json.loads(meta)
            inst.__dict__['_' + IS_RUN] = meta[IS_RUN]
            inst.__dict__['_' + IS_ADDED] = meta[IS_ADDED]

            inst.__dict__[EXPLANATION] = None
            explanation_path = data_directory / ManagerNames.EXPLAINER
            if explanation_path.exists():
                explanation = load_explanation(explanation_path)
                inst.__dict__[EXPLANATION] = explanation
        else:
            inst.__dict__['_' + IS_RUN] = False
            inst.__dict__['_' + IS_ADDED] = False
            inst.__dict__[EXPLANATION] = None

        inst.__dict__['_' + MODEL] = rai_insights.model
        inst.__dict__['_' + CLASSES] = rai_insights._classes
        inst.__dict__['_' + CATEGORICAL_FEATURES] = \
            rai_insights.categorical_features
        target_column = rai_insights.target_column
        train = rai_insights.train.drop(columns=[target_column])
        test = rai_insights.test.drop(columns=[target_column])
        inst.__dict__[U_INITIALIZATION_EXAMPLES] = train
        inst.__dict__[U_EVALUATION_EXAMPLES] = test
        inst.__dict__['_' + FEATURES] = list(train.columns)

        # reset the surrogate model
        inst._initialize_surrogate_model()

        return inst
示例#5
0
    def test_causal_save_and_load(self, housing_data, tmpdir):
        train_df, test_df, target_feature = housing_data

        save_dir = tmpdir.mkdir('save-dir')

        insights = RAIInsights(None, train_df, test_df, target_feature,
                               ModelTask.REGRESSION)

        insights.causal.add(['AveRooms'])
        insights.compute()

        pre_results = insights.causal.get()
        pre_result = pre_results[0]

        insights.causal._save(save_dir)
        manager = insights.causal._load(save_dir, insights)
        post_results = manager.get()
        post_result = post_results[0]
        assert post_result.id == pre_result.id
        assert post_result.causal_analysis is not None
        assert post_result.global_effects is not None
        assert post_result.local_effects is not None
        assert post_result.policies is not None

        # Remove the causal analysis models to test the loading of
        # causal models in case there is error in loading of the causal
        # models.
        all_causal_dirs = DirectoryManager.list_sub_directories(save_dir)
        for causal_dir in all_causal_dirs:
            dm = DirectoryManager(parent_directory_path=save_dir,
                                  sub_directory_name=causal_dir)
            causal_analysis_pkl_file_path = \
                dm.get_data_directory() / "causal_analysis.pkl"
            os.remove(causal_analysis_pkl_file_path)

        model_load_err = ('ERROR-LOADING-EXPLAINER: '
                          'There was an error loading the explainer. '
                          'Some of RAI dashboard features may not work.')
        with pytest.warns(UserWarning, match=model_load_err):
            manager = insights.causal._load(save_dir, insights)
        post_results = manager.get()
        post_result = post_results[0]
        assert post_result.id == pre_result.id
        assert post_result.causal_analysis is None
        assert post_result.global_effects is not None
        assert post_result.local_effects is not None
        assert post_result.policies is not None
示例#6
0
    def test_directory_manager(self, tmpdir, create_parent_directory):
        if create_parent_directory:
            parent_directory = tmpdir.mkdir('parent_directory')
        else:
            parent_directory = tmpdir / 'parent_directory'
        dm_one = DirectoryManager(parent_directory_path=parent_directory,
                                  sub_directory_name='known')

        assert dm_one.parent_directory_path.exists()
        assert dm_one.sub_directory_name == 'known'
        assert (dm_one.parent_directory_path /
                dm_one.sub_directory_name).exists()

        self._verify_directory_manager_operations(dm_one)

        assert isinstance(
            DirectoryManager.list_sub_directories(parent_directory), list)
        assert len(
            DirectoryManager.list_sub_directories(parent_directory)) == 1
        assert 'known' in\
            DirectoryManager.list_sub_directories(parent_directory)

        dm_two = DirectoryManager(parent_directory_path=parent_directory)

        assert dm_two.parent_directory_path.exists()
        assert dm_two.sub_directory_name is not None
        assert (dm_two.parent_directory_path /
                dm_two.sub_directory_name).exists()

        self._verify_directory_manager_operations(dm_two)

        assert isinstance(
            DirectoryManager.list_sub_directories(parent_directory), list)
        assert len(
            DirectoryManager.list_sub_directories(parent_directory)) == 2
        assert dm_two.sub_directory_name in\
            DirectoryManager.list_sub_directories(parent_directory)
示例#7
0
    def _load(path, rai_insights):
        """Load the CounterfactualManager from the given path.

        :param path: The directory path to load the CounterfactualManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The CounterfactualManager manager after loading.
        :rtype: CounterfactualManager
        """
        inst = CounterfactualManager.__new__(CounterfactualManager)

        # Rehydrate model analysis data
        inst.__dict__[CounterfactualManager._MODEL] = rai_insights.model
        inst.__dict__[CounterfactualManager._TRAIN] = rai_insights.train
        inst.__dict__[CounterfactualManager._TEST] = rai_insights.test
        inst.__dict__[CounterfactualManager._TARGET_COLUMN] = \
            rai_insights.target_column
        inst.__dict__[CounterfactualManager._TASK_TYPE] = \
            rai_insights.task_type
        inst.__dict__[CounterfactualManager._CATEGORICAL_FEATURES] = \
            rai_insights.categorical_features

        inst.__dict__[CounterfactualManager._COUNTERFACTUAL_CONFIG_LIST] = []

        # DirectoryManager.ensure_dir_exists(path)
        all_cf_dirs = DirectoryManager.list_sub_directories(path)
        for counterfactual_config_dir in all_cf_dirs:
            directory_manager = DirectoryManager(
                parent_directory_path=path,
                sub_directory_name=counterfactual_config_dir)

            counterfactual_config = CounterfactualConfig.load_config(
                directory_manager.get_config_directory())

            counterfactual_config.load_result(
                directory_manager.get_data_directory())

            counterfactual_config.load_explainer(
                directory_manager.get_generators_directory())

            if counterfactual_config.explainer is None:
                explainer_load_err = (
                    'ERROR-LOADING-COUNTERFACTUAL-EXPLAINER: '
                    'There was an error loading the '
                    'counterfactual explainer model. '
                    'Retraining the counterfactual '
                    'explainer.')
                warnings.warn(explainer_load_err)
                counterfactual_config.explainer = \
                    inst._create_diceml_explainer(
                        counterfactual_config.method,
                        counterfactual_config.continuous_features)

            if counterfactual_config.counterfactual_obj is not None:
                # Validate the serialized output against schema
                schema = CounterfactualManager._get_counterfactual_schema(
                    version=counterfactual_config.counterfactual_obj.
                    metadata['version'])
                jsonschema.validate(
                    json.loads(
                        counterfactual_config.counterfactual_obj.to_json()),
                    schema)

            inst.__dict__[
                CounterfactualManager._COUNTERFACTUAL_CONFIG_LIST].append(
                    counterfactual_config)

        return inst
    def test_counterfactual_manager_save_load(self, tmpdir):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train['target'] = y_train
        X_test['target'] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test.iloc[0:10],
                                   target_column='target',
                                   task_type='classification')

        rai_insights.counterfactual.add(
            total_CFs=10,
            desired_class=2,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.add(
            total_CFs=10,
            desired_class=1,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.compute()

        counterfactual_config_list_before_save = \
            rai_insights.counterfactual._counterfactual_config_list
        assert len(counterfactual_config_list_before_save) == 2
        assert len(rai_insights.counterfactual.get()) == 2
        cf_obj_1 = rai_insights.counterfactual.get()[0]
        assert cf_obj_1 is not None
        cf_obj_2 = rai_insights.counterfactual.get()[1]
        assert cf_obj_2 is not None

        save_dir = tmpdir.mkdir('save-dir')
        rai_insights.save(save_dir)
        rai_insights_copy = RAIInsights.load(save_dir)

        counterfactual_config_list_after_save = \
            rai_insights_copy.counterfactual._counterfactual_config_list
        assert len(rai_insights_copy.counterfactual.get()) == 2
        cf_obj_1 = rai_insights_copy.counterfactual.get()[0]
        assert cf_obj_1 is not None
        cf_obj_2 = rai_insights_copy.counterfactual.get()[1]
        assert cf_obj_2 is not None

        assert counterfactual_config_list_before_save[0].id in \
            [counterfactual_config_list_after_save[0].id,
             counterfactual_config_list_after_save[1].id]
        assert counterfactual_config_list_before_save[1].id in \
            [counterfactual_config_list_after_save[0].id,
             counterfactual_config_list_after_save[1].id]

        # Delete the dice-ml explainer directory so that the dice-ml
        # explainer can be re-trained rather being loaded from the
        # disc
        counterfactual_path = save_dir / "counterfactual"
        all_cf_dirs = DirectoryManager.list_sub_directories(
            counterfactual_path)
        for counterfactual_config_dir in all_cf_dirs:
            directory_manager = DirectoryManager(
                parent_directory_path=counterfactual_path,
                sub_directory_name=counterfactual_config_dir)
            explainer_pkl_path = \
                directory_manager.get_generators_directory() / "explainer.pkl"
            os.remove(explainer_pkl_path)

        with pytest.warns(UserWarning,
                          match='ERROR-LOADING-COUNTERFACTUAL-EXPLAINER: '
                          'There was an error loading the '
                          'counterfactual explainer model. '
                          'Retraining the counterfactual '
                          'explainer.'):
            rai_insights_copy_new = RAIInsights.load(save_dir)
        counterfactual_config_list = \
            rai_insights_copy_new.counterfactual._counterfactual_config_list
        assert len(counterfactual_config_list) == 2
        assert counterfactual_config_list[0].explainer is not None
        assert counterfactual_config_list[1].explainer is not None
    def _load(path, rai_insights):
        """
        Load the DataBalanceManager from the given path.

        :param path: The directory path to load the DataBalanceManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The DataBalanceManager after loading.
        :rtype: DataBalanceManager
        """
        # create the DataBalanceManager without any properties using the
        # __new__ function, similar to pickle
        inst = DataBalanceManager.__new__(DataBalanceManager)

        inst.__dict__["_train"] = rai_insights.train
        inst.__dict__["_test"] = rai_insights.test

        is_added = False
        cols_of_interest = None
        task_type = rai_insights.task_type
        target_column = rai_insights.target_column
        classes = (list(map(str, rai_insights._classes))
                   if rai_insights._classes is not None else [])
        df = pd.concat([rai_insights.train, rai_insights.test])
        data_balance_measures = None

        all_db_dirs = DirectoryManager.list_sub_directories(path)
        if len(all_db_dirs) != 0:
            dir_manager = DirectoryManager(parent_directory_path=path,
                                           sub_directory_name=all_db_dirs[0])
            config_dir = dir_manager.get_config_directory()

            # Load manager
            with open(config_dir / MANAGER_JSON, "r") as f:
                manager_info = json.load(f)
                is_added = manager_info[Keys.IS_ADDED]
                task_type = manager_info[Keys.TASK_TYPE]
                cols_of_interest = manager_info[Keys.COLS_OF_INTEREST]
                target_column = manager_info[Keys.TARGET_COLUMN]
                classes = manager_info[Keys.CLASSES]

            # Load from data json
            data_path = dir_manager.get_data_directory() / DATA_JSON
            if data_path.exists():
                df = pd.read_json(data_path, orient="split")

            # Load measures
            measures_path = config_dir / MEASURES_JSON
            if measures_path.exists():
                with open(measures_path, "r") as f:
                    data_balance_measures = json.load(f)

        inst.__dict__["_is_added"] = is_added
        inst.__dict__["_task_type"] = task_type
        inst.__dict__["_cols_of_interest"] = cols_of_interest
        inst.__dict__["_target_column"] = target_column
        inst.__dict__["_classes"] = classes
        inst.__dict__["_df"] = df
        inst.__dict__["_data_balance_measures"] = data_balance_measures

        return inst