Пример #1
0
def validate_state_directory(path, manager_type):
    all_dirs = os.listdir(path)
    assert manager_type in all_dirs
    all_component_paths = os.listdir(path / manager_type)
    for component_path in all_component_paths:
        # Test if the component directory has UUID structure
        UUID(component_path, version=4)

        dm = DirectoryManager(path / manager_type, component_path)

        config_path = dm.get_config_directory()
        data_path = dm.get_data_directory()
        generators_path = dm.get_generators_directory()

        if manager_type == ManagerNames.EXPLAINER:
            assert not config_path.exists()
            assert data_path.exists()
            assert not generators_path.exists()
        elif manager_type == ManagerNames.COUNTERFACTUAL:
            assert config_path.exists()
            assert data_path.exists()
            assert generators_path.exists()
        elif manager_type == ManagerNames.ERROR_ANALYSIS:
            assert config_path.exists()
            assert data_path.exists()
            assert not generators_path.exists()
        elif manager_type == ManagerNames.CAUSAL:
            assert not config_path.exists()
            assert data_path.exists()
            assert not generators_path.exists()
Пример #2
0
    def _load(path, rai_insights):
        """Load the ErrorAnalysisManager from the given path.

        :param path: The directory path to load the ErrorAnalysisManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The ErrorAnalysisManager manager after loading.
        :rtype: ErrorAnalysisManager
        """
        # create the ErrorAnalysisManager without any properties using
        # the __new__ function, similar to pickle
        inst = ErrorAnalysisManager.__new__(ErrorAnalysisManager)

        ea_config_list = []
        ea_report_list = []
        all_ea_dirs = DirectoryManager.list_sub_directories(path)
        for ea_dir in all_ea_dirs:
            directory_manager = DirectoryManager(parent_directory_path=path,
                                                 sub_directory_name=ea_dir)

            config_path = (directory_manager.get_config_directory() /
                           'config.json')
            with open(config_path, 'r') as file:
                ea_config = json.load(file, object_hook=as_error_config)
                ea_config_list.append(ea_config)

            report_path = (directory_manager.get_data_directory() /
                           'report.json')
            with open(report_path, 'r') as file:
                ea_report = json.load(file, object_hook=as_error_report)
                # Validate the serialized output against schema
                schema = ErrorAnalysisManager._get_error_analysis_schema()
                jsonschema.validate(json.loads(ea_report.to_json()), schema)
                ea_report_list.append(ea_report)

        inst.__dict__['_ea_report_list'] = ea_report_list
        inst.__dict__['_ea_config_list'] = ea_config_list

        categorical_features = rai_insights.categorical_features
        inst.__dict__['_categorical_features'] = categorical_features
        target_column = rai_insights.target_column
        true_y = rai_insights.test[target_column]
        dataset = rai_insights.test.drop(columns=[target_column])
        inst.__dict__['_dataset'] = dataset
        inst.__dict__['_true_y'] = true_y
        feature_names = list(dataset.columns)
        inst.__dict__['_feature_names'] = feature_names
        inst.__dict__['_analyzer'] = ModelAnalyzer(rai_insights.model, dataset,
                                                   true_y, feature_names,
                                                   categorical_features)
        return inst
Пример #3
0
    def _load(path, rai_insights):
        """Load the CounterfactualManager from the given path.

        :param path: The directory path to load the CounterfactualManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The CounterfactualManager manager after loading.
        :rtype: CounterfactualManager
        """
        inst = CounterfactualManager.__new__(CounterfactualManager)

        # Rehydrate model analysis data
        inst.__dict__[CounterfactualManager._MODEL] = rai_insights.model
        inst.__dict__[CounterfactualManager._TRAIN] = rai_insights.train
        inst.__dict__[CounterfactualManager._TEST] = rai_insights.test
        inst.__dict__[CounterfactualManager._TARGET_COLUMN] = \
            rai_insights.target_column
        inst.__dict__[CounterfactualManager._TASK_TYPE] = \
            rai_insights.task_type
        inst.__dict__[CounterfactualManager._CATEGORICAL_FEATURES] = \
            rai_insights.categorical_features

        inst.__dict__[CounterfactualManager._COUNTERFACTUAL_CONFIG_LIST] = []

        # DirectoryManager.ensure_dir_exists(path)
        all_cf_dirs = DirectoryManager.list_sub_directories(path)
        for counterfactual_config_dir in all_cf_dirs:
            directory_manager = DirectoryManager(
                parent_directory_path=path,
                sub_directory_name=counterfactual_config_dir)

            counterfactual_config = CounterfactualConfig.load_config(
                directory_manager.get_config_directory())

            counterfactual_config.load_result(
                directory_manager.get_data_directory())

            counterfactual_config.load_explainer(
                directory_manager.get_generators_directory())

            if counterfactual_config.explainer is None:
                explainer_load_err = (
                    'ERROR-LOADING-COUNTERFACTUAL-EXPLAINER: '
                    'There was an error loading the '
                    'counterfactual explainer model. '
                    'Retraining the counterfactual '
                    'explainer.')
                warnings.warn(explainer_load_err)
                counterfactual_config.explainer = \
                    inst._create_diceml_explainer(
                        counterfactual_config.method,
                        counterfactual_config.continuous_features)

            if counterfactual_config.counterfactual_obj is not None:
                # Validate the serialized output against schema
                schema = CounterfactualManager._get_counterfactual_schema(
                    version=counterfactual_config.counterfactual_obj.
                    metadata['version'])
                jsonschema.validate(
                    json.loads(
                        counterfactual_config.counterfactual_obj.to_json()),
                    schema)

            inst.__dict__[
                CounterfactualManager._COUNTERFACTUAL_CONFIG_LIST].append(
                    counterfactual_config)

        return inst
    def _load(path, rai_insights):
        """
        Load the DataBalanceManager from the given path.

        :param path: The directory path to load the DataBalanceManager from.
        :type path: str
        :param rai_insights: The loaded parent RAIInsights.
        :type rai_insights: RAIInsights
        :return: The DataBalanceManager after loading.
        :rtype: DataBalanceManager
        """
        # create the DataBalanceManager without any properties using the
        # __new__ function, similar to pickle
        inst = DataBalanceManager.__new__(DataBalanceManager)

        inst.__dict__["_train"] = rai_insights.train
        inst.__dict__["_test"] = rai_insights.test

        is_added = False
        cols_of_interest = None
        task_type = rai_insights.task_type
        target_column = rai_insights.target_column
        classes = (list(map(str, rai_insights._classes))
                   if rai_insights._classes is not None else [])
        df = pd.concat([rai_insights.train, rai_insights.test])
        data_balance_measures = None

        all_db_dirs = DirectoryManager.list_sub_directories(path)
        if len(all_db_dirs) != 0:
            dir_manager = DirectoryManager(parent_directory_path=path,
                                           sub_directory_name=all_db_dirs[0])
            config_dir = dir_manager.get_config_directory()

            # Load manager
            with open(config_dir / MANAGER_JSON, "r") as f:
                manager_info = json.load(f)
                is_added = manager_info[Keys.IS_ADDED]
                task_type = manager_info[Keys.TASK_TYPE]
                cols_of_interest = manager_info[Keys.COLS_OF_INTEREST]
                target_column = manager_info[Keys.TARGET_COLUMN]
                classes = manager_info[Keys.CLASSES]

            # Load from data json
            data_path = dir_manager.get_data_directory() / DATA_JSON
            if data_path.exists():
                df = pd.read_json(data_path, orient="split")

            # Load measures
            measures_path = config_dir / MEASURES_JSON
            if measures_path.exists():
                with open(measures_path, "r") as f:
                    data_balance_measures = json.load(f)

        inst.__dict__["_is_added"] = is_added
        inst.__dict__["_task_type"] = task_type
        inst.__dict__["_cols_of_interest"] = cols_of_interest
        inst.__dict__["_target_column"] = target_column
        inst.__dict__["_classes"] = classes
        inst.__dict__["_df"] = df
        inst.__dict__["_data_balance_measures"] = data_balance_measures

        return inst