def feature_importance(self):
        '''
            train pairwise random forests and average the feature importance from all trees
        '''
        matplotlib.pyplot.close()
        self.logger.info("Plotting feature importance........")
        self.scenario.feature_data = self.scenario.feature_data.fillna(
            self.scenario.feature_data.mean())

        pc = PairwiseClassifier(classifier_class=RandomForest)
        config = {}
        config["rf:n_estimators"] = 100
        config["rf:max_features"] = "auto"
        config["rf:criterion"] = "gini"
        config["rf:max_depth"] = None
        config["rf:min_samples_split"] = 2
        config["rf:min_samples_leaf"] = 1
        config["rf:bootstrap"] = True
        pc.fit(scenario=self.scenario, config=config)

        importances = [
            rf.model.feature_importances_ for rf in pc.classifiers
            if np.isnan(rf.model.feature_importances_).sum() == 0
        ]
        median_importance = np.median(importances, axis=0)
        q25 = np.percentile(importances, 0.25, axis=0)
        q75 = np.percentile(importances, 0.75, axis=0)

        feature_names = np.array(self.scenario.feature_data.columns)

        # sort features by average importance and look only at the first 15
        # features
        N_FEAT = min(feature_names.shape[0], 15)
        indices = np.argsort(median_importance)[::-1]
        median_importance = median_importance[indices][:N_FEAT]
        q25 = q25[indices][:N_FEAT]
        q75 = q75[indices][:N_FEAT]
        feature_names = feature_names[indices[:N_FEAT]]

        plt.figure()
        # only the first 10 most important features
        plt.bar(range(N_FEAT),
                median_importance,
                color="r",
                yerr=[q25, q75],
                align="center")

        plt.xlim([-1, N_FEAT])
        plt.xticks(range(N_FEAT), feature_names, rotation=40, ha='right')
        plt.tight_layout()
        out_fn = os.path.join(self.output_dn, "feature_importance.png")
        plt.savefig(out_fn, format="png")

        return out_fn
Exemplo n.º 2
0
    def fit_selector(self, scenario: ASlibScenario, config: Configuration):
        '''
            fits an algorithm selector for a given scenario wrt a given configuration

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration
        '''

        if config.get("selector") == "PairwiseClassifier":

            clf_class = None
            if config.get("classifier") == "RandomForest":
                clf_class = RandomForest
            if config.get("classifier") == "XGBoost":
                clf_class = XGBoost

            selector = PairwiseClassifier(classifier_class=clf_class)
            selector.fit(scenario=scenario, config=config)

        if config.get("selector") == "PairwiseRegressor":
            reg_class = None
            if config.get("regressor") == "RandomForestRegressor":
                reg_class = RandomForestRegressor
                
            selector = PairwiseRegression(regressor_class=reg_class)
            selector.fit(scenario=scenario, config=config)

        return selector
Exemplo n.º 3
0
    def get_cs(self, scenario: ASlibScenario):
        '''
            returns the parameter configuration space of AutoFolio
            (based on the automl config space: https://github.com/automl/ConfigSpace)

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
        '''

        self.cs = ConfigurationSpace()

        # add feature steps as binary parameters
        for fs in scenario.feature_steps:
            fs_param = CategoricalHyperparameter(name="fgroup_%s" % (
                fs), choices=[True, False], default=fs in scenario.feature_steps_default)
            self.cs.add_hyperparameter(fs_param)

        # preprocessing
        PCAWrapper.add_params(self.cs)
        ImputerWrapper.add_params(self.cs)
        StandardScalerWrapper.add_params(self.cs)

        # Pre-Solving
        if scenario.performance_type[0] == "runtime":
            Aspeed.add_params(
                cs=self.cs, cutoff=scenario.algorithm_cutoff_time)

        # classifiers
        RandomForest.add_params(self.cs)
        XGBoost.add_params(self.cs)
       
        # regressors
        RandomForestRegressor.add_params(self.cs)

        # selectors
        PairwiseClassifier.add_params(self.cs)
        PairwiseRegression.add_params(self.cs)       

        self.logger.debug(self.cs)

        return self.cs
Exemplo n.º 4
0
    def fit_selector(self, scenario: ASlibScenario, config: Configuration):
        '''
            fits an algorithm selector for a given scenario wrt a given configuration

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration
        '''

        if config.get("selector") == "PairwiseClassifier":

            clf_class = None
            if config.get("classifier") == "RandomForest":
                clf_class = RandomForest

            selector = PairwiseClassifier(classifier_class=clf_class)
            selector.fit(scenario=scenario, config=config)

        return selector
Exemplo n.º 5
0
    def get_cs(self, scenario: ASlibScenario):
        '''
            returns the parameter configuration space of AutoFolio
            (based on the automl config space: https://github.com/automl/ConfigSpace)

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
        '''

        self.cs = ConfigurationSpace()

        # add feature steps as binary parameters
        for fs in scenario.feature_steps:
            fs_param = CategoricalHyperparameter(name="fgroup_%s" % (
                fs), choices=[True, False], default=fs in scenario.feature_steps_default)
            self.cs.add_hyperparameter(fs_param)

        # preprocessing
        PCAWrapper.add_params(self.cs)
        ImputerWrapper.add_params(self.cs)
        StandardScalerWrapper.add_params(self.cs)

        # Pre-Solving
        if scenario.performance_type[0] == "runtime":
            Aspeed.add_params(
                cs=self.cs, cutoff=scenario.algorithm_cutoff_time)

        # classifiers
        RandomForest.add_params(self.cs)

        # selectors
        PairwiseClassifier.add_params(self.cs)

        return self.cs
Exemplo n.º 6
0
    def get_cs(self, scenario: ASlibScenario, autofolio_config:dict=None):
        '''
            returns the parameter configuration space of AutoFolio
            (based on the automl config space: https://github.com/automl/ConfigSpace)

            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand

            autofolio_config: dict, or None
                An optional dictionary of configuration options
        '''

        self.cs = ConfigurationSpace()

        # only allow the feature groups specified in the config file
        # by default, though, all of the feature groups are allowed.
        allowed_feature_groups = autofolio_config.get("allowed_feature_groups", 
            scenario.feature_steps)

        if len(allowed_feature_groups) == 0:
            msg = "Please ensure at least one feature group is allowed"
            raise ValueError(msg)


        if len(allowed_feature_groups) == 1: 
            choices = [True] # if we only have one feature group, it has to be active 
        else:
            choices = [True, False]
        default = True

        for fs in allowed_feature_groups:
            
            fs_param = CategoricalHyperparameter(name="fgroup_%s" % (fs),
                choices=choices, default_value=default)
            self.cs.add_hyperparameter(fs_param)

        # preprocessing
        if autofolio_config.get("pca", True):
            PCAWrapper.add_params(self.cs)

        if autofolio_config.get("impute", True):
            ImputerWrapper.add_params(self.cs)

        if autofolio_config.get("scale", True):
            StandardScalerWrapper.add_params(self.cs)

        # Pre-Solving
        if scenario.performance_type[0] == "runtime":
            if autofolio_config.get("presolve", True):
                Aspeed.add_params(
                    cs=self.cs, cutoff=scenario.algorithm_cutoff_time)

        if autofolio_config.get("classifier"):
            # fix parameter
            cls_choices = [autofolio_config["classifier"]]
            cls_def = autofolio_config["classifier"]
        else:
            cls_choices = ["RandomForest","XGBoost"]
            cls_def = "RandomForest"
        classifier = CategoricalHyperparameter(
                "classifier", choices=cls_choices, 
                default_value=cls_def)

        self.cs.add_hyperparameter(classifier)

        RandomForest.add_params(self.cs)
        XGBoost.add_params(self.cs)

        if autofolio_config.get("regressor"):
            # fix parameter
            reg_choices = [autofolio_config["regressor"]]
            reg_def = autofolio_config["regressor"]
        else:
            reg_choices = ["RandomForestRegressor"]
            reg_def = "RandomForestRegressor"

        regressor = CategoricalHyperparameter(
                "regressor", choices=reg_choices, default_value=reg_def)
        self.cs.add_hyperparameter(regressor)
        RandomForestRegressor.add_params(self.cs)

        # selectors
        if autofolio_config.get("selector"):
            # fix parameter
            sel_choices = [autofolio_config["selector"]]
            sel_def = autofolio_config["selector"]
        else:
            sel_choices = ["PairwiseClassifier","PairwiseRegressor"]
            sel_def = "PairwiseClassifier"
            
        selector = CategoricalHyperparameter(
                "selector", choices=sel_choices, default_value=sel_def)
        self.cs.add_hyperparameter(selector)
        PairwiseClassifier.add_params(self.cs)
        PairwiseRegression.add_params(self.cs)  

        self.logger.debug(self.cs)

        return self.cs