def __init__(self,
                 perf_fn: str,
                 feat_fn: str,
                 objective: str = "solution_quality",
                 runtime_cutoff: float = None,
                 maximize: bool = True,
                 cv_fn: str = None,
                 seed: int = 12345):
        """ Constructor """
        self.scenario = ASlibScenario()
        self.scenario.read_from_csv(perf_fn=perf_fn,
                                    feat_fn=feat_fn,
                                    objective=objective,
                                    runtime_cutoff=runtime_cutoff,
                                    maximize=maximize,
                                    cv_fn=cv_fn)
        self.seed = seed

        self.af = AutoFolio(random_seed=seed)
        self.logger = logging.getLogger("AF Facade")

        self.feature_pre_pipeline = None
        self.pre_solver = None
        self.selector = None
        self.config = None
        self.cs = None
Exemplo n.º 2
0
    def _save_model(self, out_fn: str, scenario: ASlibScenario,
                    feature_pre_pipeline: list, pre_solver: Aspeed, selector,
                    config: Configuration):
        '''
            save all pipeline objects for predictions

            Arguments
            ---------
            out_fn: str
                filename of output file
            scenario: AslibScenario
                ASlib scenario with all the data
            feature_pre_pipeline: list
                list of preprocessing objects
            pre_solver: Aspeed
                aspeed object with pre-solving schedule
            selector: autofolio.selector.*
                fitted selector object
            config: Configuration
                parameter setting configuration
        '''
        scenario.logger = None
        for fpp in feature_pre_pipeline:
            fpp.logger = None
        if pre_solver:
            pre_solver.logger = None
        selector.logger = None
        model = [scenario, feature_pre_pipeline, pre_solver, selector, config]
        with open(out_fn, "bw") as fp:
            pickle.dump(model, fp)
    def transform(self, scenario: ASlibScenario):
        '''
            transform ASLib scenario data

            Arguments
            ---------
            scenario: data.aslib_scenario.ASlibScenario
                ASlib Scenario with all data in pandas

            Returns
            -------
            data.aslib_scenario.ASlibScenario
        '''
        self.logger.debug("Impute Missing Feature Values")

        values = self.imputer.transform(np.array(scenario.feature_data.values))
        scenario.feature_data = pd.DataFrame(
            data=values,
            index=scenario.feature_data.index,
            columns=scenario.feature_data.columns)

        return scenario
Exemplo n.º 4
0
    def transform(self, scenario: ASlibScenario):
        '''
            transform ASLib scenario data

            Arguments
            ---------
            scenario: data.aslib_scenario.ASlibScenario
                ASlib Scenario with all data in pandas

            Returns
            -------
            data.aslib_scenario.ASlibScenario
        '''
        if self.pca:
            self.logger.debug("Applying PCA")
            values = self.pca.transform(np.array(scenario.feature_data.values))

            scenario.feature_data = pd.DataFrame(
                data=values,
                index=scenario.feature_data.index,
                columns=["f%d" % (i) for i in range(values.shape[1])])

        return scenario
    def transform(self, scenario: ASlibScenario):
        '''
            transform ASLib scenario data

            Arguments
            ---------
            scenario: data.aslib_scenario.ASlibScenario
                ASlib Scenario with all data in pandas

            Returns
            -------
            data.aslib_scenario.ASlibScenario
        '''
        if self.scaler:
            self.logger.debug("Applying StandardScaler")
            
            values = self.scaler.transform(
                np.array(scenario.feature_data.values))

            scenario.feature_data = pd.DataFrame(
                data=values, index=scenario.feature_data.index, columns=scenario.feature_data.columns)

        return scenario
Exemplo n.º 6
0
    def run_cli(self):
        '''
            main method of AutoFolio based on command line interface
        '''

        cmd_parser = CMDParser()
        args_, self.overwrite_args = cmd_parser.parse()

        self._root_logger.setLevel(args_.verbose)

        if args_.load:
            pred = self.read_model_and_predict(
                model_fn=args_.load,
                feature_vec=list(map(float, args_.feature_vec.split(" "))))
            print("Selected Schedule [(algorithm, budget)]: %s" % (pred))

        else:

            scenario = ASlibScenario()
            if args_.scenario:
                scenario.read_scenario(args_.scenario)
            elif args_.performance_csv and args_.feature_csv:
                scenario.read_from_csv(perf_fn=args_.performance_csv,
                                       feat_fn=args_.feature_csv,
                                       objective=args_.objective,
                                       runtime_cutoff=args_.runtime_cutoff,
                                       maximize=args_.maximize,
                                       cv_fn=args_.cv_csv)
            else:
                raise ValueError("Missing inputs to read scenario data.")

            test_scenario = None
            if args_.performance_test_csv and args_.feature_test_csv:
                test_scenario = ASlibScenario()
                test_scenario.read_from_csv(
                    perf_fn=args_.performance_test_csv,
                    feat_fn=args_.feature_test_csv,
                    objective=args_.objective,
                    runtime_cutoff=args_.runtime_cutoff,
                    maximize=args_.maximize,
                    cv_fn=None)

            config = {}
            if args_.config is not None:
                self.logger.info("Reading yaml config file")
                config = yaml.load(open(args_.config))
            if not config.get("wallclock_limit"):
                config["wallclock_limit"] = args_.wallclock_limit
            if not config.get("runcount_limit"):
                config["runcount_limit"] = args_.runcount_limit
            if not config.get("output-dir"):
                config["output-dir"] = args_.output_dir

            self.cs = self.get_cs(scenario, config)

            if args_.outer_cv:
                self._outer_cv(scenario,
                               config,
                               args_.outer_cv_fold,
                               args_.out_template,
                               smac_seed=args_.smac_seed)
                return 0

            if args_.tune:
                config = self.get_tuned_config(
                    scenario,
                    wallclock_limit=args_.wallclock_limit,
                    runcount_limit=args_.runcount_limit,
                    autofolio_config=config,
                    seed=args_.smac_seed)
            else:
                config = self.cs.get_default_configuration()
            self.logger.debug(config)

            if args_.save:
                feature_pre_pipeline, pre_solver, selector = self.fit(
                    scenario=scenario, config=config)
                self._save_model(args_.save, scenario, feature_pre_pipeline,
                                 pre_solver, selector, config)
            else:
                self.run_cv(config=config,
                            scenario=scenario,
                            folds=int(scenario.cv_data.max().max()))

            if test_scenario is not None:
                stats = self.run_fold(config=config,
                                      fold=0,
                                      return_fit=False,
                                      scenario=scenario,
                                      test_scenario=test_scenario)
Exemplo n.º 7
0
    def run_fold(self,
                 config: Configuration,
                 scenario: ASlibScenario,
                 fold: int,
                 test_scenario=None,
                 return_fit: bool = False):
        '''
            run a given fold of cross validation
            
            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing
            fold: int
                fold id
            test_scenario:aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario with test data for validation
                generated from <scenario> if None

            return_fit: bool
                optionally, the learned preprocessing options, presolver and
                selector can be returned
                
            Returns
            -------
            Stats()

            (pre_pipeline, pre_solver, selector):
                only present if return_fit is True
                the pipeline components fit with the configuration options

            schedule: dict of string -> list of (solver, cutoff) pairs
                only present if return_fit is True
                the solver choices for each instance
                
                
        '''

        if test_scenario is None:
            self.logger.info("CV-Iteration: %d" % (fold))
            test_scenario, training_scenario = scenario.get_split(indx=fold)
        else:
            self.logger.info("Validation on test data")
            training_scenario = scenario

        feature_pre_pipeline, pre_solver, selector = self.fit(
            scenario=training_scenario, config=config)

        schedules = self.predict(test_scenario, config, feature_pre_pipeline,
                                 pre_solver, selector)

        val = Validator()
        if scenario.performance_type[0] == "runtime":
            stats = val.validate_runtime(schedules=schedules,
                                         test_scenario=test_scenario,
                                         train_scenario=training_scenario)
        elif scenario.performance_type[0] == "solution_quality":
            stats = val.validate_quality(schedules=schedules,
                                         test_scenario=test_scenario,
                                         train_scenario=training_scenario)
        else:
            raise ValueError("Unknown: %s" % (scenario.performance_type[0]))

        if return_fit:
            return stats, (feature_pre_pipeline, pre_solver,
                           selector), schedules
        else:
            return stats
Exemplo n.º 8
0
    def _outer_cv(self,
                  scenario: ASlibScenario,
                  autofolio_config: dict = None,
                  outer_cv_fold: int = None,
                  out_template: str = None,
                  smac_seed: int = 42):
        '''
            Evaluate on a scenario using an "outer" cross-fold validation
            scheme. In particular, this ensures that SMAC does not use the test
            set during hyperparameter optimization.

            Arguments
            ---------
            scenario: ASlibScenario
                ASlib Scenario at hand
            
            autofolio_config: dict, or None
                An optional dictionary of configuration options

            outer_cv_fold: int, or None
                If given, then only the single outer-cv fold is processed

            out_template: str, or None
                If given, the learned configurations are written to the 
                specified locations. The string is considered a template, and
                "%fold%" will be replaced with the fold.

            smac_seed:int 
                random seed for SMAC

            Returns
            -------
            stats: validate.Stats
                Performance over all outer-cv folds

        '''
        import string

        outer_stats = None

        # For each outer split
        outer_cv_folds = range(1, 11)
        if outer_cv_fold is not None:
            outer_cv_folds = range(outer_cv_fold, outer_cv_fold + 1)

        for cv_fold in outer_cv_folds:

            # Use ‘ASlibScenario.get_split()’ to get the outer split
            outer_testing, outer_training = scenario.get_split(cv_fold)

            msg = ">>>>> Outer CV fold: {} <<<<<".format(cv_fold)
            self.logger.info(msg)

            # Use ASlibScenario.create_cv_splits() to get an inner-cv
            outer_training.create_cv_splits(n_folds=10)

            # Use ‘AutoFolio.get_tuned_config()’ to tune on inner-cv
            config = self.get_tuned_config(outer_training,
                                           autofolio_config=autofolio_config,
                                           seed=smac_seed)

            # Use `AutoFolio.run_fold()’ to get the performance on the outer split
            stats, fit, schedule = self.run_fold(config,
                                                 scenario,
                                                 cv_fold,
                                                 return_fit=True)

            feature_pre_pipeline, pre_solver, selector = fit

            if outer_stats is None:
                outer_stats = stats
            else:
                outer_stats.merge(stats)

            # save the model, if given an output location
            if out_template is not None:
                out_template_ = string.Template(out_template)
                model_fn = out_template_.substitute(fold=cv_fold, type="pkl")

                msg = "Writing model to: {}".format(model_fn)
                self.logger.info(msg)

                self._save_model(model_fn, scenario, feature_pre_pipeline,
                                 pre_solver, selector, config)

                # convert the schedule to a data frame
                schedule_df = pd.Series(schedule, name="solver")
                schedule_df.index.name = "instance"
                schedule_df = schedule_df.reset_index()

                # just keep the solver name; we don't care about the time

                # x[0] gets the first pair in the schedule list
                # and x[0][0] gets the name of the solver from that pair
                schedule_df['solver'] = schedule_df['solver'].apply(
                    lambda x: x[0][0])

                selections_fn = out_template_.substitute(fold=cv_fold,
                                                         type="csv")

                msg = "Writing solver choices to: {}".format(selections_fn)
                self.logger.info(msg)

                schedule_df.to_csv(selections_fn, index=False)

        self.logger.info(">>>>> Final Stats <<<<<")
        outer_stats.show()
class AFCsvFacade(object):
    def __init__(self,
                 perf_fn: str,
                 feat_fn: str,
                 objective: str = "solution_quality",
                 runtime_cutoff: float = None,
                 maximize: bool = True,
                 cv_fn: str = None,
                 seed: int = 12345):
        """ Constructor """
        self.scenario = ASlibScenario()
        self.scenario.read_from_csv(perf_fn=perf_fn,
                                    feat_fn=feat_fn,
                                    objective=objective,
                                    runtime_cutoff=runtime_cutoff,
                                    maximize=maximize,
                                    cv_fn=cv_fn)
        self.seed = seed

        self.af = AutoFolio(random_seed=seed)
        self.logger = logging.getLogger("AF Facade")

        self.feature_pre_pipeline = None
        self.pre_solver = None
        self.selector = None
        self.config = None
        self.cs = None

    def fit(self, config: Configuration = None, save_fn: str = None):
        """ Train AutoFolio on data from init"""
        self.logger.info("Fit")
        cs = self.af.get_cs(self.scenario, {})
        self.cs = cs
        if config is None:
            config = cs.get_default_configuration()
        else:
            config = Configuration(configuration_space=cs, values=config)

        feature_pre_pipeline, pre_solver, selector = self.af.fit(
            scenario=self.scenario, config=config)

        self.feature_pre_pipeline = feature_pre_pipeline
        self.pre_solver = pre_solver
        self.selector = selector
        self.config = config

        if save_fn:
            self.af._save_model(save_fn, self.scenario, feature_pre_pipeline,
                                pre_solver, selector, config)
            self.logger.info("AutoFolio model saved to %s" % (save_fn))

    def tune(
        self,
        wallclock_limit: int = 1200,
        runcount_limit: int = np.inf,
    ):

        config = self.af.get_tuned_config(self.scenario,
                                          wallclock_limit=wallclock_limit,
                                          runcount_limit=runcount_limit,
                                          autofolio_config={},
                                          seed=self.seed)
        self.logger.info("Optimized Configuration: %s" % (config))
        return config

    def cross_validation(self, config: Configuration):
        """ run a cross validation on given AutoFolio configuration"""
        score = -1 * self.af.run_cv(config=config,
                                    scenario=self.scenario,
                                    folds=int(
                                        self.scenario.cv_data.max().max()))
        self.logger.info("AF's final performance %f" % (score))

        return score

    def predict(self, vec):
        self.scenario.logger = None
        scenario = copy.deepcopy(self.scenario)
        feature_vec = np.array([vec])
        scenario.feature_data = pd.DataFrame(feature_vec,
                                             index=["pseudo_instance"],
                                             columns=scenario.features)
        scenario.instances = ["pseudo_instance"]

        return self.af.predict(scenario=scenario,
                               config=self.config,
                               feature_pre_pipeline=self.feature_pre_pipeline,
                               pre_solver=self.pre_solver,
                               selector=self.selector)

    @staticmethod
    def load_and_predict(vec: np.ndarray, load_fn: str):
        """ get predicted algorithm for given meta-feature vector"""
        af = AutoFolio(random_seed=42)  # random seed doesn't matter here
        pred = af.read_model_and_predict(model_fn=load_fn, feature_vec=vec)
        print("Selected Schedule [(algorithm, budget)]: %s" % (pred))
        return pred[0][0]