Пример #1
0
    def _save_results(self, rh: RunHistory, output_fn, backup_fn=None):
        """ Helper to save results to file

        Parameters
        ----------
        rh: RunHistory
            runhistory to save
        output_fn: str
            if ends on '.json': filename to save history to
            else: directory to save runhistory to (filename is backup_fn)
        backup_fn: str
            if output_fn does not end on '.json', treat output_fn as dir and
            append backup_fn as filename (if output_fn ends on '.json', this
            argument is ignored)
        """
        if output_fn == "":
            self.logger.info(
                "No output specified, validated runhistory not saved.")
            return
        # Check if a folder or a file is specified as output
        if not output_fn.endswith('.json'):
            output_dir = output_fn
            output_fn = os.path.join(output_dir, backup_fn)
            self.logger.debug("Output is \"%s\", changing to \"%s\"!",
                              output_dir, output_fn)
        base = os.path.split(output_fn)[0]
        if not base == "" and not os.path.exists(base):
            self.logger.debug("Folder (\"%s\") doesn't exist, creating.", base)
            os.makedirs(base)
        rh.save_json(output_fn)
        self.logger.info("Saving validation-results in %s", output_fn)
Пример #2
0
def merge_foreign_data(scenario: Scenario, runhistory: RunHistory,
                       in_scenario_list: typing.List[Scenario],
                       in_runhistory_list: typing.List[RunHistory]):
    """Extend <scenario> and <runhistory> with runhistory data from another
    <in_scenario> assuming the same pcs, feature space, but different instances

    Parameters
    ---------
    scenario: Scenario
        original scenario -- feature dictionary will be extended
    runhistory: RunHistory
        original runhistory -- will be extended by further data points
    in_scenario_list: typing.List[Scenario]
        input scenario
    in_runhistory_list: typing.List[RunHistory]
        list of runhistories wrt <in_scenario>

    Returns
    -------
    scenario: Scenario
    runhistory: Runhistory
    """
    # add further instance features
    for in_scenario in in_scenario_list:
        if scenario.n_features != in_scenario.n_features:
            raise ValueError(
                "Feature Space has to be the same for both scenarios (%d vs %d)."
                % (scenario.n_features, in_scenario.n_features))

        if scenario.cs != in_scenario.cs:
            raise ValueError("PCS of both scenarios have to be identical.")

        if scenario.cutoff != in_scenario.cutoff:
            raise ValueError("Cutoffs of both scenarios have to be identical.")

        scenario.feature_dict.update(in_scenario.feature_dict)

    # extend runhistory
    for rh in in_runhistory_list:
        runhistory.update(rh, origin=DataOrigin.EXTERNAL_DIFFERENT_INSTANCES)

    for date in runhistory.data:
        if scenario.feature_dict.get(date.instance_id) is None:
            raise ValueError(
                "Instance feature for \"%s\" was not found in scenario data." %
                (date.instance_id))

    runhistory.compute_all_costs(instances=scenario.train_insts)

    return scenario, runhistory
Пример #3
0
def merge_foreign_data_from_file(
        scenario: Scenario,
        runhistory: RunHistory,
        in_scenario_fn_list: typing.List[str],
        in_runhistory_fn_list: typing.List[str],
        cs: ConfigurationSpace,
        aggregate_func: typing.Callable = average_cost):
    """Extend <scenario> and <runhistory> with runhistory data from another
    <in_scenario> assuming the same pcs, feature space, but different instances

    Parameters
    ---------
    scenario: Scenario
        original scenario -- feature dictionary will be extended
    runhistory: RunHistory
        original runhistory -- will be extended by further data points
    in_scenario_fn_list: typing.List[str]
        input scenario file names
    in_runhistory_fn_list: typing.List[str]
        list filenames of runhistory dumps
    cs: ConfigurationSpace
        parameter configuration space to read runhistory from file
    aggregate_func: typing.Callable
        function to aggregate performance of a configuratoion across instances

    Returns
    -------
    scenario: Scenario
    runhistory: Runhistory
    """

    if not in_scenario_fn_list:
        raise ValueError(
            "To read warmstart data from previous runhistories, the corresponding scenarios are required. Use option --warmstart_scenario"
        )
    scens = [
        Scenario(scenario=scen_fn, cmd_options={"output_dir": ""})
        for scen_fn in in_scenario_fn_list
    ]
    rhs = []
    for rh_fn in in_runhistory_fn_list:
        rh = RunHistory(aggregate_func, file_system=scenario.file_system)
        rh.load_json(rh_fn, cs)
        rhs.append(rh)

    return merge_foreign_data(scenario,
                              runhistory,
                              in_scenario_list=scens,
                              in_runhistory_list=rhs)
Пример #4
0
def _cost(config: Configuration, run_history: RunHistory,
          instance_seed_pairs=None):
    """Return array of all costs for the given config for further calculations.

    Parameters
    ----------
    config : Configuration
        Configuration to calculate objective for
    run_history : RunHistory
        RunHistory object from which the objective value is computed.
    instance_seed_pairs : list, optional (default=None)
        List of tuples of instance-seeds pairs. If None, the run_history is
        queried for all runs of the given configuration.

    Returns
    -------
    Costs: list
        Array of all costs
    """
    try:
        id_ = run_history.config_ids[config]
    except KeyError:  # challenger was not running so far
        return []

    if instance_seed_pairs is None:
        instance_seed_pairs = run_history.get_runs_for_config(config)

    costs = []
    for i, r in instance_seed_pairs:
        k = RunKey(id_, i, r)
        costs.append(run_history.data[k].cost)
    return costs
Пример #5
0
    def _get_mean_costs(self, incs: typing.List[Configuration],
                        new_rh: RunHistory):
        """
        Compute mean cost per instance

        Parameters
        ----------
        incs : typing.List[Configuration]
            incumbents determined by all parallel SMAC runs
        new_rh : RunHistory
            runhistory to determine mean performance

        Returns
        -------
        List[float] means
        Dict(Config -> Dict(inst_id(str) -> float))

        """
        config_cost_per_inst = {}
        results = []
        for incumbent in incs:
            cost_per_inst = new_rh.get_instance_costs_for_config(
                config=incumbent)
            config_cost_per_inst[incumbent] = cost_per_inst
            results.append(np.mean(list(cost_per_inst.values())))
        return results, config_cost_per_inst
Пример #6
0
def read(run_history: RunHistory, output_dirs: typing.Union[str,
                                                            typing.List[str]],
         configuration_space: ConfigurationSpace, logger: logging.Logger):
    """Update runhistory with run results from concurrent runs of pSMAC.

    Parameters
    ----------
    run_history : dsmac.runhistory.RunHistory
        RunHistory object to be updated with run information from runhistory
        objects stored in the output directory.
    output_dirs : typing.Union[str,typing.List[str]]
        List of SMAC output directories
        or Linux path expression (str) which will be casted into a list with
        file_system.glob(). This function will search the output directories
        for files matching the runhistory regular expression.
    configuration_space : ConfigSpace.ConfigurationSpace
        A ConfigurationSpace object to check if loaded configurations are valid.
    logger : logging.Logger
    """
    numruns_in_runhistory = len(run_history.data)
    initial_numruns_in_runhistory = numruns_in_runhistory
    file_system = run_history.file_system
    if isinstance(output_dirs, str):
        parsed_output_dirs = file_system.glob(output_dirs)
        if file_system.glob(run_history.file_system.join(output_dirs,
                                                         "run_*")):
            parsed_output_dirs += file_system.glob(
                file_system.join(output_dirs, "run_*"))
    else:
        parsed_output_dirs = output_dirs

    for output_directory in parsed_output_dirs:
        for file_in_output_directory in file_system.listdir(output_directory):
            match = re.match(RUNHISTORY_RE, file_in_output_directory)
            valid_match = re.match(VALIDATEDRUNHISTORY_RE,
                                   file_in_output_directory)
            if match or valid_match:
                last_id = PSMAC_VALUE.dir2id[output_directory]
                runhistory_file = file_system.join(output_directory,
                                                   file_in_output_directory)
                updated_id_set = run_history.update_from_json(
                    runhistory_file,
                    configuration_space,
                    id_set=PSMAC_VALUE.dir2id[output_directory],
                    file_system=run_history.file_system)
                PSMAC_VALUE.dir2id[output_directory] = updated_id_set
                # print(PSMAC_VALUE.dir2id)
                new_numruns_in_runhistory = len(run_history.data)
                difference = new_numruns_in_runhistory - numruns_in_runhistory
                logger.debug('Shared model mode: Loaded %d new runs from %s' %
                             (difference, runhistory_file))
                numruns_in_runhistory = new_numruns_in_runhistory

    difference = numruns_in_runhistory - initial_numruns_in_runhistory
    logger.info(
        'Shared model mode: Finished loading new runs, found %d new runs.' %
        difference)
Пример #7
0
def write(run_history: RunHistory, output_directory: str,
          logger: logging.Logger):
    """Write the runhistory to the output directory.

    Overwrites previously outputted runhistories.

    Parameters
    ----------
    run_history : ~dsmac.runhistory.runhistory.RunHistory
        RunHistory object to be saved.

    output_directory : str

    logger : logging.Logger
    """
    file_system = run_history.file_system
    output_filename = file_system.join(output_directory,
                                       RUNHISTORY_FILEPATTERN)

    logging.debug("Saving runhistory to %s" % output_filename)

    run_history.save_json(output_filename, save_external=False)
Пример #8
0
 def restore_state(self, scen, args_):
     """Read in files for state-restoration: runhistory, stats, trajectory.
     """
     # Check for folder and files
     rh_path = os.path.join(args_.restore_state, "runhistory.json")
     stats_path = os.path.join(args_.restore_state, "stats.json")
     traj_path_aclib = os.path.join(args_.restore_state, "traj_aclib2.json")
     traj_path_old = os.path.join(args_.restore_state, "traj_old.csv")
     scen_path = os.path.join(args_.restore_state, "scenario.txt")
     if not os.path.isdir(args_.restore_state):
        raise FileNotFoundError("Could not find folder from which to restore.")
     # Load runhistory and stats
     rh = RunHistory(aggregate_func=None)
     rh.load_json(rh_path, scen.cs)
     self.logger.debug("Restored runhistory from %s", rh_path)
     stats = Stats(scen)
     stats.load(stats_path)
     self.logger.debug("Restored stats from %s", stats_path)
     with open(traj_path_aclib, 'r') as traj_fn:
         traj_list_aclib = traj_fn.readlines()
     with open(traj_path_old, 'r') as traj_fn:
         traj_list_old = traj_fn.readlines()
     return rh, stats, traj_list_aclib, traj_list_old
Пример #9
0
    def _adapt_cutoff(self, challenger: Configuration,
                      incumbent: Configuration,
                      run_history: RunHistory,
                      inc_sum_cost: float):
        """Adaptive capping:
        Compute cutoff based on time so far used for incumbent
        and reduce cutoff for next run of challenger accordingly

        !Only applicable if self.run_obj_time

        !runs on incumbent should be superset of the runs performed for the
         challenger

        Parameters
        ----------
        challenger : Configuration
            Configuration which challenges incumbent
        incumbent : Configuration
            Best configuration so far
        run_history : RunHistory
            Stores all runs we ran so far
        inc_sum_cost: float
            Sum of runtimes of all incumbent runs

        Returns
        -------
        cutoff: int
            Adapted cutoff
        """

        if not self.run_obj_time:
            return self.cutoff

        # cost used by challenger for going over all its runs
        # should be subset of runs of incumbent (not checked for efficiency
        # reasons)
        chall_inst_seeds = run_history.get_runs_for_config(challenger)
        chal_sum_cost = sum_cost(config=challenger,
                                 instance_seed_pairs=chall_inst_seeds,
                                 run_history=run_history)
        cutoff = min(self.cutoff,
                     inc_sum_cost * self.adaptive_capping_slackfactor -
                     chal_sum_cost
                     )
        return cutoff
Пример #10
0
    def _add_inc_run(self, incumbent: Configuration, run_history: RunHistory):
        """Add new run for incumbent

        *Side effect:* adds runs to <run_history>

        Parameters
        ----------
        incumbent : Configuration
            best configuration so far
        run_history : RunHistory
            stores all runs we ran so far
        """
        inc_runs = run_history.get_runs_for_config(incumbent)

        # Line 3
        # First evaluate incumbent on a new instance
        if len(inc_runs) < self.maxR:
            while True:
                # Line 4
                # find all instances that have the most runs on the inc
                inc_runs = run_history.get_runs_for_config(incumbent)
                inc_inst = [s.instance for s in inc_runs]
                inc_inst = list(Counter(inc_inst).items())
                inc_inst.sort(key=lambda x: x[1], reverse=True)
                try:
                    max_runs = inc_inst[0][1]
                except IndexError:
                    self.logger.debug("No run for incumbent found")
                    max_runs = 0
                inc_inst = set([x[0] for x in inc_inst if x[1] == max_runs])

                available_insts = (self.instances - inc_inst)

                # if all instances were used n times, we can pick an instances
                # from the complete set again
                if not self.deterministic and not available_insts:
                    available_insts = self.instances

                # Line 6 (Line 5 is further down...)
                if self.deterministic:
                    next_seed = 0
                else:
                    next_seed = self.rs.randint(low=0, high=MAXINT, size=1)[0]

                if available_insts:
                    # Line 5 (here for easier code)
                    next_instance = self.rs.choice(list(available_insts))
                    # Line 7
                    self.logger.debug("Add run of incumbent")
                    status, cost, dur, res = self.tae_runner.start(
                        config=incumbent,
                        instance=next_instance,
                        seed=next_seed,
                        cutoff=self.cutoff,
                        instance_specific=self.instance_specifics.get(
                            next_instance, "0"))
                    self._ta_time += dur
                    self._num_run += 1
                else:
                    self.logger.debug("No further instance-seed pairs for "
                                      "incumbent available.")
                    break

                inc_runs = run_history.get_runs_for_config(incumbent)
                # Termination condition; after exactly one run, this checks
                # whether further runs are necessary due to minR
                if len(inc_runs) >= self.minR or len(inc_runs) >= self.maxR:
                    break
Пример #11
0
    def intensify(self,
                  challengers: typing.List[Configuration],
                  incumbent: Configuration,
                  run_history: RunHistory,
                  aggregate_func: typing.Callable,
                  time_bound: float = float(MAXINT),
                  log_traj: bool = True):
        """Running intensification to determine the incumbent configuration.
        *Side effect:* adds runs to run_history

        Implementation of Procedure 2 in Hutter et al. (2011).

        Parameters
        ----------
        challengers : typing.List[Configuration]
            promising configurations
        incumbent : Configuration
            best configuration so far
        run_history : RunHistory
            stores all runs we ran so far
        aggregate_func: typing.Callable
            aggregate error across instances
        time_bound : float, optional (default=2 ** 31 - 1)
            time in [sec] available to perform intensify
        log_traj: bool
            whether to log changes of incumbents in trajectory

        Returns
        -------
        incumbent: Configuration()
            current (maybe new) incumbent configuration
        inc_perf: float
            empirical performance of incumbent configuration
        """
        self.start_time = time.time()
        self._ta_time = 0

        if time_bound < self._min_time:
            raise ValueError("time_bound must be >= %f" % (self._min_time))

        self._num_run = 0
        self._chall_indx = 0

        # Line 1 + 2
        if isinstance(challengers, ChallengerList):
            challengers = challengers.challengers
        L1 = 10
        L2 = 10000
        r = np.random.uniform()
        if 0.05 < r < 0.2 and len(challengers) > L1:
            target = np.random.randint(1, L1)
            challengers[target], challengers[0] = challengers[0], challengers[
                target]
        if r < 0.05 and len(challengers) > L2:
            target = np.random.randint(L1, L2)
            challengers[target], challengers[0] = challengers[0], challengers[
                target]

        for challenger in challengers:
            if not run_history.db.appointment_config(challenger):
                continue
            if challenger == incumbent:
                self.logger.debug(
                    "Challenger was the same as the current incumbent; Skipping challenger"
                )
                continue

            self.logger.debug("Intensify on %s", challenger)
            if hasattr(challenger, 'origin'):
                self.logger.debug("Configuration origin: %s",
                                  challenger.origin)

            try:
                # Lines 3-7
                self._add_inc_run(incumbent=incumbent, run_history=run_history)

                # Lines 8-17
                incumbent = self._race_challenger(
                    challenger=challenger,
                    incumbent=incumbent,
                    run_history=run_history,
                    aggregate_func=aggregate_func,
                    log_traj=log_traj)
                if self.always_race_against and \
                        challenger == incumbent and \
                        self.always_race_against != challenger:
                    self.logger.debug(
                        "Race against constant configuration after incumbent change."
                    )
                    incumbent = self._race_challenger(
                        challenger=self.always_race_against,
                        incumbent=incumbent,
                        run_history=run_history,
                        aggregate_func=aggregate_func,
                        log_traj=log_traj)

            except BudgetExhaustedException:
                # We return incumbent, SMBO stops due to its own budget checks
                inc_perf = run_history.get_cost(incumbent)
                self.logger.debug("Budget exhausted; Return incumbent")
                return incumbent, inc_perf

            tm = time.time()
            if self._chall_indx >= self.min_chall:
                if self._num_run > self.run_limit:
                    self.logger.debug(
                        "Maximum #runs for intensification reached")
                    break
                if not self.use_ta_time_bound and tm - self.start_time - time_bound >= 0:
                    self.logger.debug(
                        "Wallclock time limit for intensification reached ("
                        "used: %f sec, available: %f sec)" %
                        (tm - self.start_time, time_bound))
                    break
                elif self._ta_time - time_bound >= 0:
                    self.logger.debug(
                        "TA time limit for intensification reached ("
                        "used: %f sec, available: %f sec)" %
                        (self._ta_time, time_bound))
                    break

        # output estimated performance of incumbent
        inc_runs = run_history.get_runs_for_config(incumbent)
        inc_perf = aggregate_func(incumbent, run_history, inc_runs)
        self.logger.info(
            "Updated estimated cost of incumbent on %d runs: %.4f" %
            (len(inc_runs), inc_perf))

        self.stats.update_average_configs_per_intensify(
            n_configs=self._chall_indx)

        return incumbent, inc_perf
Пример #12
0
    def __init__(
        self,
        scenario: Scenario,
        tae_runner: Optional[Union[Type[ExecuteTARun], Callable]] = None,
        tae_runner_kwargs: Optional[dict] = None,
        runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None,
        runhistory_kwargs: Optional[dict] = None,
        intensifier: Optional[Type[Intensifier]] = None,
        intensifier_kwargs: Optional[dict] = None,
        acquisition_function: Optional[
            Type[AbstractAcquisitionFunction]] = None,
        acquisition_function_kwargs: Optional[dict] = None,
        integrate_acquisition_function: bool = False,
        acquisition_function_optimizer: Optional[
            Type[AcquisitionFunctionMaximizer]] = None,
        acquisition_function_optimizer_kwargs: Optional[dict] = None,
        model: Optional[Type[AbstractEPM]] = None,
        model_kwargs: Optional[dict] = None,
        runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None,
        runhistory2epm_kwargs: Optional[dict] = None,
        initial_design: Optional[Type[InitialDesign]] = None,
        initial_design_kwargs: Optional[dict] = None,
        initial_configurations: Optional[List[Configuration]] = None,
        stats: Optional[Stats] = None,
        restore_incumbent: Optional[Configuration] = None,
        rng: Optional[Union[np.random.RandomState, int]] = None,
        smbo_class: Optional[SMBO] = None,
        run_id: Optional[int] = None,
        random_configuration_chooser: Optional[
            Type[RandomConfigurationChooser]] = None,
        random_configuration_chooser_kwargs: Optional[dict] = None,
    ):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~dsmac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~dsmac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~dsmac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~dsmac.tae.execute_ta_run_old.ExecuteTARunOld`.
        tae_runner_kwargs: Optional[dict]
            arguments passed to constructor of '~tae_runner'
        runhistory : RunHistory
            runhistory to store all algorithm runs
        runhistory_kwargs : Optional[dict]
            arguments passed to constructor of runhistory.
            We strongly advise against changing the aggregation function,
            since it will break some code assumptions
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        intensifier_kwargs: Optional[dict]
            arguments passed to the constructor of '~intensifier'
        acquisition_function : ~dsmac.optimizer.acquisition.AbstractAcquisitionFunction
            Class or object that implements the :class:`~dsmac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~dsmac.optimizer.acquisition.EI` or :class:`~dsmac.optimizer.acquisition.LogEI` if not set.
            `~acquisition_function_kwargs` is passed to the class constructor.
        acquisition_function_kwargs : Optional[dict]
            dictionary to pass specific arguments to ~acquisition_function
        integrate_acquisition_function : bool, default=False
            Whether to integrate the acquisition function. Works only with models which can sample their
            hyperparameters (i.e. GaussianProcessMCMC).
        acquisition_function_optimizer : ~dsmac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~dsmac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`dsmac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
        acquisition_function_optimizer_kwargs: Optional[dict]
            Arguments passed to constructor of '~acquisition_function_optimizer'
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        model_kwargs : Optional[dict]
            Arguments passed to constructor of '~model'
        runhistory2epm : ~dsmac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~dsmac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~dsmac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        runhistory2epm_kwargs: Optional[dict]
            Arguments passed to the constructor of '~runhistory2epm'
        initial_design : InitialDesign
            initial sampling design
        initial_design_kwargs: Optional[dict]
            arguments passed to constructor of `~initial_design'
        initial_configurations : List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~dsmac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id : int (optional)
            Run ID will be used as subfolder for output_dir. If no ``run_id`` is given, a random ``run_id`` will be
            chosen.
        random_configuration_chooser : ~dsmac.optimizer.random_configuration_chooser.RandomConfigurationChooser
            How often to choose a random configuration during the intensification procedure.
        random_configuration_chooser_kwargs : Optional[dict]
            arguments of constructor for '~random_configuration_chooser'

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost

        self.scenario = scenario
        self.output_dir = ""
        if not restore_incumbent:
            # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an
            # output directory. This is the default path.
            # initial random number generator
            # run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # run_id=datetime.now().strftime("%Y%m%d%H%M%S%f")
            run_id = uuid1()
            # self.output_dir = create_output_directory(scenario, run_id)   # fixme run_id
            self.output_dir = scenario.output_dir  # create_output_directory(scenario, run_id)   # fixme run_id

        elif scenario.output_dir is not None:
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # output-directory is created in CLI when restoring from a
            # folder. calling the function again in the facade results in two
            # folders being created: run_X and run_X.OLD. if we are
            # restoring, the output-folder exists already and we omit creating it,
            # but set the self-output_dir to the dir.
            # necessary because we want to write traj to new output-dir in CLI.
            self.output_dir = scenario.output_dir_for_this_run

        if (scenario.deterministic is True
                and getattr(scenario, 'tuner_timeout', None) is None
                and scenario.run_obj == 'quality'):
            self.logger.info(
                'Optimizing a deterministic scenario for quality without a tuner timeout - will make '
                'SMAC deterministic and only evaluate one configuration per iteration!'
            )
            scenario.intensification_percentage = 1e-10
            scenario.min_chall = 1
        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario, file_system=scenario.file_system)

        if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG":
            self.logger.warning(
                "Runtime as objective automatically activates log(y) transformation"
            )
            self.scenario.transform_y = "LOG"

        # initialize empty runhistory
        runhistory_def_kwargs = {'aggregate_func': aggregate_func}
        if runhistory_kwargs is not None:
            runhistory_def_kwargs.update(runhistory_kwargs)
        if runhistory is None:
            runhistory = RunHistory(**runhistory_def_kwargs,
                                    file_system=scenario.file_system,
                                    db_type=scenario.db_type,
                                    db_args=scenario.db_args,
                                    db_kwargs=scenario.db_kwargs)
        elif inspect.isclass(runhistory):
            runhistory = runhistory(**runhistory_def_kwargs)
        else:
            if runhistory.aggregate_func is None:
                runhistory.aggregate_func = aggregate_func

        rand_conf_chooser_kwargs = {'rng': rng}
        if random_configuration_chooser_kwargs is not None:
            rand_conf_chooser_kwargs.update(
                random_configuration_chooser_kwargs)
        if random_configuration_chooser is None:
            if 'prob' not in rand_conf_chooser_kwargs:
                rand_conf_chooser_kwargs['prob'] = scenario.rand_prob
            random_configuration_chooser = ChooserProb(
                **rand_conf_chooser_kwargs)
        elif inspect.isclass(random_configuration_chooser):
            random_configuration_chooser = random_configuration_chooser(
                **rand_conf_chooser_kwargs)
        elif not isinstance(random_configuration_chooser,
                            RandomConfigurationChooser):
            raise ValueError(
                "random_configuration_chooser has to be"
                " a class or object of RandomConfigurationChooser")

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir,
                                 stats=self.stats,
                                 file_system=scenario.file_system)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        model_def_kwargs = {
            'types': types,
            'bounds': bounds,
            'instance_features': scenario.feature_array,
            'seed': rng.randint(MAXINT),
            'pca_components': scenario.PCA_DIM,
        }
        if model_kwargs is not None:
            model_def_kwargs.update(model_kwargs)
        if model is None:
            for key, value in {
                    'log_y': scenario.transform_y in ["LOG", "LOGS"],
                    'num_trees': scenario.rf_num_trees,
                    'do_bootstrapping': scenario.rf_do_bootstrapping,
                    'ratio_features': scenario.rf_ratio_features,
                    'min_samples_split': scenario.rf_min_samples_split,
                    'min_samples_leaf': scenario.rf_min_samples_leaf,
                    'max_depth': scenario.rf_max_depth,
            }.items():
                if key not in model_def_kwargs:
                    model_def_kwargs[key] = value
            model_def_kwargs['configspace'] = self.scenario.cs
            model = RandomForestWithInstances(**model_def_kwargs)
        elif inspect.isclass(model):
            model_def_kwargs['configspace'] = self.scenario.cs
            model = model(**model_def_kwargs)
        else:
            raise TypeError("Model not recognized: %s" % (type(model)))

        # initial acquisition function
        acq_def_kwargs = {'model': model}
        if acquisition_function_kwargs is not None:
            acq_def_kwargs.update(acquisition_function_kwargs)
        if acquisition_function is None:
            if scenario.transform_y in ["LOG", "LOGS"]:
                acquisition_function = LogEI(**acq_def_kwargs)
            else:
                acquisition_function = EI(**acq_def_kwargs)
        elif inspect.isclass(acquisition_function):
            acquisition_function = acquisition_function(**acq_def_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function must be None or an object implementing the "
                "AbstractAcquisitionFunction, not %s." %
                type(acquisition_function))
        if integrate_acquisition_function:
            acquisition_function = IntegratedAcquisitionFunction(
                acquisition_function=acquisition_function, **acq_def_kwargs)

        # initialize optimizer on acquisition function
        acq_func_opt_kwargs = {
            'acquisition_function': acquisition_function,
            'config_space': scenario.cs,
            'rng': rng,
        }
        if acquisition_function_optimizer_kwargs is not None:
            acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs)
        if acquisition_function_optimizer is None:
            for key, value in {
                    'max_steps': scenario.sls_max_steps,
                    'n_steps_plateau_walk': scenario.sls_n_steps_plateau_walk,
            }.items():
                if key not in acq_func_opt_kwargs:
                    acq_func_opt_kwargs[key] = value
            acquisition_function_optimizer = InterleavedLocalAndRandomSearch(
                **acq_func_opt_kwargs)
        elif inspect.isclass(acquisition_function_optimizer):
            acquisition_function_optimizer = acquisition_function_optimizer(
                **acq_func_opt_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function_optimizer must be None or an object implementing the "
                "AcquisitionFunctionMaximizer, but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        tae_def_kwargs = {
            'stats': self.stats,
            'run_obj': scenario.run_obj,
            'runhistory': runhistory,
            'par_factor': scenario.par_factor,
            'cost_for_crash': scenario.cost_for_crash,
            'abort_on_first_run_crash': scenario.abort_on_first_run_crash
        }
        if tae_runner_kwargs is not None:
            tae_def_kwargs.update(tae_runner_kwargs)
        if 'ta' not in tae_def_kwargs:
            tae_def_kwargs['ta'] = scenario.ta
        if tae_runner is None:
            tae_def_kwargs['ta'] = scenario.ta
            tae_runner = ExecuteTARunOld(**tae_def_kwargs)
        elif inspect.isclass(tae_runner):
            tae_runner = tae_runner(**tae_def_kwargs)
        elif callable(tae_runner):
            tae_def_kwargs['ta'] = tae_runner
            tae_runner = ExecuteTAFuncDict(**tae_def_kwargs)
        else:
            raise TypeError(
                "Argument 'tae_runner' is %s, but must be "
                "either None, a callable or an object implementing "
                "ExecuteTaRun. Passing 'None' will result in the "
                "creation of target algorithm runner based on the "
                "call string in the scenario file." % type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # initialize intensification
        intensifier_def_kwargs = {
            'tae_runner':
            tae_runner,
            'stats':
            self.stats,
            'traj_logger':
            traj_logger,
            'rng':
            rng,
            'instances':
            scenario.train_insts,
            'cutoff':
            scenario.cutoff,
            'deterministic':
            scenario.deterministic,
            'run_obj_time':
            scenario.run_obj == "runtime",
            'always_race_against':
            scenario.cs.get_default_configuration()
            if scenario.always_race_default else None,
            'use_ta_time_bound':
            scenario.use_ta_time,
            'instance_specifics':
            scenario.instance_specific,
            'minR':
            scenario.minR,
            'maxR':
            scenario.maxR,
            'adaptive_capping_slackfactor':
            scenario.intens_adaptive_capping_slackfactor,
            'min_chall':
            scenario.intens_min_chall,
        }
        if hasattr(scenario,
                   'filter_callback') and scenario.filter_callback is not None:
            print('update callback')
            intensifier_def_kwargs.update(
                {'filter_callback': scenario.filter_callback})
        if intensifier_kwargs is not None:
            intensifier_def_kwargs.update(intensifier_kwargs)
        if intensifier is None:
            intensifier = Intensifier(**intensifier_def_kwargs)
        elif inspect.isclass(intensifier):
            intensifier = intensifier(**intensifier_def_kwargs)
        else:
            raise TypeError(
                "Argument intensifier must be None or an object implementing the Intensifier, but is '%s'"
                % type(intensifier))

        # initial design
        if initial_design is not None and initial_configurations is not None:
            initial_design.initial_configurations = initial_configurations
            initial_configurations = None
        init_design_def_kwargs = {
            'tae_runner': tae_runner,
            'scenario': scenario,
            'stats': self.stats,
            'traj_logger': traj_logger,
            'runhistory': runhistory,
            'rng': rng,
            'configs': initial_configurations,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'n_configs_x_params': 0,
            'max_config_fracs': 0.0,
            'initial_configurations': initial_design.initial_configurations
        }
        if initial_design_kwargs is not None:
            init_design_def_kwargs.update(initial_design_kwargs)
        if initial_configurations is not None:
            initial_design = InitialDesign(**init_design_def_kwargs)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design = DefaultConfiguration(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "RANDOM":
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design = RandomConfigurations(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "LHD":
                initial_design = LHDesign(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "FACTORIAL":
                initial_design = FactorialInitialDesign(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "SOBOL":
                initial_design = SobolDesign(**init_design_def_kwargs)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        elif inspect.isclass(initial_design):
            initial_design = initial_design(**init_design_def_kwargs)
        else:
            raise TypeError(
                "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'"
                % type(initial_design))

        # if we log the performance data,
        # the RFRImputator will already get
        # log transform data from the runhistory
        if scenario.transform_y in ["LOG", "LOGS"]:
            cutoff = np.log(np.nanmin([np.inf, np.float_(scenario.cutoff)]))
            threshold = cutoff + np.log(scenario.par_factor)
        else:
            cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff)])
            threshold = cutoff * scenario.par_factor
        num_params = len(scenario.cs.get_hyperparameters())
        imputor = RFRImputator(rng=rng,
                               cutoff=cutoff,
                               threshold=threshold,
                               model=model,
                               change_threshold=0.01,
                               max_iter=2)

        r2e_def_kwargs = {
            'scenario': scenario,
            'num_params': num_params,
            'success_states': [
                StatusType.SUCCESS,
            ],
            'impute_censored_data': True,
            'impute_state': [
                StatusType.CAPPED,
            ],
            'imputor': imputor,
            'scale_perc': 5
        }
        if scenario.run_obj == 'quality':
            r2e_def_kwargs.update({
                'success_states': [StatusType.SUCCESS, StatusType.CRASHED],
                'impute_censored_data':
                False,
                'impute_state':
                None,
            })
        if runhistory2epm_kwargs is not None:
            r2e_def_kwargs.update(runhistory2epm_kwargs)
        if runhistory2epm is None:
            if scenario.run_obj == 'runtime':
                runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs)
            elif scenario.run_obj == 'quality':
                if scenario.transform_y == "NONE":
                    runhistory2epm = RunHistory2EPM4Cost(**r2e_def_kwargs)
                elif scenario.transform_y == "LOG":
                    runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs)
                elif scenario.transform_y == "LOGS":
                    runhistory2epm = RunHistory2EPM4LogScaledCost(
                        **r2e_def_kwargs)
                elif scenario.transform_y == "INVS":
                    runhistory2epm = RunHistory2EPM4InvScaledCost(
                        **r2e_def_kwargs)
            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)
        elif inspect.isclass(runhistory2epm):
            runhistory2epm = runhistory2epm(**r2e_def_kwargs)
        else:
            raise TypeError(
                "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'"
                % type(runhistory2epm))

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design,
            'runhistory': runhistory,
            'runhistory2epm': runhistory2epm,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'num_run': run_id,
            'model': model,
            'acq_optimizer': acquisition_function_optimizer,
            'acquisition_func': acquisition_function,
            'rng': rng,
            'restore_incumbent': restore_incumbent,
            'random_configuration_chooser': random_configuration_chooser
        }

        if smbo_class is None:
            self.solver = SMBO(**smbo_args)
        else:
            self.solver = smbo_class(**smbo_args)
Пример #13
0
    def _get_runs(
        self,
        configs: Union[str, typing.List[Configuration]],
        insts: Union[str, typing.List[str]],
        repetitions: int = 1,
        runhistory: RunHistory = None,
    ) -> typing.Tuple[typing.List[_Run], RunHistory]:
        """
        Generate list of SMAC-TAE runs to be executed. This means
        combinations of configs with all instances on a certain number of seeds.

        side effect: Adds runs that don't need to be reevaluated to self.rh!

        Parameters
        ----------
        configs: str or list<Configuration>
            string or directly a list of Configuration
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
                time evaluates at cpu- or wallclock-timesteps of:
                [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
                with max_time being the highest recorded time
        insts: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of seeds per instance/config-pair to be evaluated
        runhistory: RunHistory
            optional, try to reuse this runhistory and save some runs

        Returns
        -------
        runs: list<_Run>
            list with _Runs
            [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1),
             _Run(config=CONFIG2,inst=INSTANCE2,seed=SEED2,inst_specs=INST_SPECIFICS2),
             ...]
        """
        # Get relevant configurations and instances
        if isinstance(configs, str):
            configs = self._get_configs(configs)
        if isinstance(insts, str):
            insts = self._get_instances(insts)

        # If no instances are given, fix the instances to one "None" instance
        if not insts:
            insts = [None]
        # If algorithm is deterministic, fix repetitions to 1
        if self.scen.deterministic and repetitions != 1:
            self.logger.warning(
                "Specified %d repetitions, but fixing to 1, "
                "because algorithm is deterministic.", repetitions)
            repetitions = 1

        # Extract relevant information from given runhistory
        inst_seed_config = self._process_runhistory(configs, insts, runhistory)

        # Now create the actual run-list
        runs = []
        # Counter for runs without the need of recalculation
        runs_from_rh = 0
        # If we reuse runs, we want to return them as well
        new_rh = RunHistory(average_cost)

        for i in sorted(insts):
            for rep in range(repetitions):
                # First, find a seed and add all the data we can take from the
                # given runhistory to "our" validation runhistory.
                configs_evaluated = []
                if runhistory and i in inst_seed_config:
                    # Choose seed based on most often evaluated inst-seed-pair
                    seed, configs_evaluated = inst_seed_config[i].pop(0)
                    # Delete inst if all seeds are used
                    if not inst_seed_config[i]:
                        inst_seed_config.pop(i)
                    # Add runs to runhistory
                    for c in configs_evaluated[:]:
                        runkey = RunKey(runhistory.config_ids[c], i, seed)
                        cost, time, status, additional_info = runhistory.data[
                            runkey]
                        if status in [
                                StatusType.CRASHED, StatusType.ABORT,
                                StatusType.CAPPED
                        ]:
                            # Not properly executed target algorithm runs should be repeated
                            configs_evaluated.remove(c)
                            continue
                        new_rh.add(c,
                                   cost,
                                   time,
                                   status,
                                   instance_id=i,
                                   seed=seed,
                                   additional_info=additional_info)
                        runs_from_rh += 1
                else:
                    # If no runhistory or no entries for instance, get new seed
                    seed = self.rng.randint(MAXINT)

                # We now have a seed and add all configs that are not already
                # evaluated on that seed to the runs-list. This way, we
                # guarantee the same inst-seed-pairs for all configs.
                for config in [
                        c for c in configs if not c in configs_evaluated
                ]:
                    # Only use specifics if specific exists, else use string "0"
                    specs = self.scen.instance_specific[
                        i] if i and i in self.scen.instance_specific else "0"
                    runs.append(
                        _Run(config=config,
                             inst=i,
                             seed=seed,
                             inst_specs=specs))

        self.logger.info(
            "Collected %d runs from %d configurations on %d "
            "instances with %d repetitions. Reusing %d runs from "
            "given runhistory.", len(runs), len(configs), len(insts),
            repetitions, runs_from_rh)

        return runs, new_rh
Пример #14
0
class Hydra(object):
    """
    Facade to use Hydra default mode

    Attributes
    ----------
    logger
    stats : Stats
        loggs information about used resources
    solver : SMBO
        handles the actual algorithm calls
    rh : RunHistory
        List with information about previous runs
    portfolio : list
        List of all incumbents

    """
    def __init__(self,
                 scenario: Scenario,
                 n_iterations: int,
                 val_set: str = 'train',
                 incs_per_round: int = 1,
                 n_optimizers: int = 1,
                 rng: typing.Optional[typing.Union[np.random.RandomState,
                                                   int]] = None,
                 run_id: int = 1,
                 tae: typing.Type[ExecuteTARun] = ExecuteTARunOld,
                 tae_kwargs: typing.Union[dict, None] = None,
                 **kwargs):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        n_iterations: int,
            number of Hydra iterations
        val_set: str
            Set to validate incumbent(s) on. [train, valX].
            train => whole training set,
            valX => train_set * 100/X where X in (0, 100)
        incs_per_round: int
            Number of incumbents to keep per round
        n_optimizers: int
            Number of optimizers to run in parallel per round
        rng: int/np.random.RandomState
            The randomState/seed to pass to each dsmac run
        run_id: int
            run_id for this hydra run
        tae: ExecuteTARun
            Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc)
        tae_kwargs: Optional[dict]
            arguments passed to constructor of '~tae'

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.n_iterations = n_iterations
        self.scenario = scenario
        self.run_id, self.rng = get_rng(rng, run_id, self.logger)
        self.kwargs = kwargs
        self.output_dir = None
        self.top_dir = None
        self.solver = None
        self.portfolio = None
        self.rh = RunHistory(average_cost, file_system=scenario.file_system)
        self._tae = tae
        self._tae_kwargs = tae_kwargs
        if incs_per_round <= 0:
            self.logger.warning('Invalid value in %s: %d. Setting to 1',
                                'incs_per_round', incs_per_round)
        self.incs_per_round = max(incs_per_round, 1)
        if n_optimizers <= 0:
            self.logger.warning('Invalid value in %s: %d. Setting to 1',
                                'n_optimizers', n_optimizers)
        self.n_optimizers = max(n_optimizers, 1)
        self.val_set = self._get_validation_set(val_set)
        self.cost_per_inst = {}
        self.optimizer = None
        self.portfolio_cost = None

    def _get_validation_set(self,
                            val_set: str,
                            delete: bool = True) -> typing.List[str]:
        """
        Create small validation set for hydra to determine incumbent performance

        Parameters
        ----------
        val_set: str
            Set to validate incumbent(s) on. [train, valX].
            train => whole training set,
            valX => train_set * 100/X where X in (0, 100)
        delete: bool
            Flag to delete all validation instances from the training set

        Returns
        -------
        val: typing.List[str]
            List of instance-ids to validate on

        """
        if val_set == 'none':
            return None
        if val_set == 'train':
            return self.scenario.train_insts
        elif val_set[:3] != 'val':
            self.logger.warning(
                'Can not determine validation set size. Using full training-set!'
            )
            return self.scenario.train_insts
        else:
            size = int(val_set[3:]) / 100
            if size <= 0 or size >= 1:
                raise ValueError(
                    'X invalid in valX, should be between 0 and 1')
            insts = np.array(self.scenario.train_insts)
            # just to make sure this also works with the small example we have to round up to 3
            size = max(np.floor(insts.shape[0] * size).astype(int), 3)
            ids = np.random.choice(insts.shape[0], size, replace=False)
            val = insts[ids].tolist()
            if delete:
                self.scenario.train_insts = np.delete(insts, ids).tolist()
            return val

    def optimize(self) -> typing.List[Configuration]:
        """
        Optimizes the algorithm provided in scenario (given in constructor)

        Returns
        -------
        portfolio : typing.List[Configuration]
            Portfolio of found configurations

        """
        # Setup output directory
        self.portfolio = []
        portfolio_cost = np.inf
        if self.output_dir is None:
            self.top_dir = "hydra-output_%s" % (
                datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f'))
            self.scenario.output_dir = os.path.join(
                self.top_dir,
                "psmac3-output_%s" % (datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)

        scen = copy.deepcopy(self.scenario)
        scen.output_dir_for_this_run = None
        scen.output_dir = None
        # parent process SMAC only used for validation purposes
        self.solver = SMAC4AC(scenario=scen,
                              tae_runner=self._tae,
                              rng=self.rng,
                              run_id=self.run_id,
                              **self.kwargs)
        for i in range(self.n_iterations):
            self.logger.info("=" * 120)
            self.logger.info("Hydra Iteration: %d", (i + 1))

            if i == 0:
                tae = self._tae
                tae_kwargs = self._tae_kwargs
            else:
                tae = ExecuteTARunHydra
                if self._tae_kwargs:
                    tae_kwargs = self._tae_kwargs
                else:
                    tae_kwargs = {}
                tae_kwargs['cost_oracle'] = self.cost_per_inst
            self.optimizer = PSMAC(
                scenario=self.scenario,
                run_id=self.run_id,
                rng=self.rng,
                tae=tae,
                tae_kwargs=tae_kwargs,
                shared_model=False,
                validate=True if self.val_set else False,
                n_optimizers=self.n_optimizers,
                val_set=self.val_set,
                n_incs=self.
                n_optimizers,  # return all configurations (unvalidated)
                **self.kwargs)
            self.optimizer.output_dir = self.output_dir
            incs = self.optimizer.optimize()
            cost_per_conf_v, val_ids, cost_per_conf_e, est_ids = self.optimizer.get_best_incumbents_ids(
                incs)
            if self.val_set:
                to_keep_ids = val_ids[:self.incs_per_round]
            else:
                to_keep_ids = est_ids[:self.incs_per_round]
            config_cost_per_inst = {}
            incs = incs[to_keep_ids]
            self.logger.info('Kept incumbents')
            for inc in incs:
                self.logger.info(inc)
                config_cost_per_inst[inc] = cost_per_conf_v[
                    inc] if self.val_set else cost_per_conf_e[inc]

            cur_portfolio_cost = self._update_portfolio(
                incs, config_cost_per_inst)
            if portfolio_cost <= cur_portfolio_cost:
                self.logger.info(
                    "No further progress (%f) --- terminate hydra",
                    portfolio_cost)
                break
            else:
                portfolio_cost = cur_portfolio_cost
                self.logger.info("Current pertfolio cost: %f", portfolio_cost)

            self.scenario.output_dir = os.path.join(
                self.top_dir,
                "psmac3-output_%s" % (datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)
        read(self.rh,
             os.path.join(self.top_dir, 'psmac3*', 'run_' + str(MAXINT)),
             self.scenario.cs, self.logger)
        self.rh.save_json(fn=os.path.join(
            self.top_dir, 'all_validated_runs_runhistory.json'),
                          save_external=True)
        with open(os.path.join(self.top_dir, 'portfolio.pkl'), 'wb') as fh:
            pickle.dump(self.portfolio, fh)
        self.logger.info("~" * 120)
        self.logger.info('Resulting Portfolio:')
        for configuration in self.portfolio:
            self.logger.info(str(configuration))
        self.logger.info("~" * 120)

        return self.portfolio

    def _update_portfolio(
            self, incs: np.ndarray, config_cost_per_inst: typing.Dict
    ) -> typing.Union[np.float, float]:
        """
        Validates all configurations (in incs) and determines which ones to add to the portfolio

        Parameters
        ----------
        incs: np.ndarray
            List of Configurations

        Returns
        -------
        cur_cost: typing.Union[np.float, float]
            The current cost of the portfolio

        """
        if self.val_set:  # we have validated data
            for kept in incs:
                if kept not in self.portfolio:
                    self.portfolio.append(kept)
                    cost_per_inst = config_cost_per_inst[kept]
                    if self.cost_per_inst:
                        if len(self.cost_per_inst) != len(cost_per_inst):
                            raise ValueError(
                                'Num validated Instances mismatch!')
                        else:
                            for key in cost_per_inst:
                                self.cost_per_inst[key] = min(
                                    self.cost_per_inst[key],
                                    cost_per_inst[key])
                    else:
                        self.cost_per_inst = cost_per_inst
            cur_cost = np.mean(list(
                self.cost_per_inst.values()))  # type: np.float
        else:  # No validated data. Set the mean to the approximated mean
            means = [
            ]  # can contain nans as not every instance was evaluated thus we should use nanmean to approximate
            for kept in incs:
                means.append(
                    np.nanmean(
                        list(
                            self.optimizer.rh.get_instance_costs_for_config(
                                kept).values())))
                self.portfolio.append(kept)
            if self.portfolio_cost:
                new_mean = self.portfolio_cost * (
                    len(self.portfolio) - len(incs)) / len(self.portfolio)
                new_mean += np.nansum(means)
            else:
                new_mean = np.mean(means)
            self.cost_per_inst = defaultdict(lambda: new_mean)
            cur_cost = new_mean

        self.portfolio_cost = cur_cost
        return cur_cost
Пример #15
0
    def __init__(self,
                 scenario: Scenario,
                 n_iterations: int,
                 val_set: str = 'train',
                 incs_per_round: int = 1,
                 n_optimizers: int = 1,
                 rng: typing.Optional[typing.Union[np.random.RandomState,
                                                   int]] = None,
                 run_id: int = 1,
                 tae: typing.Type[ExecuteTARun] = ExecuteTARunOld,
                 tae_kwargs: typing.Union[dict, None] = None,
                 **kwargs):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        n_iterations: int,
            number of Hydra iterations
        val_set: str
            Set to validate incumbent(s) on. [train, valX].
            train => whole training set,
            valX => train_set * 100/X where X in (0, 100)
        incs_per_round: int
            Number of incumbents to keep per round
        n_optimizers: int
            Number of optimizers to run in parallel per round
        rng: int/np.random.RandomState
            The randomState/seed to pass to each dsmac run
        run_id: int
            run_id for this hydra run
        tae: ExecuteTARun
            Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc)
        tae_kwargs: Optional[dict]
            arguments passed to constructor of '~tae'

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.n_iterations = n_iterations
        self.scenario = scenario
        self.run_id, self.rng = get_rng(rng, run_id, self.logger)
        self.kwargs = kwargs
        self.output_dir = None
        self.top_dir = None
        self.solver = None
        self.portfolio = None
        self.rh = RunHistory(average_cost, file_system=scenario.file_system)
        self._tae = tae
        self._tae_kwargs = tae_kwargs
        if incs_per_round <= 0:
            self.logger.warning('Invalid value in %s: %d. Setting to 1',
                                'incs_per_round', incs_per_round)
        self.incs_per_round = max(incs_per_round, 1)
        if n_optimizers <= 0:
            self.logger.warning('Invalid value in %s: %d. Setting to 1',
                                'n_optimizers', n_optimizers)
        self.n_optimizers = max(n_optimizers, 1)
        self.val_set = self._get_validation_set(val_set)
        self.cost_per_inst = {}
        self.optimizer = None
        self.portfolio_cost = None
Пример #16
0
    def __init__(
            self,
            scenario: Scenario,
            # TODO: once we drop python3.4 add type hint
            # typing.Union[ExecuteTARun, callable]
            tae_runner=None,
            runhistory: RunHistory = None,
            intensifier: Intensifier = None,
            acquisition_function: AbstractAcquisitionFunction = None,
            model: AbstractEPM = None,
            runhistory2epm: AbstractRunHistory2EPM = None,
            initial_design: InitialDesign = None,
            initial_configurations: typing.List[Configuration] = None,
            stats: Stats = None,
            rng: np.random.RandomState = None,
            run_id: int = 1):
        """Constructor"""
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost
        self.runhistory = None
        self.trajectory = None

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario, file_system=scenario.file_system)

        self.output_dir = create_output_directory(scenario, run_id)
        scenario.write()

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func,
                                    file_system=scenario.file_system)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        # initial random number generator
        num_run, rng = self._get_rng(rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir,
                                 stats=self.stats,
                                 file_system=scenario.file_system)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                configspace=scenario.cs,
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM,
                num_trees=scenario.rf_num_trees,
                do_bootstrapping=scenario.rf_do_bootstrapping,
                ratio_features=scenario.rf_ratio_features,
                min_samples_split=scenario.rf_min_samples_split,
                min_samples_leaf=scenario.rf_min_samples_leaf,
                max_depth=scenario.rf_max_depth,
            )
        # initial acquisition function
        if acquisition_function is None:
            if scenario.run_obj == "runtime":
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)
        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        local_search = LocalSearch(
            acquisition_function,
            scenario.cs,
            max_steps=scenario.sls_max_steps,
            n_steps_plateau_walk=scenario.sls_n_steps_plateau_walk)

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(
                tae_runner=tae_runner,
                stats=self.stats,
                traj_logger=traj_logger,
                rng=rng,
                instances=scenario.train_insts,
                cutoff=scenario.cutoff,
                deterministic=scenario.deterministic,
                run_obj_time=scenario.run_obj == "runtime",
                always_race_against=scenario.cs.get_default_configuration()
                if scenario.always_race_default else None,
                instance_specifics=scenario.instance_specific,
                minR=scenario.minR,
                maxR=scenario.maxR,
                adaptive_capping_slackfactor=scenario.
                intens_adaptive_capping_slackfactor,
                min_chall=scenario.intens_min_chall,
                distributer=scenario.distributer)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = InitialDesign(tae_runner=tae_runner,
                                           scenario=scenario,
                                           stats=self.stats,
                                           traj_logger=traj_logger,
                                           runhistory=runhistory,
                                           rng=rng,
                                           configs=initial_configurations,
                                           intensifier=intensifier,
                                           aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(
                    tae_runner=tae_runner,
                    scenario=scenario,
                    stats=self.stats,
                    traj_logger=traj_logger,
                    runhistory=runhistory,
                    rng=rng,
                    intensifier=intensifier,
                    aggregate_func=aggregate_func,
                    max_config_fracs=0.0)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfigurations(
                    tae_runner=tae_runner,
                    scenario=scenario,
                    stats=self.stats,
                    traj_logger=traj_logger,
                    runhistory=runhistory,
                    rng=rng,
                    intensifier=intensifier,
                    aggregate_func=aggregate_func,
                    max_config_fracs=0.0)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == "runtime":

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log(scenario.cutoff)
                threshold = np.log(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                runhistory2epm = RunHistory2EPM4Cost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=False,
                    impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        self.solver = EPILS_Solver(scenario=scenario,
                                   stats=self.stats,
                                   initial_design=initial_design,
                                   runhistory=runhistory,
                                   runhistory2epm=runhistory2epm,
                                   intensifier=intensifier,
                                   aggregate_func=aggregate_func,
                                   num_run=num_run,
                                   model=model,
                                   acq_optimizer=local_search,
                                   acquisition_func=acquisition_function,
                                   rng=rng)
Пример #17
0
    def main_cli(self, commandline_arguments: typing.List[str]=None):
        """Main function of SMAC for CLI interface"""
        self.logger.info("SMAC call: %s" % (" ".join(sys.argv)))

        cmd_reader = CMDReader()
        kwargs = {}
        if commandline_arguments:
            kwargs['commandline_arguments'] = commandline_arguments
        main_args_, smac_args_, scen_args_ = cmd_reader.read_cmd(**kwargs)

        root_logger = logging.getLogger()
        root_logger.setLevel(main_args_.verbose_level)
        logger_handler = logging.StreamHandler(
                stream=sys.stdout)
        if root_logger.level >= logging.INFO:
            formatter = logging.Formatter(
                "%(levelname)s:\t%(message)s")
        else:
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(name)s:%(message)s",
                "%Y-%m-%d %H:%M:%S")
        logger_handler.setFormatter(formatter)
        root_logger.addHandler(logger_handler)
        # remove default handler
        if len(root_logger.handlers) > 1:
            root_logger.removeHandler(root_logger.handlers[0])

        # Create defaults
        rh = None
        initial_configs = None
        stats = None
        incumbent = None

        # Create scenario-object
        scenario = {}
        scenario.update(vars(smac_args_))
        scenario.update(vars(scen_args_))
        scen = Scenario(scenario=scenario)

        # Restore state
        if main_args_.restore_state:
            root_logger.debug("Restoring state from %s...", main_args_.restore_state)
            rh, stats, traj_list_aclib, traj_list_old = self.restore_state(scen, main_args_)

            scen.output_dir_for_this_run = create_output_directory(
                scen, main_args_.seed, root_logger,
            )
            scen.write()
            incumbent = self.restore_state_after_output_dir(scen, stats,
                                                            traj_list_aclib, traj_list_old)

        if main_args_.warmstart_runhistory:
            aggregate_func = average_cost
            rh = RunHistory(aggregate_func=aggregate_func)

            scen, rh = merge_foreign_data_from_file(
                scenario=scen,
                runhistory=rh,
                in_scenario_fn_list=main_args_.warmstart_scenario,
                in_runhistory_fn_list=main_args_.warmstart_runhistory,
                cs=scen.cs,
                aggregate_func=aggregate_func)

        if main_args_.warmstart_incumbent:
            initial_configs = [scen.cs.get_default_configuration()]
            for traj_fn in main_args_.warmstart_incumbent:
                trajectory = TrajLogger.read_traj_aclib_format(
                    fn=traj_fn, cs=scen.cs)
                initial_configs.append(trajectory[-1]["incumbent"])

        if main_args_.mode == "SMAC4AC":
            optimizer = SMAC4AC(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed)
        elif main_args_.mode == "SMAC4HPO":
            optimizer = SMAC4HPO(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed)
        elif main_args_.mode == "SMAC4BO":
            optimizer = SMAC4BO(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed)
        elif main_args_.mode == "ROAR":
            optimizer = ROAR(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                run_id=main_args_.seed)
        elif main_args_.mode == "EPILS":
            optimizer = EPILS(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                run_id=main_args_.seed)
        elif main_args_.mode == "Hydra":
            optimizer = Hydra(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed,
                random_configuration_chooser=main_args_.random_configuration_chooser,
                n_iterations=main_args_.hydra_iterations,
                val_set=main_args_.hydra_validation,
                incs_per_round=main_args_.hydra_incumbents_per_round,
                n_optimizers=main_args_.hydra_n_optimizers)
        elif main_args_.mode == "PSMAC":
            optimizer = PSMAC(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                run_id=main_args_.seed,
                shared_model=smac_args_.shared_model,
                validate=main_args_.psmac_validate,
                n_optimizers=main_args_.hydra_n_optimizers,
                n_incs=main_args_.hydra_incumbents_per_round,
            )
        try:
            optimizer.optimize()
        except (TAEAbortException, FirstRunCrashedException) as err:
            self.logger.error(err)
Пример #18
0
    def _race_challenger(self,
                         challenger: Configuration,
                         incumbent: Configuration,
                         run_history: RunHistory,
                         aggregate_func: typing.Callable,
                         log_traj: bool = True):
        """Aggressively race challenger against incumbent

        Parameters
        ----------
        challenger : Configuration
            Configuration which challenges incumbent
        incumbent : Configuration
            Best configuration so far
        run_history : RunHistory
            Stores all runs we ran so far
        aggregate_func: typing.Callable
            Aggregate performance across instances
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        Returns
        -------
        new_incumbent: Configuration
            Either challenger or incumbent
        """
        # at least one run of challenger
        # to increase chall_indx counter
        first_run = False

        # Line 8
        N = max(1, self.minR)

        inc_inst_seeds = set(run_history.get_runs_for_config(incumbent))
        # Line 9
        while True:
            chall_inst_seeds = set(run_history.get_runs_for_config(challenger))

            # Line 10
            missing_runs = list(inc_inst_seeds - chall_inst_seeds)

            # Line 11
            self.rs.shuffle(missing_runs)
            to_run = missing_runs[:min(N, len(missing_runs))]
            # Line 13 (Line 12 comes below...)
            missing_runs = missing_runs[min(N, len(missing_runs)):]

            # for adaptive capping
            # because of efficieny computed here
            inst_seed_pairs = list(inc_inst_seeds - set(missing_runs))
            # cost used by incumbent for going over all runs in inst_seed_pairs
            inc_sum_cost = sum_cost(config=incumbent,
                                    instance_seed_pairs=inst_seed_pairs,
                                    run_history=run_history)

            if len(to_run) == 0:
                self.logger.debug("No further runs for challenger available")

            # Line 12
            # Run challenger on all <config,seed> to run
            for instance, seed in to_run:

                cutoff = self._adapt_cutoff(challenger=challenger,
                                            incumbent=incumbent,
                                            run_history=run_history,
                                            inc_sum_cost=inc_sum_cost)
                if cutoff is not None and cutoff <= 0:
                    # no time to validate challenger
                    self.logger.debug("Stop challenger itensification due "
                                      "to adaptive capping.")
                    # challenger performance is worse than incumbent
                    return incumbent

                if not first_run:
                    first_run = True
                    self._chall_indx += 1

                self.logger.debug("Add run of challenger")
                try:
                    status, cost, dur, res = self.tae_runner.start(
                        config=challenger,
                        instance=instance,
                        seed=seed,
                        cutoff=cutoff,
                        instance_specific=self.instance_specifics.get(
                            instance, "0"),
                        capped=(self.cutoff is not None)
                        and (cutoff < self.cutoff))
                    self._num_run += 1
                    self._ta_time += dur
                except CappedRunException:
                    return incumbent

            new_incumbent = self._compare_configs(
                incumbent=incumbent,
                challenger=challenger,
                run_history=run_history,
                aggregate_func=aggregate_func,
                log_traj=log_traj)
            if new_incumbent == incumbent:
                break
            elif new_incumbent == challenger:
                incumbent = challenger
                break
            else:  # Line 17
                # challenger is not worse, continue
                N = 2 * N

        return incumbent
Пример #19
0
from dsmac.runhistory.runhistory import RunHistoryDB
from dsmac.runhistory.utils import get_id_of_config
from dsmac.tae.execute_ta_run import StatusType

if __name__ == '__main__':
    from ConfigSpace import ConfigurationSpace
    import joblib
    from dsmac.optimizer.objective import average_cost
    from dsmac.runhistory.runhistory import RunHistory

    runhistory = RunHistory(average_cost,db_args="test.db")
    cs: ConfigurationSpace = joblib.load("/home/tqc/PycharmProjects/auto-pipeline/test/php.bz2")
    runhistory.load_json(
        "/home/tqc/PycharmProjects/auto-pipeline/test/test_runhistory/default_dataset_name/smac_output/runhistory.json",
        cs)
    all_configs = (runhistory.get_all_configs())
    config = all_configs[0]
    config_id = get_id_of_config(config)
    cost = runhistory.get_cost(config)
    db = RunHistoryDB(cs, runhistory, "test.db")
    db.delete_all()
    ans = db.appointment_config(config)
    print(ans)
    db.insert_runhistory(config, cost, 0.1, StatusType.SUCCESS)
    db2 = RunHistoryDB(cs, runhistory, "test.db")
    db2.insert_runhistory(all_configs[1], runhistory.get_cost(all_configs[1]), 0.1, StatusType.SUCCESS)
    db.fetch_new_runhistory()
Пример #20
0
    def _compare_configs(self,
                         incumbent: Configuration,
                         challenger: Configuration,
                         run_history: RunHistory,
                         aggregate_func: typing.Callable,
                         log_traj: bool = True):
        """
        Compare two configuration wrt the runhistory and return the one which
        performs better (or None if the decision is not safe)

        Decision strategy to return x as being better than y:
            1. x has at least as many runs as y
            2. x performs better than y on the intersection of runs on x and y

        Implicit assumption:
            Challenger was evaluated on the same instance-seed pairs as
            incumbent

        Parameters
        ----------
        incumbent: Configuration
            Current incumbent
        challenger: Configuration
            Challenger configuration
        run_history: RunHistory
            Stores all runs we ran so far
        aggregate_func: typing.Callable
            Aggregate performance across instances
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        Returns
        -------
        None or better of the two configurations x,y
        """

        inc_runs = run_history.get_runs_for_config(incumbent)
        chall_runs = run_history.get_runs_for_config(challenger)
        to_compare_runs = set(inc_runs).intersection(chall_runs)

        # performance on challenger runs
        chal_perf = aggregate_func(challenger, run_history, to_compare_runs)
        inc_perf = aggregate_func(incumbent, run_history, to_compare_runs)

        # Line 15
        if chal_perf > inc_perf and len(chall_runs) >= self.minR:
            # Incumbent beats challenger
            self.logger.debug("Incumbent (%.4f) is better than challenger "
                              "(%.4f) on %d runs." %
                              (inc_perf, chal_perf, len(chall_runs)))
            return incumbent

        # Line 16
        if not set(inc_runs) - set(chall_runs):

            # no plateau walks
            if chal_perf >= inc_perf:
                self.logger.debug(
                    "Incumbent (%.4f) is at least as good as the "
                    "challenger (%.4f) on %d runs." %
                    (inc_perf, chal_perf, len(chall_runs)))
                return incumbent

            # Challenger is better than incumbent
            # and has at least the same runs as inc
            # -> change incumbent
            n_samples = len(chall_runs)
            self.logger.info(
                "Challenger (%.4f) is better than incumbent (%.4f)"
                " on %d runs." % (chal_perf, inc_perf, n_samples))
            # Show changes in the configuration
            params = sorted([(param, incumbent[param], challenger[param])
                             for param in challenger.keys()])
            self.logger.info("Changes in incumbent:")
            for param in params:
                if param[1] != param[2]:
                    self.logger.info("  %s : %r -> %r" % (param))
                else:
                    self.logger.debug("  %s remains unchanged: %r" %
                                      (param[0], param[1]))

            if log_traj:
                self.stats.inc_changed += 1
                self.traj_logger.add_entry(train_perf=chal_perf,
                                           incumbent_id=self.stats.inc_changed,
                                           incumbent=challenger)
            return challenger

        # undecided
        return None
Пример #21
0
    def __init__(self,
                 scenario: Scenario,
                 rng: typing.Optional[typing.Union[np.random.RandomState,
                                                   int]] = None,
                 run_id: int = 1,
                 tae: typing.Type[ExecuteTARun] = ExecuteTARunOld,
                 tae_kwargs: typing.Union[dict, None] = None,
                 shared_model: bool = True,
                 validate: bool = True,
                 n_optimizers: int = 2,
                 val_set: typing.Union[typing.List[str], None] = None,
                 n_incs: int = 1,
                 **kwargs):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        n_optimizers: int
            Number of optimizers to run in parallel per round
        rng: int/np.random.RandomState
            The randomState/seed to pass to each dsmac run
        run_id: int
            run_id for this hydra run
        tae: ExecuteTARun
            Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc)
        tae_kwargs: Optional[dict]
            arguments passed to constructor of '~tae'
        shared_model: bool
            Flag to indicate whether information is shared between SMAC runs or not
        validate: bool / None
            Flag to indicate whether to validate the found configurations or to use the SMAC estimates
            None => neither and return the full portfolio
        n_incs: int
            Number of incumbents to return (n_incs <= 0 ==> all found configurations)
        val_set: typing.List[str]
            List of instance-ids to validate on

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.scenario = scenario
        self.run_id, self.rng = get_rng(rng, run_id, logger=self.logger)
        self.kwargs = kwargs
        self.output_dir = None
        self.rh = RunHistory(average_cost, file_system=scenario.file_system)
        self._tae = tae
        self._tae_kwargs = tae_kwargs
        if n_optimizers <= 1:
            self.logger.warning('Invalid value in %s: %d. Setting to 2',
                                'n_optimizers', n_optimizers)
        self.n_optimizers = max(n_optimizers, 2)
        self.validate = validate
        self.shared_model = shared_model
        self.n_incs = min(max(1, n_incs), self.n_optimizers)
        if val_set is None:
            self.val_set = scenario.train_insts
        else:
            self.val_set = val_set