Python RunHistory примеры использования

Язык программирования: Python

Пространство имен/Пакет: dsmac.runhistory.runhistory

Класс/Тип: RunHistory

Примеров на hotexamples.com: 21

Python RunHistory - 21 примеров найдено. Это лучшие примеры Python кода для dsmac.runhistory.runhistory.RunHistory, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RunHistory(9)

get_runs_for_config(6)

load_json(3)

save_json(3)

get_cost(2)

add(1)

aggregate_func(1)

compute_all_costs(1)

get_all_configs(1)

get_instance_costs_for_config(1)

update(1)

update_from_json(1)

Пример #1

Показать файл

    def _save_results(self, rh: RunHistory, output_fn, backup_fn=None):
        """ Helper to save results to file

        Parameters
        ----------
        rh: RunHistory
            runhistory to save
        output_fn: str
            if ends on '.json': filename to save history to
            else: directory to save runhistory to (filename is backup_fn)
        backup_fn: str
            if output_fn does not end on '.json', treat output_fn as dir and
            append backup_fn as filename (if output_fn ends on '.json', this
            argument is ignored)
        """
        if output_fn == "":
            self.logger.info(
                "No output specified, validated runhistory not saved.")
            return
        # Check if a folder or a file is specified as output
        if not output_fn.endswith('.json'):
            output_dir = output_fn
            output_fn = os.path.join(output_dir, backup_fn)
            self.logger.debug("Output is \"%s\", changing to \"%s\"!",
                              output_dir, output_fn)
        base = os.path.split(output_fn)[0]
        if not base == "" and not os.path.exists(base):
            self.logger.debug("Folder (\"%s\") doesn't exist, creating.", base)
            os.makedirs(base)
        rh.save_json(output_fn)
        self.logger.info("Saving validation-results in %s", output_fn)

Пример #2

Показать файл

Файл: merge_foreign_data.py Проект: tqichun/HyperFlow

def merge_foreign_data(scenario: Scenario, runhistory: RunHistory,
                       in_scenario_list: typing.List[Scenario],
                       in_runhistory_list: typing.List[RunHistory]):
    """Extend <scenario> and <runhistory> with runhistory data from another
    <in_scenario> assuming the same pcs, feature space, but different instances

    Parameters
    ---------
    scenario: Scenario
        original scenario -- feature dictionary will be extended
    runhistory: RunHistory
        original runhistory -- will be extended by further data points
    in_scenario_list: typing.List[Scenario]
        input scenario
    in_runhistory_list: typing.List[RunHistory]
        list of runhistories wrt <in_scenario>

    Returns
    -------
    scenario: Scenario
    runhistory: Runhistory
    """
    # add further instance features
    for in_scenario in in_scenario_list:
        if scenario.n_features != in_scenario.n_features:
            raise ValueError(
                "Feature Space has to be the same for both scenarios (%d vs %d)."
                % (scenario.n_features, in_scenario.n_features))

        if scenario.cs != in_scenario.cs:
            raise ValueError("PCS of both scenarios have to be identical.")

        if scenario.cutoff != in_scenario.cutoff:
            raise ValueError("Cutoffs of both scenarios have to be identical.")

        scenario.feature_dict.update(in_scenario.feature_dict)

    # extend runhistory
    for rh in in_runhistory_list:
        runhistory.update(rh, origin=DataOrigin.EXTERNAL_DIFFERENT_INSTANCES)

    for date in runhistory.data:
        if scenario.feature_dict.get(date.instance_id) is None:
            raise ValueError(
                "Instance feature for \"%s\" was not found in scenario data." %
                (date.instance_id))

    runhistory.compute_all_costs(instances=scenario.train_insts)

    return scenario, runhistory

Пример #3

Показать файл

Файл: merge_foreign_data.py Проект: tqichun/HyperFlow

def merge_foreign_data_from_file(
        scenario: Scenario,
        runhistory: RunHistory,
        in_scenario_fn_list: typing.List[str],
        in_runhistory_fn_list: typing.List[str],
        cs: ConfigurationSpace,
        aggregate_func: typing.Callable = average_cost):
    """Extend <scenario> and <runhistory> with runhistory data from another
    <in_scenario> assuming the same pcs, feature space, but different instances

    Parameters
    ---------
    scenario: Scenario
        original scenario -- feature dictionary will be extended
    runhistory: RunHistory
        original runhistory -- will be extended by further data points
    in_scenario_fn_list: typing.List[str]
        input scenario file names
    in_runhistory_fn_list: typing.List[str]
        list filenames of runhistory dumps
    cs: ConfigurationSpace
        parameter configuration space to read runhistory from file
    aggregate_func: typing.Callable
        function to aggregate performance of a configuratoion across instances

    Returns
    -------
    scenario: Scenario
    runhistory: Runhistory
    """

    if not in_scenario_fn_list:
        raise ValueError(
            "To read warmstart data from previous runhistories, the corresponding scenarios are required. Use option --warmstart_scenario"
        )
    scens = [
        Scenario(scenario=scen_fn, cmd_options={"output_dir": ""})
        for scen_fn in in_scenario_fn_list
    ]
    rhs = []
    for rh_fn in in_runhistory_fn_list:
        rh = RunHistory(aggregate_func, file_system=scenario.file_system)
        rh.load_json(rh_fn, cs)
        rhs.append(rh)

    return merge_foreign_data(scenario,
                              runhistory,
                              in_scenario_list=scens,
                              in_runhistory_list=rhs)

Пример #4

Показать файл

def _cost(config: Configuration, run_history: RunHistory,
          instance_seed_pairs=None):
    """Return array of all costs for the given config for further calculations.

    Parameters
    ----------
    config : Configuration
        Configuration to calculate objective for
    run_history : RunHistory
        RunHistory object from which the objective value is computed.
    instance_seed_pairs : list, optional (default=None)
        List of tuples of instance-seeds pairs. If None, the run_history is
        queried for all runs of the given configuration.

    Returns
    -------
    Costs: list
        Array of all costs
    """
    try:
        id_ = run_history.config_ids[config]
    except KeyError:  # challenger was not running so far
        return []

    if instance_seed_pairs is None:
        instance_seed_pairs = run_history.get_runs_for_config(config)

    costs = []
    for i, r in instance_seed_pairs:
        k = RunKey(id_, i, r)
        costs.append(run_history.data[k].cost)
    return costs

Пример #5

Показать файл

Файл: psmac_facade.py Проект: chengdake/autoflow

    def _get_mean_costs(self, incs: typing.List[Configuration],
                        new_rh: RunHistory):
        """
        Compute mean cost per instance

        Parameters
        ----------
        incs : typing.List[Configuration]
            incumbents determined by all parallel SMAC runs
        new_rh : RunHistory
            runhistory to determine mean performance

        Returns
        -------
        List[float] means
        Dict(Config -> Dict(inst_id(str) -> float))

        """
        config_cost_per_inst = {}
        results = []
        for incumbent in incs:
            cost_per_inst = new_rh.get_instance_costs_for_config(
                config=incumbent)
            config_cost_per_inst[incumbent] = cost_per_inst
            results.append(np.mean(list(cost_per_inst.values())))
        return results, config_cost_per_inst

Пример #6

Показать файл

def read(run_history: RunHistory, output_dirs: typing.Union[str,
                                                            typing.List[str]],
         configuration_space: ConfigurationSpace, logger: logging.Logger):
    """Update runhistory with run results from concurrent runs of pSMAC.

    Parameters
    ----------
    run_history : dsmac.runhistory.RunHistory
        RunHistory object to be updated with run information from runhistory
        objects stored in the output directory.
    output_dirs : typing.Union[str,typing.List[str]]
        List of SMAC output directories
        or Linux path expression (str) which will be casted into a list with
        file_system.glob(). This function will search the output directories
        for files matching the runhistory regular expression.
    configuration_space : ConfigSpace.ConfigurationSpace
        A ConfigurationSpace object to check if loaded configurations are valid.
    logger : logging.Logger
    """
    numruns_in_runhistory = len(run_history.data)
    initial_numruns_in_runhistory = numruns_in_runhistory
    file_system = run_history.file_system
    if isinstance(output_dirs, str):
        parsed_output_dirs = file_system.glob(output_dirs)
        if file_system.glob(run_history.file_system.join(output_dirs,
                                                         "run_*")):
            parsed_output_dirs += file_system.glob(
                file_system.join(output_dirs, "run_*"))
    else:
        parsed_output_dirs = output_dirs

    for output_directory in parsed_output_dirs:
        for file_in_output_directory in file_system.listdir(output_directory):
            match = re.match(RUNHISTORY_RE, file_in_output_directory)
            valid_match = re.match(VALIDATEDRUNHISTORY_RE,
                                   file_in_output_directory)
            if match or valid_match:
                last_id = PSMAC_VALUE.dir2id[output_directory]
                runhistory_file = file_system.join(output_directory,
                                                   file_in_output_directory)
                updated_id_set = run_history.update_from_json(
                    runhistory_file,
                    configuration_space,
                    id_set=PSMAC_VALUE.dir2id[output_directory],
                    file_system=run_history.file_system)
                PSMAC_VALUE.dir2id[output_directory] = updated_id_set
                # print(PSMAC_VALUE.dir2id)
                new_numruns_in_runhistory = len(run_history.data)
                difference = new_numruns_in_runhistory - numruns_in_runhistory
                logger.debug('Shared model mode: Loaded %d new runs from %s' %
                             (difference, runhistory_file))
                numruns_in_runhistory = new_numruns_in_runhistory

    difference = numruns_in_runhistory - initial_numruns_in_runhistory
    logger.info(
        'Shared model mode: Finished loading new runs, found %d new runs.' %
        difference)

Пример #7

Показать файл

def write(run_history: RunHistory, output_directory: str,
          logger: logging.Logger):
    """Write the runhistory to the output directory.

    Overwrites previously outputted runhistories.

    Parameters
    ----------
    run_history : ~dsmac.runhistory.runhistory.RunHistory
        RunHistory object to be saved.

    output_directory : str

    logger : logging.Logger
    """
    file_system = run_history.file_system
    output_filename = file_system.join(output_directory,
                                       RUNHISTORY_FILEPATTERN)

    logging.debug("Saving runhistory to %s" % output_filename)

    run_history.save_json(output_filename, save_external=False)

Пример #8

Показать файл

 def restore_state(self, scen, args_):
     """Read in files for state-restoration: runhistory, stats, trajectory.
     """
     # Check for folder and files
     rh_path = os.path.join(args_.restore_state, "runhistory.json")
     stats_path = os.path.join(args_.restore_state, "stats.json")
     traj_path_aclib = os.path.join(args_.restore_state, "traj_aclib2.json")
     traj_path_old = os.path.join(args_.restore_state, "traj_old.csv")
     scen_path = os.path.join(args_.restore_state, "scenario.txt")
     if not os.path.isdir(args_.restore_state):
        raise FileNotFoundError("Could not find folder from which to restore.")
     # Load runhistory and stats
     rh = RunHistory(aggregate_func=None)
     rh.load_json(rh_path, scen.cs)
     self.logger.debug("Restored runhistory from %s", rh_path)
     stats = Stats(scen)
     stats.load(stats_path)
     self.logger.debug("Restored stats from %s", stats_path)
     with open(traj_path_aclib, 'r') as traj_fn:
         traj_list_aclib = traj_fn.readlines()
     with open(traj_path_old, 'r') as traj_fn:
         traj_list_old = traj_fn.readlines()
     return rh, stats, traj_list_aclib, traj_list_old

Пример #9

Показать файл

Файл: intensification.py Проект: chengdake/autoflow

    def _adapt_cutoff(self, challenger: Configuration,
                      incumbent: Configuration,
                      run_history: RunHistory,
                      inc_sum_cost: float):
        """Adaptive capping:
        Compute cutoff based on time so far used for incumbent
        and reduce cutoff for next run of challenger accordingly

        !Only applicable if self.run_obj_time

        !runs on incumbent should be superset of the runs performed for the
         challenger

        Parameters
        ----------
        challenger : Configuration
            Configuration which challenges incumbent
        incumbent : Configuration
            Best configuration so far
        run_history : RunHistory
            Stores all runs we ran so far
        inc_sum_cost: float
            Sum of runtimes of all incumbent runs

        Returns
        -------
        cutoff: int
            Adapted cutoff
        """

        if not self.run_obj_time:
            return self.cutoff

        # cost used by challenger for going over all its runs
        # should be subset of runs of incumbent (not checked for efficiency
        # reasons)
        chall_inst_seeds = run_history.get_runs_for_config(challenger)
        chal_sum_cost = sum_cost(config=challenger,
                                 instance_seed_pairs=chall_inst_seeds,
                                 run_history=run_history)
        cutoff = min(self.cutoff,
                     inc_sum_cost * self.adaptive_capping_slackfactor -
                     chal_sum_cost
                     )
        return cutoff

Пример #10

Показать файл

    def _add_inc_run(self, incumbent: Configuration, run_history: RunHistory):
        """Add new run for incumbent

        *Side effect:* adds runs to <run_history>

        Parameters
        ----------
        incumbent : Configuration
            best configuration so far
        run_history : RunHistory
            stores all runs we ran so far
        """
        inc_runs = run_history.get_runs_for_config(incumbent)

        # Line 3
        # First evaluate incumbent on a new instance
        if len(inc_runs) < self.maxR:
            while True:
                # Line 4
                # find all instances that have the most runs on the inc
                inc_runs = run_history.get_runs_for_config(incumbent)
                inc_inst = [s.instance for s in inc_runs]
                inc_inst = list(Counter(inc_inst).items())
                inc_inst.sort(key=lambda x: x[1], reverse=True)
                try:
                    max_runs = inc_inst[0][1]
                except IndexError:
                    self.logger.debug("No run for incumbent found")
                    max_runs = 0
                inc_inst = set([x[0] for x in inc_inst if x[1] == max_runs])

                available_insts = (self.instances - inc_inst)

                # if all instances were used n times, we can pick an instances
                # from the complete set again
                if not self.deterministic and not available_insts:
                    available_insts = self.instances

                # Line 6 (Line 5 is further down...)
                if self.deterministic:
                    next_seed = 0
                else:
                    next_seed = self.rs.randint(low=0, high=MAXINT, size=1)[0]

                if available_insts:
                    # Line 5 (here for easier code)
                    next_instance = self.rs.choice(list(available_insts))
                    # Line 7
                    self.logger.debug("Add run of incumbent")
                    status, cost, dur, res = self.tae_runner.start(
                        config=incumbent,
                        instance=next_instance,
                        seed=next_seed,
                        cutoff=self.cutoff,
                        instance_specific=self.instance_specifics.get(
                            next_instance, "0"))
                    self._ta_time += dur
                    self._num_run += 1
                else:
                    self.logger.debug("No further instance-seed pairs for "
                                      "incumbent available.")
                    break

                inc_runs = run_history.get_runs_for_config(incumbent)
                # Termination condition; after exactly one run, this checks
                # whether further runs are necessary due to minR
                if len(inc_runs) >= self.minR or len(inc_runs) >= self.maxR:
                    break

Пример #11

Показать файл

    def intensify(self,
                  challengers: typing.List[Configuration],
                  incumbent: Configuration,
                  run_history: RunHistory,
                  aggregate_func: typing.Callable,
                  time_bound: float = float(MAXINT),
                  log_traj: bool = True):
        """Running intensification to determine the incumbent configuration.
        *Side effect:* adds runs to run_history

        Implementation of Procedure 2 in Hutter et al. (2011).

        Parameters
        ----------
        challengers : typing.List[Configuration]
            promising configurations
        incumbent : Configuration
            best configuration so far
        run_history : RunHistory
            stores all runs we ran so far
        aggregate_func: typing.Callable
            aggregate error across instances
        time_bound : float, optional (default=2 ** 31 - 1)
            time in [sec] available to perform intensify
        log_traj: bool
            whether to log changes of incumbents in trajectory

        Returns
        -------
        incumbent: Configuration()
            current (maybe new) incumbent configuration
        inc_perf: float
            empirical performance of incumbent configuration
        """
        self.start_time = time.time()
        self._ta_time = 0

        if time_bound < self._min_time:
            raise ValueError("time_bound must be >= %f" % (self._min_time))

        self._num_run = 0
        self._chall_indx = 0

        # Line 1 + 2
        if isinstance(challengers, ChallengerList):
            challengers = challengers.challengers
        L1 = 10
        L2 = 10000
        r = np.random.uniform()
        if 0.05 < r < 0.2 and len(challengers) > L1:
            target = np.random.randint(1, L1)
            challengers[target], challengers[0] = challengers[0], challengers[
                target]
        if r < 0.05 and len(challengers) > L2:
            target = np.random.randint(L1, L2)
            challengers[target], challengers[0] = challengers[0], challengers[
                target]

        for challenger in challengers:
            if not run_history.db.appointment_config(challenger):
                continue
            if challenger == incumbent:
                self.logger.debug(
                    "Challenger was the same as the current incumbent; Skipping challenger"
                )
                continue

            self.logger.debug("Intensify on %s", challenger)
            if hasattr(challenger, 'origin'):
                self.logger.debug("Configuration origin: %s",
                                  challenger.origin)

            try:
                # Lines 3-7
                self._add_inc_run(incumbent=incumbent, run_history=run_history)

                # Lines 8-17
                incumbent = self._race_challenger(
                    challenger=challenger,
                    incumbent=incumbent,
                    run_history=run_history,
                    aggregate_func=aggregate_func,
                    log_traj=log_traj)
                if self.always_race_against and \
                        challenger == incumbent and \
                        self.always_race_against != challenger:
                    self.logger.debug(
                        "Race against constant configuration after incumbent change."
                    )
                    incumbent = self._race_challenger(
                        challenger=self.always_race_against,
                        incumbent=incumbent,
                        run_history=run_history,
                        aggregate_func=aggregate_func,
                        log_traj=log_traj)

            except BudgetExhaustedException:
                # We return incumbent, SMBO stops due to its own budget checks
                inc_perf = run_history.get_cost(incumbent)
                self.logger.debug("Budget exhausted; Return incumbent")
                return incumbent, inc_perf

            tm = time.time()
            if self._chall_indx >= self.min_chall:
                if self._num_run > self.run_limit:
                    self.logger.debug(
                        "Maximum #runs for intensification reached")
                    break
                if not self.use_ta_time_bound and tm - self.start_time - time_bound >= 0:
                    self.logger.debug(
                        "Wallclock time limit for intensification reached ("
                        "used: %f sec, available: %f sec)" %
                        (tm - self.start_time, time_bound))
                    break
                elif self._ta_time - time_bound >= 0:
                    self.logger.debug(
                        "TA time limit for intensification reached ("
                        "used: %f sec, available: %f sec)" %
                        (self._ta_time, time_bound))
                    break

        # output estimated performance of incumbent
        inc_runs = run_history.get_runs_for_config(incumbent)
        inc_perf = aggregate_func(incumbent, run_history, inc_runs)
        self.logger.info(
            "Updated estimated cost of incumbent on %d runs: %.4f" %
            (len(inc_runs), inc_perf))

        self.stats.update_average_configs_per_intensify(
            n_configs=self._chall_indx)

        return incumbent, inc_perf

Пример #12

Показать файл

Файл: smac_ac_facade.py Проект: tqichun/auto-pipeline

    def __init__(
        self,
        scenario: Scenario,
        tae_runner: Optional[Union[Type[ExecuteTARun], Callable]] = None,
        tae_runner_kwargs: Optional[dict] = None,
        runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None,
        runhistory_kwargs: Optional[dict] = None,
        intensifier: Optional[Type[Intensifier]] = None,
        intensifier_kwargs: Optional[dict] = None,
        acquisition_function: Optional[
            Type[AbstractAcquisitionFunction]] = None,
        acquisition_function_kwargs: Optional[dict] = None,
        integrate_acquisition_function: bool = False,
        acquisition_function_optimizer: Optional[
            Type[AcquisitionFunctionMaximizer]] = None,
        acquisition_function_optimizer_kwargs: Optional[dict] = None,
        model: Optional[Type[AbstractEPM]] = None,
        model_kwargs: Optional[dict] = None,
        runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None,
        runhistory2epm_kwargs: Optional[dict] = None,
        initial_design: Optional[Type[InitialDesign]] = None,
        initial_design_kwargs: Optional[dict] = None,
        initial_configurations: Optional[List[Configuration]] = None,
        stats: Optional[Stats] = None,
        restore_incumbent: Optional[Configuration] = None,
        rng: Optional[Union[np.random.RandomState, int]] = None,
        smbo_class: Optional[SMBO] = None,
        run_id: Optional[int] = None,
        random_configuration_chooser: Optional[
            Type[RandomConfigurationChooser]] = None,
        random_configuration_chooser_kwargs: Optional[dict] = None,
    ):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~dsmac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~dsmac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~dsmac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~dsmac.tae.execute_ta_run_old.ExecuteTARunOld`.
        tae_runner_kwargs: Optional[dict]
            arguments passed to constructor of '~tae_runner'
        runhistory : RunHistory
            runhistory to store all algorithm runs
        runhistory_kwargs : Optional[dict]
            arguments passed to constructor of runhistory.
            We strongly advise against changing the aggregation function,
            since it will break some code assumptions
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        intensifier_kwargs: Optional[dict]
            arguments passed to the constructor of '~intensifier'
        acquisition_function : ~dsmac.optimizer.acquisition.AbstractAcquisitionFunction
            Class or object that implements the :class:`~dsmac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~dsmac.optimizer.acquisition.EI` or :class:`~dsmac.optimizer.acquisition.LogEI` if not set.
            `~acquisition_function_kwargs` is passed to the class constructor.
        acquisition_function_kwargs : Optional[dict]
            dictionary to pass specific arguments to ~acquisition_function
        integrate_acquisition_function : bool, default=False
            Whether to integrate the acquisition function. Works only with models which can sample their
            hyperparameters (i.e. GaussianProcessMCMC).
        acquisition_function_optimizer : ~dsmac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~dsmac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`dsmac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
        acquisition_function_optimizer_kwargs: Optional[dict]
            Arguments passed to constructor of '~acquisition_function_optimizer'
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        model_kwargs : Optional[dict]
            Arguments passed to constructor of '~model'
        runhistory2epm : ~dsmac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~dsmac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~dsmac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        runhistory2epm_kwargs: Optional[dict]
            Arguments passed to the constructor of '~runhistory2epm'
        initial_design : InitialDesign
            initial sampling design
        initial_design_kwargs: Optional[dict]
            arguments passed to constructor of `~initial_design'
        initial_configurations : List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~dsmac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id : int (optional)
            Run ID will be used as subfolder for output_dir. If no ``run_id`` is given, a random ``run_id`` will be
            chosen.
        random_configuration_chooser : ~dsmac.optimizer.random_configuration_chooser.RandomConfigurationChooser
            How often to choose a random configuration during the intensification procedure.
        random_configuration_chooser_kwargs : Optional[dict]
            arguments of constructor for '~random_configuration_chooser'

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost

        self.scenario = scenario
        self.output_dir = ""
        if not restore_incumbent:
            # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an
            # output directory. This is the default path.
            # initial random number generator
            # run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # run_id=datetime.now().strftime("%Y%m%d%H%M%S%f")
            run_id = uuid1()
            # self.output_dir = create_output_directory(scenario, run_id)   # fixme run_id
            self.output_dir = scenario.output_dir  # create_output_directory(scenario, run_id)   # fixme run_id

        elif scenario.output_dir is not None:
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # output-directory is created in CLI when restoring from a
            # folder. calling the function again in the facade results in two
            # folders being created: run_X and run_X.OLD. if we are
            # restoring, the output-folder exists already and we omit creating it,
            # but set the self-output_dir to the dir.
            # necessary because we want to write traj to new output-dir in CLI.
            self.output_dir = scenario.output_dir_for_this_run

        if (scenario.deterministic is True
                and getattr(scenario, 'tuner_timeout', None) is None
                and scenario.run_obj == 'quality'):
            self.logger.info(
                'Optimizing a deterministic scenario for quality without a tuner timeout - will make '
                'SMAC deterministic and only evaluate one configuration per iteration!'
            )
            scenario.intensification_percentage = 1e-10
            scenario.min_chall = 1
        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario, file_system=scenario.file_system)

        if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG":
            self.logger.warning(
                "Runtime as objective automatically activates log(y) transformation"
            )
            self.scenario.transform_y = "LOG"

        # initialize empty runhistory
        runhistory_def_kwargs = {'aggregate_func': aggregate_func}
        if runhistory_kwargs is not None:
            runhistory_def_kwargs.update(runhistory_kwargs)
        if runhistory is None:
            runhistory = RunHistory(**runhistory_def_kwargs,
                                    file_system=scenario.file_system,
                                    db_type=scenario.db_type,
                                    db_args=scenario.db_args,
                                    db_kwargs=scenario.db_kwargs)
        elif inspect.isclass(runhistory):
            runhistory = runhistory(**runhistory_def_kwargs)
        else:
            if runhistory.aggregate_func is None:
                runhistory.aggregate_func = aggregate_func

        rand_conf_chooser_kwargs = {'rng': rng}
        if random_configuration_chooser_kwargs is not None:
            rand_conf_chooser_kwargs.update(
                random_configuration_chooser_kwargs)
        if random_configuration_chooser is None:
            if 'prob' not in rand_conf_chooser_kwargs:
                rand_conf_chooser_kwargs['prob'] = scenario.rand_prob
            random_configuration_chooser = ChooserProb(
                **rand_conf_chooser_kwargs)
        elif inspect.isclass(random_configuration_chooser):
            random_configuration_chooser = random_configuration_chooser(
                **rand_conf_chooser_kwargs)
        elif not isinstance(random_configuration_chooser,
                            RandomConfigurationChooser):
            raise ValueError(
                "random_configuration_chooser has to be"
                " a class or object of RandomConfigurationChooser")

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir,
                                 stats=self.stats,
                                 file_system=scenario.file_system)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        model_def_kwargs = {
            'types': types,
            'bounds': bounds,
            'instance_features': scenario.feature_array,
            'seed': rng.randint(MAXINT),
            'pca_components': scenario.PCA_DIM,
        }
        if model_kwargs is not None:
            model_def_kwargs.update(model_kwargs)
        if model is None:
            for key, value in {
                    'log_y': scenario.transform_y in ["LOG", "LOGS"],
                    'num_trees': scenario.rf_num_trees,
                    'do_bootstrapping': scenario.rf_do_bootstrapping,
                    'ratio_features': scenario.rf_ratio_features,
                    'min_samples_split': scenario.rf_min_samples_split,
                    'min_samples_leaf': scenario.rf_min_samples_leaf,
                    'max_depth': scenario.rf_max_depth,
            }.items():
                if key not in model_def_kwargs:
                    model_def_kwargs[key] = value
            model_def_kwargs['configspace'] = self.scenario.cs
            model = RandomForestWithInstances(**model_def_kwargs)
        elif inspect.isclass(model):
            model_def_kwargs['configspace'] = self.scenario.cs
            model = model(**model_def_kwargs)
        else:
            raise TypeError("Model not recognized: %s" % (type(model)))

        # initial acquisition function
        acq_def_kwargs = {'model': model}
        if acquisition_function_kwargs is not None:
            acq_def_kwargs.update(acquisition_function_kwargs)
        if acquisition_function is None:
            if scenario.transform_y in ["LOG", "LOGS"]:
                acquisition_function = LogEI(**acq_def_kwargs)
            else:
                acquisition_function = EI(**acq_def_kwargs)
        elif inspect.isclass(acquisition_function):
            acquisition_function = acquisition_function(**acq_def_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function must be None or an object implementing the "
                "AbstractAcquisitionFunction, not %s." %
                type(acquisition_function))
        if integrate_acquisition_function:
            acquisition_function = IntegratedAcquisitionFunction(
                acquisition_function=acquisition_function, **acq_def_kwargs)

        # initialize optimizer on acquisition function
        acq_func_opt_kwargs = {
            'acquisition_function': acquisition_function,
            'config_space': scenario.cs,
            'rng': rng,
        }
        if acquisition_function_optimizer_kwargs is not None:
            acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs)
        if acquisition_function_optimizer is None:
            for key, value in {
                    'max_steps': scenario.sls_max_steps,
                    'n_steps_plateau_walk': scenario.sls_n_steps_plateau_walk,
            }.items():
                if key not in acq_func_opt_kwargs:
                    acq_func_opt_kwargs[key] = value
            acquisition_function_optimizer = InterleavedLocalAndRandomSearch(
                **acq_func_opt_kwargs)
        elif inspect.isclass(acquisition_function_optimizer):
            acquisition_function_optimizer = acquisition_function_optimizer(
                **acq_func_opt_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function_optimizer must be None or an object implementing the "
                "AcquisitionFunctionMaximizer, but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        tae_def_kwargs = {
            'stats': self.stats,
            'run_obj': scenario.run_obj,
            'runhistory': runhistory,
            'par_factor': scenario.par_factor,
            'cost_for_crash': scenario.cost_for_crash,
            'abort_on_first_run_crash': scenario.abort_on_first_run_crash
        }
        if tae_runner_kwargs is not None:
            tae_def_kwargs.update(tae_runner_kwargs)
        if 'ta' not in tae_def_kwargs:
            tae_def_kwargs['ta'] = scenario.ta
        if tae_runner is None:
            tae_def_kwargs['ta'] = scenario.ta
            tae_runner = ExecuteTARunOld(**tae_def_kwargs)
        elif inspect.isclass(tae_runner):
            tae_runner = tae_runner(**tae_def_kwargs)
        elif callable(tae_runner):
            tae_def_kwargs['ta'] = tae_runner
            tae_runner = ExecuteTAFuncDict(**tae_def_kwargs)
        else:
            raise TypeError(
                "Argument 'tae_runner' is %s, but must be "
                "either None, a callable or an object implementing "
                "ExecuteTaRun. Passing 'None' will result in the "
                "creation of target algorithm runner based on the "
                "call string in the scenario file." % type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # initialize intensification
        intensifier_def_kwargs = {
            'tae_runner':
            tae_runner,
            'stats':
            self.stats,
            'traj_logger':
            traj_logger,
            'rng':
            rng,
            'instances':
            scenario.train_insts,
            'cutoff':
            scenario.cutoff,
            'deterministic':
            scenario.deterministic,
            'run_obj_time':
            scenario.run_obj == "runtime",
            'always_race_against':
            scenario.cs.get_default_configuration()
            if scenario.always_race_default else None,
            'use_ta_time_bound':
            scenario.use_ta_time,
            'instance_specifics':
            scenario.instance_specific,
            'minR':
            scenario.minR,
            'maxR':
            scenario.maxR,
            'adaptive_capping_slackfactor':
            scenario.intens_adaptive_capping_slackfactor,
            'min_chall':
            scenario.intens_min_chall,
        }
        if hasattr(scenario,
                   'filter_callback') and scenario.filter_callback is not None:
            print('update callback')
            intensifier_def_kwargs.update(
                {'filter_callback': scenario.filter_callback})
        if intensifier_kwargs is not None:
            intensifier_def_kwargs.update(intensifier_kwargs)
        if intensifier is None:
            intensifier = Intensifier(**intensifier_def_kwargs)
        elif inspect.isclass(intensifier):
            intensifier = intensifier(**intensifier_def_kwargs)
        else:
            raise TypeError(
                "Argument intensifier must be None or an object implementing the Intensifier, but is '%s'"
                % type(intensifier))

        # initial design
        if initial_design is not None and initial_configurations is not None:
            initial_design.initial_configurations = initial_configurations
            initial_configurations = None
        init_design_def_kwargs = {
            'tae_runner': tae_runner,
            'scenario': scenario,
            'stats': self.stats,
            'traj_logger': traj_logger,
            'runhistory': runhistory,
            'rng': rng,
            'configs': initial_configurations,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'n_configs_x_params': 0,
            'max_config_fracs': 0.0,
            'initial_configurations': initial_design.initial_configurations
        }
        if initial_design_kwargs is not None:
            init_design_def_kwargs.update(initial_design_kwargs)
        if initial_configurations is not None:
            initial_design = InitialDesign(**init_design_def_kwargs)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design = DefaultConfiguration(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "RANDOM":
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design = RandomConfigurations(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "LHD":
                initial_design = LHDesign(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "FACTORIAL":
                initial_design = FactorialInitialDesign(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "SOBOL":
                initial_design = SobolDesign(**init_design_def_kwargs)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        elif inspect.isclass(initial_design):
            initial_design = initial_design(**init_design_def_kwargs)
        else:
            raise TypeError(
                "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'"
                % type(initial_design))

        # if we log the performance data,
        # the RFRImputator will already get
        # log transform data from the runhistory
        if scenario.transform_y in ["LOG", "LOGS"]:
            cutoff = np.log(np.nanmin([np.inf, np.float_(scenario.cutoff)]))
            threshold = cutoff + np.log(scenario.par_factor)
        else:
            cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff)])
            threshold = cutoff * scenario.par_factor
        num_params = len(scenario.cs.get_hyperparameters())
        imputor = RFRImputator(rng=rng,
                               cutoff=cutoff,
                               threshold=threshold,
                               model=model,
                               change_threshold=0.01,
                               max_iter=2)

        r2e_def_kwargs = {
            'scenario': scenario,
            'num_params': num_params,
            'success_states': [
                StatusType.SUCCESS,
            ],
            'impute_censored_data': True,
            'impute_state': [
                StatusType.CAPPED,
            ],
            'imputor': imputor,
            'scale_perc': 5
        }
        if scenario.run_obj == 'quality':
            r2e_def_kwargs.update({
                'success_states': [StatusType.SUCCESS, StatusType.CRASHED],
                'impute_censored_data':
                False,
                'impute_state':
                None,
            })
        if runhistory2epm_kwargs is not None:
            r2e_def_kwargs.update(runhistory2epm_kwargs)
        if runhistory2epm is None:
            if scenario.run_obj == 'runtime':
                runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs)
            elif scenario.run_obj == 'quality':
                if scenario.transform_y == "NONE":
                    runhistory2epm = RunHistory2EPM4Cost(**r2e_def_kwargs)
                elif scenario.transform_y == "LOG":
                    runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs)
                elif scenario.transform_y == "LOGS":
                    runhistory2epm = RunHistory2EPM4LogScaledCost(
                        **r2e_def_kwargs)
                elif scenario.transform_y == "INVS":
                    runhistory2epm = RunHistory2EPM4InvScaledCost(
                        **r2e_def_kwargs)
            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)
        elif inspect.isclass(runhistory2epm):
            runhistory2epm = runhistory2epm(**r2e_def_kwargs)
        else:
            raise TypeError(
                "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'"
                % type(runhistory2epm))

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design,
            'runhistory': runhistory,
            'runhistory2epm': runhistory2epm,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'num_run': run_id,
            'model': model,
            'acq_optimizer': acquisition_function_optimizer,
            'acquisition_func': acquisition_function,
            'rng': rng,
            'restore_incumbent': restore_incumbent,
            'random_configuration_chooser': random_configuration_chooser
        }

        if smbo_class is None:
            self.solver = SMBO(**smbo_args)
        else:
            self.solver = smbo_class(**smbo_args)

Пример #13

Показать файл

    def _get_runs(
        self,
        configs: Union[str, typing.List[Configuration]],
        insts: Union[str, typing.List[str]],
        repetitions: int = 1,
        runhistory: RunHistory = None,
    ) -> typing.Tuple[typing.List[_Run], RunHistory]:
        """
        Generate list of SMAC-TAE runs to be executed. This means
        combinations of configs with all instances on a certain number of seeds.

        side effect: Adds runs that don't need to be reevaluated to self.rh!

        Parameters
        ----------
        configs: str or list<Configuration>
            string or directly a list of Configuration
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
                time evaluates at cpu- or wallclock-timesteps of:
                [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
                with max_time being the highest recorded time
        insts: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of seeds per instance/config-pair to be evaluated
        runhistory: RunHistory
            optional, try to reuse this runhistory and save some runs

        Returns
        -------
        runs: list<_Run>
            list with _Runs
            [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1),
             _Run(config=CONFIG2,inst=INSTANCE2,seed=SEED2,inst_specs=INST_SPECIFICS2),
             ...]
        """
        # Get relevant configurations and instances
        if isinstance(configs, str):
            configs = self._get_configs(configs)
        if isinstance(insts, str):
            insts = self._get_instances(insts)

        # If no instances are given, fix the instances to one "None" instance
        if not insts:
            insts = [None]
        # If algorithm is deterministic, fix repetitions to 1
        if self.scen.deterministic and repetitions != 1:
            self.logger.warning(
                "Specified %d repetitions, but fixing to 1, "
                "because algorithm is deterministic.", repetitions)
            repetitions = 1

        # Extract relevant information from given runhistory
        inst_seed_config = self._process_runhistory(configs, insts, runhistory)

        # Now create the actual run-list
        runs = []
        # Counter for runs without the need of recalculation
        runs_from_rh = 0
        # If we reuse runs, we want to return them as well
        new_rh = RunHistory(average_cost)

        for i in sorted(insts):
            for rep in range(repetitions):
                # First, find a seed and add all the data we can take from the
                # given runhistory to "our" validation runhistory.
                configs_evaluated = []
                if runhistory and i in inst_seed_config:
                    # Choose seed based on most often evaluated inst-seed-pair
                    seed, configs_evaluated = inst_seed_config[i].pop(0)
                    # Delete inst if all seeds are used
                    if not inst_seed_config[i]:
                        inst_seed_config.pop(i)
                    # Add runs to runhistory
                    for c in configs_evaluated[:]:
                        runkey = RunKey(runhistory.config_ids[c], i, seed)
                        cost, time, status, additional_info = runhistory.data[
                            runkey]
                        if status in [
                                StatusType.CRASHED, StatusType.ABORT,
                                StatusType.CAPPED
                        ]:
                            # Not properly executed target algorithm runs should be repeated
                            configs_evaluated.remove(c)
                            continue
                        new_rh.add(c,
                                   cost,
                                   time,
                                   status,
                                   instance_id=i,
                                   seed=seed,
                                   additional_info=additional_info)
                        runs_from_rh += 1
                else:
                    # If no runhistory or no entries for instance, get new seed
                    seed = self.rng.randint(MAXINT)

                # We now have a seed and add all configs that are not already
                # evaluated on that seed to the runs-list. This way, we
                # guarantee the same inst-seed-pairs for all configs.
                for config in [
                        c for c in configs if not c in configs_evaluated
                ]:
                    # Only use specifics if specific exists, else use string "0"
                    specs = self.scen.instance_specific[
                        i] if i and i in self.scen.instance_specific else "0"
                    runs.append(
                        _Run(config=config,
                             inst=i,
                             seed=seed,
                             inst_specs=specs))

        self.logger.info(
            "Collected %d runs from %d configurations on %d "
            "instances with %d repetitions. Reusing %d runs from "
            "given runhistory.", len(runs), len(configs), len(insts),
            repetitions, runs_from_rh)

        return runs, new_rh

Пример #14

Показать файл

class Hydra(object):
    """
    Facade to use Hydra default mode

    Attributes
    ----------
    logger
    stats : Stats
        loggs information about used resources
    solver : SMBO
        handles the actual algorithm calls
    rh : RunHistory
        List with information about previous runs
    portfolio : list
        List of all incumbents

    """
    def __init__(self,
                 scenario: Scenario,
                 n_iterations: int,
                 val_set: str = 'train',
                 incs_per_round: int = 1,
                 n_optimizers: int = 1,
                 rng: typing.Optional[typing.Union[np.random.RandomState,
                                                   int]] = None,
                 run_id: int = 1,
                 tae: typing.Type[ExecuteTARun] = ExecuteTARunOld,
                 tae_kwargs: typing.Union[dict, None] = None,
                 **kwargs):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        n_iterations: int,
            number of Hydra iterations
        val_set: str
            Set to validate incumbent(s) on. [train, valX].
            train => whole training set,
            valX => train_set * 100/X where X in (0, 100)
        incs_per_round: int
            Number of incumbents to keep per round
        n_optimizers: int
            Number of optimizers to run in parallel per round
        rng: int/np.random.RandomState
            The randomState/seed to pass to each dsmac run
        run_id: int
            run_id for this hydra run
        tae: ExecuteTARun
            Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc)
        tae_kwargs: Optional[dict]
            arguments passed to constructor of '~tae'

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.n_iterations = n_iterations
        self.scenario = scenario
        self.run_id, self.rng = get_rng(rng, run_id, self.logger)
        self.kwargs = kwargs
        self.output_dir = None
        self.top_dir = None
        self.solver = None
        self.portfolio = None
        self.rh = RunHistory(average_cost, file_system=scenario.file_system)
        self._tae = tae
        self._tae_kwargs = tae_kwargs
        if incs_per_round <= 0:
            self.logger.warning('Invalid value in %s: %d. Setting to 1',
                                'incs_per_round', incs_per_round)
        self.incs_per_round = max(incs_per_round, 1)
        if n_optimizers <= 0:
            self.logger.warning('Invalid value in %s: %d. Setting to 1',
                                'n_optimizers', n_optimizers)
        self.n_optimizers = max(n_optimizers, 1)
        self.val_set = self._get_validation_set(val_set)
        self.cost_per_inst = {}
        self.optimizer = None
        self.portfolio_cost = None

    def _get_validation_set(self,
                            val_set: str,
                            delete: bool = True) -> typing.List[str]:
        """
        Create small validation set for hydra to determine incumbent performance

        Parameters
        ----------
        val_set: str
            Set to validate incumbent(s) on. [train, valX].
            train => whole training set,
            valX => train_set * 100/X where X in (0, 100)
        delete: bool
            Flag to delete all validation instances from the training set

        Returns
        -------
        val: typing.List[str]
            List of instance-ids to validate on

        """
        if val_set == 'none':
            return None
        if val_set == 'train':
            return self.scenario.train_insts
        elif val_set[:3] != 'val':
            self.logger.warning(
                'Can not determine validation set size. Using full training-set!'
            )
            return self.scenario.train_insts
        else:
            size = int(val_set[3:]) / 100
            if size <= 0 or size >= 1:
                raise ValueError(
                    'X invalid in valX, should be between 0 and 1')
            insts = np.array(self.scenario.train_insts)
            # just to make sure this also works with the small example we have to round up to 3
            size = max(np.floor(insts.shape[0] * size).astype(int), 3)
            ids = np.random.choice(insts.shape[0], size, replace=False)
            val = insts[ids].tolist()
            if delete:
                self.scenario.train_insts = np.delete(insts, ids).tolist()
            return val

    def optimize(self) -> typing.List[Configuration]:
        """
        Optimizes the algorithm provided in scenario (given in constructor)

        Returns
        -------
        portfolio : typing.List[Configuration]
            Portfolio of found configurations

        """
        # Setup output directory
        self.portfolio = []
        portfolio_cost = np.inf
        if self.output_dir is None:
            self.top_dir = "hydra-output_%s" % (
                datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f'))
            self.scenario.output_dir = os.path.join(
                self.top_dir,
                "psmac3-output_%s" % (datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)

        scen = copy.deepcopy(self.scenario)
        scen.output_dir_for_this_run = None
        scen.output_dir = None
        # parent process SMAC only used for validation purposes
        self.solver = SMAC4AC(scenario=scen,
                              tae_runner=self._tae,
                              rng=self.rng,
                              run_id=self.run_id,
                              **self.kwargs)
        for i in range(self.n_iterations):
            self.logger.info("=" * 120)
            self.logger.info("Hydra Iteration: %d", (i + 1))

            if i == 0:
                tae = self._tae
                tae_kwargs = self._tae_kwargs
            else:
                tae = ExecuteTARunHydra
                if self._tae_kwargs:
                    tae_kwargs = self._tae_kwargs
                else:
                    tae_kwargs = {}
                tae_kwargs['cost_oracle'] = self.cost_per_inst
            self.optimizer = PSMAC(
                scenario=self.scenario,
                run_id=self.run_id,
                rng=self.rng,
                tae=tae,
                tae_kwargs=tae_kwargs,
                shared_model=False,
                validate=True if self.val_set else False,
                n_optimizers=self.n_optimizers,
                val_set=self.val_set,
                n_incs=self.
                n_optimizers,  # return all configurations (unvalidated)
                **self.kwargs)
            self.optimizer.output_dir = self.output_dir
            incs = self.optimizer.optimize()
            cost_per_conf_v, val_ids, cost_per_conf_e, est_ids = self.optimizer.get_best_incumbents_ids(
                incs)
            if self.val_set:
                to_keep_ids = val_ids[:self.incs_per_round]
            else:
                to_keep_ids = est_ids[:self.incs_per_round]
            config_cost_per_inst = {}
            incs = incs[to_keep_ids]
            self.logger.info('Kept incumbents')
            for inc in incs:
                self.logger.info(inc)
                config_cost_per_inst[inc] = cost_per_conf_v[
                    inc] if self.val_set else cost_per_conf_e[inc]

            cur_portfolio_cost = self._update_portfolio(
                incs, config_cost_per_inst)
            if portfolio_cost <= cur_portfolio_cost:
                self.logger.info(
                    "No further progress (%f) --- terminate hydra",
                    portfolio_cost)
                break
            else:
                portfolio_cost = cur_portfolio_cost
                self.logger.info("Current pertfolio cost: %f", portfolio_cost)

            self.scenario.output_dir = os.path.join(
                self.top_dir,
                "psmac3-output_%s" % (datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)
        read(self.rh,
             os.path.join(self.top_dir, 'psmac3*', 'run_' + str(MAXINT)),
             self.scenario.cs, self.logger)
        self.rh.save_json(fn=os.path.join(
            self.top_dir, 'all_validated_runs_runhistory.json'),
                          save_external=True)
        with open(os.path.join(self.top_dir, 'portfolio.pkl'), 'wb') as fh:
            pickle.dump(self.portfolio, fh)
        self.logger.info("~" * 120)
        self.logger.info('Resulting Portfolio:')
        for configuration in self.portfolio:
            self.logger.info(str(configuration))
        self.logger.info("~" * 120)

        return self.portfolio

    def _update_portfolio(
            self, incs: np.ndarray, config_cost_per_inst: typing.Dict
    ) -> typing.Union[np.float, float]:
        """
        Validates all configurations (in incs) and determines which ones to add to the portfolio

        Parameters
        ----------
        incs: np.ndarray
            List of Configurations

        Returns
        -------
        cur_cost: typing.Union[np.float, float]
            The current cost of the portfolio

        """
        if self.val_set:  # we have validated data
            for kept in incs:
                if kept not in self.portfolio:
                    self.portfolio.append(kept)
                    cost_per_inst = config_cost_per_inst[kept]
                    if self.cost_per_inst:
                        if len(self.cost_per_inst) != len(cost_per_inst):
                            raise ValueError(
                                'Num validated Instances mismatch!')
                        else:
                            for key in cost_per_inst:
                                self.cost_per_inst[key] = min(
                                    self.cost_per_inst[key],
                                    cost_per_inst[key])
                    else:
                        self.cost_per_inst = cost_per_inst
            cur_cost = np.mean(list(
                self.cost_per_inst.values()))  # type: np.float
        else:  # No validated data. Set the mean to the approximated mean
            means = [
            ]  # can contain nans as not every instance was evaluated thus we should use nanmean to approximate
            for kept in incs:
                means.append(
                    np.nanmean(
                        list(
                            self.optimizer.rh.get_instance_costs_for_config(
                                kept).values())))
                self.portfolio.append(kept)
            if self.portfolio_cost:
                new_mean = self.portfolio_cost * (
                    len(self.portfolio) - len(incs)) / len(self.portfolio)
                new_mean += np.nansum(means)
            else:
                new_mean = np.mean(means)
            self.cost_per_inst = defaultdict(lambda: new_mean)
            cur_cost = new_mean

        self.portfolio_cost = cur_cost
        return cur_cost

Пример #15

Показать файл

    def __init__(self,
                 scenario: Scenario,
                 n_iterations: int,
                 val_set: str = 'train',
                 incs_per_round: int = 1,
                 n_optimizers: int = 1,
                 rng: typing.Optional[typing.Union[np.random.RandomState,
                                                   int]] = None,
                 run_id: int = 1,
                 tae: typing.Type[ExecuteTARun] = ExecuteTARunOld,
                 tae_kwargs: typing.Union[dict, None] = None,
                 **kwargs):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        n_iterations: int,
            number of Hydra iterations
        val_set: str
            Set to validate incumbent(s) on. [train, valX].
            train => whole training set,
            valX => train_set * 100/X where X in (0, 100)
        incs_per_round: int
            Number of incumbents to keep per round
        n_optimizers: int
            Number of optimizers to run in parallel per round
        rng: int/np.random.RandomState
            The randomState/seed to pass to each dsmac run
        run_id: int
            run_id for this hydra run
        tae: ExecuteTARun
            Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc)
        tae_kwargs: Optional[dict]
            arguments passed to constructor of '~tae'

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.n_iterations = n_iterations
        self.scenario = scenario
        self.run_id, self.rng = get_rng(rng, run_id, self.logger)
        self.kwargs = kwargs
        self.output_dir = None
        self.top_dir = None
        self.solver = None
        self.portfolio = None
        self.rh = RunHistory(average_cost, file_system=scenario.file_system)
        self._tae = tae
        self._tae_kwargs = tae_kwargs
        if incs_per_round <= 0:
            self.logger.warning('Invalid value in %s: %d. Setting to 1',
                                'incs_per_round', incs_per_round)
        self.incs_per_round = max(incs_per_round, 1)
        if n_optimizers <= 0:
            self.logger.warning('Invalid value in %s: %d. Setting to 1',
                                'n_optimizers', n_optimizers)
        self.n_optimizers = max(n_optimizers, 1)
        self.val_set = self._get_validation_set(val_set)
        self.cost_per_inst = {}
        self.optimizer = None
        self.portfolio_cost = None

Пример #16

Показать файл

    def __init__(
            self,
            scenario: Scenario,
            # TODO: once we drop python3.4 add type hint
            # typing.Union[ExecuteTARun, callable]
            tae_runner=None,
            runhistory: RunHistory = None,
            intensifier: Intensifier = None,
            acquisition_function: AbstractAcquisitionFunction = None,
            model: AbstractEPM = None,
            runhistory2epm: AbstractRunHistory2EPM = None,
            initial_design: InitialDesign = None,
            initial_configurations: typing.List[Configuration] = None,
            stats: Stats = None,
            rng: np.random.RandomState = None,
            run_id: int = 1):
        """Constructor"""
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost
        self.runhistory = None
        self.trajectory = None

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario, file_system=scenario.file_system)

        self.output_dir = create_output_directory(scenario, run_id)
        scenario.write()

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func,
                                    file_system=scenario.file_system)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        # initial random number generator
        num_run, rng = self._get_rng(rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir,
                                 stats=self.stats,
                                 file_system=scenario.file_system)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                configspace=scenario.cs,
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM,
                num_trees=scenario.rf_num_trees,
                do_bootstrapping=scenario.rf_do_bootstrapping,
                ratio_features=scenario.rf_ratio_features,
                min_samples_split=scenario.rf_min_samples_split,
                min_samples_leaf=scenario.rf_min_samples_leaf,
                max_depth=scenario.rf_max_depth,
            )
        # initial acquisition function
        if acquisition_function is None:
            if scenario.run_obj == "runtime":
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)
        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        local_search = LocalSearch(
            acquisition_function,
            scenario.cs,
            max_steps=scenario.sls_max_steps,
            n_steps_plateau_walk=scenario.sls_n_steps_plateau_walk)

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(
                tae_runner=tae_runner,
                stats=self.stats,
                traj_logger=traj_logger,
                rng=rng,
                instances=scenario.train_insts,
                cutoff=scenario.cutoff,
                deterministic=scenario.deterministic,
                run_obj_time=scenario.run_obj == "runtime",
                always_race_against=scenario.cs.get_default_configuration()
                if scenario.always_race_default else None,
                instance_specifics=scenario.instance_specific,
                minR=scenario.minR,
                maxR=scenario.maxR,
                adaptive_capping_slackfactor=scenario.
                intens_adaptive_capping_slackfactor,
                min_chall=scenario.intens_min_chall,
                distributer=scenario.distributer)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = InitialDesign(tae_runner=tae_runner,
                                           scenario=scenario,
                                           stats=self.stats,
                                           traj_logger=traj_logger,
                                           runhistory=runhistory,
                                           rng=rng,
                                           configs=initial_configurations,
                                           intensifier=intensifier,
                                           aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(
                    tae_runner=tae_runner,
                    scenario=scenario,
                    stats=self.stats,
                    traj_logger=traj_logger,
                    runhistory=runhistory,
                    rng=rng,
                    intensifier=intensifier,
                    aggregate_func=aggregate_func,
                    max_config_fracs=0.0)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfigurations(
                    tae_runner=tae_runner,
                    scenario=scenario,
                    stats=self.stats,
                    traj_logger=traj_logger,
                    runhistory=runhistory,
                    rng=rng,
                    intensifier=intensifier,
                    aggregate_func=aggregate_func,
                    max_config_fracs=0.0)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == "runtime":

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log(scenario.cutoff)
                threshold = np.log(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                runhistory2epm = RunHistory2EPM4Cost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=False,
                    impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        self.solver = EPILS_Solver(scenario=scenario,
                                   stats=self.stats,
                                   initial_design=initial_design,
                                   runhistory=runhistory,
                                   runhistory2epm=runhistory2epm,
                                   intensifier=intensifier,
                                   aggregate_func=aggregate_func,
                                   num_run=num_run,
                                   model=model,
                                   acq_optimizer=local_search,
                                   acquisition_func=acquisition_function,
                                   rng=rng)

Пример #17

Показать файл

    def main_cli(self, commandline_arguments: typing.List[str]=None):
        """Main function of SMAC for CLI interface"""
        self.logger.info("SMAC call: %s" % (" ".join(sys.argv)))

        cmd_reader = CMDReader()
        kwargs = {}
        if commandline_arguments:
            kwargs['commandline_arguments'] = commandline_arguments
        main_args_, smac_args_, scen_args_ = cmd_reader.read_cmd(**kwargs)

        root_logger = logging.getLogger()
        root_logger.setLevel(main_args_.verbose_level)
        logger_handler = logging.StreamHandler(
                stream=sys.stdout)
        if root_logger.level >= logging.INFO:
            formatter = logging.Formatter(
                "%(levelname)s:\t%(message)s")
        else:
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(name)s:%(message)s",
                "%Y-%m-%d %H:%M:%S")
        logger_handler.setFormatter(formatter)
        root_logger.addHandler(logger_handler)
        # remove default handler
        if len(root_logger.handlers) > 1:
            root_logger.removeHandler(root_logger.handlers[0])

        # Create defaults
        rh = None
        initial_configs = None
        stats = None
        incumbent = None

        # Create scenario-object
        scenario = {}
        scenario.update(vars(smac_args_))
        scenario.update(vars(scen_args_))
        scen = Scenario(scenario=scenario)

        # Restore state
        if main_args_.restore_state:
            root_logger.debug("Restoring state from %s...", main_args_.restore_state)
            rh, stats, traj_list_aclib, traj_list_old = self.restore_state(scen, main_args_)

            scen.output_dir_for_this_run = create_output_directory(
                scen, main_args_.seed, root_logger,
            )
            scen.write()
            incumbent = self.restore_state_after_output_dir(scen, stats,
                                                            traj_list_aclib, traj_list_old)

        if main_args_.warmstart_runhistory:
            aggregate_func = average_cost
            rh = RunHistory(aggregate_func=aggregate_func)

            scen, rh = merge_foreign_data_from_file(
                scenario=scen,
                runhistory=rh,
                in_scenario_fn_list=main_args_.warmstart_scenario,
                in_runhistory_fn_list=main_args_.warmstart_runhistory,
                cs=scen.cs,
                aggregate_func=aggregate_func)

        if main_args_.warmstart_incumbent:
            initial_configs = [scen.cs.get_default_configuration()]
            for traj_fn in main_args_.warmstart_incumbent:
                trajectory = TrajLogger.read_traj_aclib_format(
                    fn=traj_fn, cs=scen.cs)
                initial_configs.append(trajectory[-1]["incumbent"])

        if main_args_.mode == "SMAC4AC":
            optimizer = SMAC4AC(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed)
        elif main_args_.mode == "SMAC4HPO":
            optimizer = SMAC4HPO(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed)
        elif main_args_.mode == "SMAC4BO":
            optimizer = SMAC4BO(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed)
        elif main_args_.mode == "ROAR":
            optimizer = ROAR(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                run_id=main_args_.seed)
        elif main_args_.mode == "EPILS":
            optimizer = EPILS(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                run_id=main_args_.seed)
        elif main_args_.mode == "Hydra":
            optimizer = Hydra(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed,
                random_configuration_chooser=main_args_.random_configuration_chooser,
                n_iterations=main_args_.hydra_iterations,
                val_set=main_args_.hydra_validation,
                incs_per_round=main_args_.hydra_incumbents_per_round,
                n_optimizers=main_args_.hydra_n_optimizers)
        elif main_args_.mode == "PSMAC":
            optimizer = PSMAC(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                run_id=main_args_.seed,
                shared_model=smac_args_.shared_model,
                validate=main_args_.psmac_validate,
                n_optimizers=main_args_.hydra_n_optimizers,
                n_incs=main_args_.hydra_incumbents_per_round,
            )
        try:
            optimizer.optimize()
        except (TAEAbortException, FirstRunCrashedException) as err:
            self.logger.error(err)

Пример #18

Показать файл

    def _race_challenger(self,
                         challenger: Configuration,
                         incumbent: Configuration,
                         run_history: RunHistory,
                         aggregate_func: typing.Callable,
                         log_traj: bool = True):
        """Aggressively race challenger against incumbent

        Parameters
        ----------
        challenger : Configuration
            Configuration which challenges incumbent
        incumbent : Configuration
            Best configuration so far
        run_history : RunHistory
            Stores all runs we ran so far
        aggregate_func: typing.Callable
            Aggregate performance across instances
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        Returns
        -------
        new_incumbent: Configuration
            Either challenger or incumbent
        """
        # at least one run of challenger
        # to increase chall_indx counter
        first_run = False

        # Line 8
        N = max(1, self.minR)

        inc_inst_seeds = set(run_history.get_runs_for_config(incumbent))
        # Line 9
        while True:
            chall_inst_seeds = set(run_history.get_runs_for_config(challenger))

            # Line 10
            missing_runs = list(inc_inst_seeds - chall_inst_seeds)

            # Line 11
            self.rs.shuffle(missing_runs)
            to_run = missing_runs[:min(N, len(missing_runs))]
            # Line 13 (Line 12 comes below...)
            missing_runs = missing_runs[min(N, len(missing_runs)):]

            # for adaptive capping
            # because of efficieny computed here
            inst_seed_pairs = list(inc_inst_seeds - set(missing_runs))
            # cost used by incumbent for going over all runs in inst_seed_pairs
            inc_sum_cost = sum_cost(config=incumbent,
                                    instance_seed_pairs=inst_seed_pairs,
                                    run_history=run_history)

            if len(to_run) == 0:
                self.logger.debug("No further runs for challenger available")

            # Line 12
            # Run challenger on all <config,seed> to run
            for instance, seed in to_run:

                cutoff = self._adapt_cutoff(challenger=challenger,
                                            incumbent=incumbent,
                                            run_history=run_history,
                                            inc_sum_cost=inc_sum_cost)
                if cutoff is not None and cutoff <= 0:
                    # no time to validate challenger
                    self.logger.debug("Stop challenger itensification due "
                                      "to adaptive capping.")
                    # challenger performance is worse than incumbent
                    return incumbent

                if not first_run:
                    first_run = True
                    self._chall_indx += 1

                self.logger.debug("Add run of challenger")
                try:
                    status, cost, dur, res = self.tae_runner.start(
                        config=challenger,
                        instance=instance,
                        seed=seed,
                        cutoff=cutoff,
                        instance_specific=self.instance_specifics.get(
                            instance, "0"),
                        capped=(self.cutoff is not None)
                        and (cutoff < self.cutoff))
                    self._num_run += 1
                    self._ta_time += dur
                except CappedRunException:
                    return incumbent

            new_incumbent = self._compare_configs(
                incumbent=incumbent,
                challenger=challenger,
                run_history=run_history,
                aggregate_func=aggregate_func,
                log_traj=log_traj)
            if new_incumbent == incumbent:
                break
            elif new_incumbent == challenger:
                incumbent = challenger
                break
            else:  # Line 17
                # challenger is not worse, continue
                N = 2 * N

        return incumbent

Пример #19

Показать файл

from dsmac.runhistory.runhistory import RunHistoryDB
from dsmac.runhistory.utils import get_id_of_config
from dsmac.tae.execute_ta_run import StatusType

if __name__ == '__main__':
    from ConfigSpace import ConfigurationSpace
    import joblib
    from dsmac.optimizer.objective import average_cost
    from dsmac.runhistory.runhistory import RunHistory

    runhistory = RunHistory(average_cost,db_args="test.db")
    cs: ConfigurationSpace = joblib.load("/home/tqc/PycharmProjects/auto-pipeline/test/php.bz2")
    runhistory.load_json(
        "/home/tqc/PycharmProjects/auto-pipeline/test/test_runhistory/default_dataset_name/smac_output/runhistory.json",
        cs)
    all_configs = (runhistory.get_all_configs())
    config = all_configs[0]
    config_id = get_id_of_config(config)
    cost = runhistory.get_cost(config)
    db = RunHistoryDB(cs, runhistory, "test.db")
    db.delete_all()
    ans = db.appointment_config(config)
    print(ans)
    db.insert_runhistory(config, cost, 0.1, StatusType.SUCCESS)
    db2 = RunHistoryDB(cs, runhistory, "test.db")
    db2.insert_runhistory(all_configs[1], runhistory.get_cost(all_configs[1]), 0.1, StatusType.SUCCESS)
    db.fetch_new_runhistory()

Пример #20

Показать файл

    def _compare_configs(self,
                         incumbent: Configuration,
                         challenger: Configuration,
                         run_history: RunHistory,
                         aggregate_func: typing.Callable,
                         log_traj: bool = True):
        """
        Compare two configuration wrt the runhistory and return the one which
        performs better (or None if the decision is not safe)

        Decision strategy to return x as being better than y:
            1. x has at least as many runs as y
            2. x performs better than y on the intersection of runs on x and y

        Implicit assumption:
            Challenger was evaluated on the same instance-seed pairs as
            incumbent

        Parameters
        ----------
        incumbent: Configuration
            Current incumbent
        challenger: Configuration
            Challenger configuration
        run_history: RunHistory
            Stores all runs we ran so far
        aggregate_func: typing.Callable
            Aggregate performance across instances
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        Returns
        -------
        None or better of the two configurations x,y
        """

        inc_runs = run_history.get_runs_for_config(incumbent)
        chall_runs = run_history.get_runs_for_config(challenger)
        to_compare_runs = set(inc_runs).intersection(chall_runs)

        # performance on challenger runs
        chal_perf = aggregate_func(challenger, run_history, to_compare_runs)
        inc_perf = aggregate_func(incumbent, run_history, to_compare_runs)

        # Line 15
        if chal_perf > inc_perf and len(chall_runs) >= self.minR:
            # Incumbent beats challenger
            self.logger.debug("Incumbent (%.4f) is better than challenger "
                              "(%.4f) on %d runs." %
                              (inc_perf, chal_perf, len(chall_runs)))
            return incumbent

        # Line 16
        if not set(inc_runs) - set(chall_runs):

            # no plateau walks
            if chal_perf >= inc_perf:
                self.logger.debug(
                    "Incumbent (%.4f) is at least as good as the "
                    "challenger (%.4f) on %d runs." %
                    (inc_perf, chal_perf, len(chall_runs)))
                return incumbent

            # Challenger is better than incumbent
            # and has at least the same runs as inc
            # -> change incumbent
            n_samples = len(chall_runs)
            self.logger.info(
                "Challenger (%.4f) is better than incumbent (%.4f)"
                " on %d runs." % (chal_perf, inc_perf, n_samples))
            # Show changes in the configuration
            params = sorted([(param, incumbent[param], challenger[param])
                             for param in challenger.keys()])
            self.logger.info("Changes in incumbent:")
            for param in params:
                if param[1] != param[2]:
                    self.logger.info("  %s : %r -> %r" % (param))
                else:
                    self.logger.debug("  %s remains unchanged: %r" %
                                      (param[0], param[1]))

            if log_traj:
                self.stats.inc_changed += 1
                self.traj_logger.add_entry(train_perf=chal_perf,
                                           incumbent_id=self.stats.inc_changed,
                                           incumbent=challenger)
            return challenger

        # undecided
        return None

Пример #21

Показать файл

Файл: psmac_facade.py Проект: chengdake/autoflow

    def __init__(self,
                 scenario: Scenario,
                 rng: typing.Optional[typing.Union[np.random.RandomState,
                                                   int]] = None,
                 run_id: int = 1,
                 tae: typing.Type[ExecuteTARun] = ExecuteTARunOld,
                 tae_kwargs: typing.Union[dict, None] = None,
                 shared_model: bool = True,
                 validate: bool = True,
                 n_optimizers: int = 2,
                 val_set: typing.Union[typing.List[str], None] = None,
                 n_incs: int = 1,
                 **kwargs):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        n_optimizers: int
            Number of optimizers to run in parallel per round
        rng: int/np.random.RandomState
            The randomState/seed to pass to each dsmac run
        run_id: int
            run_id for this hydra run
        tae: ExecuteTARun
            Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc)
        tae_kwargs: Optional[dict]
            arguments passed to constructor of '~tae'
        shared_model: bool
            Flag to indicate whether information is shared between SMAC runs or not
        validate: bool / None
            Flag to indicate whether to validate the found configurations or to use the SMAC estimates
            None => neither and return the full portfolio
        n_incs: int
            Number of incumbents to return (n_incs <= 0 ==> all found configurations)
        val_set: typing.List[str]
            List of instance-ids to validate on

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.scenario = scenario
        self.run_id, self.rng = get_rng(rng, run_id, logger=self.logger)
        self.kwargs = kwargs
        self.output_dir = None
        self.rh = RunHistory(average_cost, file_system=scenario.file_system)
        self._tae = tae
        self._tae_kwargs = tae_kwargs
        if n_optimizers <= 1:
            self.logger.warning('Invalid value in %s: %d. Setting to 2',
                                'n_optimizers', n_optimizers)
        self.n_optimizers = max(n_optimizers, 2)
        self.validate = validate
        self.shared_model = shared_model
        self.n_incs = min(max(1, n_incs), self.n_optimizers)
        if val_set is None:
            self.val_set = scenario.train_insts
        else:
            self.val_set = val_set