示例#1
0
def _cost(config: Configuration, run_history: RunHistory,
          instance_seed_pairs=None):
    """Return array of all costs for the given config for further calculations.

    Parameters
    ----------
    config : Configuration
        Configuration to calculate objective for
    run_history : RunHistory
        RunHistory object from which the objective value is computed.
    instance_seed_pairs : list, optional (default=None)
        List of tuples of instance-seeds pairs. If None, the run_history is
        queried for all runs of the given configuration.

    Returns
    -------
    Costs: list
        Array of all costs
    """
    try:
        id_ = run_history.config_ids[config]
    except KeyError:  # challenger was not running so far
        return []

    if instance_seed_pairs is None:
        instance_seed_pairs = run_history.get_runs_for_config(config)

    costs = []
    for i, r in instance_seed_pairs:
        k = RunKey(id_, i, r)
        costs.append(run_history.data[k].cost)
    return costs
示例#2
0
    def add(self,
            config: Configuration,
            cost: float,
            time: float,
            status: StatusType,
            instance_id: str = "",
            seed: int = 0,
            additional_info: dict = None,
            origin: DataOrigin = DataOrigin.INTERNAL):
        """Adds a data of a new target algorithm (TA) run;
        it will update data if the same key values are used
        (config, instance_id, seed)

        Parameters
        ----------
            config : dict (or other type -- depending on config space module)
                Parameter configuration
            cost: float
                Cost of TA run (will be minimized)
            time: float
                Runtime of TA run
            status: str
                Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT}
            instance_id: str
                String representing an instance (default: None)
            seed: int
                Random seed used by TA (default: None)
            additional_info: dict
                Additional run infos (could include further returned
                information from TA or fields such as start time and host_id)
            origin: DataOrigin
                Defines how data will be used.
        """
        if not instance_id:
            instance_id = None
        config_id = self.config_ids.get(config)
        if config_id is None:  # it's a new config
            new_id = get_id_of_config(config)
            self.config_ids[config] = new_id
            config_id = self.config_ids.get(config)
            self.ids_config[new_id] = config

        k = RunKey(config_id, instance_id, seed)
        v = RunValue(cost, time, status, additional_info)

        # Each runkey is supposed to be used only once. Repeated tries to add
        # the same runkey will be ignored silently if not capped.
        if self.overwrite_existing_runs or self.data.get(k) is None:
            self._add(k, v, status, origin)
        elif status != StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED:
            # overwrite capped runs with uncapped runs
            self._add(k, v, status, origin)
        elif status == StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED and cost > self.data[k].cost:
            # overwrite if censored with a larger cutoff
            self._add(k, v, status, origin)
示例#3
0
    def get_instance_costs_for_config(self, config: Configuration):
        """
            Returns the average cost per instance (across seeds)
            for a configuration
            Parameters
            ----------
            config : Configuration from ConfigSpace
                Parameter configuration

            Returns
            -------
            cost_per_inst: dict<instance name<str>, cost<float>>
        """
        config_id = self.config_ids.get(config)
        runs_ = self._configid_to_inst_seed.get(config_id, [])
        cost_per_inst = {}
        for inst, seed in runs_:
            cost_per_inst[inst] = cost_per_inst.get(inst, [])
            rkey = RunKey(config_id, inst, seed)
            vkey = self.data[rkey]
            cost_per_inst[inst].append(vkey.cost)
        cost_per_inst = dict([(inst, np.mean(costs)) for inst, costs in cost_per_inst.items()])
        return cost_per_inst
示例#4
0
    def _get_runs(self,
                 configs: Union[str, typing.List[Configuration]],
                 insts: Union[str, typing.List[str]],
                 repetitions: int=1,
                 runhistory: RunHistory=None,
                 ) -> typing.Tuple[typing.List[_Run], RunHistory]:
        """
        Generate list of SMAC-TAE runs to be executed. This means
        combinations of configs with all instances on a certain number of seeds.

        side effect: Adds runs that don't need to be reevaluated to self.rh!

        Parameters
        ----------
        configs: str or list<Configuration>
            string or directly a list of Configuration
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
                time evaluates at cpu- or wallclock-timesteps of:
                [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
                with max_time being the highest recorded time
        insts: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of seeds per instance/config-pair to be evaluated
        runhistory: RunHistory
            optional, try to reuse this runhistory and save some runs

        Returns
        -------
        runs: list<_Run>
            list with _Runs
            [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1),
             _Run(config=CONFIG2,inst=INSTANCE2,seed=SEED2,inst_specs=INST_SPECIFICS2),
             ...]
        """
        # Get relevant configurations and instances
        if isinstance(configs, str):
            configs = self._get_configs(configs)
        if isinstance(insts, str):
            insts = self._get_instances(insts)

        # If no instances are given, fix the instances to one "None" instance
        if not insts:
            insts = [None]
        # If algorithm is deterministic, fix repetitions to 1
        if self.scen.deterministic and repetitions != 1:
            self.logger.warning("Specified %d repetitions, but fixing to 1, "
                                "because algorithm is deterministic.", repetitions)
            repetitions = 1

        # Extract relevant information from given runhistory
        inst_seed_config = self._process_runhistory(configs, insts, runhistory)

        # Now create the actual run-list
        runs = []
        # Counter for runs without the need of recalculation
        runs_from_rh = 0
        # If we reuse runs, we want to return them as well
        new_rh = RunHistory(average_cost)

        for i in sorted(insts):
            for rep in range(repetitions):
                # First, find a seed and add all the data we can take from the
                # given runhistory to "our" validation runhistory.
                configs_evaluated = []
                if runhistory and i in inst_seed_config:
                    # Choose seed based on most often evaluated inst-seed-pair
                    seed, configs_evaluated = inst_seed_config[i].pop(0)
                    # Delete inst if all seeds are used
                    if not inst_seed_config[i]:
                        inst_seed_config.pop(i)
                    # Add runs to runhistory
                    for c in configs_evaluated[:]:
                        runkey = RunKey(runhistory.config_ids[c], i, seed)
                        cost, time, status, additional_info = runhistory.data[runkey]
                        if status in [StatusType.CRASHED, StatusType.ABORT, StatusType.CAPPED]:
                            # Not properly executed target algorithm runs should be repeated
                            configs_evaluated.remove(c)
                            continue
                        new_rh.add(c, cost, time, status, instance_id=i,
                                   seed=seed, additional_info=additional_info)
                        runs_from_rh += 1
                else:
                    # If no runhistory or no entries for instance, get new seed
                    seed = self.rng.randint(MAXINT)

                # We now have a seed and add all configs that are not already
                # evaluated on that seed to the runs-list. This way, we
                # guarantee the same inst-seed-pairs for all configs.
                for config in [c for c in configs if not c in configs_evaluated]:
                    # Only use specifics if specific exists, else use string "0"
                    specs = self.scen.instance_specific[i] if i and i in self.scen.instance_specific else "0"
                    runs.append(_Run(config=config,
                                    inst=i,
                                    seed=seed,
                                    inst_specs=specs))

        self.logger.info("Collected %d runs from %d configurations on %d "
                         "instances with %d repetitions. Reusing %d runs from "
                         "given runhistory.", len(runs), len(configs),
                         len(insts), repetitions, runs_from_rh)

        return runs, new_rh
示例#5
0
def fmin_smac(func: typing.Callable,
              x0: typing.List[float],
              bounds: typing.List[typing.Iterable[float]],
              maxfun: int = -1,
              rng: typing.Union[np.random.RandomState, int] = None,
              scenario_args: typing.Mapping[str, typing.Any] = None,
              **kwargs):
    """
    Minimize a function func using the SMAC4HPO facade
    (i.e., a modified version of SMAC).
    This function is a convenience wrapper for the SMAC4HPO class.

    Parameters
    ----------
    func : typing.Callable
        Function to minimize.
    x0 : typing.List[float]
        Initial guess/default configuration.
    bounds : typing.List[typing.List[float]]
        ``(min, max)`` pairs for each element in ``x``, defining the bound on
        that parameters.
    maxfun : int, optional
        Maximum number of function evaluations.
    rng : np.random.RandomState, optional
            Random number generator used by SMAC.
    scenario_args: typing.Mapping[str,typing.Any]
        Arguments passed to the scenario
        See dsmac.scenario.scenario.Scenario
    **kwargs:
        Arguments passed to the optimizer class
        See ~dsmac.facade.smac_facade.SMAC

    Returns
    -------
    x : list
        Estimated position of the minimum.
    f : float
        Value of `func` at the minimum.
    s : :class:`smac.facade.smac_hpo_facade.SMAC4HPO`
        SMAC objects which enables the user to get
        e.g., the trajectory and runhistory.

    """
    # create configuration space
    cs = ConfigurationSpace()

    # Adjust zero padding
    tmplt = 'x{0:0' + str(len(str(len(bounds)))) + 'd}'

    for idx, (lower_bound, upper_bound) in enumerate(bounds):
        parameter = UniformFloatHyperparameter(name=tmplt.format(idx + 1),
                                               lower=lower_bound,
                                               upper=upper_bound,
                                               default_value=x0[idx])
        cs.add_hyperparameter(parameter)

    # create scenario
    scenario_dict = {
        "run_obj": "quality",
        "cs": cs,
        "deterministic": "true",
        "initial_incumbent": "DEFAULT",
    }

    if scenario_args is not None:
        scenario_dict.update(scenario_args)

    if maxfun > 0:
        scenario_dict["runcount_limit"] = maxfun
    scenario = Scenario(scenario_dict)

    smac = SMAC4HPO(scenario=scenario,
                    tae_runner=ExecuteTAFuncArray,
                    tae_runner_kwargs={'ta': func},
                    rng=rng,
                    **kwargs)

    smac.logger = logging.getLogger(smac.__module__ + "." +
                                    smac.__class__.__name__)
    incumbent = smac.optimize()
    config_id = smac.solver.runhistory.config_ids[incumbent]
    run_key = RunKey(config_id, None, 0)
    incumbent_performance = smac.solver.runhistory.data[run_key]
    incumbent = np.array(
        [incumbent[tmplt.format(idx + 1)] for idx in range(len(bounds))],
        dtype=np.float)
    return incumbent, incumbent_performance.cost, smac