Exemplo n.º 1
0
    def __init__(self,
                 configspace: ConfigurationSpace,
                 types: np.ndarray,
                 bounds: typing.List[typing.Tuple[float, float]],
                 seed: int,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 10000,
                 batch_size: int = 8,
                 var: bool = True,
                 **kwargs):
        super().__init__(configspace, types, bounds, seed, **kwargs)
        print("USE DNGO")
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 100
        self.log_error = 1000

        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size

        self.nn = None
        self.blr = None

        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Exemplo n.º 2
0
    def __init__(
        self,
        ta: typing.Union[typing.List[str], typing.Callable],
        stats: Stats,
        run_obj: str = "runtime",
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = True,
    ):

        # The results is a FIFO structure, implemented via a list
        # (because the Queue lock is not pickable). Finished runs are
        # put in this list and collected via process_finished_runs
        self.results = []  # type: typing.List[typing.Tuple[RunInfo, RunValue]]

        # Below state the support for a Runner algorithm that
        # implements a ta
        self.ta = ta
        self.stats = stats
        self.run_obj = run_obj
        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash
        self.logger = PickableLoggerAdapter(self.__module__ + '.' +
                                            self.__class__.__name__)
        self._supports_memory_limit = False

        super().__init__()
Exemplo n.º 3
0
    def __init__(
        self,
        ta: typing.Union[typing.List[str], typing.Callable],
        stats: Stats,
        run_obj: str = "runtime",
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = True,
    ):
        """
        Attributes
        ----------
        results
        ta
        stats
        run_obj
        par_factor
        cost_for_crash
        abort_first_run_crash

        Parameters
        ----------
        ta : typing.Union[typing.List[str], typing.Callable]
            target algorithm
        stats: Stats
             stats object to collect statistics about runtime/additional info
        run_obj: str
            run objective of SMAC
        par_factor: int
            penalization factor
        cost_for_crash : float
            cost that is used in case of crashed runs (including runs
            that returned NaN or inf)
        abort_on_first_run_crash: bool
            if true and first run crashes, raise FirstRunCrashedException
        """

        # The results is a FIFO structure, implemented via a list
        # (because the Queue lock is not pickable). Finished runs are
        # put in this list and collected via process_finished_runs
        self.results = []  # type: typing.List[typing.Tuple[RunInfo, RunValue]]

        # Below state the support for a Runner algorithm that
        # implements a ta
        self.ta = ta
        self.stats = stats
        self.run_obj = run_obj
        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash
        self.logger = PickableLoggerAdapter(self.__module__ + '.' +
                                            self.__class__.__name__)
        self._supports_memory_limit = False

        super().__init__()
Exemplo n.º 4
0
    def __init__(self,
                 hidden_dims,
                 input_size,
                 feat_type=None,
                 var: bool = True,
                 max_cat: int = np.inf):
        super(NeuralNet, self).__init__()
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        self.feat_type = feat_type
        self.input_size = input_size
        self.num_neurons = hidden_dims
        self.activation = nn.Tanh
        self.num_layer = len(hidden_dims)
        self.max_cat = max_cat
        if var:
            self.n_output = 2
        else:
            self.n_output = 1

        if np.sum(self.feat_type) == 0:
            self.feat_type = None

        if self.feat_type is not None:
            self.logger.info("Use cat embedding")
            assert len(self.feat_type) == self.input_size
            emb = nn.ModuleList()
            sz = int(0)
            for f in self.feat_type:
                if f == 0:
                    # In SMAC 0 encodes a numerical
                    emb.append(None)
                    sz += 1
                else:
                    es = min(self.max_cat, int(f))
                    emb.append(nn.Embedding(int(f), es))
                    sz += es
            assert int(sz) == sz
            sz = int(sz)
            num_neurons = [sz] + self.num_neurons
            self.embedding = emb
        else:
            num_neurons = [self.input_size] + self.num_neurons

        self.weights = nn.ModuleList()
        self.acts = nn.ModuleList()

        print(num_neurons)
        for i in range(self.num_layer):
            self.weights.append(nn.Linear(num_neurons[i], num_neurons[i + 1]))
            self.acts.append(self.activation())

        self.outlayer = nn.Linear(num_neurons[-1], self.n_output)
Exemplo n.º 5
0
    def __init__(
        self,
        ta: Callable,
        stats: Stats,
        multi_objectives: List[str] = ["cost"],
        run_obj: str = "quality",
        memory_limit: Optional[int] = None,
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = False,
        use_pynisher: bool = True,
    ):

        super().__init__(
            ta=ta,
            stats=stats,
            multi_objectives=multi_objectives,
            run_obj=run_obj,
            par_factor=par_factor,
            cost_for_crash=cost_for_crash,
            abort_on_first_run_crash=abort_on_first_run_crash,
        )
        self.ta = ta
        self.stats = stats
        self.multi_objectives = multi_objectives
        self.run_obj = run_obj

        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash

        signature = inspect.signature(ta).parameters
        self._accepts_seed = "seed" in signature.keys()
        self._accepts_instance = "instance" in signature.keys()
        self._accepts_budget = "budget" in signature.keys()
        if not callable(ta):
            raise TypeError("Argument `ta` must be a callable, but is %s" % type(ta))
        self._ta = cast(Callable, ta)

        if memory_limit is not None:
            memory_limit = int(math.ceil(memory_limit))
        self.memory_limit = memory_limit

        self.use_pynisher = use_pynisher

        self.logger = PickableLoggerAdapter(
            self.__module__ + "." + self.__class__.__name__
        )
Exemplo n.º 6
0
    def __init__(
        self,
        configspace: ConfigurationSpace,
        types: typing.List[int],
        bounds: typing.List[typing.Tuple[float, float]],
        seed: int,
        instance_features: typing.Optional[np.ndarray] = None,
        pca_components: typing.Optional[int] = 7,
    ) -> None:
        self.configspace = configspace
        self.seed = seed
        self.instance_features = instance_features
        self.pca_components = pca_components

        if instance_features is not None:
            self.n_feats = instance_features.shape[1]
        else:
            self.n_feats = 0

        self.n_params = len(self.configspace.get_hyperparameters())

        self.pca = PCA(n_components=self.pca_components)
        self.scaler = MinMaxScaler()
        self._apply_pca = False

        # Never use a lower variance than this
        self.var_threshold = VERY_SMALL_NUMBER

        self.bounds = bounds
        self.types = types
        # Initial types array which is used to reset the type array at every call to train()
        self._initial_types = copy.deepcopy(types)

        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Exemplo n.º 7
0
    def __init__(
        self,
        configspace: ConfigurationSpace,
        types: typing.List[int],
        bounds: typing.List[typing.Tuple[float, float]],
        seed: int,
        instance_features: typing.Optional[np.ndarray] = None,
        pca_components: typing.Optional[int] = 7,
    ) -> None:
        """Constructor

        Parameters
        ----------
        configspace : ConfigurationSpace
            Configuration space to tune for.
        types : List[int]
            Specifies the number of categorical values of an input dimension where
            the i-th entry corresponds to the i-th input dimension. Let's say we
            have 2 dimension where the first dimension consists of 3 different
            categorical choices and the second dimension is continuous than we
            have to pass [3, 0]. Note that we count starting from 0.
        bounds : List[Tuple[float, float]]
            bounds of input dimensions: (lower, uppper) for continuous dims; (n_cat, np.nan) for categorical dims
        seed : int
            The seed that is passed to the model library.
        instance_features : np.ndarray (I, K)
            Contains the K dimensional instance features
            of the I different instances
        pca_components : float
            Number of components to keep when using PCA to reduce
            dimensionality of instance features. Requires to
            set n_feats (> pca_dims).
        """
        self.configspace = configspace
        self.seed = seed
        self.instance_features = instance_features
        self.pca_components = pca_components

        if instance_features is not None:
            self.n_feats = instance_features.shape[1]
        else:
            self.n_feats = 0

        self.n_params = len(self.configspace.get_hyperparameters())

        self.pca = PCA(n_components=self.pca_components)
        self.scaler = MinMaxScaler()
        self._apply_pca = False

        # Never use a lower variance than this
        self.var_threshold = VERY_SMALL_NUMBER

        self.bounds = bounds
        self.types = types
        # Initial types array which is used to reset the type array at every call to train()
        self._initial_types = copy.deepcopy(types)

        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Exemplo n.º 8
0
    def __init__(
        self,
        aggregate_func: typing.Callable,
        overwrite_existing_runs: bool = False,
        file_system=LocalFS()) -> None:
        """Constructor

        Parameters
        ----------
        aggregate_func: callable
            function to aggregate perf across instances
        overwrite_existing_runs: bool
            allows to overwrites old results if pairs of
            algorithm-instance-seed were measured
            multiple times
        """
        self.file_system = file_system
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        # By having the data in a deterministic order we can do useful tests
        # when we serialize the data and can assume it's still in the same
        # order as it was added.
        self.data = collections.OrderedDict(
        )  # type: typing.Dict[RunKey, RunValue]

        # for fast access, we have also an unordered data structure
        # to get all instance seed pairs of a configuration
        self._configid_to_inst_seed = {}  # type: typing.Dict[int, InstSeedKey]

        self.config_ids = {}  # type: typing.Dict[Configuration, int]
        self.ids_config = {}  # type: typing.Dict[int, Configuration]
        self._n_id = 0

        # Stores cost for each configuration ID
        self.cost_per_config = {}  # type: typing.Dict[int, float]
        # runs_per_config maps the configuration ID to the number of runs for that configuration
        # and is necessary for computing the moving average
        self.runs_per_config = {}  # type: typing.Dict[int, int]

        # Store whether a datapoint is "external", which means it was read from
        # a JSON file. Can be chosen to not be written to disk
        self.external = {}  # type: typing.Dict[RunKey, DataOrigin]

        self.aggregate_func = aggregate_func
        self.overwrite_existing_runs = overwrite_existing_runs
Exemplo n.º 9
0
    def __init__(self, model: AbstractEPM):
        """Constructor

        Parameters
        ----------
        model : AbstractEPM
            Models the objective function.
        """
        self.model = model
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Exemplo n.º 10
0
    def __init__(self, model: AbstractEPM):
        """Constructor

        Parameters
        ----------
        model : AbstractEPM
            Models the objective function.
        """
        self.model = model
        self._required_updates = ('model', )  # type: Tuple[str, ...]
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Exemplo n.º 11
0
    def __init__(self,
                 configspace: ConfigurationSpace,
                 types: np.ndarray,
                 bounds: typing.List[typing.Tuple[float, float]],
                 seed: int,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 10000,
                 batch_size: int = 8,
                 number_of_networks: int = 10,
                 var: bool = True,
                 train_with_lognormal_llh=False,
                 compute_mean_in_logspace=True,
                 **kwargs):
        super().__init__(configspace, types, bounds, seed, **kwargs)

        assert not (train_with_lognormal_llh and compute_mean_in_logspace)

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 1000
        self.log_error = 5000

        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.number_of_networks = number_of_networks
        self.train_with_lognormal = train_with_lognormal_llh
        self.compute_mean_in_logspace = compute_mean_in_logspace

        self.nns = None
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Exemplo n.º 12
0
    def __init__(
        self,
        overwrite_existing_runs: bool = False,
    ) -> None:
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        # By having the data in a deterministic order we can do useful tests
        # when we serialize the data and can assume it's still in the same
        # order as it was added.
        self.data = collections.OrderedDict()  # type: Dict[RunKey, RunValue]

        # for fast access, we have also an unordered data structure
        # to get all instance seed pairs of a configuration.
        # This does not include capped runs.
        self._configid_to_inst_seed_budget = (
            {})  # type: Dict[int, Dict[InstSeedKey, List[float]]]

        self.config_ids = {}  # type: Dict[Configuration, int]
        self.ids_config = {}  # type: Dict[int, Configuration]
        self._n_id = 0

        # Stores cost for each configuration ID
        self._cost_per_config = {}  # type: Dict[int, np.ndarray]
        # Stores min cost across all budgets for each configuration ID
        self._min_cost_per_config = {}  # type: Dict[int, np.ndarray]
        # runs_per_config maps the configuration ID to the number of runs for that configuration
        # and is necessary for computing the moving average
        self.num_runs_per_config = {}  # type: Dict[int, int]

        # Store whether a datapoint is "external", which means it was read from
        # a JSON file. Can be chosen to not be written to disk
        self.external = {}  # type: Dict[RunKey, DataOrigin]

        self.overwrite_existing_runs = overwrite_existing_runs
        self.num_obj = -1  # type: int
        self.objective_bounds = []  # type: List[Tuple[float, float]]
Exemplo n.º 13
0
    def __init__(self,
                 seed: int,
                 feat_types: typing.List[int] = None,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 10000,
                 batch_size: int = 8,
                 var: bool = True,
                 lognormal_nllh: bool = False,
                 var_bias_init: float = 1,
                 max_cat: int = np.inf,
                 learned_weight_init: bool = False,
                 optimization_algorithm: str = 'sgd',
                 **kwargs):
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 1000
        self.log_error = iterations
        self.seed = seed
        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.lognormal_nllh = lognormal_nllh
        self.var_bias_init = var_bias_init
        self.max_cat = max_cat
        self.learned_weight_init = learned_weight_init
        self.optimization_algorithm = optimization_algorithm

        self.feat_types = feat_types

        if self.lognormal_nllh:
            assert self.var, "Can't train with lognormal nllh if no var is selected"

        self.model = None
Exemplo n.º 14
0
class BaseRunner(ABC):
    """Interface class to handle the execution of SMAC' configurations.

    This interface defines how to interact with the SMBO loop.
    The complexity of running a configuration as well as handling the
    results is abstracted to the SMBO via a BaseRunner.

    From SMBO perspective, launching a configuration follows a
    submit/collect scheme as follows:
    1- A run is launched via submit_run()
    1.1- Submit_run internally calls run_wrapper(), a method that
         contains common processing functions among different runners,
         for example, handling capping and stats checking.
    1.2- A class that implements BaseRunner defines run() which is
         really the algorithm to translate a RunInfo to a RunValue, i.e.
         a configuration to an actual result.
    2- A completed run is collected via get_finished_runs(), which returns
       any finished runs, if any.
    3- This interface also offers the method wait() as a mechanism to make
       sure we have enough data in the next iteration to make a decision. For
       example, the intensifier might not be able to select the next challenger
       until more results are available.


    Attributes
    ----------

    results
    ta
    stats
    run_obj
    par_factor
    cost_for_crash
    abort_first_run_crash

    Parameters
    ---------
    ta : typing.Union[typing.List[str], typing.Callable]
        target algorithm
    stats: Stats
         stats object to collect statistics about runtime/additional info
    run_obj: str
        run objective of SMAC
    par_factor: int
        penalization factor
    cost_for_crash : float
        cost that is used in case of crashed runs (including runs
        that returned NaN or inf)
    abort_on_first_run_crash: bool
        if true and first run crashes, raise FirstRunCrashedException
    """
    def __init__(
        self,
        ta: typing.Union[typing.List[str], typing.Callable],
        stats: Stats,
        run_obj: str = "runtime",
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = True,
    ):

        # The results is a FIFO structure, implemented via a list
        # (because the Queue lock is not pickable). Finished runs are
        # put in this list and collected via process_finished_runs
        self.results = []  # type: typing.List[typing.Tuple[RunInfo, RunValue]]

        # Below state the support for a Runner algorithm that
        # implements a ta
        self.ta = ta
        self.stats = stats
        self.run_obj = run_obj
        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash
        self.logger = PickableLoggerAdapter(self.__module__ + '.' +
                                            self.__class__.__name__)
        self._supports_memory_limit = False

        super().__init__()

    @abstractmethod
    def submit_run(self, run_info: RunInfo) -> None:
        """This function submits a configuration
        embedded in a RunInfo object, and uses one of the workers
        to produce a result (such result will eventually be available
        on the self.results FIFO).

        This interface method will be called by SMBO, with the expectation
        that a function will be executed by a worker.

        What will be executed is dictated by run_info, and "how" will it be
        executed is decided via the child class that implements a run() method.

        Because config submission can be a serial/parallel endeavor,
        it is expected to be implemented by a child class.

        Parameters
        ----------
        run_info: RunInfo
            An object containing the configuration and the necessary data to run it

        """
        pass

    @abstractmethod
    def run(
        self,
        config: Configuration,
        instance: str,
        cutoff: typing.Optional[float] = None,
        seed: int = 12345,
        budget: typing.Optional[float] = None,
        instance_specific: str = "0",
    ) -> typing.Tuple[StatusType, float, float, typing.Dict]:
        """Runs target algorithm <self.ta> with configuration <config> on
        instance <instance> with instance specifics <specifics> for at most
        <cutoff> seconds and random seed <seed>

        This method exemplifies how to defined the run() method

        Parameters
        ----------
            config : Configuration
                dictionary param -> value
            instance : string
                problem instance
            cutoff : float, optional
                Wallclock time limit of the target algorithm. If no value is
                provided no limit will be enforced.
            seed : int
                random seed
            budget : float, optional
                A positive, real-valued number representing an arbitrary limit to the target
                algorithm. Handled by the target algorithm internally
            instance_specific: str
                instance specific information (e.g., domain file or solution)

        Returns
        -------
            status: enum of StatusType (int)
                {SUCCESS, TIMEOUT, CRASHED, ABORT}
            cost: float
                cost/regret/quality (float) (None, if not returned by TA)
            runtime: float
                runtime (None if not returned by TA)
            additional_info: dict
                all further additional run information
        """
        pass

    def run_wrapper(
        self,
        run_info: RunInfo,
    ) -> typing.Tuple[RunInfo, RunValue]:
        """Wrapper around run() to exec and check the execution of a given config file

        This function encapsulates common handling/processing, so that run() implementation
        is simplified.

        Parameters
        ----------
            run_info : RunInfo
                Object that contains enough information to execute a configuration run in
                isolation.

        Returns
        -------
            RunInfo:
                an object containing the configuration launched
            RunValue:
                Contains information about the status/performance of config
        """
        start = time.time()

        if run_info.cutoff is None and self.run_obj == "runtime":
            if self.logger:
                self.logger.critical(
                    "For scenarios optimizing running time "
                    "(run objective), a cutoff time is required, "
                    "but not given to this call.")
            raise ValueError("For scenarios optimizing running time "
                             "(run objective), a cutoff time is required, "
                             "but not given to this call.")
        cutoff = None
        if run_info.cutoff is not None:
            cutoff = int(math.ceil(run_info.cutoff))

        try:
            status, cost, runtime, additional_info = self.run(
                config=run_info.config,
                instance=run_info.instance,
                cutoff=cutoff,
                seed=run_info.seed,
                budget=run_info.budget,
                instance_specific=run_info.instance_specific)
        except Exception as e:
            status = StatusType.CRASHED
            cost = self.cost_for_crash
            runtime = time.time() - start

            # Add context information to the error message
            exception_traceback = traceback.format_exc()
            error_message = repr(e)
            additional_info = {
                'traceback': exception_traceback,
                'error': error_message
            }

        end = time.time()

        if run_info.budget == 0 and status == StatusType.DONOTADVANCE:
            raise ValueError(
                "Cannot handle DONOTADVANCE state when using intensify or SH/HB on "
                "instances.")

        # Catch NaN or inf.
        if (self.run_obj == 'runtime' and not np.isfinite(runtime)
                or self.run_obj == 'quality' and not np.isfinite(cost)):
            if self.logger:
                self.logger.warning(
                    "Target Algorithm returned NaN or inf as {}. "
                    "Algorithm run is treated as CRASHED, cost "
                    "is set to {} for quality scenarios. "
                    "(Change value through \"cost_for_crash\""
                    "-option.)".format(self.run_obj, self.cost_for_crash))
            status = StatusType.CRASHED

        if self.run_obj == "runtime":
            # The following line pleases mypy - we already check for cutoff not being none above,
            # prior to calling run. However, mypy assumes that the data type of cutoff
            # is still Optional[int]
            assert cutoff is not None
            if runtime > self.par_factor * cutoff:
                self.logger.warning("Returned running time is larger "
                                    "than {0} times the passed cutoff time. "
                                    "Clamping to {0} x cutoff.".format(
                                        self.par_factor))
                runtime = cutoff * self.par_factor
                status = StatusType.TIMEOUT
            if status == StatusType.SUCCESS:
                cost = runtime
            else:
                cost = cutoff * self.par_factor
            if status == StatusType.TIMEOUT and run_info.capped:
                status = StatusType.CAPPED
        else:
            if status == StatusType.CRASHED:
                cost = self.cost_for_crash

        return run_info, RunValue(status=status,
                                  cost=cost,
                                  time=runtime,
                                  additional_info=additional_info,
                                  starttime=start,
                                  endtime=end)

    @abstractmethod
    def get_finished_runs(
            self) -> typing.List[typing.Tuple[RunInfo, RunValue]]:
        """This method returns any finished configuration, and returns a list with
        the results of exercising the configurations. This class keeps populating results
        to self.results until a call to get_finished runs is done. In this case, the
        self.results list is emptied and all RunValues produced by running run() are
        returned.

        Returns
        -------
            List[RunInfo, RunValue]: A list of pais RunInfo/RunValues
            a submitted configuration
        """
        raise NotImplementedError()

    @abstractmethod
    def wait(self) -> None:
        """SMBO/intensifier might need to wait for runs to finish before making a decision.
        This method waits until 1 run completes
        """
        pass

    @abstractmethod
    def pending_runs(self) -> bool:
        """
        Whether or not there are configs still running. Generally if the runner is serial,
        launching a run instantly returns it's result. On parallel runners, there might
        be pending configurations to complete.
        """
        pass

    @abstractmethod
    def num_workers(self) -> int:
        """
        Return the active number of workers that will execute tae runs.
        """
        pass
Exemplo n.º 15
0
class AbstractTAFunc(SerialRunner):
    """Baseclass to execute target algorithms which are python functions.

    **Note:*** Do not use directly

    Parameters
    ----------
    ta : callable
        Function (target algorithm) to be optimized.
    stats: Stats()
         stats object to collect statistics about runtime and so on
    multi_objectives: List[str]
        names of the objectives, by default it is a single objective parameter "cost"
    run_obj: str
        run objective of SMAC
    memory_limit : int, optional
        Memory limit (in MB) that will be applied to the target algorithm.
    par_factor: int
        penalization factor
    cost_for_crash : float
        cost that is used in case of crashed runs (including runs
        that returned NaN or inf)
    use_pynisher: bool
        use pynisher to limit resources;
        if disabled
          * TA func can use as many resources
          as it wants (time and memory) --- use with caution
          * all runs will be returned as SUCCESS if returned value is not None

    Attributes
    ----------
    memory_limit
    use_pynisher
    """

    def __init__(
        self,
        ta: Callable,
        stats: Stats,
        multi_objectives: List[str] = ["cost"],
        run_obj: str = "quality",
        memory_limit: Optional[int] = None,
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = False,
        use_pynisher: bool = True,
    ):

        super().__init__(
            ta=ta,
            stats=stats,
            multi_objectives=multi_objectives,
            run_obj=run_obj,
            par_factor=par_factor,
            cost_for_crash=cost_for_crash,
            abort_on_first_run_crash=abort_on_first_run_crash,
        )
        self.ta = ta
        self.stats = stats
        self.multi_objectives = multi_objectives
        self.run_obj = run_obj

        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash

        signature = inspect.signature(ta).parameters
        self._accepts_seed = "seed" in signature.keys()
        self._accepts_instance = "instance" in signature.keys()
        self._accepts_budget = "budget" in signature.keys()
        if not callable(ta):
            raise TypeError("Argument `ta` must be a callable, but is %s" % type(ta))
        self._ta = cast(Callable, ta)

        if memory_limit is not None:
            memory_limit = int(math.ceil(memory_limit))
        self.memory_limit = memory_limit

        self.use_pynisher = use_pynisher

        self.logger = PickableLoggerAdapter(
            self.__module__ + "." + self.__class__.__name__
        )

    def run(
        self,
        config: Configuration,
        instance: Optional[str] = None,
        cutoff: Optional[float] = None,
        seed: int = 12345,
        budget: Optional[float] = None,
        instance_specific: str = "0",
    ) -> Tuple[StatusType, float, float, Dict]:
        """Runs target algorithm <self._ta> with configuration <config> for at
        most <cutoff> seconds, allowing it to use at most <memory_limit> RAM.

        Whether the target algorithm is called with the <instance> and
        <seed> depends on the subclass implementing the actual call to
        the target algorithm.

        Parameters
        ----------
            config : Configuration, dictionary (or similar)
                Dictionary param -> value
            instance : str, optional
                Problem instance
            cutoff : float, optional
                Wallclock time limit of the target algorithm. If no value is
                provided no limit will be enforced. It is casted to integer internally.
            seed : int
                Random seed
            budget : float, optional
                A positive, real-valued number representing an arbitrary limit to the target algorithm
                Handled by the target algorithm internally
            instance_specific: str
                Instance specific information (e.g., domain file or solution)
        Returns
        -------
            status: enum of StatusType (int)
                {SUCCESS, TIMEOUT, CRASHED, ABORT}
            cost: np.ndarray
                cost/regret/quality/runtime (float) (None, if not returned by TA)
            runtime: float
                runtime (None if not returned by TA)
            additional_info: dict
                all further additional run information
        """

        obj_kwargs = {}  # type: Dict[str, Union[int, str, float, None]]
        if self._accepts_seed:
            obj_kwargs["seed"] = seed
        if self._accepts_instance:
            obj_kwargs["instance"] = instance
        if self._accepts_budget:
            obj_kwargs["budget"] = budget

        cost = self.cost_for_crash  # type: Union[float, List[float]]

        if self.use_pynisher:
            # walltime for pynisher has to be a rounded up integer
            if cutoff is not None:
                cutoff = int(math.ceil(cutoff))
                if cutoff > MAX_CUTOFF:
                    raise ValueError(
                        "%d is outside the legal range of [0, 65535] "
                        "for cutoff (when using pynisher, due to OS limitations)"
                        % cutoff
                    )

            arguments = {
                "logger": self.logger,
                "wall_time_in_s": cutoff,
                "mem_in_mb": self.memory_limit,
            }

            # call ta
            try:
                obj = pynisher.enforce_limits(**arguments)(self._ta)
                rval = self._call_ta(obj, config, obj_kwargs)
            except Exception as e:
                cost = np.asarray(cost).squeeze().tolist()
                exception_traceback = traceback.format_exc()
                error_message = repr(e)
                additional_info = {
                    "traceback": exception_traceback,
                    "error": error_message,
                }

                return StatusType.CRASHED, cost, 0.0, additional_info  # type: ignore

            if isinstance(rval, tuple):
                result = rval[0]
                additional_run_info = rval[1]
            else:
                result = rval
                additional_run_info = {}

            # get status, cost, time
            if obj.exit_status is pynisher.TimeoutException:
                status = StatusType.TIMEOUT
            elif obj.exit_status is pynisher.MemorylimitException:
                status = StatusType.MEMOUT
            elif obj.exit_status == 0 and result is not None:
                status = StatusType.SUCCESS
                cost = result  # type: ignore # noqa
            else:
                status = StatusType.CRASHED

            runtime = float(obj.wall_clock_time)
        else:
            start_time = time.time()

            # call ta
            try:
                rval = self._call_ta(self._ta, config, obj_kwargs)

                if isinstance(rval, tuple):
                    result = rval[0]
                    additional_run_info = rval[1]
                else:
                    result = rval
                    additional_run_info = {}

                status = StatusType.SUCCESS
                cost = result  # type: ignore
            except Exception as e:
                self.logger.exception(e)
                status = StatusType.CRASHED
                additional_run_info = {}

            runtime = time.time() - start_time

        # Do some sanity checking (for multi objective)
        if len(self.multi_objectives) > 1:
            error = f"Returned costs {cost} does not match the number of objectives {len(self.multi_objectives)}."

            # If dict convert to array
            # Make sure the ordering is correct
            if isinstance(cost, dict):
                ordered_cost = []
                for name in self.multi_objectives:
                    if name not in cost:
                        raise RuntimeError(
                            f"Objective {name} was not found in the returned costs."
                        )

                    ordered_cost.append(cost[name])
                cost = ordered_cost

            if isinstance(cost, list):
                if len(cost) != len(self.multi_objectives):
                    raise RuntimeError(error)

            if isinstance(cost, float):
                raise RuntimeError(error)

        if cost is None or status == StatusType.CRASHED:
            status = StatusType.CRASHED
            cost = self.cost_for_crash

        cost = np.asarray(cost).squeeze().tolist()

        return status, cost, runtime, additional_run_info  # type: ignore

    def _call_ta(
        self,
        obj: Callable,
        config: Configuration,
        obj_kwargs: Dict[str, Union[int, str, float, None]],
    ) -> Union[float, Tuple[float, Dict]]:
        raise NotImplementedError()
Exemplo n.º 16
0
 def __init__(self, model: AbstractEPM):
     self.model = model
     self._required_updates = ('model', )  # type: Tuple[str, ...]
     self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__)
Exemplo n.º 17
0
class EnsembleNN(BaseModel):
    def __init__(self,
                 configspace: ConfigurationSpace,
                 types: np.ndarray,
                 bounds: typing.List[typing.Tuple[float, float]],
                 seed: int,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 10000,
                 batch_size: int = 8,
                 number_of_networks: int = 10,
                 var: bool = True,
                 train_with_lognormal_llh=False,
                 compute_mean_in_logspace=True,
                 **kwargs):
        super().__init__(configspace, types, bounds, seed, **kwargs)

        assert not (train_with_lognormal_llh and compute_mean_in_logspace)

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 1000
        self.log_error = 5000

        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.number_of_networks = number_of_networks
        self.train_with_lognormal = train_with_lognormal_llh
        self.compute_mean_in_logspace = compute_mean_in_logspace

        self.nns = None
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

    def _train(self, X: np.ndarray, y: np.ndarray):

        self._my = np.mean(y)
        self._sy = np.std(y)

        if not self.train_with_lognormal:
            y -= self._my
            y /= self._sy

        self.train_data = (X, y)
        self.nns = []
        self.logger.debug("Start Training %d networks" %
                          self.number_of_networks)
        for i in range(self.number_of_networks):
            nn = SimpleNetworkEmbedding(
                hidden_dims=self.hidden_dims,
                lr=self.lr,
                seed=self.seed + i,
                momentum=self.momentum,
                weight_decay=self.weight_decay,
                iterations=self.iterations,
                batch_size=self.batch_size,
                var=self.var,
                lognormal_nllh=self.train_with_lognormal)
            nn.train(X, y)
            self.nns.append(nn)

    def _predict_individual(
            self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
        ms = np.zeros([X.shape[0], self.number_of_networks])
        vs = np.zeros([X.shape[0], self.number_of_networks])
        for i_nn, nn in enumerate(self.nns):
            pred = nn.predict(X)
            m = pred[:, 0]
            v = pred[:, 1]

            if not self.train_with_lognormal:
                m = m * self._sy + self._my
                v = v * self._sy**2

            ms[:, i_nn] = m
            vs[:, i_nn] = v

        return ms, vs

    def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
        ms, _ = self._predict_individual(X)
        m = ms.mean(axis=1)
        v = ms.var(axis=1)
        return m, v

    def predict_marginalized_over_instances(self, X: np.ndarray):
        """Predict mean and variance marginalized over all instances.

        Returns the predictive mean and variance marginalised over all
        instances for a set of configurations.

        Note
        ----
        This method overwrites the same method of ~smac.epm.base_epm.AbstractEPM;
        the following method is random forest specific
        and follows the SMAC2 implementation;
        it requires no distribution assumption
        to marginalize the uncertainty estimates

        Parameters
        ----------
        X : np.ndarray
            [n_samples, n_features (config)]

        Returns
        -------
        means : np.ndarray of shape = [n_samples, 1]
            Predictive mean
        vars : np.ndarray  of shape = [n_samples, 1]
            Predictive variance
        """

        if self.instance_features is None or \
                len(self.instance_features) == 0:
            mean_, var = self.predict(X)
            var[var < self.var_threshold] = self.var_threshold
            var[np.isnan(var)] = self.var_threshold
            return mean_, var

        if len(X.shape) != 2:
            raise ValueError('Expected 2d array, got %dd array!' %
                             len(X.shape))
        if X.shape[1] != len(self.bounds):
            raise ValueError('Rows in X should have %d entries but have %d!' %
                             (len(self.bounds), X.shape[1]))

        mean_ = np.zeros(X.shape[0])
        var = np.zeros(X.shape[0])

        for i, x in enumerate(X):

            # marginalize over instance
            # 1. Get predictions for all networks

            # Not very efficient
            preds_nns1 = np.zeros(
                [len(self.instance_features), self.number_of_networks])
            #for i_f, feat in enumerate(self.instance_features):
            #    x_ = np.concatenate([x, feat]).reshape([1, -1])
            #    print(i_f, x_)
            #    m, _ = self._predict_individual(x_)
            #    preds_nns1[i_f, :] = m

            input = np.concatenate((np.tile(
                x, (len(self.instance_features), 1)), self.instance_features),
                                   axis=1)
            preds_nns, _ = self._predict_individual(input)

            # 2. Average in each NN for all instances
            pred_per_nn = []
            for nn_id in range(self.number_of_networks):
                if self.compute_mean_in_logspace:
                    pred_per_nn.append(
                        np.log(np.mean(np.exp(preds_nns[:, nn_id]))))
                else:
                    pred_per_nn.append(np.mean(preds_nns[:, nn_id]))

            # 3. compute statistics across trees
            mean_x = np.mean(pred_per_nn)
            var_x = np.var(pred_per_nn)
            if var_x < self.var_threshold:
                var_x = self.var_threshold

            var[i] = var_x
            mean_[i] = mean_x

        if len(mean_.shape) == 1:
            mean_ = mean_.reshape((-1, 1))
        if len(var.shape) == 1:
            var = var.reshape((-1, 1))

        return mean_, var
Exemplo n.º 18
0
    def __init__(
        self,
        ta: typing.Callable,
        stats: Stats,
        run_obj: str = "quality",
        memory_limit: typing.Optional[int] = None,
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = False,
        use_pynisher: bool = True,
    ):

        super().__init__(
            ta=ta,
            stats=stats,
            run_obj=run_obj,
            par_factor=par_factor,
            cost_for_crash=cost_for_crash,
            abort_on_first_run_crash=abort_on_first_run_crash,
        )
        """
        Abstract class for having a function as target algorithm

        Parameters
        ----------
        ta : callable
            Function (target algorithm) to be optimized.
        stats: Stats()
             stats object to collect statistics about runtime and so on
        run_obj: str
            run objective of SMAC
        memory_limit : int, optional
            Memory limit (in MB) that will be applied to the target algorithm.
        par_factor: int
            penalization factor
        cost_for_crash : float
            cost that is used in case of crashed runs (including runs
            that returned NaN or inf)
        use_pynisher: bool
            use pynisher to limit resources;
            if disabled
              * TA func can use as many resources
              as it wants (time and memory) --- use with caution
              * all runs will be returned as SUCCESS if returned value is not None
        """
        self.ta = ta
        self.stats = stats
        self.run_obj = run_obj

        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash

        signature = inspect.signature(ta).parameters
        self._accepts_seed = 'seed' in signature.keys()
        self._accepts_instance = 'instance' in signature.keys()
        self._accepts_budget = 'budget' in signature.keys()
        if not callable(ta):
            raise TypeError('Argument `ta` must be a callable, but is %s' %
                            type(ta))
        self._ta = typing.cast(typing.Callable, ta)

        if memory_limit is not None:
            memory_limit = int(math.ceil(memory_limit))
        self.memory_limit = memory_limit

        self.use_pynisher = use_pynisher

        self.logger = PickableLoggerAdapter(self.__module__ + '.' +
                                            self.__class__.__name__)
Exemplo n.º 19
0
class OutputWriter(object):
    """Writing scenario to file."""
    def __init__(self) -> None:
        self.logger = PickableLoggerAdapter(name=self.__module__ + "." +
                                            self.__class__.__name__)

    def write_scenario_file(self, scenario: 'Scenario') -> None:
        """Write scenario to a file (format is compatible with input_reader).
        Will overwrite if file exists. If you have arguments that need special
        parsing when saving, specify so in the _parse_argument-function.
        Creates output-dir if necessesary.

        Parameters
        ----------
            scenario: Scenario
                Scenario to be written to file

        Returns
        -------
            status: False or None
                False indicates that writing process failed
        """
        if scenario.output_dir_for_this_run is None or scenario.output_dir_for_this_run == "":
            scenario.logger.info("No output directory for scenario logging "
                                 "specified -- scenario will not be logged.")
            return
        # Create output-dir if necessary
        if not os.path.isdir(scenario.output_dir_for_this_run):
            scenario.logger.debug("Output directory does not exist! Will be "
                                  "created.")
            try:
                os.makedirs(scenario.output_dir_for_this_run)
            except OSError:
                scenario.logger.debug("Could not make output directory.",
                                      exc_info=True)
                raise OSError("Could not make output directory: "
                              "{}.".format(scenario.output_dir_for_this_run))

        # options_dest2name maps scenario._arguments from dest -> name
        options_dest2name = {(scenario._arguments[v]['dest']
                              if scenario._arguments[v]['dest'] else v):
                             v.lstrip('-').replace('-', '_')
                             for v in scenario._arguments}

        # Write all options into "output_dir/scenario.txt"
        path = os.path.join(scenario.output_dir_for_this_run, "scenario.txt")
        scenario.logger.debug("Writing scenario-file to {}.".format(path))
        with open(path, 'w') as fh:
            for key in options_dest2name:
                key = key.lstrip('-').replace('-', '_')
                new_value = self._parse_argument(scenario, key,
                                                 getattr(scenario, key))
                if new_value is not None:
                    fh.write("{} = {}\n".format(options_dest2name[key],
                                                new_value))

    def _parse_argument(self, scenario: 'Scenario', key: str,
                        value: typing.Any) -> typing.Any:
        """Some values of the scenario-file need to be changed upon writing,
        such as the 'ta' (target algorithm), due to it's callback. Also,
        the configspace, features, train_inst- and test-inst-lists are saved
        to output_dir, if they exist.

        Parameters:
        -----------
            scenario: Scenario
                Scenario-file to be written
            key: string
                Name of the attribute in scenario-file
            value: Any
                Corresponding attribute

        Returns:
        --------
            new value: string
                The altered value, to be written to file

        Sideeffects:
        ------------
          - copies files pcs_fn, train_inst_fn, test_inst_fn and feature_fn to
            output if possible, creates the files from attributes otherwise
        """
        if key in ['pcs_fn', 'train_inst_fn', 'test_inst_fn', 'feature_fn']:
            # Copy if file exists, else write to new file
            if value is not None and os.path.isfile(value):
                try:
                    assert scenario.output_dir_for_this_run is not None  # please mypy
                    new_path = shutil.copy(value,
                                           scenario.output_dir_for_this_run)
                except shutil.SameFileError:
                    new_path = value  # File is already in output_dir
                # For .pcs-file, also save with the same basename as json and use json-path!
                if key == 'pcs_fn' and scenario.cs is not None and value.endswith(
                        '.pcs'):  # type: ignore[attr-defined] # noqa F821
                    file_name = os.path.splitext(os.path.basename(value))[0]
                    assert scenario.output_dir_for_this_run is not None  # please mypy
                    new_path = os.path.join(scenario.output_dir_for_this_run,
                                            file_name + '.json')
                    self.save_configspace(
                        scenario.cs, new_path,
                        'json')  # type: ignore[attr-defined] # noqa F821
                    scenario.logger.debug(
                        "Setting the pcs_fn-attr of written scenario from %s to %s",
                        value, new_path)
            elif key == 'pcs_fn' and scenario.cs is not None:  # type: ignore[attr-defined] # noqa F821
                try:
                    assert scenario.output_dir_for_this_run is not None  # please mypy
                    pcs_path = os.path.join(scenario.output_dir_for_this_run,
                                            'configspace.pcs')
                    self.save_configspace(
                        scenario.cs, pcs_path,
                        'pcs_new')  # type: ignore[attr-defined] # noqa F821
                except TypeError:
                    self.logger.error(
                        "Could not write pcs file to disk."
                        " ConfigSpace not compatible with (new) pcs format.")
                assert scenario.output_dir_for_this_run is not None  # please mypy
                new_path = os.path.join(scenario.output_dir_for_this_run,
                                        'configspace.json')
                self.save_configspace(
                    scenario.cs, new_path,
                    'json')  # type: ignore[attr-defined] # noqa F821
            elif key == 'train_inst_fn' and scenario.train_insts != [None]:
                assert scenario.output_dir_for_this_run is not None  # please mypy
                new_path = os.path.join(scenario.output_dir_for_this_run,
                                        'train_insts.txt')
                self.write_inst_file(scenario.train_insts, new_path)
            elif key == 'test_inst_fn' and scenario.test_insts != [None]:
                assert scenario.output_dir_for_this_run is not None  # please mypy
                new_path = os.path.join(scenario.output_dir_for_this_run,
                                        'test_insts.txt')
                self.write_inst_file(scenario.test_insts, new_path)
            elif key == 'feature_fn' and scenario.feature_dict != {}:
                assert scenario.output_dir_for_this_run is not None  # please mypy
                new_path = os.path.join(scenario.output_dir_for_this_run,
                                        'features.txt')
                self.write_inst_features_file(scenario.n_features,
                                              scenario.feature_dict, new_path)
            else:
                return None
            # New value -> new path
            return new_path
        elif key == 'ta' and value is not None:
            # Reversing the callback on 'ta' (shlex.split)
            return " ".join(value)
        elif key in ['train_insts', 'test_insts', 'cs', 'feature_dict']:
            # No need to log, recreated from files
            return None
        else:
            return value

    def write_inst_file(self, insts: typing.List[str], fn: str) -> None:
        """Writes instance-list to file.

        Parameters
        ----------
            insts: list<string>
                 Instance list to be written
            fn: string
                 Output path
        """
        with open(fn, 'w') as fh:
            fh.write("\n".join(insts))

    def write_inst_features_file(
        self,
        n_features: int,
        feat_dict: typing.Dict[str, typing.Iterable[float]],
        fn: str,
    ) -> None:
        """Writes features to file.

        Parameters
        ----------
            n_features: int
                 Number of features
            feat_dict: dict
                 Features to be written
            fn: string
                 File name of instance feature file
        """
        header = "Instance, " + ", ".join(
            ["feature" + str(i) for i in range(n_features)]) + "\n"
        body = [
            ", ".join([inst] + [str(f) for f in feat_dict[inst]]) + "\n"
            for inst in feat_dict
        ]
        with open(fn, 'w') as fh:
            fh.write(header + "".join(body))

    def save_configspace(self, cs: ConfigurationSpace, fn: str,
                         output_format: str) -> None:
        """Writing ConfigSpace to file.

        Parameters
        ----------
            cs : ConfigurationSpace
                Config-space to be written
            fn : str
                Output-file-path
            output_format : str
                Output format of the configuration space file. Currently,
                ``json`` and ``pcs_new`` are supported.
        """
        writers = {'pcs_new': pcs_new.write, 'json': json.write}
        writer = writers.get(output_format)
        if writer:
            with open(fn, 'w') as fh:
                fh.write(writer(cs))
        else:
            raise ValueError(
                "Configuration space output format %s not supported. "
                "Please choose one of %s" %
                (output_format, set(writers.keys())))
Exemplo n.º 20
0
    def __init__(self, file_system=LocalFS()):
        self.file_system = file_system

        self.logger = PickableLoggerAdapter(name=self.__module__ + "." +
                                            self.__class__.__name__)
Exemplo n.º 21
0
class DNGO(BaseModel):
    def __init__(self,
                 configspace: ConfigurationSpace,
                 types: np.ndarray,
                 bounds: typing.List[typing.Tuple[float, float]],
                 seed: int,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 10000,
                 batch_size: int = 8,
                 var: bool = True,
                 **kwargs):
        super().__init__(configspace, types, bounds, seed, **kwargs)
        print("USE DNGO")
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 100
        self.log_error = 1000

        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size

        self.nn = None
        self.blr = None

        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

    def _train(self, X: np.ndarray, y: np.ndarray):
        self.nn = SimpleNetworkEmbedding(
            hidden_dims=self.hidden_dims,
            lr=self.lr,
            seed=self.seed,
            momentum=self.momentum,
            weight_decay=self.weight_decay,
            iterations=self.iterations,
            batch_size=self.batch_size,
            var=self.var,
        )
        self.blr = BayesianLinearRegressionLayer()

        self._my = np.mean(y)
        self._sy = np.std(y)

        y -= self._my
        y /= self._sy

        #print(X, y)
        #import matplotlib.pyplot as plt

        self.nn.train(X, y)
        #plt.scatter(X, y)

        #x_dense = np.linspace(-0.1, 1.1, 100)
        #pred = self._predict_nn(x_dense.reshape([-1, 1]))
        #m = pred[:, 0].flatten()
        #v = pred[:, 1].flatten()
        #plt.plot(x_dense, m, label="nn")
        #plt.fill_between(x_dense, m - v, m + v, alpha=0.5)
        self.blr.optimize_alpha_beta(self.nn.model, X, y)

        #m, v = self.blr.predict(self.model, x_dense.reshape([-1, 1]))
        #m = m.data.numpy().flatten()
        #v = v.data.numpy().flatten()
        #plt.scatter(X, y)
        #plt.plot(x_dense, m, label="blr")
        #plt.fill_between(x_dense, m-v, m+v, alpha=0.5)
        #plt.legend()
        #plt.ylim([-10, 10])
        #plt.show()

    def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
        means, vars = self.blr.predict(self.nn.model, X)
        means = means.data.numpy().flatten()
        vars = vars.data.numpy().flatten()

        means = np.array(means * self._sy + self._my).reshape([-1, 1])
        vars = np.array(vars * self._sy**2).reshape([-1, 1])

        if not np.isfinite(means).any():
            self.logger.critical(
                "All DNGO predictions are NaN. Fall back to random predictions"
            )
            return np.random.randn(means.shape[0],
                                   means.shape[1]), np.zeros_like(vars)
        else:
            return means, vars
Exemplo n.º 22
0
class NeuralNet(nn.Module):
    def __init__(self,
                 hidden_dims,
                 input_size,
                 feat_type=None,
                 var: bool = True,
                 max_cat: int = np.inf):
        super(NeuralNet, self).__init__()
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        self.feat_type = feat_type
        self.input_size = input_size
        self.num_neurons = hidden_dims
        self.activation = nn.Tanh
        self.num_layer = len(hidden_dims)
        self.max_cat = max_cat
        if var:
            self.n_output = 2
        else:
            self.n_output = 1

        if np.sum(self.feat_type) == 0:
            self.feat_type = None

        if self.feat_type is not None:
            self.logger.info("Use cat embedding")
            assert len(self.feat_type) == self.input_size
            emb = nn.ModuleList()
            sz = int(0)
            for f in self.feat_type:
                if f == 0:
                    # In SMAC 0 encodes a numerical
                    emb.append(None)
                    sz += 1
                else:
                    es = min(self.max_cat, int(f))
                    emb.append(nn.Embedding(int(f), es))
                    sz += es
            assert int(sz) == sz
            sz = int(sz)
            num_neurons = [sz] + self.num_neurons
            self.embedding = emb
        else:
            num_neurons = [self.input_size] + self.num_neurons

        self.weights = nn.ModuleList()
        self.acts = nn.ModuleList()

        print(num_neurons)
        for i in range(self.num_layer):
            self.weights.append(nn.Linear(num_neurons[i], num_neurons[i + 1]))
            self.acts.append(self.activation())

        self.outlayer = nn.Linear(num_neurons[-1], self.n_output)

    def initialize_weights(self, var_bias_init: float = 1):
        # Use Xavier normal intialization, slightly modified from "Understanding the difficulty of ..."
        for i in range(len(self.weights)):
            torch.nn.init.xavier_normal_(self.weights[i].weight)
            self.weights[i].bias.data.fill_(0)
        torch.nn.init.xavier_normal_(self.outlayer.weight)
        # TODO Second bias should be initialised to np.log(np.exp(x) - 1), s.t. softplus = x
        self.outlayer.bias.data[0].fill_(0)
        if var_bias_init == 0:
            self.logger.critical(
                "Can't properly initialize bias unit, initialize wih zero")
            self.outlayer.bias.data[0].fill_(0)
        else:
            self.outlayer.bias.data[1].fill_(np.log(np.exp(var_bias_init) - 1))

    def learn_initial_weights(self, X):
        """Learn initial weights such that the mean over the data is on average zero per neuron"""
        output = torch.tensor(X, dtype=torch.float32)
        for i in range(len(self.weights)):
            torch.nn.init.xavier_normal_(self.weights[i].weight,
                                         torch.nn.init.calculate_gain('tanh'))
            self.weights[i].bias.data.fill_(0)
            output2 = self.weights[i].forward(output)
            mean = output2.mean(axis=0)
            self.weights[i].bias.data = -mean
            output = self.weights[i].forward(output)
            output = self.acts[i](output)
            # print(output.mean(axis=0), output.mean(axis=0).shape)
        torch.nn.init.xavier_normal_(self.outlayer.weight,
                                     torch.nn.init.calculate_gain('tanh'))
        self.outlayer.bias.data.fill_(0)
        # self.outlayer.bias.data[1].fill_(np.log(np.exp(1) - 1))
        # Noise can be tuned here...
        self.outlayer.bias.data[1] = -5

    def forward(self, x):
        out = []
        if self.feat_type is not None:
            for idx, (emb,
                      typ) in enumerate(zip(self.embedding, self.feat_type)):
                if typ == 0:
                    # a numerical
                    out.append(x[:, idx].view(-1, 1))
                else:
                    # a categorical
                    out.append(
                        emb(x[:, idx].long().view(-1, 1)).view(
                            [-1, min(self.max_cat, typ)]))
            out = torch.cat(out, 1)
        else:
            out = x

        for i in range(self.num_layer):
            out = self.weights[i](out)
            out = self.acts[i](out)
        out = self.outlayer(out)
        if self.n_output == 2:
            # Passing second output through softplus function (see Lakshminarayanan (2017))
            out[:, 1] = torch.log(1 + torch.exp(out[:, 1])) + 10e-6
        return out
Exemplo n.º 23
0
class EnsembleNN(AbstractEPM):
    def __init__(self,
                 configspace: ConfigurationSpace,
                 types: typing.List[int],
                 bounds: typing.List[typing.Tuple[float, float]],
                 seed: int,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 5000,
                 batch_size: int = 16,
                 number_of_networks: int = 5,
                 var: bool = True,
                 train_with_lognormal_llh=False,
                 compute_mean_in_logspace=False,
                 max_cat: int = np.inf,
                 ignore_cens: bool = False,
                 learned_weight_init: bool = False,
                 optimization_algorithm: str = 'sgd',
                 **kwargs):
        super().__init__(configspace, types, bounds, seed, **kwargs)
        #self.types[self.types == 0] = -1
        self.types = [int(f) for f in self.types]
        assert not (train_with_lognormal_llh and compute_mean_in_logspace)

        if type(self.seed) != int:
            self.seed = self.seed[0]

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 1000
        self.log_error = 5000

        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.number_of_networks = number_of_networks
        self.train_with_lognormal = train_with_lognormal_llh
        self.compute_mean_in_logspace = compute_mean_in_logspace
        self.max_cat = max_cat
        self.ignore_cens = ignore_cens
        self.learned_weight_init = learned_weight_init
        self.optimization_algorithm = optimization_algorithm

        self._my = None
        self._sy = None

        # Quick check, should not take too long
        a = np.random.normal(42, 23, 1000)
        m1, v1 = (np.mean(a), np.var(a))
        a = self._preprocess_y(a)
        m2, v2 = self._postprocess_mv(np.mean(a), np.var(a))
        assert np.abs(m1 - m2) < 1e-3, (m1, m2)
        assert np.abs(v1 - v2) < 1e-3, (v1, v2)
        self._my = None
        self._sy = None

        self.nns = None
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

    def _preprocess_y(self, y: np.ndarray, redo=False):
        if self._my is None or redo:
            self._my = np.mean(y)
            self._sy = np.std(y)
            if self._sy == 0:
                # all y's are the same
                self._sy = 1

        if not self.train_with_lognormal:
            y -= self._my
            y /= self._sy

        return y

    def _postprocess_mv(self, m: np.ndarray, v: np.ndarray):
        # zero mean scaling
        m = m * self._sy + self._my
        v = v * self._sy**2
        return m, v

    def _preprocess_x(self, x: np.ndarray, redo: bool = False):
        # Replace nans with 0, should be fine for both cats and conts
        # TODO: Maybe refine this and replace cont with mean
        x = np.nan_to_num(x)
        return x

    def _train(self, X: np.ndarray, Y: np.ndarray, C: np.ndarray = None):
        self.logger.critical("Not using C as this is not a Tobit model")
        Y = self._preprocess_y(Y, redo=True)
        X = self._preprocess_x(X, redo=True)
        self.train_data = (X, Y)
        self.nns = []
        self.logger.debug("Start Training %d networks" %
                          self.number_of_networks)
        for i in range(self.number_of_networks):
            nn = SimpleNetworkEmbedding(
                hidden_dims=self.hidden_dims,
                feat_types=self.types,
                lr=self.lr,
                seed=self.seed + i,
                momentum=self.momentum,
                weight_decay=self.weight_decay,
                iterations=self.iterations,
                batch_size=self.batch_size,
                var=self.var,
                lognormal_nllh=self.train_with_lognormal,
                var_bias_init=np.std(Y),
                max_cat=self.max_cat,
                learned_weight_init=self.learned_weight_init,
                optimization_algorithm=self.optimization_algorithm,
            )
            nn.reset()
            nn.train(X, Y)
            self.nns.append(nn)

    def _predict_individual(
            self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
        X = self._preprocess_x(X, redo=True)
        ms = np.zeros([X.shape[0], self.number_of_networks])
        vs = np.zeros([X.shape[0], self.number_of_networks])
        for i_nn, nn in enumerate(self.nns):
            pred = nn.predict(X)
            m = pred[:, 0]
            v = pred[:, 1]

            if not self.train_with_lognormal:
                m, v = self._postprocess_mv(m, v)

            ms[:, i_nn] = m
            vs[:, i_nn] = v

        return ms, vs

    def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
        ms, _ = self._predict_individual(X)
        m = ms.mean(axis=1)
        v = ms.var(axis=1)
        return m.reshape((-1, 1)), v.reshape((-1, 1))

    def predict_marginalized_over_instances(self, X: np.ndarray):
        """Predict mean and variance marginalized over all instances.

        Returns the predictive mean and variance marginalised over all
        instances for a set of configurations.

        Note
        ----
        This method overwrites the same method of ~smac.epm.base_epm.AbstractEPM;
        the following method is random forest specific
        and follows the SMAC2 implementation;
        it requires no distribution assumption
        to marginalize the uncertainty estimates

        Parameters
        ----------
        X : np.ndarray
            [n_samples, n_features (config)]

        Returns
        -------
        means : np.ndarray of shape = [n_samples, 1]
            Predictive mean
        vars : np.ndarray  of shape = [n_samples, 1]
            Predictive variance
        """

        if self.instance_features is None or \
                len(self.instance_features) == 0:
            mean_, var = self.predict(X)
            var[var < self.var_threshold] = self.var_threshold
            var[np.isnan(var)] = self.var_threshold
            return mean_, var

        if len(X.shape) != 2:
            raise ValueError('Expected 2d array, got %dd array!' %
                             len(X.shape))
        if X.shape[1] != len(self.bounds):
            raise ValueError('Rows in X should have %d entries but have %d!' %
                             (len(self.bounds), X.shape[1]))

        mean_ = np.zeros((X.shape[0], 1))
        var = np.zeros(X.shape[0])

        for i, x in enumerate(X):

            # marginalize over instance
            # 1. Get predictions for all networks

            # Not very efficient
            # preds_nns1 = np.zeros([len(self.instance_features), self.number_of_networks])
            #for i_f, feat in enumerate(self.instance_features):
            #    x_ = np.concatenate([x, feat]).reshape([1, -1])
            #    print(i_f, x_)
            #    m, _ = self._predict_individual(x_)
            #    preds_nns1[i_f, :] = m

            input = np.concatenate((np.tile(
                x, (len(self.instance_features), 1)), self.instance_features),
                                   axis=1)
            preds_nns, _ = self._predict_individual(input)

            # 2. Average in each NN for all instances
            pred_per_nn = []
            for nn_id in range(self.number_of_networks):
                if self.compute_mean_in_logspace:
                    pred_per_nn.append(
                        np.log(np.mean(np.exp(preds_nns[:, nn_id]))))
                else:
                    pred_per_nn.append(np.mean(preds_nns[:, nn_id]))

            # 3. compute statistics across trees
            mean_x = np.mean(pred_per_nn)
            var_x = np.var(pred_per_nn)
            if var_x < self.var_threshold:
                var_x = self.var_threshold

            var[i] = var_x
            mean_[i] = mean_x

        if len(mean_.shape) == 1:
            mean_ = mean_.reshape((-1, 1))
        if len(var.shape) == 1:
            var = var.reshape((-1, 1))

        return mean_, var
Exemplo n.º 24
0
    def __init__(self,
                 configspace: ConfigurationSpace,
                 types: typing.List[int],
                 bounds: typing.List[typing.Tuple[float, float]],
                 seed: int,
                 hidden_dims: typing.List[int] = [50, 50, 50],
                 lr: float = 1e-3,
                 momentum: float = 0.999,
                 weight_decay: float = 1e-4,
                 iterations: int = 5000,
                 batch_size: int = 16,
                 number_of_networks: int = 5,
                 var: bool = True,
                 train_with_lognormal_llh=False,
                 compute_mean_in_logspace=False,
                 max_cat: int = np.inf,
                 ignore_cens: bool = False,
                 learned_weight_init: bool = False,
                 optimization_algorithm: str = 'sgd',
                 **kwargs):
        super().__init__(configspace, types, bounds, seed, **kwargs)
        #self.types[self.types == 0] = -1
        self.types = [int(f) for f in self.types]
        assert not (train_with_lognormal_llh and compute_mean_in_logspace)

        if type(self.seed) != int:
            self.seed = self.seed[0]

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.log_loss = 1000
        self.log_error = 5000

        self.var = var
        self.hidden_dims = hidden_dims
        self.lr = lr
        self.momentum = momentum
        self.iterations = iterations
        self.weight_decay = weight_decay
        self.batch_size = batch_size
        self.number_of_networks = number_of_networks
        self.train_with_lognormal = train_with_lognormal_llh
        self.compute_mean_in_logspace = compute_mean_in_logspace
        self.max_cat = max_cat
        self.ignore_cens = ignore_cens
        self.learned_weight_init = learned_weight_init
        self.optimization_algorithm = optimization_algorithm

        self._my = None
        self._sy = None

        # Quick check, should not take too long
        a = np.random.normal(42, 23, 1000)
        m1, v1 = (np.mean(a), np.var(a))
        a = self._preprocess_y(a)
        m2, v2 = self._postprocess_mv(np.mean(a), np.var(a))
        assert np.abs(m1 - m2) < 1e-3, (m1, m2)
        assert np.abs(v1 - v2) < 1e-3, (v1, v2)
        self._my = None
        self._sy = None

        self.nns = None
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)
Exemplo n.º 25
0
class RunHistory(object):
    """Container for target algorithm run information.

    Most importantly, the runhistory contains an efficient mapping from each evaluated configuration to the
    empirical cost observed on either the full instance set or a subset. The cost is the average over all
    observed costs for one configuration:

    * If using budgets for a single instance, only the cost on the highest observed budget is returned.
    * If using instances as the budget, the average cost over all evaluated instances is returned.
    * Theoretically, the runhistory object can handle instances and budgets at the same time. This is
      neither used nor tested.
    * Capped runs are not included in this cost.

    Note
    ----
    Guaranteed to be picklable.

    Attributes
    ----------
    data : collections.OrderedDict()
        TODO
    config_ids : dict
        Maps config -> id
    ids_config : dict
        Maps id -> config
    num_runs_per_config : dict
        Maps config_id -> number of runs

    Parameters
    ----------
    overwrite_existing_runs : bool (default=True)
        If set to ``True`` and a run of a configuration on an instance-budget-seed-pair already exists,
        it is overwritten.
    """
    def __init__(self, overwrite_existing_runs: bool = False) -> None:
        """Constructor

        Parameters
        ----------
        overwrite_existing_runs: bool
            allows to overwrites old results if pairs of
            algorithm-instance-seed were measured
            multiple times
        """
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        # By having the data in a deterministic order we can do useful tests
        # when we serialize the data and can assume it's still in the same
        # order as it was added.
        self.data = collections.OrderedDict(
        )  # type: typing.Dict[RunKey, RunValue]

        # for fast access, we have also an unordered data structure
        # to get all instance seed pairs of a configuration.
        # This does not include capped runs.
        self._configid_to_inst_seed_budget = {
        }  # type: typing.Dict[int, typing.Dict[InstSeedKey, typing.List[float]]]

        self.config_ids = {}  # type: typing.Dict[Configuration, int]
        self.ids_config = {}  # type: typing.Dict[int, Configuration]
        self._n_id = 0

        # Stores cost for each configuration ID
        self._cost_per_config = {}  # type: typing.Dict[int, float]
        # Stores min cost across all budgets for each configuration ID
        self._min_cost_per_config = {}  # type: typing.Dict[int, float]
        # runs_per_config maps the configuration ID to the number of runs for that configuration
        # and is necessary for computing the moving average
        self.num_runs_per_config = {}  # type: typing.Dict[int, int]

        # Store whether a datapoint is "external", which means it was read from
        # a JSON file. Can be chosen to not be written to disk
        self.external = {}  # type: typing.Dict[RunKey, DataOrigin]

        self.overwrite_existing_runs = overwrite_existing_runs

    def add(
        self,
        config: Configuration,
        cost: float,
        time: float,
        status: StatusType,
        instance_id: typing.Optional[str] = None,
        seed: typing.Optional[int] = None,
        budget: float = 0.0,
        starttime: float = 0.0,
        endtime: float = 0.0,
        additional_info: typing.Optional[typing.Dict] = None,
        origin: DataOrigin = DataOrigin.INTERNAL,
        force_update: bool = False,
    ) -> None:
        """Adds a data of a new target algorithm (TA) run;
        it will update data if the same key values are used
        (config, instance_id, seed)

        Parameters
        ----------
            config : dict (or other type -- depending on config space module)
                Parameter configuration
            cost: float
                Cost of TA run (will be minimized)
            time: float
                Runtime of TA run
            status: str
                Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT}
            instance_id: str
                String representing an instance (default: None)
            seed: int
                Random seed used by TA (default: None)
            budget: float
                budget (cutoff) used in intensifier to limit TA (default: 0)
            starttime: float
                starting timestamp of TA evaluation
            endtime: float
                ending timestamp of TA evaluation
            additional_info: dict
                Additional run infos (could include further returned
                information from TA or fields such as start time and host_id)
            origin: DataOrigin
                Defines how data will be used.
            force_update: bool (default: False)
                Forces the addition of a config to the history
        """

        if config is None:
            raise TypeError(
                'Configuration to add to the runhistory must not be None')
        elif not isinstance(config, Configuration):
            raise TypeError(
                'Configuration to add to the runhistory is not of type Configuration, but %s'
                % type(config))

        # Get the config id
        config_id_tmp = self.config_ids.get(config)
        if config_id_tmp is None:
            self._n_id += 1
            self.config_ids[config] = self._n_id
            config_id = typing.cast(int, self.config_ids.get(config))
            self.ids_config[self._n_id] = config
        else:
            config_id = typing.cast(int, config_id_tmp)

        # Construct keys and values for the data dictionary
        k = RunKey(config_id, instance_id, seed, budget)
        v = RunValue(cost, time, status, starttime, endtime, additional_info)

        # Each runkey is supposed to be used only once. Repeated tries to add
        # the same runkey will be ignored silently if not capped.
        if self.overwrite_existing_runs or force_update or self.data.get(
                k) is None:
            self._add(k, v, status, origin)
        elif status != StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED:
            # overwrite capped runs with uncapped runs
            self._add(k, v, status, origin)
        elif status == StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED and cost > self.data[k].cost:
            # overwrite if censored with a larger cutoff
            self._add(k, v, status, origin)

    def _add(self, k: RunKey, v: RunValue, status: StatusType,
             origin: DataOrigin) -> None:
        """Actual function to add new entry to data structures

        TODO

        """
        self.data[k] = v
        self.external[k] = origin

        # Capped data is added above
        # Do not register the cost until the run has completed
        if origin in (DataOrigin.INTERNAL, DataOrigin.EXTERNAL_SAME_INSTANCES) \
                and status not in [StatusType.CAPPED, StatusType.RUNNING]:
            # also add to fast data structure
            is_k = InstSeedKey(k.instance_id, k.seed)
            self._configid_to_inst_seed_budget[
                k.config_id] = self._configid_to_inst_seed_budget.get(
                    k.config_id, {})
            if is_k not in self._configid_to_inst_seed_budget[
                    k.config_id].keys():
                # add new inst-seed-key with budget to main dict
                self._configid_to_inst_seed_budget[k.config_id][is_k] = [
                    k.budget
                ]
            elif k.budget not in is_k:
                # append new budget to existing inst-seed-key dict
                self._configid_to_inst_seed_budget[k.config_id][is_k].append(
                    k.budget)

            # if budget is used, then update cost instead of incremental updates
            if not self.overwrite_existing_runs and k.budget == 0:
                # assumes an average across runs as cost function aggregation, this is used for algorithm configuration
                # (incremental updates are used to save time as getting the cost for > 100 instances is high)
                self.incremental_update_cost(self.ids_config[k.config_id],
                                             v.cost)
            else:
                # this is when budget > 0 (only successive halving and hyperband so far)
                self.update_cost(config=self.ids_config[k.config_id])
                if k.budget > 0:
                    if self.num_runs_per_config[
                            k.
                            config_id] != 1:  # This is updated in update_cost
                        raise ValueError('This should not happen!')

    def update_cost(self, config: Configuration) -> None:
        """Store the performance of a configuration across the instances in
        self.cost_per_config and also updates self.runs_per_config;

        Note
        ----
        This method ignores capped runs.

        Parameters
        ----------
        config: Configuration
            configuration to update cost based on all runs in runhistory
        """
        config_id = self.config_ids[config]
        # removing duplicates while keeping the order
        inst_seed_budgets = list(
            dict.fromkeys(
                self.get_runs_for_config(config,
                                         only_max_observed_budget=True)))
        self._cost_per_config[config_id] = self.average_cost(
            config, inst_seed_budgets)
        self.num_runs_per_config[config_id] = len(inst_seed_budgets)

        all_inst_seed_budgets = list(
            dict.fromkeys(
                self.get_runs_for_config(config,
                                         only_max_observed_budget=False)))
        self._min_cost_per_config[config_id] = self.min_cost(
            config, all_inst_seed_budgets)

    def incremental_update_cost(self, config: Configuration,
                                cost: float) -> None:
        """Incrementally updates the performance of a configuration by using a
        moving average;

        Parameters
        ----------
        config: Configuration
            configuration to update cost based on all runs in runhistory
        cost: float
            cost of new run of config
        """

        config_id = self.config_ids[config]
        n_runs = self.num_runs_per_config.get(config_id, 0)
        old_cost = self._cost_per_config.get(config_id, 0.)
        self._cost_per_config[config_id] = (
            (old_cost * n_runs) + cost) / (n_runs + 1)
        self.num_runs_per_config[config_id] = n_runs + 1

    def get_cost(self, config: Configuration) -> float:
        """Returns empirical cost for a configuration.

        See the class docstring for how the costs are computed. The costs are not re-computed, but are read from cache.

        Parameters
        ----------
        config: Configuration

        Returns
        -------
        cost: float
            Computed cost for configuration
        """
        config_id = self.config_ids.get(config)
        return self._cost_per_config.get(
            config_id, np.nan)  # type: ignore[arg-type] # noqa F821

    def get_runs_for_config(
            self, config: Configuration,
            only_max_observed_budget: bool) -> typing.List[InstSeedBudgetKey]:
        """Return all runs (instance seed pairs) for a configuration.

        Note
        ----
        This method ignores capped runs.

        Parameters
        ----------
        config : Configuration from ConfigSpace
            Parameter configuration
        only_max_observed_budget : bool
            Select only the maximally observed budget run for this configuration
        Returns
        -------
        instance_seed_budget_pairs : list<tuples of instance, seed, budget>
        """
        config_id = self.config_ids.get(config)
        runs = self._configid_to_inst_seed_budget.get(
            config_id, {}).copy()  # type: ignore[arg-type] # noqa F821

        # select only the max budget run if specified
        if only_max_observed_budget:
            for k, v in runs.items():
                runs[k] = [max(v)]

        # convert to inst-seed-budget key
        rval = [
            InstSeedBudgetKey(k.instance, k.seed, budget)
            for k, v in runs.items() for budget in v
        ]
        return rval

    def get_all_configs(self) -> typing.List[Configuration]:
        """Return all configurations in this RunHistory object

        Returns
        -------
            parameter configurations: list
        """
        return list(self.config_ids.keys())

    def get_all_configs_per_budget(
        self,
        budget_subset: typing.Optional[typing.List] = None,
    ) -> typing.List[Configuration]:
        """
        Return all configs in this RunHistory object that have been run on one of these budgets

        Parameter
        ---------
            budget_subset: list

        Returns
        -------
            parameter configurations: list
        """
        if budget_subset is None:
            return self.get_all_configs()
        configs = []
        for c, i, s, b in self.data.keys():
            if b in budget_subset:
                configs.append(self.ids_config[c])
        return configs

    def get_min_cost(self, config: Configuration) -> float:
        """Returns the lowest empirical cost for a configuration, across all runs (budgets)

        See the class docstring for how the costs are computed. The costs are not re-computed, but are read from cache.

        Parameters
        ----------
        config: Configuration

        Returns
        -------
        min_cost: float
            Computed cost for configuration
        """
        config_id = self.config_ids.get(config)
        return self._min_cost_per_config.get(
            config_id, np.nan)  # type: ignore[arg-type] # noqa F821

    def empty(self) -> bool:
        """Check whether or not the RunHistory is empty.

        Returns
        -------
        emptiness: bool
            True if runs have been added to the RunHistory,
            False otherwise
        """
        return len(self.data) == 0

    def save_json(self,
                  fn: str = "runhistory.json",
                  save_external: bool = False) -> None:
        """
        saves runhistory on disk

        Parameters
        ----------
        fn : str
            file name
        save_external : bool
            Whether to save external data in the runhistory file.
        """
        data = [([
            int(k.config_id),
            str(k.instance_id) if k.instance_id is not None else None,
            int(k.seed),
            float(k.budget) if k[3] is not None else 0
        ], list(v)) for k, v in self.data.items()
                if save_external or self.external[k] == DataOrigin.INTERNAL]
        config_ids_to_serialize = set([entry[0][0] for entry in data])
        configs = {
            id_: conf.get_dictionary()
            for id_, conf in self.ids_config.items()
            if id_ in config_ids_to_serialize
        }
        config_origins = {
            id_: conf.origin
            for id_, conf in self.ids_config.items()
            if (id_ in config_ids_to_serialize and conf.origin is not None)
        }

        with open(fn, "w") as fp:
            json.dump(
                {
                    "data": data,
                    "config_origins": config_origins,
                    "configs": configs
                },
                fp,
                cls=EnumEncoder,
                indent=2)

    def load_json(self, fn: str, cs: ConfigurationSpace) -> None:
        """Load and runhistory in json representation from disk.

        Overwrites current runhistory!

        Parameters
        ----------
        fn : str
            file name to load from
        cs : ConfigSpace
            instance of configuration space
        """
        try:
            with open(fn) as fp:
                all_data = json.load(fp, object_hook=StatusType.enum_hook)
        except Exception as e:
            self.logger.warning(
                'Encountered exception %s while reading runhistory from %s. '
                'Not adding any runs!',
                e,
                fn,
            )
            return

        config_origins = all_data.get("config_origins", {})

        self.ids_config = {
            int(id_): Configuration(cs,
                                    values=values,
                                    origin=config_origins.get(id_, None))
            for id_, values in all_data["configs"].items()
        }

        self.config_ids = {
            config: id_
            for id_, config in self.ids_config.items()
        }

        self._n_id = len(self.config_ids)

        # important to use add method to use all data structure correctly
        for k, v in all_data["data"]:
            self.add(config=self.ids_config[int(k[0])],
                     cost=float(v[0]),
                     time=float(v[1]),
                     status=StatusType(v[2]),
                     instance_id=k[1],
                     seed=int(k[2]),
                     budget=float(k[3]) if len(k) == 4 else 0,
                     starttime=v[3],
                     endtime=v[4],
                     additional_info=v[5])

    def update_from_json(
        self,
        fn: str,
        cs: ConfigurationSpace,
        origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES,
    ) -> None:
        """Update the current runhistory by adding new runs from a json file.

        Parameters
        ----------
        fn : str
            File name to load from.
        cs : ConfigSpace
            Instance of configuration space.
        origin : DataOrigin
            What to store as data origin.
        """
        new_runhistory = RunHistory()
        new_runhistory.load_json(fn, cs)
        self.update(runhistory=new_runhistory, origin=origin)

    def update(
        self,
        runhistory: 'RunHistory',
        origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES,
    ) -> None:
        """Update the current runhistory by adding new runs from a RunHistory.

        Parameters
        ----------
        runhistory: RunHistory
            Runhistory with additional data to be added to self
        origin: DataOrigin
            If set to ``INTERNAL`` or ``EXTERNAL_FULL`` the data will be
            added to the internal data structure self._configid_to_inst_seed_budget
            and be available :meth:`through get_runs_for_config`.
        """

        # Configurations might be already known, but by a different ID. This
        # does not matter here because the add() method handles this
        # correctly by assigning an ID to unknown configurations and re-using
        #  the ID
        for key, value in runhistory.data.items():
            config_id, instance_id, seed, budget = key
            cost, time, status, start, end, additional_info = value
            config = runhistory.ids_config[config_id]
            self.add(config=config,
                     cost=cost,
                     time=time,
                     status=status,
                     instance_id=instance_id,
                     starttime=start,
                     endtime=end,
                     seed=seed,
                     budget=budget,
                     additional_info=additional_info,
                     origin=origin)

    def _cost(
        self,
        config: Configuration,
        instance_seed_budget_keys: typing.Optional[
            typing.Iterable[InstSeedBudgetKey]] = None,
    ) -> typing.List[float]:
        """Return array of all costs for the given config for further calculations.

        Parameters
        ----------
        config : Configuration
            Configuration to calculate objective for
        instance_seed_budget_keys : list, optional (default=None)
            List of tuples of instance-seeds-budget keys. If None, the run_history is
            queried for all runs of the given configuration.

        Returns
        -------
        Costs: list
            Array of all costs
        """
        try:
            id_ = self.config_ids[config]
        except KeyError:  # challenger was not running so far
            return []

        if instance_seed_budget_keys is None:
            instance_seed_budget_keys = self.get_runs_for_config(
                config, only_max_observed_budget=True)

        costs = []
        for i, r, b in instance_seed_budget_keys:
            k = RunKey(id_, i, r, b)
            costs.append(self.data[k].cost)
        return costs

    def average_cost(
        self,
        config: Configuration,
        instance_seed_budget_keys: typing.Optional[
            typing.Iterable[InstSeedBudgetKey]] = None,
    ) -> float:
        """Return the average cost of a configuration.

        This is the mean of costs of all instance-seed pairs.

        Parameters
        ----------
        config : Configuration
            Configuration to calculate objective for
        instance_seed_budget_keys : list, optional (default=None)
            List of tuples of instance-seeds-budget keys. If None, the run_history is
            queried for all runs of the given configuration.

        Returns
        ----------
        Cost: float
            Average cost
        """
        costs = self._cost(config, instance_seed_budget_keys)
        if costs:
            return float(np.mean(costs))

        return np.nan

    def sum_cost(
        self,
        config: Configuration,
        instance_seed_budget_keys: typing.Optional[
            typing.Iterable[InstSeedBudgetKey]] = None,
    ) -> float:
        """Return the sum of costs of a configuration.

        This is the sum of costs of all instance-seed pairs.

        Parameters
        ----------
        config : Configuration
            Configuration to calculate objective for
        instance_seed_budget_keys : list, optional (default=None)
            List of tuples of instance-seeds-budget keys. If None, the run_history is
            queried for all runs of the given configuration.

        Returns
        ----------
        sum_cost: float
            Sum of costs of config
        """
        return float(np.sum(self._cost(config, instance_seed_budget_keys)))

    def min_cost(
        self,
        config: Configuration,
        instance_seed_budget_keys: typing.Optional[
            typing.Iterable[InstSeedBudgetKey]] = None,
    ) -> float:
        """Return the minimum cost of a configuration

        This is the minimum cost of all instance-seed pairs.

        Parameters
        ----------
        config : Configuration
            Configuration to calculate objective for
        instance_seed_budget_keys : list, optional (default=None)
            List of tuples of instance-seeds-budget keys. If None, the run_history is
            queried for all runs of the given configuration.

        Returns
        ----------
        min_cost: float
            minimum cost of config
        """
        costs = self._cost(config, instance_seed_budget_keys)
        if costs:
            return float(np.min(costs))

        return np.nan

    def compute_all_costs(self,
                          instances: typing.Optional[typing.List[str]] = None
                          ) -> None:
        """Computes the cost of all configurations from scratch and overwrites
        self.cost_perf_config and self.runs_per_config accordingly;

        Note
        ----
        This method is only used for ``merge_foreign_data`` and should be removed.

        Parameters
        ----------
        instances: typing.List[str]
            list of instances; if given, cost is only computed wrt to this instance set
        """
        self._cost_per_config = {}
        self.num_runs_per_config = {}
        for config, config_id in self.config_ids.items():
            # removing duplicates while keeping the order
            inst_seed_budgets = list(
                dict.fromkeys(
                    self.get_runs_for_config(config,
                                             only_max_observed_budget=True)))
            if instances is not None:
                inst_seed_budgets = list(
                    filter(
                        lambda x: x.instance in typing.cast(
                            typing.List, instances), inst_seed_budgets))

            if inst_seed_budgets:  # can be empty if never saw any runs on <instances>
                self._cost_per_config[config_id] = self.average_cost(
                    config, inst_seed_budgets)
                self._min_cost_per_config[config_id] = self.min_cost(
                    config, inst_seed_budgets)
                self.num_runs_per_config[config_id] = len(inst_seed_budgets)

    def get_instance_costs_for_config(
            self,
            config: Configuration) -> typing.Dict[str, typing.List[float]]:
        """ Returns the average cost per instance (across seeds) for a configuration

        If the runhistory contains budgets, only the highest budget for a configuration is returned.

        Note
        ----
        This is used by the pSMAC facade to determine the incumbent after the evaluation.

        Parameters
        ----------
        config : Configuration from ConfigSpace
            Parameter configuration

        Returns
        -------
        cost_per_inst: dict<instance name<str>, cost<float>>
        """
        runs_ = self.get_runs_for_config(config, only_max_observed_budget=True)
        cost_per_inst = {}  # type: typing.Dict[str, typing.List[float]]
        for inst, seed, budget in runs_:
            cost_per_inst[inst] = cost_per_inst.get(inst, [])
            rkey = RunKey(self.config_ids[config], inst, seed, budget)
            vkey = self.data[rkey]
            cost_per_inst[inst].append(vkey.cost)
        cost_per_inst = dict([(inst, np.mean(costs))
                              for inst, costs in cost_per_inst.items()])
        return cost_per_inst
Exemplo n.º 26
0
class RunHistory(object):
    """Container for target algorithm run information.

    **Note:** Guaranteed to be picklable.

    Attributes
    ----------
    data : collections.OrderedDict()
        TODO
    config_ids : dict
        Maps config -> id
    ids_config : dict
        Maps id -> config
    cost_per_config : dict
        Maps config_id -> cost
    runs_per_config : dict
        Maps config_id -> number of runs

    aggregate_func
    overwrite_existing_runs
    """
    def __init__(self,
                 aggregate_func: typing.Callable,
                 overwrite_existing_runs: bool = False) -> None:
        """Constructor

        Parameters
        ----------
        aggregate_func: callable
            function to aggregate perf across instances
        overwrite_existing_runs: bool
            allows to overwrites old results if pairs of
            algorithm-instance-seed were measured
            multiple times
        """
        self.logger = PickableLoggerAdapter(self.__module__ + "." +
                                            self.__class__.__name__)

        # By having the data in a deterministic order we can do useful tests
        # when we serialize the data and can assume it's still in the same
        # order as it was added.
        self.data = collections.OrderedDict(
        )  # type: typing.Dict[RunKey, RunValue]

        # for fast access, we have also an unordered data structure
        # to get all instance seed pairs of a configuration
        self._configid_to_inst_seed = {}  # type: typing.Dict[int, InstSeedKey]

        self.config_ids = {}  # type: typing.Dict[Configuration, int]
        self.ids_config = {}  # type: typing.Dict[int, Configuration]
        self._n_id = 0

        # Stores cost for each configuration ID
        self.cost_per_config = {}  # type: typing.Dict[int, float]
        # runs_per_config maps the configuration ID to the number of runs for that configuration
        # and is necessary for computing the moving average
        self.runs_per_config = {}  # type: typing.Dict[int, int]

        # Store whether a datapoint is "external", which means it was read from
        # a JSON file. Can be chosen to not be written to disk
        self.external = {}  # type: typing.Dict[RunKey, DataOrigin]

        self.aggregate_func = aggregate_func
        self.overwrite_existing_runs = overwrite_existing_runs

    def add(self,
            config: Configuration,
            cost: float,
            time: float,
            status: StatusType,
            instance_id: str = None,
            seed: int = None,
            additional_info: dict = None,
            origin: DataOrigin = DataOrigin.INTERNAL):
        """Adds a data of a new target algorithm (TA) run;
        it will update data if the same key values are used
        (config, instance_id, seed)

        Parameters
        ----------
            config : dict (or other type -- depending on config space module)
                Parameter configuration
            cost: float
                Cost of TA run (will be minimized)
            time: float
                Runtime of TA run
            status: str
                Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT}
            instance_id: str
                String representing an instance (default: None)
            seed: int
                Random seed used by TA (default: None)
            additional_info: dict
                Additional run infos (could include further returned
                information from TA or fields such as start time and host_id)
            origin: DataOrigin
                Defines how data will be used.
        """

        config_id = self.config_ids.get(config)
        if config_id is None:
            self._n_id += 1
            self.config_ids[config] = self._n_id
            config_id = self.config_ids.get(config)
            self.ids_config[self._n_id] = config

        k = RunKey(config_id, instance_id, seed)
        v = RunValue(cost, time, status, additional_info)

        # Each runkey is supposed to be used only once. Repeated tries to add
        # the same runkey will be ignored silently if not capped.
        if self.overwrite_existing_runs or self.data.get(k) is None:
            self._add(k, v, status, origin)
        elif status != StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED:
            # overwrite capped runs with uncapped runs
            self._add(k, v, status, origin)
        elif status == StatusType.CAPPED and self.data[
                k].status == StatusType.CAPPED and cost > self.data[k].cost:
            # overwrite if censored with a larger cutoff
            self._add(k, v, status, origin)

    def _add(self, k: RunKey, v: RunValue, status: StatusType,
             origin: DataOrigin):
        """Actual function to add new entry to data structures

        TODO

        """
        self.data[k] = v
        self.external[k] = origin

        if origin in (DataOrigin.INTERNAL, DataOrigin.EXTERNAL_SAME_INSTANCES) \
                and status != StatusType.CAPPED:
            # also add to fast data structure
            is_k = InstSeedKey(k.instance_id, k.seed)
            self._configid_to_inst_seed[
                k.config_id] = self._configid_to_inst_seed.get(
                    k.config_id, [])
            if is_k not in self._configid_to_inst_seed[k.config_id]:
                self._configid_to_inst_seed[k.config_id].append(is_k)

            if not self.overwrite_existing_runs:
                # assumes an average across runs as cost function aggregation
                self.incremental_update_cost(self.ids_config[k.config_id],
                                             v.cost)
            else:
                self.update_cost(config=self.ids_config[k.config_id])

    def update_cost(self, config: Configuration):
        """Store the performance of a configuration across the instances in
        self.cost_perf_config and also updates self.runs_per_config;
        uses self.aggregate_func

        Parameters
        ----------
        config: Configuration
            configuration to update cost based on all runs in runhistory
        """
        inst_seeds = set(self.get_runs_for_config(config))
        perf = self.aggregate_func(config, self, inst_seeds)
        config_id = self.config_ids[config]
        self.cost_per_config[config_id] = perf
        self.runs_per_config[config_id] = len(inst_seeds)

    def compute_all_costs(self, instances: typing.List[str] = None):
        """Computes the cost of all configurations from scratch and overwrites
        self.cost_perf_config and self.runs_per_config accordingly;

        Parameters
        ----------
        instances: typing.List[str]
            list of instances; if given, cost is only computed wrt to this instance set
        """

        self.cost_per_config = {}
        self.runs_per_config = {}
        for config, config_id in self.config_ids.items():
            inst_seeds = set(self.get_runs_for_config(config))
            if instances is not None:
                inst_seeds = list(
                    filter(lambda x: x.instance in instances, inst_seeds))

            if inst_seeds:  # can be empty if never saw any runs on <instances>
                perf = self.aggregate_func(config, self, inst_seeds)
                self.cost_per_config[config_id] = perf
                self.runs_per_config[config_id] = len(inst_seeds)

    def incremental_update_cost(self, config: Configuration, cost: float):
        """Incrementally updates the performance of a configuration by using a
        moving average;

        Parameters
        ----------
        config: Configuration
            configuration to update cost based on all runs in runhistory
        cost: float
            cost of new run of config
        """

        config_id = self.config_ids[config]
        n_runs = self.runs_per_config.get(config_id, 0)
        old_cost = self.cost_per_config.get(config_id, 0.)
        self.cost_per_config[config_id] = (
            (old_cost * n_runs) + cost) / (n_runs + 1)
        self.runs_per_config[config_id] = n_runs + 1

    def get_cost(self, config: Configuration):
        """Returns empirical cost for a configuration; uses  self.cost_per_config

        Parameters
        ----------
        config: Configuration

        Returns
        -------
        cost: float
            Computed cost for configuration
        """
        config_id = self.config_ids[config]
        return self.cost_per_config.get(config_id, np.nan)

    def get_runs_for_config(self, config: Configuration):
        """Return all runs (instance seed pairs) for a configuration.

        Parameters
        ----------
        config : Configuration from ConfigSpace
            Parameter configuration

        Returns
        -------
        instance_seed_pairs : list<tuples of instance, seed>
        """
        config_id = self.config_ids.get(config)
        return self._configid_to_inst_seed.get(config_id, [])

    def get_instance_costs_for_config(self, config: Configuration):
        """
            Returns the average cost per instance (across seeds)
            for a configuration
            Parameters
            ----------
            config : Configuration from ConfigSpace
                Parameter configuration

            Returns
            -------
            cost_per_inst: dict<instance name<str>, cost<float>>
        """
        config_id = self.config_ids.get(config)
        runs_ = self._configid_to_inst_seed.get(config_id, [])
        cost_per_inst = {}
        for inst, seed in runs_:
            cost_per_inst[inst] = cost_per_inst.get(inst, [])
            rkey = RunKey(config_id, inst, seed)
            vkey = self.data[rkey]
            cost_per_inst[inst].append(vkey.cost)
        cost_per_inst = dict([(inst, np.mean(costs))
                              for inst, costs in cost_per_inst.items()])
        return cost_per_inst

    def get_all_configs(self):
        """Return all configurations in this RunHistory object

        Returns
        -------
            parameter configurations: list
        """
        return list(self.config_ids.keys())

    def empty(self):
        """Check whether or not the RunHistory is empty.

        Returns
        -------
        emptiness: bool
            True if runs have been added to the RunHistory,
            False otherwise
        """
        return len(self.data) == 0

    def save_json(self,
                  fn: str = "runhistory.json",
                  save_external: bool = False):
        """
        saves runhistory on disk

        Parameters
        ----------
        fn : str
            file name
        save_external : bool
            Whether to save external data in the runhistory file.
        """
        fn = fn.replace(":", "-")
        data = [([
            int(k.config_id),
            str(k.instance_id) if k.instance_id is not None else None,
            int(k.seed)
        ], list(v)) for k, v in self.data.items()
                if save_external or self.external[k] == DataOrigin.INTERNAL]
        config_ids_to_serialize = set([entry[0][0] for entry in data])
        configs = {
            id_: conf.get_dictionary()
            for id_, conf in self.ids_config.items()
            if id_ in config_ids_to_serialize
        }
        config_origins = {
            id_: conf.origin
            for id_, conf in self.ids_config.items()
            if (id_ in config_ids_to_serialize and conf.origin is not None)
        }

        with open(fn, "w") as fp:
            json.dump(
                {
                    "data": data,
                    "config_origins": config_origins,
                    "configs": configs
                },
                fp,
                cls=EnumEncoder,
                indent=2)

    def load_json(self, fn: str, cs: ConfigurationSpace):
        """Load and runhistory in json representation from disk.

        Overwrites current runhistory!

        Parameters
        ----------
        fn : str
            file name to load from
        cs : ConfigSpace
            instance of configuration space
        """
        try:
            with open(fn) as fp:
                all_data = json.load(fp, object_hook=StatusType.enum_hook)
        except Exception as e:
            self.logger.warning(
                'Encountered exception %s while reading runhistory from %s. '
                'Not adding any runs!',
                e,
                fn,
            )
            return

        config_origins = all_data.get("config_origins", {})

        self.ids_config = {
            int(id_): Configuration(cs,
                                    values=values,
                                    origin=config_origins.get(id_, None))
            for id_, values in all_data["configs"].items()
        }

        self.config_ids = {
            config: id_
            for id_, config in self.ids_config.items()
        }

        self._n_id = len(self.config_ids)

        # important to use add method to use all data structure correctly
        for k, v in all_data["data"]:
            self.add(config=self.ids_config[int(k[0])],
                     cost=float(v[0]),
                     time=float(v[1]),
                     status=StatusType(v[2]),
                     instance_id=k[1],
                     seed=int(k[2]),
                     additional_info=v[3])

    def update_from_json(
            self,
            fn: str,
            cs: ConfigurationSpace,
            origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES):
        """Update the current runhistory by adding new runs from a json file.

        Parameters
        ----------
        fn : str
            File name to load from.
        cs : ConfigSpace
            Instance of configuration space.
        origin : DataOrigin
            What to store as data origin.
        """
        new_runhistory = RunHistory(self.aggregate_func)
        new_runhistory.load_json(fn, cs)
        self.update(runhistory=new_runhistory, origin=origin)

    def update(self,
               runhistory: 'RunHistory',
               origin: DataOrigin = DataOrigin.EXTERNAL_SAME_INSTANCES):
        """Update the current runhistory by adding new runs from a RunHistory.

        Parameters
        ----------
        runhistory: RunHistory
            Runhistory with additional data to be added to self
        origin: DataOrigin
            If set to ``INTERNAL`` or ``EXTERNAL_FULL`` the data will be
            added to the internal data structure self._configid_to_inst_seed
            and be available :meth:`through get_runs_for_config`.
        """

        # Configurations might be already known, but by a different ID. This
        # does not matter here because the add() method handles this
        # correctly by assigning an ID to unknown configurations and re-using
        #  the ID
        for key, value in runhistory.data.items():
            config_id, instance_id, seed = key
            cost, time, status, additional_info = value
            config = runhistory.ids_config[config_id]
            self.add(config=config,
                     cost=cost,
                     time=time,
                     status=status,
                     instance_id=instance_id,
                     seed=seed,
                     additional_info=additional_info,
                     origin=origin)
Exemplo n.º 27
0
 def __init__(self) -> None:
     self.logger = PickableLoggerAdapter(name=self.__module__ + "." +
                                         self.__class__.__name__)
Exemplo n.º 28
0
class AbstractTAFunc(SerialRunner):
    """Baseclass to execute target algorithms which are python functions.

    **Note:*** Do not use directly

    Attributes
    ----------
    memory_limit
    use_pynisher
    """
    def __init__(
        self,
        ta: typing.Callable,
        stats: Stats,
        run_obj: str = "quality",
        memory_limit: typing.Optional[int] = None,
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = False,
        use_pynisher: bool = True,
    ):

        super().__init__(
            ta=ta,
            stats=stats,
            run_obj=run_obj,
            par_factor=par_factor,
            cost_for_crash=cost_for_crash,
            abort_on_first_run_crash=abort_on_first_run_crash,
        )
        """
        Abstract class for having a function as target algorithm

        Parameters
        ----------
        ta : callable
            Function (target algorithm) to be optimized.
        stats: Stats()
             stats object to collect statistics about runtime and so on
        run_obj: str
            run objective of SMAC
        memory_limit : int, optional
            Memory limit (in MB) that will be applied to the target algorithm.
        par_factor: int
            penalization factor
        cost_for_crash : float
            cost that is used in case of crashed runs (including runs
            that returned NaN or inf)
        use_pynisher: bool
            use pynisher to limit resources;
            if disabled
              * TA func can use as many resources
              as it wants (time and memory) --- use with caution
              * all runs will be returned as SUCCESS if returned value is not None
        """
        self.ta = ta
        self.stats = stats
        self.run_obj = run_obj

        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash

        signature = inspect.signature(ta).parameters
        self._accepts_seed = 'seed' in signature.keys()
        self._accepts_instance = 'instance' in signature.keys()
        self._accepts_budget = 'budget' in signature.keys()
        if not callable(ta):
            raise TypeError('Argument `ta` must be a callable, but is %s' %
                            type(ta))
        self._ta = typing.cast(typing.Callable, ta)

        if memory_limit is not None:
            memory_limit = int(math.ceil(memory_limit))
        self.memory_limit = memory_limit

        self.use_pynisher = use_pynisher

        self.logger = PickableLoggerAdapter(self.__module__ + '.' +
                                            self.__class__.__name__)

    def run(
        self,
        config: Configuration,
        instance: typing.Optional[str] = None,
        cutoff: typing.Optional[float] = None,
        seed: int = 12345,
        budget: typing.Optional[float] = None,
        instance_specific: str = "0"
    ) -> typing.Tuple[StatusType, float, float, typing.Dict]:
        """Runs target algorithm <self._ta> with configuration <config> for at
        most <cutoff> seconds, allowing it to use at most <memory_limit> RAM.

        Whether the target algorithm is called with the <instance> and
        <seed> depends on the subclass implementing the actual call to
        the target algorithm.

        Parameters
        ----------
            config : Configuration, dictionary (or similar)
                Dictionary param -> value
            instance : str, optional
                Problem instance
            cutoff : float, optional
                Wallclock time limit of the target algorithm. If no value is
                provided no limit will be enforced. It is casted to integer internally.
            seed : int
                Random seed
            budget : float, optional
                A positive, real-valued number representing an arbitrary limit to the target algorithm
                Handled by the target algorithm internally
            instance_specific: str
                Instance specific information (e.g., domain file or solution)
        Returns
        -------
            status: enum of StatusType (int)
                {SUCCESS, TIMEOUT, CRASHED, ABORT}
            cost: float
                cost/regret/quality/runtime (float) (None, if not returned by TA)
            runtime: float
                runtime (None if not returned by TA)
            additional_info: dict
                all further additional run information
        """

        obj_kwargs = {
        }  # type: typing.Dict[str, typing.Union[int, str, float, None]]
        if self._accepts_seed:
            obj_kwargs['seed'] = seed
        if self._accepts_instance:
            obj_kwargs['instance'] = instance
        if self._accepts_budget:
            obj_kwargs['budget'] = budget

        if self.use_pynisher:
            # walltime for pynisher has to be a rounded up integer
            if cutoff is not None:
                cutoff = int(math.ceil(cutoff))
                if cutoff > MAX_CUTOFF:
                    raise ValueError(
                        "%d is outside the legal range of [0, 65535] "
                        "for cutoff (when using pynisher, due to OS limitations)"
                        % cutoff)

            arguments = {
                'logger': self.logger,
                'wall_time_in_s': cutoff,
                'mem_in_mb': self.memory_limit
            }

            # call ta
            try:
                obj = pynisher.enforce_limits(**arguments)(self._ta)
                rval = self._call_ta(obj, config, obj_kwargs)
            except Exception as e:
                exception_traceback = traceback.format_exc()
                error_message = repr(e)
                additional_info = {
                    'traceback': exception_traceback,
                    'error': error_message
                }
                return StatusType.CRASHED, self.cost_for_crash, 0.0, additional_info

            if isinstance(rval, tuple):
                result = rval[0]
                additional_run_info = rval[1]
            else:
                result = rval
                additional_run_info = {}

            # get status, cost, time
            if obj.exit_status is pynisher.TimeoutException:
                status = StatusType.TIMEOUT
                cost = self.cost_for_crash
            elif obj.exit_status is pynisher.MemorylimitException:
                status = StatusType.MEMOUT
                cost = self.cost_for_crash
            elif obj.exit_status == 0 and result is not None:
                status = StatusType.SUCCESS
                cost = result
            else:
                status = StatusType.CRASHED
                cost = self.cost_for_crash

            runtime = float(obj.wall_clock_time)
        else:
            start_time = time.time()
            # call ta
            try:
                rval = self._call_ta(self._ta, config, obj_kwargs)
                if isinstance(rval, tuple):
                    result = rval[0]
                    additional_run_info = rval[1]
                else:
                    result = rval
                    additional_run_info = {}
                status = StatusType.SUCCESS
                cost = result
            except Exception as e:
                self.logger.exception(e)
                cost, result = self.cost_for_crash, self.cost_for_crash
                status = StatusType.CRASHED
                additional_run_info = {}

            runtime = time.time() - start_time

        if status == StatusType.SUCCESS and not isinstance(
                result, (int, float)):
            status = StatusType.CRASHED
            cost = self.cost_for_crash

        return status, cost, runtime, additional_run_info

    def _call_ta(
        self,
        obj: typing.Callable,
        config: Configuration,
        obj_kwargs: typing.Dict[str, typing.Union[int, str, float, None]],
    ) -> typing.Union[float, typing.Tuple[float, typing.Dict]]:
        raise NotImplementedError()