class BaseRunner(ABC): """Interface class to handle the execution of SMAC' configurations. This interface defines how to interact with the SMBO loop. The complexity of running a configuration as well as handling the results is abstracted to the SMBO via a BaseRunner. From SMBO perspective, launching a configuration follows a submit/collect scheme as follows: 1- A run is launched via submit_run() 1.1- Submit_run internally calls run_wrapper(), a method that contains common processing functions among different runners, for example, handling capping and stats checking. 1.2- A class that implements BaseRunner defines run() which is really the algorithm to translate a RunInfo to a RunValue, i.e. a configuration to an actual result. 2- A completed run is collected via get_finished_runs(), which returns any finished runs, if any. 3- This interface also offers the method wait() as a mechanism to make sure we have enough data in the next iteration to make a decision. For example, the intensifier might not be able to select the next challenger until more results are available. Attributes ---------- results ta stats run_obj par_factor cost_for_crash abort_first_run_crash Parameters --------- ta : typing.Union[typing.List[str], typing.Callable] target algorithm stats: Stats stats object to collect statistics about runtime/additional info run_obj: str run objective of SMAC par_factor: int penalization factor cost_for_crash : float cost that is used in case of crashed runs (including runs that returned NaN or inf) abort_on_first_run_crash: bool if true and first run crashes, raise FirstRunCrashedException """ def __init__( self, ta: typing.Union[typing.List[str], typing.Callable], stats: Stats, run_obj: str = "runtime", par_factor: int = 1, cost_for_crash: float = float(MAXINT), abort_on_first_run_crash: bool = True, ): # The results is a FIFO structure, implemented via a list # (because the Queue lock is not pickable). Finished runs are # put in this list and collected via process_finished_runs self.results = [] # type: typing.List[typing.Tuple[RunInfo, RunValue]] # Below state the support for a Runner algorithm that # implements a ta self.ta = ta self.stats = stats self.run_obj = run_obj self.par_factor = par_factor self.cost_for_crash = cost_for_crash self.abort_on_first_run_crash = abort_on_first_run_crash self.logger = PickableLoggerAdapter(self.__module__ + '.' + self.__class__.__name__) self._supports_memory_limit = False super().__init__() @abstractmethod def submit_run(self, run_info: RunInfo) -> None: """This function submits a configuration embedded in a RunInfo object, and uses one of the workers to produce a result (such result will eventually be available on the self.results FIFO). This interface method will be called by SMBO, with the expectation that a function will be executed by a worker. What will be executed is dictated by run_info, and "how" will it be executed is decided via the child class that implements a run() method. Because config submission can be a serial/parallel endeavor, it is expected to be implemented by a child class. Parameters ---------- run_info: RunInfo An object containing the configuration and the necessary data to run it """ pass @abstractmethod def run( self, config: Configuration, instance: str, cutoff: typing.Optional[float] = None, seed: int = 12345, budget: typing.Optional[float] = None, instance_specific: str = "0", ) -> typing.Tuple[StatusType, float, float, typing.Dict]: """Runs target algorithm <self.ta> with configuration <config> on instance <instance> with instance specifics <specifics> for at most <cutoff> seconds and random seed <seed> This method exemplifies how to defined the run() method Parameters ---------- config : Configuration dictionary param -> value instance : string problem instance cutoff : float, optional Wallclock time limit of the target algorithm. If no value is provided no limit will be enforced. seed : int random seed budget : float, optional A positive, real-valued number representing an arbitrary limit to the target algorithm. Handled by the target algorithm internally instance_specific: str instance specific information (e.g., domain file or solution) Returns ------- status: enum of StatusType (int) {SUCCESS, TIMEOUT, CRASHED, ABORT} cost: float cost/regret/quality (float) (None, if not returned by TA) runtime: float runtime (None if not returned by TA) additional_info: dict all further additional run information """ pass def run_wrapper( self, run_info: RunInfo, ) -> typing.Tuple[RunInfo, RunValue]: """Wrapper around run() to exec and check the execution of a given config file This function encapsulates common handling/processing, so that run() implementation is simplified. Parameters ---------- run_info : RunInfo Object that contains enough information to execute a configuration run in isolation. Returns ------- RunInfo: an object containing the configuration launched RunValue: Contains information about the status/performance of config """ start = time.time() if run_info.cutoff is None and self.run_obj == "runtime": if self.logger: self.logger.critical( "For scenarios optimizing running time " "(run objective), a cutoff time is required, " "but not given to this call.") raise ValueError("For scenarios optimizing running time " "(run objective), a cutoff time is required, " "but not given to this call.") cutoff = None if run_info.cutoff is not None: cutoff = int(math.ceil(run_info.cutoff)) try: status, cost, runtime, additional_info = self.run( config=run_info.config, instance=run_info.instance, cutoff=cutoff, seed=run_info.seed, budget=run_info.budget, instance_specific=run_info.instance_specific) except Exception as e: status = StatusType.CRASHED cost = self.cost_for_crash runtime = time.time() - start # Add context information to the error message exception_traceback = traceback.format_exc() error_message = repr(e) additional_info = { 'traceback': exception_traceback, 'error': error_message } end = time.time() if run_info.budget == 0 and status == StatusType.DONOTADVANCE: raise ValueError( "Cannot handle DONOTADVANCE state when using intensify or SH/HB on " "instances.") # Catch NaN or inf. if (self.run_obj == 'runtime' and not np.isfinite(runtime) or self.run_obj == 'quality' and not np.isfinite(cost)): if self.logger: self.logger.warning( "Target Algorithm returned NaN or inf as {}. " "Algorithm run is treated as CRASHED, cost " "is set to {} for quality scenarios. " "(Change value through \"cost_for_crash\"" "-option.)".format(self.run_obj, self.cost_for_crash)) status = StatusType.CRASHED if self.run_obj == "runtime": # The following line pleases mypy - we already check for cutoff not being none above, # prior to calling run. However, mypy assumes that the data type of cutoff # is still Optional[int] assert cutoff is not None if runtime > self.par_factor * cutoff: self.logger.warning("Returned running time is larger " "than {0} times the passed cutoff time. " "Clamping to {0} x cutoff.".format( self.par_factor)) runtime = cutoff * self.par_factor status = StatusType.TIMEOUT if status == StatusType.SUCCESS: cost = runtime else: cost = cutoff * self.par_factor if status == StatusType.TIMEOUT and run_info.capped: status = StatusType.CAPPED else: if status == StatusType.CRASHED: cost = self.cost_for_crash return run_info, RunValue(status=status, cost=cost, time=runtime, additional_info=additional_info, starttime=start, endtime=end) @abstractmethod def get_finished_runs( self) -> typing.List[typing.Tuple[RunInfo, RunValue]]: """This method returns any finished configuration, and returns a list with the results of exercising the configurations. This class keeps populating results to self.results until a call to get_finished runs is done. In this case, the self.results list is emptied and all RunValues produced by running run() are returned. Returns ------- List[RunInfo, RunValue]: A list of pais RunInfo/RunValues a submitted configuration """ raise NotImplementedError() @abstractmethod def wait(self) -> None: """SMBO/intensifier might need to wait for runs to finish before making a decision. This method waits until 1 run completes """ pass @abstractmethod def pending_runs(self) -> bool: """ Whether or not there are configs still running. Generally if the runner is serial, launching a run instantly returns it's result. On parallel runners, there might be pending configurations to complete. """ pass @abstractmethod def num_workers(self) -> int: """ Return the active number of workers that will execute tae runs. """ pass
class EnsembleNN(AbstractEPM): def __init__(self, configspace: ConfigurationSpace, types: typing.List[int], bounds: typing.List[typing.Tuple[float, float]], seed: int, hidden_dims: typing.List[int] = [50, 50, 50], lr: float = 1e-3, momentum: float = 0.999, weight_decay: float = 1e-4, iterations: int = 5000, batch_size: int = 16, number_of_networks: int = 5, var: bool = True, train_with_lognormal_llh=False, compute_mean_in_logspace=False, max_cat: int = np.inf, ignore_cens: bool = False, learned_weight_init: bool = False, optimization_algorithm: str = 'sgd', **kwargs): super().__init__(configspace, types, bounds, seed, **kwargs) #self.types[self.types == 0] = -1 self.types = [int(f) for f in self.types] assert not (train_with_lognormal_llh and compute_mean_in_logspace) if type(self.seed) != int: self.seed = self.seed[0] self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.log_loss = 1000 self.log_error = 5000 self.var = var self.hidden_dims = hidden_dims self.lr = lr self.momentum = momentum self.iterations = iterations self.weight_decay = weight_decay self.batch_size = batch_size self.number_of_networks = number_of_networks self.train_with_lognormal = train_with_lognormal_llh self.compute_mean_in_logspace = compute_mean_in_logspace self.max_cat = max_cat self.ignore_cens = ignore_cens self.learned_weight_init = learned_weight_init self.optimization_algorithm = optimization_algorithm self._my = None self._sy = None # Quick check, should not take too long a = np.random.normal(42, 23, 1000) m1, v1 = (np.mean(a), np.var(a)) a = self._preprocess_y(a) m2, v2 = self._postprocess_mv(np.mean(a), np.var(a)) assert np.abs(m1 - m2) < 1e-3, (m1, m2) assert np.abs(v1 - v2) < 1e-3, (v1, v2) self._my = None self._sy = None self.nns = None self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__) def _preprocess_y(self, y: np.ndarray, redo=False): if self._my is None or redo: self._my = np.mean(y) self._sy = np.std(y) if self._sy == 0: # all y's are the same self._sy = 1 if not self.train_with_lognormal: y -= self._my y /= self._sy return y def _postprocess_mv(self, m: np.ndarray, v: np.ndarray): # zero mean scaling m = m * self._sy + self._my v = v * self._sy**2 return m, v def _preprocess_x(self, x: np.ndarray, redo: bool = False): # Replace nans with 0, should be fine for both cats and conts # TODO: Maybe refine this and replace cont with mean x = np.nan_to_num(x) return x def _train(self, X: np.ndarray, Y: np.ndarray, C: np.ndarray = None): self.logger.critical("Not using C as this is not a Tobit model") Y = self._preprocess_y(Y, redo=True) X = self._preprocess_x(X, redo=True) self.train_data = (X, Y) self.nns = [] self.logger.debug("Start Training %d networks" % self.number_of_networks) for i in range(self.number_of_networks): nn = SimpleNetworkEmbedding( hidden_dims=self.hidden_dims, feat_types=self.types, lr=self.lr, seed=self.seed + i, momentum=self.momentum, weight_decay=self.weight_decay, iterations=self.iterations, batch_size=self.batch_size, var=self.var, lognormal_nllh=self.train_with_lognormal, var_bias_init=np.std(Y), max_cat=self.max_cat, learned_weight_init=self.learned_weight_init, optimization_algorithm=self.optimization_algorithm, ) nn.reset() nn.train(X, Y) self.nns.append(nn) def _predict_individual( self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]: X = self._preprocess_x(X, redo=True) ms = np.zeros([X.shape[0], self.number_of_networks]) vs = np.zeros([X.shape[0], self.number_of_networks]) for i_nn, nn in enumerate(self.nns): pred = nn.predict(X) m = pred[:, 0] v = pred[:, 1] if not self.train_with_lognormal: m, v = self._postprocess_mv(m, v) ms[:, i_nn] = m vs[:, i_nn] = v return ms, vs def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]: ms, _ = self._predict_individual(X) m = ms.mean(axis=1) v = ms.var(axis=1) return m.reshape((-1, 1)), v.reshape((-1, 1)) def predict_marginalized_over_instances(self, X: np.ndarray): """Predict mean and variance marginalized over all instances. Returns the predictive mean and variance marginalised over all instances for a set of configurations. Note ---- This method overwrites the same method of ~smac.epm.base_epm.AbstractEPM; the following method is random forest specific and follows the SMAC2 implementation; it requires no distribution assumption to marginalize the uncertainty estimates Parameters ---------- X : np.ndarray [n_samples, n_features (config)] Returns ------- means : np.ndarray of shape = [n_samples, 1] Predictive mean vars : np.ndarray of shape = [n_samples, 1] Predictive variance """ if self.instance_features is None or \ len(self.instance_features) == 0: mean_, var = self.predict(X) var[var < self.var_threshold] = self.var_threshold var[np.isnan(var)] = self.var_threshold return mean_, var if len(X.shape) != 2: raise ValueError('Expected 2d array, got %dd array!' % len(X.shape)) if X.shape[1] != len(self.bounds): raise ValueError('Rows in X should have %d entries but have %d!' % (len(self.bounds), X.shape[1])) mean_ = np.zeros((X.shape[0], 1)) var = np.zeros(X.shape[0]) for i, x in enumerate(X): # marginalize over instance # 1. Get predictions for all networks # Not very efficient # preds_nns1 = np.zeros([len(self.instance_features), self.number_of_networks]) #for i_f, feat in enumerate(self.instance_features): # x_ = np.concatenate([x, feat]).reshape([1, -1]) # print(i_f, x_) # m, _ = self._predict_individual(x_) # preds_nns1[i_f, :] = m input = np.concatenate((np.tile( x, (len(self.instance_features), 1)), self.instance_features), axis=1) preds_nns, _ = self._predict_individual(input) # 2. Average in each NN for all instances pred_per_nn = [] for nn_id in range(self.number_of_networks): if self.compute_mean_in_logspace: pred_per_nn.append( np.log(np.mean(np.exp(preds_nns[:, nn_id])))) else: pred_per_nn.append(np.mean(preds_nns[:, nn_id])) # 3. compute statistics across trees mean_x = np.mean(pred_per_nn) var_x = np.var(pred_per_nn) if var_x < self.var_threshold: var_x = self.var_threshold var[i] = var_x mean_[i] = mean_x if len(mean_.shape) == 1: mean_ = mean_.reshape((-1, 1)) if len(var.shape) == 1: var = var.reshape((-1, 1)) return mean_, var
class NeuralNet(nn.Module): def __init__(self, hidden_dims, input_size, feat_type=None, var: bool = True, max_cat: int = np.inf): super(NeuralNet, self).__init__() self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__) self.feat_type = feat_type self.input_size = input_size self.num_neurons = hidden_dims self.activation = nn.Tanh self.num_layer = len(hidden_dims) self.max_cat = max_cat if var: self.n_output = 2 else: self.n_output = 1 if np.sum(self.feat_type) == 0: self.feat_type = None if self.feat_type is not None: self.logger.info("Use cat embedding") assert len(self.feat_type) == self.input_size emb = nn.ModuleList() sz = int(0) for f in self.feat_type: if f == 0: # In SMAC 0 encodes a numerical emb.append(None) sz += 1 else: es = min(self.max_cat, int(f)) emb.append(nn.Embedding(int(f), es)) sz += es assert int(sz) == sz sz = int(sz) num_neurons = [sz] + self.num_neurons self.embedding = emb else: num_neurons = [self.input_size] + self.num_neurons self.weights = nn.ModuleList() self.acts = nn.ModuleList() print(num_neurons) for i in range(self.num_layer): self.weights.append(nn.Linear(num_neurons[i], num_neurons[i + 1])) self.acts.append(self.activation()) self.outlayer = nn.Linear(num_neurons[-1], self.n_output) def initialize_weights(self, var_bias_init: float = 1): # Use Xavier normal intialization, slightly modified from "Understanding the difficulty of ..." for i in range(len(self.weights)): torch.nn.init.xavier_normal_(self.weights[i].weight) self.weights[i].bias.data.fill_(0) torch.nn.init.xavier_normal_(self.outlayer.weight) # TODO Second bias should be initialised to np.log(np.exp(x) - 1), s.t. softplus = x self.outlayer.bias.data[0].fill_(0) if var_bias_init == 0: self.logger.critical( "Can't properly initialize bias unit, initialize wih zero") self.outlayer.bias.data[0].fill_(0) else: self.outlayer.bias.data[1].fill_(np.log(np.exp(var_bias_init) - 1)) def learn_initial_weights(self, X): """Learn initial weights such that the mean over the data is on average zero per neuron""" output = torch.tensor(X, dtype=torch.float32) for i in range(len(self.weights)): torch.nn.init.xavier_normal_(self.weights[i].weight, torch.nn.init.calculate_gain('tanh')) self.weights[i].bias.data.fill_(0) output2 = self.weights[i].forward(output) mean = output2.mean(axis=0) self.weights[i].bias.data = -mean output = self.weights[i].forward(output) output = self.acts[i](output) # print(output.mean(axis=0), output.mean(axis=0).shape) torch.nn.init.xavier_normal_(self.outlayer.weight, torch.nn.init.calculate_gain('tanh')) self.outlayer.bias.data.fill_(0) # self.outlayer.bias.data[1].fill_(np.log(np.exp(1) - 1)) # Noise can be tuned here... self.outlayer.bias.data[1] = -5 def forward(self, x): out = [] if self.feat_type is not None: for idx, (emb, typ) in enumerate(zip(self.embedding, self.feat_type)): if typ == 0: # a numerical out.append(x[:, idx].view(-1, 1)) else: # a categorical out.append( emb(x[:, idx].long().view(-1, 1)).view( [-1, min(self.max_cat, typ)])) out = torch.cat(out, 1) else: out = x for i in range(self.num_layer): out = self.weights[i](out) out = self.acts[i](out) out = self.outlayer(out) if self.n_output == 2: # Passing second output through softplus function (see Lakshminarayanan (2017)) out[:, 1] = torch.log(1 + torch.exp(out[:, 1])) + 10e-6 return out
class DNGO(BaseModel): def __init__(self, configspace: ConfigurationSpace, types: np.ndarray, bounds: typing.List[typing.Tuple[float, float]], seed: int, hidden_dims: typing.List[int] = [50, 50, 50], lr: float = 1e-3, momentum: float = 0.999, weight_decay: float = 1e-4, iterations: int = 10000, batch_size: int = 8, var: bool = True, **kwargs): super().__init__(configspace, types, bounds, seed, **kwargs) print("USE DNGO") self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.log_loss = 100 self.log_error = 1000 self.var = var self.hidden_dims = hidden_dims self.lr = lr self.momentum = momentum self.iterations = iterations self.weight_decay = weight_decay self.batch_size = batch_size self.nn = None self.blr = None self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__) def _train(self, X: np.ndarray, y: np.ndarray): self.nn = SimpleNetworkEmbedding( hidden_dims=self.hidden_dims, lr=self.lr, seed=self.seed, momentum=self.momentum, weight_decay=self.weight_decay, iterations=self.iterations, batch_size=self.batch_size, var=self.var, ) self.blr = BayesianLinearRegressionLayer() self._my = np.mean(y) self._sy = np.std(y) y -= self._my y /= self._sy #print(X, y) #import matplotlib.pyplot as plt self.nn.train(X, y) #plt.scatter(X, y) #x_dense = np.linspace(-0.1, 1.1, 100) #pred = self._predict_nn(x_dense.reshape([-1, 1])) #m = pred[:, 0].flatten() #v = pred[:, 1].flatten() #plt.plot(x_dense, m, label="nn") #plt.fill_between(x_dense, m - v, m + v, alpha=0.5) self.blr.optimize_alpha_beta(self.nn.model, X, y) #m, v = self.blr.predict(self.model, x_dense.reshape([-1, 1])) #m = m.data.numpy().flatten() #v = v.data.numpy().flatten() #plt.scatter(X, y) #plt.plot(x_dense, m, label="blr") #plt.fill_between(x_dense, m-v, m+v, alpha=0.5) #plt.legend() #plt.ylim([-10, 10]) #plt.show() def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]: means, vars = self.blr.predict(self.nn.model, X) means = means.data.numpy().flatten() vars = vars.data.numpy().flatten() means = np.array(means * self._sy + self._my).reshape([-1, 1]) vars = np.array(vars * self._sy**2).reshape([-1, 1]) if not np.isfinite(means).any(): self.logger.critical( "All DNGO predictions are NaN. Fall back to random predictions" ) return np.random.randn(means.shape[0], means.shape[1]), np.zeros_like(vars) else: return means, vars