class EnsembleNN(BaseModel): def __init__(self, configspace: ConfigurationSpace, types: np.ndarray, bounds: typing.List[typing.Tuple[float, float]], seed: int, hidden_dims: typing.List[int] = [50, 50, 50], lr: float = 1e-3, momentum: float = 0.999, weight_decay: float = 1e-4, iterations: int = 10000, batch_size: int = 8, number_of_networks: int = 10, var: bool = True, train_with_lognormal_llh=False, compute_mean_in_logspace=True, **kwargs): super().__init__(configspace, types, bounds, seed, **kwargs) assert not (train_with_lognormal_llh and compute_mean_in_logspace) self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.log_loss = 1000 self.log_error = 5000 self.var = var self.hidden_dims = hidden_dims self.lr = lr self.momentum = momentum self.iterations = iterations self.weight_decay = weight_decay self.batch_size = batch_size self.number_of_networks = number_of_networks self.train_with_lognormal = train_with_lognormal_llh self.compute_mean_in_logspace = compute_mean_in_logspace self.nns = None self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__) def _train(self, X: np.ndarray, y: np.ndarray): self._my = np.mean(y) self._sy = np.std(y) if not self.train_with_lognormal: y -= self._my y /= self._sy self.train_data = (X, y) self.nns = [] self.logger.debug("Start Training %d networks" % self.number_of_networks) for i in range(self.number_of_networks): nn = SimpleNetworkEmbedding( hidden_dims=self.hidden_dims, lr=self.lr, seed=self.seed + i, momentum=self.momentum, weight_decay=self.weight_decay, iterations=self.iterations, batch_size=self.batch_size, var=self.var, lognormal_nllh=self.train_with_lognormal) nn.train(X, y) self.nns.append(nn) def _predict_individual( self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]: ms = np.zeros([X.shape[0], self.number_of_networks]) vs = np.zeros([X.shape[0], self.number_of_networks]) for i_nn, nn in enumerate(self.nns): pred = nn.predict(X) m = pred[:, 0] v = pred[:, 1] if not self.train_with_lognormal: m = m * self._sy + self._my v = v * self._sy**2 ms[:, i_nn] = m vs[:, i_nn] = v return ms, vs def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]: ms, _ = self._predict_individual(X) m = ms.mean(axis=1) v = ms.var(axis=1) return m, v def predict_marginalized_over_instances(self, X: np.ndarray): """Predict mean and variance marginalized over all instances. Returns the predictive mean and variance marginalised over all instances for a set of configurations. Note ---- This method overwrites the same method of ~smac.epm.base_epm.AbstractEPM; the following method is random forest specific and follows the SMAC2 implementation; it requires no distribution assumption to marginalize the uncertainty estimates Parameters ---------- X : np.ndarray [n_samples, n_features (config)] Returns ------- means : np.ndarray of shape = [n_samples, 1] Predictive mean vars : np.ndarray of shape = [n_samples, 1] Predictive variance """ if self.instance_features is None or \ len(self.instance_features) == 0: mean_, var = self.predict(X) var[var < self.var_threshold] = self.var_threshold var[np.isnan(var)] = self.var_threshold return mean_, var if len(X.shape) != 2: raise ValueError('Expected 2d array, got %dd array!' % len(X.shape)) if X.shape[1] != len(self.bounds): raise ValueError('Rows in X should have %d entries but have %d!' % (len(self.bounds), X.shape[1])) mean_ = np.zeros(X.shape[0]) var = np.zeros(X.shape[0]) for i, x in enumerate(X): # marginalize over instance # 1. Get predictions for all networks # Not very efficient preds_nns1 = np.zeros( [len(self.instance_features), self.number_of_networks]) #for i_f, feat in enumerate(self.instance_features): # x_ = np.concatenate([x, feat]).reshape([1, -1]) # print(i_f, x_) # m, _ = self._predict_individual(x_) # preds_nns1[i_f, :] = m input = np.concatenate((np.tile( x, (len(self.instance_features), 1)), self.instance_features), axis=1) preds_nns, _ = self._predict_individual(input) # 2. Average in each NN for all instances pred_per_nn = [] for nn_id in range(self.number_of_networks): if self.compute_mean_in_logspace: pred_per_nn.append( np.log(np.mean(np.exp(preds_nns[:, nn_id])))) else: pred_per_nn.append(np.mean(preds_nns[:, nn_id])) # 3. compute statistics across trees mean_x = np.mean(pred_per_nn) var_x = np.var(pred_per_nn) if var_x < self.var_threshold: var_x = self.var_threshold var[i] = var_x mean_[i] = mean_x if len(mean_.shape) == 1: mean_ = mean_.reshape((-1, 1)) if len(var.shape) == 1: var = var.reshape((-1, 1)) return mean_, var
class EnsembleNN(AbstractEPM): def __init__(self, configspace: ConfigurationSpace, types: typing.List[int], bounds: typing.List[typing.Tuple[float, float]], seed: int, hidden_dims: typing.List[int] = [50, 50, 50], lr: float = 1e-3, momentum: float = 0.999, weight_decay: float = 1e-4, iterations: int = 5000, batch_size: int = 16, number_of_networks: int = 5, var: bool = True, train_with_lognormal_llh=False, compute_mean_in_logspace=False, max_cat: int = np.inf, ignore_cens: bool = False, learned_weight_init: bool = False, optimization_algorithm: str = 'sgd', **kwargs): super().__init__(configspace, types, bounds, seed, **kwargs) #self.types[self.types == 0] = -1 self.types = [int(f) for f in self.types] assert not (train_with_lognormal_llh and compute_mean_in_logspace) if type(self.seed) != int: self.seed = self.seed[0] self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.log_loss = 1000 self.log_error = 5000 self.var = var self.hidden_dims = hidden_dims self.lr = lr self.momentum = momentum self.iterations = iterations self.weight_decay = weight_decay self.batch_size = batch_size self.number_of_networks = number_of_networks self.train_with_lognormal = train_with_lognormal_llh self.compute_mean_in_logspace = compute_mean_in_logspace self.max_cat = max_cat self.ignore_cens = ignore_cens self.learned_weight_init = learned_weight_init self.optimization_algorithm = optimization_algorithm self._my = None self._sy = None # Quick check, should not take too long a = np.random.normal(42, 23, 1000) m1, v1 = (np.mean(a), np.var(a)) a = self._preprocess_y(a) m2, v2 = self._postprocess_mv(np.mean(a), np.var(a)) assert np.abs(m1 - m2) < 1e-3, (m1, m2) assert np.abs(v1 - v2) < 1e-3, (v1, v2) self._my = None self._sy = None self.nns = None self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__) def _preprocess_y(self, y: np.ndarray, redo=False): if self._my is None or redo: self._my = np.mean(y) self._sy = np.std(y) if self._sy == 0: # all y's are the same self._sy = 1 if not self.train_with_lognormal: y -= self._my y /= self._sy return y def _postprocess_mv(self, m: np.ndarray, v: np.ndarray): # zero mean scaling m = m * self._sy + self._my v = v * self._sy**2 return m, v def _preprocess_x(self, x: np.ndarray, redo: bool = False): # Replace nans with 0, should be fine for both cats and conts # TODO: Maybe refine this and replace cont with mean x = np.nan_to_num(x) return x def _train(self, X: np.ndarray, Y: np.ndarray, C: np.ndarray = None): self.logger.critical("Not using C as this is not a Tobit model") Y = self._preprocess_y(Y, redo=True) X = self._preprocess_x(X, redo=True) self.train_data = (X, Y) self.nns = [] self.logger.debug("Start Training %d networks" % self.number_of_networks) for i in range(self.number_of_networks): nn = SimpleNetworkEmbedding( hidden_dims=self.hidden_dims, feat_types=self.types, lr=self.lr, seed=self.seed + i, momentum=self.momentum, weight_decay=self.weight_decay, iterations=self.iterations, batch_size=self.batch_size, var=self.var, lognormal_nllh=self.train_with_lognormal, var_bias_init=np.std(Y), max_cat=self.max_cat, learned_weight_init=self.learned_weight_init, optimization_algorithm=self.optimization_algorithm, ) nn.reset() nn.train(X, Y) self.nns.append(nn) def _predict_individual( self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]: X = self._preprocess_x(X, redo=True) ms = np.zeros([X.shape[0], self.number_of_networks]) vs = np.zeros([X.shape[0], self.number_of_networks]) for i_nn, nn in enumerate(self.nns): pred = nn.predict(X) m = pred[:, 0] v = pred[:, 1] if not self.train_with_lognormal: m, v = self._postprocess_mv(m, v) ms[:, i_nn] = m vs[:, i_nn] = v return ms, vs def _predict(self, X: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]: ms, _ = self._predict_individual(X) m = ms.mean(axis=1) v = ms.var(axis=1) return m.reshape((-1, 1)), v.reshape((-1, 1)) def predict_marginalized_over_instances(self, X: np.ndarray): """Predict mean and variance marginalized over all instances. Returns the predictive mean and variance marginalised over all instances for a set of configurations. Note ---- This method overwrites the same method of ~smac.epm.base_epm.AbstractEPM; the following method is random forest specific and follows the SMAC2 implementation; it requires no distribution assumption to marginalize the uncertainty estimates Parameters ---------- X : np.ndarray [n_samples, n_features (config)] Returns ------- means : np.ndarray of shape = [n_samples, 1] Predictive mean vars : np.ndarray of shape = [n_samples, 1] Predictive variance """ if self.instance_features is None or \ len(self.instance_features) == 0: mean_, var = self.predict(X) var[var < self.var_threshold] = self.var_threshold var[np.isnan(var)] = self.var_threshold return mean_, var if len(X.shape) != 2: raise ValueError('Expected 2d array, got %dd array!' % len(X.shape)) if X.shape[1] != len(self.bounds): raise ValueError('Rows in X should have %d entries but have %d!' % (len(self.bounds), X.shape[1])) mean_ = np.zeros((X.shape[0], 1)) var = np.zeros(X.shape[0]) for i, x in enumerate(X): # marginalize over instance # 1. Get predictions for all networks # Not very efficient # preds_nns1 = np.zeros([len(self.instance_features), self.number_of_networks]) #for i_f, feat in enumerate(self.instance_features): # x_ = np.concatenate([x, feat]).reshape([1, -1]) # print(i_f, x_) # m, _ = self._predict_individual(x_) # preds_nns1[i_f, :] = m input = np.concatenate((np.tile( x, (len(self.instance_features), 1)), self.instance_features), axis=1) preds_nns, _ = self._predict_individual(input) # 2. Average in each NN for all instances pred_per_nn = [] for nn_id in range(self.number_of_networks): if self.compute_mean_in_logspace: pred_per_nn.append( np.log(np.mean(np.exp(preds_nns[:, nn_id])))) else: pred_per_nn.append(np.mean(preds_nns[:, nn_id])) # 3. compute statistics across trees mean_x = np.mean(pred_per_nn) var_x = np.var(pred_per_nn) if var_x < self.var_threshold: var_x = self.var_threshold var[i] = var_x mean_[i] = mean_x if len(mean_.shape) == 1: mean_ = mean_.reshape((-1, 1)) if len(var.shape) == 1: var = var.reshape((-1, 1)) return mean_, var