def _metric(self, data): last, current = data[-2], data[-1] # this is the range between the nadir and the ideal point norm = current["nadir"] - current["ideal"] # if the range is degenerated (very close to zero) - disable normalization by dividing by one norm[norm < 1e-32] = 1 # calculate the change from last to current in ideal and nadir point delta_ideal = calc_delta_norm(current["ideal"], last["ideal"], norm) delta_nadir = calc_delta_norm(current["nadir"], last["nadir"], norm) # get necessary data from the current population c_F, c_ideal, c_nadir = current["F"], current["ideal"], current["nadir"] # normalize last and current with respect to most recent ideal and nadir c_N = normalize(c_F, c_ideal, c_nadir) l_N = normalize(last["F"], c_ideal, c_nadir) # calculate IGD from one to another delta_f = IGD(c_N).calc(l_N) return { "delta_ideal": delta_ideal, "delta_nadir": delta_nadir, "delta_f": delta_f }
def _decide(self): # get the beginning and the end of the window current = self.history[0][0] last = self.history[-1][0] if self.xl is not None and self.xu is not None: current = normalize(current, x_min=self.xl, x_max=self.xu) last = normalize(last, x_min=self.xl, x_max=self.xu) # now analyze the change in X space always from the closest two solutions I = vectorized_cdist(current, last).argmin(axis=1) avg_dist = np.sqrt((current - last[I])**2).mean() # whether the change was less than x space tolerance x_tol = avg_dist < self.x_tol # now check the F space current = self.history[0][1].min() last = self.history[-1][1].min() # the absolute difference of current to last f f_tol_abs = last - current < self.f_tol_abs # now the relative tolerance which is usually more important f_tol = last / self.F_min - current / self.F_min < self.f_tol return not (x_tol or f_tol_abs or f_tol)
def decomposition_truncation(Q, w=0.5, **kwargs): data = {} if kwargs.get('limit'): limit = kwargs['limit'] else: limit = 1e4 d = np.array([q.solution[0] for q in Q]) t = np.array([q.solution[2] for q in Q]) d_norm = normalize(d) t_norm = normalize(t, x_min=0, x_max=72) F = (w * d_norm) + ((1 - w) * t_norm) # F = d * (t/72) I = np.argsort(F)[:limit] Q = deque([Q[i] for i in I]) # F = lambda x: x.states[-1].distances[0] * (x.states[-1].schedule[0] / 72) # Q = sorted(Q, key=criteria) # if len(Q) > limit: # Q = Q[:limit] Q.append(None) # Mark the end of a level return deque(Q), data
def _metric(self, data): ret = super()._metric(data) if not self.sliding_window: data = self.data[-self.metric_window_size:] # get necessary data from the current population current = data[-1] c_F, c_ideal, c_nadir = current["F"], current["ideal"], current[ "nadir"] # normalize all previous generations with respect to current ideal and nadir N = [normalize(e["F"], c_ideal, c_nadir) for e in data] # check if the movement of all points is significant if self.all_to_current: c_N = normalize(c_F, c_ideal, c_nadir) if self.perf_indicator == "igd": delta_f = [IGD(c_N).do(N[k]) for k in range(len(N))] elif self.perf_indicator == "hv": hv = Hypervolume(ref_point=np.ones(c_F.shape[1])) delta_f = [hv.do(N[k]) for k in range(len(N))] else: delta_f = [IGD(N[k + 1]).do(N[k]) for k in range(len(N) - 1)] ret["delta_f"] = delta_f return ret
def _evaluate(self, X, out, *args, algorithm=None, **kwargs): super()._evaluate(X, out, *args, **kwargs) out["_F"] = out["F"] out["_G"] = out["G"] F = normalize(out["F"], 20000, 60000) G = normalize(out["G"], np.array([0.0, 0.0]), np.array([2000.0, 200.0])) del out["G"] out["F"] = F + (self.penalty * G).sum(axis=1)[:, None]
def _calc_metric(self): # get the current and the last history snapshot current, last = self.history[-1], self.history[-2] # this is the range between the nadir and the ideal point norm = current["nadir"] - current["ideal"] # if the range is degenerated (very close to zero) - disable normalization by dividing by one norm[norm < 1e-32] = 1 # calculate the change from last to current in ideal and nadir point delta_ideal = self._calc_delta_norm(current["ideal"], last["ideal"], norm) max_delta_ideal = max([e["delta_ideal"] for e in self.metrics] + [delta_ideal]) delta_nadir = self._calc_delta_norm(current["nadir"], last["nadir"], norm) max_delta_nadir = max([e["delta_nadir"] for e in self.metrics] + [delta_nadir]) # get necessary data from the current population c_F, c_ideal, c_nadir = current["F"], current["ideal"], current[ "nadir"] c_N = normalize(c_F, c_ideal, c_nadir) if not self.renormalize: l_N = normalize(last["F"], c_ideal, c_nadir) delta_f = IGD(c_N).calc(l_N) max_delta_f = max([e["delta_f"] for e in self.metrics] + [delta_f]) else: # normalize all previous generations with respect to current ideal and nadir N = [normalize(e["F"], c_ideal, c_nadir) for e in self.history] # check if the movement of all points is significant if self.all_to_current: delta_f = [IGD(c_N).calc(N[k]) for k in range(len(N))] else: delta_f = [IGD(N[k + 1]).calc(N[k]) for k in range(len(N) - 1)] max_delta_f = np.array(delta_f).max() return { "delta_ideal": delta_ideal, "max_delta_ideal": max_delta_ideal, "delta_nadir": delta_nadir, "max_delta_nadir": max_delta_nadir, "delta_f": delta_f, "max_delta_f": max_delta_f, "max_delta_all": max(max_delta_ideal, max_delta_nadir, max_delta_f) }
def _do(self, pop, n_survive, algorithm=None, **kwargs): X, F = pop.get("X", "F") problem = algorithm.problem _X = normalize(X, problem.xl, problem.xu) D = cdist(_X, _X) if F.shape[1] != 1: raise ValueError( "FitnessSurvival can only used for single objective problems!") survivors = np.full(len(pop), False) _F = np.copy(F) while np.sum(survivors) < n_survive: s = np.argmin(_F[:, 0]) if np.isinf(_F[s, 0]): _F = np.copy(F) _F[survivors] = np.inf else: survivors[s] = True _F[D[s, :] < self.eps] = np.inf return pop[survivors]
def _potential_optimal(self): pop = self.pop if len(pop) == 1: return pop # get the intervals of each individual _F, _CV, xl, xu = pop.get("F", "CV", "xl", "xu") nF = normalize(_F) F = nF + self.penalty * _CV # get the length of the interval of each solution nxl, nxu = norm_bounds(pop, self.problem) length = (nxu - nxl) / 2 val = length.max(axis=1) # (a) non-dominated with respect to interval obj = np.column_stack([-val, F]) I = NonDominatedSorting().do(obj, only_non_dominated_front=True) candidates, F, xl, xu, val = pop[I], F[I], xl[I], xu[I], val[I] # import matplotlib.pyplot as plt # plt.scatter(obj[:, 0], obj[:, 1]) # plt.scatter(obj[I, 0], obj[I, 1], color="red") # plt.show() if len(candidates) == 1: return candidates else: if len(candidates) > self.n_max_candidates: candidates = RankAndCrowdingSurvival().do( self.problem, pop, self.n_max_candidates) return candidates
def _store(self, algorithm): problem = algorithm.problem X = algorithm.opt.get("X") if X.dtype != np.object: if problem.xl is not None and problem.xu is not None: X = normalize(X, x_min=problem.xl, x_max=problem.xu) return X
def _do(self, F, **kwargs): F, _, ideal_point, nadir_point = normalize(F, x_min=self.ideal_point, x_max=self.nadir_point, estimate_bounds_if_none=True, return_bounds=True) return None
def _decide(self): H = [normalize(e, x_min=self.xl, x_max=self.xu) for e in self.history] perf = np.full(self.n_last - 1, np.inf) for k in range(self.n_last - 1): current, last = H[k], H[k + 1] perf[k] = IGD(current).calc(last) return perf.std() > self.tol
def _evaluate(self, x, out, *args, **kwargs): _x = [x[:, :30]] for i in range(self.m - 1): _x.append(x[:, 30 + i * self.n:30 + (i + 1) * self.n]) u = anp.column_stack([x_i.sum(axis=1) for x_i in _x]) v = (2 + u) * (u < self.n) + 1 * (u == self.n) g = v[:, 1:].sum(axis=1) f1 = 1 + u[:, 0] f2 = g * (1 / f1) if self.normalize: f1 = normalize(f1, 1, 31) f2 = normalize(f2, (self.m - 1) * 1 / 31, (self.m - 1)) out["F"] = anp.column_stack([f1, f2])
def _calc(self, F): non_dom = NonDominatedSorting().do(F, only_non_dominated_front=True) _F = F[non_dom, :] if self.normalize: hv = _HyperVolume(np.ones(F.shape[1])) _F = normalize(_F, x_min=np.min(self.pf, axis=0), x_max=np.max(self.pf, axis=0)) else: hv = _HyperVolume(np.max(self.pf, axis=0)) val = hv.compute(_F) return val
def calc_normalized_constraints(self, G): # update the ideal point for constraints if self.min_constraints is None: self.min_constraints = np.full(G.shape[1], np.inf) self.min_constraints = np.min(np.vstack((self.min_constraints, G)), axis=0) # update the nadir point for constraints non_dominated = NonDominatedSorting().do(G, return_rank=True, only_non_dominated_front=True) if self.max_constraints is None: self.max_constraints = np.full(G.shape[1], np.inf) self.max_constraints = np.min(np.vstack((self.max_constraints, np.max(G[non_dominated, :], axis=0))), axis=0) return normalize(G, self.min_constraints, self.max_constraints)
def _potential_optimal(self): pop = self.pop if len(pop) == 1: return pop # get the intervals of each individual _F, _CV, xl, xu = pop.get("F", "CV", "xl", "xu") nF = normalize(_F) F = nF + self.penalty * _CV # get the length of the interval of each solution nxl, nxu = norm_bounds(pop, self.problem) length = (nxu - nxl) / 2 val = length.mean(axis=1) # (a) non-dominated set with respect to interval obj = np.column_stack([-val, F]) # an unlimited archive size can cause issues - thus truncate if necessary if len(pop) > self.n_max_archive: # find the rank of each individual _, rank = NonDominatedSorting().do(obj, return_rank=True) # calculate the number of solutions after truncation and filter the best ones out n_truncated = int(self.archive_reduct * self.n_max_archive) I = np.argsort(rank)[:n_truncated] # also update all the utility variables defined so far to match the truncation pop, F, nxl, nxu, length, val, obj = pop[I], F[I], nxl[I], nxu[I], length[I], val[I], obj[I] self.pop = pop I = NonDominatedSorting().do(obj, only_non_dominated_front=True) candidates, F, xl, xu, val = pop[I], F[I], xl[I], xu[I], val[I] # import matplotlib.pyplot as plt # plt.scatter(obj[:, 0], obj[:, 1]) # plt.scatter(obj[I, 0], obj[I, 1], color="red") # plt.show() # if all candidates are expanded in each iteration this can cause issues - here use crowding distance to decide if len(candidates) == 1: return candidates else: if len(candidates) > self.n_max_candidates: candidates = RankAndCrowdingSurvival().do(self.problem, pop, n_survive=self.n_max_candidates) return candidates
def _calc(self, F): # only consider the non-dominated solutions for HV non_dom = NonDominatedSorting().do(F, only_non_dominated_front=True) _F = np.copy(F[non_dom, :]) if self.normalize: # because we normalize now the reference point is (1,...1) ref_point = np.ones(F.shape[1]) hv = _HyperVolume(ref_point) _F = normalize(_F, x_min=self.ideal_point, x_max=self.nadir_point) else: hv = _HyperVolume(self.ref_point) val = hv.compute(_F) return val
def _potential_optimal(self): pop = self.pop if len(pop) == 1: return pop # get the intervals of each individual _F, _CV, xl, xu = pop.get("F", "CV", "xl", "xu") nF = normalize(_F) F = nF + self.penalty * _CV # get the length of the interval of each solution nxl, nxu = norm_bounds(pop, problem) length = (nxu - nxl) / 2 val = length.max(axis=1) # (a) non-dominated with respect to interval obj = np.column_stack([-val, F]) I = NonDominatedSorting().do(obj, only_non_dominated_front=True) candidates, F, xl, xu, val = pop[I], F[I], xl[I], xu[I], val[I] # import matplotlib.pyplot as plt # plt.scatter(obj[:, 0], obj[:, 1]) # plt.scatter(obj[I, 0], obj[I, 1], color="red") # plt.show() if len(candidates) == 1: return candidates else: # TODO: The second condition needs to be implemented here. Exact implementation still unclear. n_max_candidates = 10 if len(candidates) > n_max_candidates: I = list( np.random.choice(np.arange(len(candidates)), n_max_candidates - 1)) k = np.argmin(F[:, 0]) if k not in I: I.append(k) candidates = candidates[I] return candidates
def _decide(self): # get the data of the latest generation c_F, c_CV = self.history[0] # extract the constraint violation information CV = np.array([e[1].min() for e in self.history]) # if some constraints were violated in the window if CV.max() > 0: # however if in the current generation a solution is feasible - continue if c_CV.min() == 0: return True # otherwise still no feasible solution was found, apply the CV tolerance else: # normalize by the maximum minimum CV in each window CV = CV / CV.max() CV = np.array( [CV[k + 1] - CV[k] for k in range(self.n_last - 1)]) return CV.max() > self.tol else: F = [ normalize(e[0], c_F.min(axis=0), c_F.max(axis=0)) for e in self.history ] # the metrics to keep track of perf = np.full(self.n_last - 1, np.inf) ideal, nadir = perf.copy(), perf.copy() for k in range(self.n_last - 1): current, last = F[k], F[k + 1] ideal[k] = (current.min(axis=0) - last.min(axis=0)).max() nadir[k] = (current.max(axis=0) - last.max(axis=0)).max() perf[k] = IGDPlus(current).calc(last) return ideal.max() > self.tol or nadir.max( ) > self.tol or perf.mean() > self.tol
def predict(res, X): # if it is only one dimensional convert it if X.ndim == 1: X = X[:, None] Y = np.full((X.shape[0], len(res['surrogates'])), np.inf) # denormalize if normalized before if res['normalize_X']: X = normalize(X, res['X_min'], res['X_max']) # for each target value to predict there exists a model for m, model in enumerate(res['surrogates']): Y[:, m] = model.predict(X) # denormalize target if done while fitting if res['normalize_Y']: Y = denormalize(Y, res['Y_min'], res['Y_max']) return Y
def _do(self, pop, n_survive, algorithm=None, **kwargs): X, F = pop.get("X", "F") if F.shape[1] != 1: raise ValueError( "FitnessSurvival can only used for single objective problems!") # normalized distance in the design space problem = algorithm.problem _X = normalize(X, problem.xl, problem.xu) D = cdist(_X, _X) # calculate the niche count nc = 1 - (np.power(D / self.sigma, self.alpha)) nc[D > self.sigma] = 0 nc = np.sum(nc, axis=1) # modified objective value _F = F[:, 0] * nc return pop[np.argsort(_F)[:n_survive]]
def _calc_pareto_front(self, n_pareto_points=100): x = 1 + anp.linspace(0, 1, n_pareto_points) * 30 pf = anp.column_stack([x, (self.m - 1) / x]) if self.normalize: pf = normalize(pf) return pf
def norm_bounds(pop, problem): nxl = normalize(pop.get("xl"), problem.xl, problem.xu) nxu = normalize(pop.get("xu"), problem.xl, problem.xu) return nxl, nxu
def normalize(self, x): return normalize(x, self._xl, self._xu)
def _do(self, pop, n_survive, out=None, **kwargs): # check if it is a population with a single objective F, G = pop.get("F", "G") if F.shape[1] != 1: raise ValueError("FitnessSurvival can only used for single objective problems!") # default parameters if not provided to the algorithm DEFAULT_PARAMS = { "parameter_less": {}, "epsilon_constrained": {"epsilon": 1e-2}, "penalty": {"weight": 0.1}, "stochastic_ranking": {"weight": 0.45}, } # check if the method is known if self.method not in DEFAULT_PARAMS.keys(): raise Exception("Unknown constraint handling method %s" % self.method) # set the default parameter if not provided for key, value in DEFAULT_PARAMS[self.method].items(): set_if_none(self.params, key, value) # make the lowest possible constraint violation 0 - if not violated in that constraint G = G * (G > 0).astype(np.float) # find value to normalize to sum of for CV for j in range(G.shape[1]): N = np.median(G[:, j]) if N == 0: N = np.max(G[:, j]) if N > 0: pass # G[:, j] /= N # add the constraint violation and divide by normalization factor CV = np.sum(G, axis=1) if self.method == "parameter_less": # if infeasible add the constraint violation to worst F value _F = np.max(F, axis=0) + CV infeasible = CV > 0 F[infeasible, 0] = _F[infeasible] # do fitness survival as done before with modified f return pop[np.argsort(F[:, 0])[:n_survive]] elif self.method == "epsilon_constrained": _F = np.max(F, axis=0) + CV infeasible = CV > self.params["epsilon"] F[infeasible, 0] = _F[infeasible] # do fitness survival as done before with modified f return pop[np.argsort(F[:, 0])[:n_survive]] elif self.method == "penalty": _F = normalize(F) # add for each constraint violation a penalty _F[:, 0] = _F[:, 0] + self.params["weight"] * CV return pop[np.argsort(_F[:, 0])[:n_survive]] elif self.method == "stochastic_ranking": # first shuffle the population randomly - to be sorted again I = np.random.permutation(len(pop)) pop, F, CV = pop[I], F[I], CV[I] # func = load_function("stochastic_ranking", "stochastic_ranking") from stochastic_ranking import stochastic_ranking func = stochastic_ranking index = func(F[:, 0], CV, self.params["prob"]) return pop[index[:n_survive]]
def fit(X, Y, methods=[ 'george_gp', 'gpy_gp', 'sklearn_gradient_boosting', 'my_dacefit', 'torch_nn', 'scipy_rbf', 'sklearn_polyregr', 'sklearn_dacefit' ], func_error=calc_mse, disp=False, normalize_X=False, normalize_Y=False, do_crossvalidation=True, n_folds=5, crossvalidation_sets=None, debug=False): """ The is the public interface which fits a surrogate res that is able to predict more than one target value. Parameters ---------- X : numpy.array Design space which is a two dimensional array nxm array, where n is the number of samples and m the number of variables. Y : numpy.array The target values that should be predicted by the res methods : list of strings A list methods as string which should be considered during the fitting func_error : function The error metric which is used to compare the surrogate goodness. error(F, F_hat) where it compares the prediction F_hat of the res with the true values F. disp : bool Print output during the fitting of the surrogate archive with information about the error. debug : bool If true warnings and exceptions are shown. Otherwise they are suppressed. Returns ------- res : dict The res that is used to predict values. It can be heterogenous which means each target value is predicted by a different res type with different parameters. """ # if it is only one dimensional convert it if X.ndim == 1: X = X[:, None] if Y.ndim == 1: Y = Y[:, None] if X.shape[0] != Y.shape[0]: raise Exception("X and Y does not have the same number of rows!") if isinstance(methods, str): methods = [methods] # the object that is returned in the end having all the necessary information for the prediction res = { 'n_samples': X.shape[0], 'n_var': X.shape[1], 'n_targets': Y.shape[1], 'normalize_X': normalize_X, 'normalize_Y': normalize_Y } # remove duplicated rows if they occur in the input I = unique_rows(X) X, Y = X[I, :], Y[I, :] # normalize input or target if boolean values set to true if normalize_X: X, res['X_min'], res['X_max'] = normalize(X, return_bounds=True) if normalize_Y: Y, res['Y_min'], res['Y_max'] = normalize(Y, return_bounds=True) # create a list of all entries that should be run surrogates = [] for entry in methods: try: method, params = get_method_and_params(entry) except Exception as e: if debug: raise e warnings.warn(str(e)) warnings.warn("Not able to load model %s. Will be skipped." % entry) continue for param in params: surrogates.append({ 'name': entry, 'method': method, 'param': param, 'error': None }) # list of crossvalidation results - for each target one entry crossvalidation = [] # if the archive should be evaluated using crossvalidation if do_crossvalidation: # create the sets - either provided or randomly if crossvalidation_sets is None and n_folds is not None: crossvalidation_sets = create_crossvalidation_sets( res['n_samples'], n_folds, randomize=True) if crossvalidation_sets is None: raise Exception( "Either specify the number of folds or directly provide the crossvalidation sets!" ) for m in range(res['n_targets']): # the crossvalidation results are saved in this dictionary - each entry one parameter configuration result = [] # for each method validate for k, entry in enumerate(surrogates): try: name, method, param = entry['name'], entry[ 'method'], entry['param'] error = np.full(n_folds, np.inf) duration = np.full(n_folds, np.nan) # on each validation set for i, (training, test) in enumerate(crossvalidation_sets): impl = method(**param) start_time = time.time() warnings.filterwarnings("ignore") impl.fit(X[training, :], Y[training, [m]]) duration[i] = time.time() - start_time Y_hat = impl.predict(X[test, :], return_std=False) error[i] = func_error(Y[test, [m]], Y_hat) except Exception as e: if debug: print(e) warnings.warn("Error while using fitting: %s %s %s" % (name, method, param)) result.append({ 'name': name, 'method': method, 'param': param, 'error': error, 'duration': np.mean(duration) }) result = sorted(result, key=lambda e: np.mean(e['error'])) if disp: __display(result, str(m + 1)) crossvalidation.append(result) res['crossvalidation'] = crossvalidation # if no crossvalidation should be done than there is only one res to select else: if len(surrogates) != 1: raise Exception( "Please provide exactly one surrogate if no surrogate selection is performed." ) # add dummy entries here for m in range(res['n_targets']): crossvalidation.append(surrogates[0]) # finally fit the res on all available data models = [] for m in range(res['n_targets']): # select the best available res found through crossvalidation method, param = crossvalidation[m][0]['method'], crossvalidation[m][0][ 'param'] impl = method(**param) impl.fit(X, Y[:, m]) models.append(impl) res['surrogates'] = models return res
def _store(self, algorithm): X = algorithm.opt.get("X") if X.dtype != np.object: return normalize(algorithm.opt.get("X"), x_min=algorithm.problem.xl, x_max=algorithm.problem.xu)