def fit(self, my_lambda, coverage=3, max_iters=50): #while(True): print('beta = ', self.my_beta) print('lambda = ', my_lambda) my_args = (self, my_lambda, coverage, False) nfeatures = self.train_args['feature'].shape[1] w0 = self.randomw0(nfeatures) sigma_value = 0.02 print('sigma0 ', sigma_value) es = CMAEvolutionStrategy(w0, sigma0=sigma_value, inopts={ 'maxiter': max_iters, 'popsize': 40 }) while not es.stop(): solutions = es.ask() fitnesses = [ InterestingnessLearner.cost(x, *my_args) for x in solutions ] es.tell(solutions, fitnesses) es.disp() self.nmcost(es.result[0], my_lambda, coverage, is_debug=True) final_model = self.createClassifier(es.result[0], coverage) print(final_model.size(), final_model.meanSupport()) return final_model
def main(filename, directory): initialFilename = os.path.join(directory, "initial.json") genDir = os.path.join(directory, "gens") finalFilename = os.path.join(directory, "final.json") baseCondor = "base.condor" with open(baseCondor, "r") as f: condorConfig = f.read() optMkdir(directory) optMkdir(genDir) shutil.copyfile(filename, initialFilename) with open(filename, "r") as f: conf = json.load(f) paramNames = [] paramVals = [] flattenParams(conf, paramNames, paramVals, "") sigma0 = SIGMA0 opts = Options() opts["popsize"] = POP_SIZE # opts.printme() cma = CMAEvolutionStrategy(paramVals, sigma0, opts) while (cma.countiter < NUM_GENS) and not (cma.stop()): thisGenDir = os.path.join(genDir, str(cma.countiter)) optMkdir(thisGenDir) xs = cma.ask() xs, fits = runEvals(paramNames, xs, cma.countiter, thisGenDir, condorConfig) cma.tell(xs, fits) res = cma.result() paramsToJsonFile(finalFilename, paramNames, res[0])
class EvolutionStrategy: # Wrapper for CMAEvolutionStrategy def __init__(self, mu, sigma, popsize, weight_decay=0.01): self.es = CMAEvolutionStrategy(mu.tolist(), sigma, {'popsize': popsize}) self.weight_decay = weight_decay self.solutions = None @property def best(self): best_sol = self.es.result[0] best_fit = -self.es.result[1] return best_sol, best_fit def _compute_weight_decay(self, model_param_list): model_param_grid = np.array(model_param_list) return -self.weight_decay * np.mean( model_param_grid * model_param_grid, axis=1) def ask(self): self.solutions = self.es.ask() return self.solutions def tell(self, reward_table_result): reward_table = -np.array(reward_table_result) if self.weight_decay > 0: l2_decay = self._compute_weight_decay(self.solutions) reward_table += l2_decay self.es.tell(self.solutions, reward_table.tolist())
def main(filename, directory): initialFilename = os.path.join(directory, 'initial.json') genDir = os.path.join(directory, 'gens') finalFilename = os.path.join(directory, 'final.json') baseCondor = 'base.condor' with open(baseCondor, 'r') as f: condorConfig = f.read() optMkdir(directory) optMkdir(genDir) shutil.copyfile(filename, initialFilename) with open(filename, 'r') as f: conf = json.load(f) paramNames = [] paramVals = [] flattenParams(conf, paramNames, paramVals, '') sigma0 = SIGMA0 opts = Options() opts['popsize'] = POP_SIZE #opts.printme() cma = CMAEvolutionStrategy(paramVals, sigma0, opts) while (cma.countiter < NUM_GENS) and not (cma.stop()): thisGenDir = os.path.join(genDir, str(cma.countiter)) optMkdir(thisGenDir) xs = cma.ask() xs, fits = runEvals(paramNames, xs, cma.countiter, thisGenDir, condorConfig) cma.tell(xs, fits) res = cma.result() paramsToJsonFile(finalFilename, paramNames, res[0])
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(config): y = problem.evaluate_config(config) return y from cma import CMAEvolutionStrategy from litebo.utils.util_funcs import get_types from litebo.utils.config_space import Configuration types, bounds = get_types(cs) assert all(types == 0) # Check Constant Hyperparameter const_idx = list() for i, bound in enumerate(bounds): if np.isnan(bound[1]): const_idx.append(i) hp_num = len(bounds) - len(const_idx) es = CMAEvolutionStrategy(hp_num * [0], 0.99, inopts={'bounds': [0, 1], 'seed': seed}) global_start_time = time.time() global_trial_counter = 0 config_list = [] perf_list = [] time_list = [] eval_num = 0 while eval_num < max_runs: X = es.ask(number=es.popsize) _X = X.copy() for i in range(len(_X)): for index in const_idx: _X[i] = np.insert(_X[i], index, 0) # np.insert returns a copy # _X = np.asarray(_X) values = [] for xi in _X: # convert array to Configuration config = Configuration(cs, vector=xi) perf = objective_function(config) global_time = time.time() - global_start_time global_trial_counter += 1 values.append(perf) print('=== CMAES Trial %d: %s perf=%f global_time=%f' % (global_trial_counter, config, perf, global_time)) config_list.append(config) perf_list.append(perf) time_list.append(global_time) values = np.reshape(values, (-1,)) es.tell(X, values) eval_num += es.popsize print('===== Total evaluation times=%d. Truncate to max_runs=%d.' % (eval_num, max_runs)) config_list = config_list[:max_runs] perf_list = perf_list[:max_runs] time_list = time_list[:max_runs] return config_list, perf_list, time_list
def evolve_greedy_policies(model_dist: ModelDist, iterations: int = 30, population_size: int = 5): """ Evolves the greedy policy to find the best policies :param model_dist: Model distribution :param iterations: Number of evolutions :param population_size: The population size """ print(f'Evolves the greedy policies for {model_dist.name} model with ' f'{model_dist.num_tasks} tasks and {model_dist.num_servers} servers') eval_tasks, eval_servers = model_dist.generate_oneshot() lower_bound = greedy_algorithm(eval_tasks, eval_servers, ValuePriority(), ProductResources(), SumSpeed()).social_welfare print(f'Lower bound is {lower_bound}') reset_model(eval_tasks, eval_servers) evolution_strategy = CMAEvolutionStrategy( 11 * [1], 0.2, {'population size': population_size}) for iteration in range(iterations): suggestions = evolution_strategy.ask() tasks, servers = model_dist.generate_oneshot() solutions = [] for i, suggestion in enumerate(suggestions): solutions.append( greedy_algorithm( tasks, servers, TaskPriorityEvoStrategy(i, *suggestion[:5]), ServerSelectionEvoStrategy(i, *suggestion[5:8]), ResourceAllocationEvoStrategy( i, *suggestion[8:11])).social_welfare) reset_model(tasks, servers) evolution_strategy.tell(suggestions, solutions) evolution_strategy.disp() if iteration % 2 == 0: evaluation = greedy_algorithm( eval_tasks, eval_servers, TaskPriorityEvoStrategy(0, *suggestions[0][:5]), ServerSelectionEvoStrategy(0, *suggestions[0][5:8]), ResourceAllocationEvoStrategy(0, *suggestions[0][8:11])) print(f'Iter: {iteration} - {evaluation.social_welfare}') pprint.pprint(evolution_strategy.result())
def main(): # param length # 10 fights # rating_mu, rating_sig, wins, losses, odds input_params_len = 10 * 2 * 4 es = CMAEvolutionStrategy([0] * input_params_len, 0.5) while not es.stop(): solutions = es.ask() func_vals = [get_betting_result(x) for x in solutions] es.tell(solutions, func_vals) es.logger.add() # write data to disc to be plotted es.disp() es.result_pretty()
def maximize(self, runhistory: HistoryContainer, num_points: int, **kwargs) -> Iterable[Tuple[float, Configuration]]: try: from cma import CMAEvolutionStrategy except ImportError: raise ImportError("Package cma is not installed!") types, bounds = get_types(self.config_space) assert all(types == 0) # Check Constant Hyperparameter const_idx = list() for i, bound in enumerate(bounds): if np.isnan(bound[1]): const_idx.append(i) hp_num = len(bounds) - len(const_idx) es = CMAEvolutionStrategy(hp_num * [0], 0.99, inopts={'bounds': [0, 1]}) eval_num = 0 next_configs_by_acq_value = list() while eval_num < num_points: X = es.ask(number=es.popsize) _X = X.copy() for i in range(len(_X)): for index in const_idx: _X[i] = np.insert(_X[i], index, 0) _X = np.asarray(_X) values = self.acquisition_function._compute(_X) values = np.reshape(values, (-1, )) es.tell(X, values) next_configs_by_acq_value.extend([(values[i], _X[i]) for i in range(es.popsize)]) eval_num += es.popsize next_configs_by_acq_value.sort(reverse=True, key=lambda x: x[0]) next_configs_by_acq_value = [_[1] for _ in next_configs_by_acq_value] next_configs_by_acq_value = [ Configuration(self.config_space, vector=array) for array in next_configs_by_acq_value ] challengers = ChallengerList(next_configs_by_acq_value, self.config_space, self.random_chooser) self.random_chooser.next_smbo_iteration() return challengers
class CMAES(Algorithm): def initialize(self, **kwargs): super().initialize(**kwargs) if self.x0 is None: self.x0 = self.domain.l + self.domain.range / 2 # cma operates on normalized scale x0 = self.domain.normalize(self.x0) self.cma = CMAEvolutionStrategy(x0=x0, sigma0=self.config.sigma0, inopts={'bounds': [0, 1]}) self._X = None self._X_i = 0 self._Y = None def _next(self, context=None): if self._X is None: # get new population self._X = self.cma.ask() self._Y = np.empty(len(self._X)) self._X_i = 0 return self.domain.denormalize(self._X[self._X_i]) def finalize(self): self.cma.result_pretty() def best_predicted(self): xbest = None if self.cma.result.xbest is not None: xbest = self.domain.denormalize(self.cma.result.xbest) return xbest if not xbest is None else self.x0 def add_data(self, data): self._Y[self._X_i] = data['y'] self._X_i += 1 # population complete if self._X_i == len(self._X): self.cma.tell(self._X, -self._Y) self._X = None super().add_data(data)
def update_borders(self): for i in range(len(self.clusters)): cluster = self.clusters[i] len_bounds = np.linalg.norm(cluster.border[1] - cluster.border[1]) es = CMAEvolutionStrategy( cluster.border.ravel().tolist(), len_bounds * 0.1, { 'bounds': [self.boundary.min_bounds[0], self.boundary.max_bounds[0]] }) while not es.stop(): solutions = es.ask() #TODO es.tell( solutions, [self.evaluate_border(border, i) for border in solutions]) #es.tell( solutions, [cluster.evaluate_border(border) for border in solutions] ) x_best = es.result()[0] #if x_best is not None and cluster.in_global_border( x_best ): if x_best is not None: cluster.border = x_best.reshape(cluster.border.shape) '''
def run(): train = 0 names = [ # 'bet_pred_a', 'bet_pred_b', 'bet_odds_a', 'bet_odds_b', 'bet_wnl_a', 'bet_wnl_b', 'bet_ts_a', 'bet_ts_b', 'bet_tmi_a', 'bet_tmi_b', 'bet_tma_a', 'bet_tma_b', ] params = [ 0, 0, 0, 0, 0, 0 ] bounds = [[-np.inf], [np.inf]] assert len(params) == len(names) # assert len(params) == len(bounds[0]) if train: sigma = 1 opts = CMAOptions() # opts['tolx'] = 1E-2 opts['bounds'] = bounds es = CMAEvolutionStrategy(params, sigma, inopts=opts) while not es.stop(): solutions = es.ask() fitness = [main(x, train=1) for x in solutions] es.tell(solutions, fitness) es.disp() print(list(es.result[0])) print(list(es.result[5])) es.result_pretty() print('') print('best') print(list(es.result[0])) print('') print('xfavorite: distribution mean in "phenotype" space, to be considered as current best estimate of the optimum') print(list(es.result[5])) else: main(params)
def cma_minimize(acq_function, bounds, return_best_only=True, **kwargs) -> Tuple[torch.Tensor, ...]: x0 = 0.5 * np.ones(bounds.shape[-1]) opts = { 'bounds': [0, 1], "popsize": kwargs.get('popsize', 100), "seed": 10, "verbose": -1 } if "maxiter" in kwargs: opts.update(maxiter=kwargs["maxiter"]) es = CMAEvolutionStrategy(x0=x0, sigma0=kwargs.get('sigma0', 0.5), inopts=opts) xs_list, y_list = [], [] with torch.no_grad(): while not es.stop(): xs = es.ask() X = torch.tensor(xs, dtype=torch.float64) Y = acq_function(X.unsqueeze(-2)) y = Y.view(-1).double().numpy() es.tell(xs, y) xs_list.append(xs) y_list.append(y) if return_best_only: cand = torch.tensor([es.best.x]) cand_val = torch.tensor([es.best.f]) else: cand = torch.tensor(np.concatenate(xs_list, axis=0)) cand_val = torch.tensor(np.concatenate(y_list, axis=0)) return cand, cand_val
class CMAES: def __init__(self, x0, s0=0.5, opts={}): """ wrapper around cma.CMAEvolutionStrategy """ self.num_parameters = len(x0) print('{} params in controller'.format(self.num_parameters)) self.solver = CMAEvolutionStrategy(x0, s0, opts) def __repr__(self): return '<pycma wrapper>' def ask(self): """ sample parameters """ samples = self.solver.ask() return np.array(samples).reshape(-1, self.num_parameters) def tell(self, samples, fitness): """ update parameters with total episode reward """ return self.solver.tell(samples, -1 * fitness) @property def mean(self): return self.solver.mean
def train(self, cost, model, x_data, y_data=None, tolfun=1e-11, popsize=None, maxiter=None, use_grad=False): """Trains the ``model`` using the custom `cost` function. Args: cost (theta.costfunctions): the cost function. model (theta.model.Model or theta.rtbm.RTBM): the model to be trained. x_data (numpy.array): the support data with shape (Nv, Ndata). y_data (numpy.array): the target prediction. tolfun (float): the maximum tolerance of the cost function fluctuation to stop the minimization. popsize (int): the population size. maxiter (int): the maximum number of iterations. use_grad (bool): if True the gradients for the cost and model are used in the minimization. Returns: numpy.array: the optimal parameters Note: The parameters of the model are changed by this algorithm. """ initsol = np.real(model.get_parameters()) args = {'bounds': model.get_bounds(), 'tolfun': tolfun, 'verb_log': 0} sigma = np.max(model.get_bounds()[1])*0.1 if popsize is not None: args['popsize'] = popsize if maxiter is not None: args['maxiter'] = maxiter grad = None if use_grad: grad = worker_gradient es = CMAEvolutionStrategy(initsol, sigma, args) if self.num_cores > 1: with closing(mp.Pool(self.num_cores, initializer=worker_initialize, initargs=(cost, model, x_data, y_data))) as pool: while not es.stop(): f_values, solutions = [], [] while len(solutions) < es.popsize: x = es.ask(es.popsize-len(solutions), gradf=grad) curr_fit = pool.map_async(worker_compute, x).get() for value, solution in zip(curr_fit,x): if not np.isnan(value): solutions.append(solution) f_values.append(value) es.tell(solutions, f_values) es.disp() pool.terminate() else: worker_initialize(cost, model, x_data, y_data) while not es.stop(): f_values, solutions = [], [] while len(solutions) < es.popsize: curr_fit = x = np.NaN while np.isnan(curr_fit): x = es.ask(1, gradf=grad)[0] curr_fit = worker_compute(x) solutions.append(x) f_values.append(curr_fit) es.tell(solutions, f_values) es.disp() print(es.result) model.set_parameters(es.result[0]) return es.result[0]
def run(): train = 0 names = [ # 'pred_a', 'pred_b', # 0.0 # 'odds_a', 'odds_b', # 2.1 # 'bet_wnl_a', 'bet_wnl_b', # 2.1 # 'bet_ts_a', 'bet_ts_b', # 1.5 # 'bet_tmi_a', 'bet_tmi_b', # 1.1 # 'bet_tma_a', 'bet_tma_b', # 0.9 # 'bet_drs_a', 'bet_drs_b', # 0.5 'bet_sfc_a', 'bet_sfc_b', # -0.1 # 'bet_spd_a', 'bet_spd_b', # 0.8 # 'bet_set_a', 'bet_set_b', # 1.0 # 'bet_gms_a', 'bet_gms_b', # -0.2 # 'bet_tie_a', 'bet_tie_b', # 4.1 # 'bet_ups_a', 'bet_ups_b', # -0.2 # 'bet_age_a', 'bet_age_b', # -2.5 ] params = [0, 0] bounds = [[-np.inf], [np.inf]] assert len(params) == len(names) assert len(params) == len(bounds) if train: sigma = 1 opts = CMAOptions() # opts['tolx'] = 1E-2 opts['bounds'] = bounds es = CMAEvolutionStrategy(params, sigma, inopts=opts) while not es.stop(): solutions = es.ask() try: fitness = [main(x, train) for x in solutions] except ValueError as exc: print(str(exc)) continue es.tell(solutions, fitness) es.disp() print(list(es.result[0])) print(list(es.result[5])) es.result_pretty() print( f'finished after {es.result[3]} evaluations and {es.result[4]} iterations' ) print('') print('best') print(list(es.result[0])) print('') print( 'xfavorite: distribution mean in "phenotype" space, to be considered as current best estimate of the optimum' ) print(list(es.result[5])) # res = minimize(main, params, (train,), bounds=bounds) # print('') # print(f'{res.nit} iterations') # print(f'Success: {res.success} {res.message}') # print(f'Solution: {res.x}') # return else: main(params)
def run_optimization(self): """ Runs optimization job :return: """ simulation_name = self.parse_args.input population_size = self.parse_args.population_size self.optim_param_mgr = OptimizationParameterManager() optim_param_mgr = self.optim_param_mgr # optim_param_mgr.parse(args.params_file) optim_param_mgr.parse(self.parse_args.params_file) starting_params = optim_param_mgr.get_starting_points() print 'starting_params (mapped to [0,1])=', starting_params print 'remapped (true) starting params=', optim_param_mgr.params_from_0_1( starting_params) print 'dictionary of remapped parameters labeled by parameter name=', optim_param_mgr.param_from_0_1_dict( starting_params) print 'simulation_name=', simulation_name self.workload_dict = self.prepare_optimization_run( simulation_name=simulation_name) workload_dict = self.workload_dict print workload_dict std_dev = optim_param_mgr.std_dev default_bounds = optim_param_mgr.default_bounds optim = CMAEvolutionStrategy(starting_params, std_dev, {'bounds': list(default_bounds)}) while not optim.stop(): # iterate # get candidate solutions # param_set_list = optim.ask(number=self.num_workers) # param_set_list = optim.ask(number=1) param_set_list = optim.ask(number=population_size) # set param_set_list for run_task to iterate over self.set_param_set_list(param_set_list=param_set_list) # #debug # return_result_vec = [self.fcn(optim_param_mgr.params_from_0_1(X)) for X in param_set_list] # evaluate targert function values at the candidate solutions return_result_vec = np.array([], dtype=float) for param_set in self.param_generator(self.num_workers): print 'CURRENT PARAM SET=', param_set # distribution param_set to workers - run tasks spawns appropriate number of workers # given self.num_workers and the size of the param_set partial_return_result_vec = self.run_task( workload_dict, param_set) return_result_vec = np.append(return_result_vec, partial_return_result_vec) print 'FINISHED PARAM_SET=', param_set optim.tell(param_set_list, return_result_vec) # do all the real "update" work optim.disp(20) # display info every 20th iteration optim.logger.add() # log another "data line" optimal_parameters = optim.result()[0] print('termination by', optim.stop()) print('best f-value =', optim.result()[1]) optimal_parameters_remapped = optim_param_mgr.params_from_0_1( optim.result()[0]) print('best solution =', optimal_parameters_remapped) # print('best solution =', optim_param_mgr.params_from_0_1(optim.result()[0])) print optim_param_mgr.params_names self.save_optimal_parameters(optimal_parameters) self.save_optimal_simulation(optimal_parameters)
optim_noise = CMAEvolutionStrategy(fixnoise, 0.4)#0.4) # 0.2 optim_class = CMAEvolutionStrategy(128 * [0.0], 0.2) scores_all = [] generations = [] for i in tqdm.trange(cmasteps, desc="CMA steps"): class_codes = optim_class.ask() noise_codes = optim_noise.ask() codes_tsr = torch.from_numpy(np.array(class_codes)).float() noise_tsr = torch.from_numpy(np.array(noise_codes)).float() latent_code = torch.cat((noise_tsr, codes_tsr), dim=1).cuda() # this initialize inner loop with torch.no_grad(): imgs = G.visualize(latent_code).cpu() scores = scorer.score_tsr(imgs) print("step %d dsim %.3f (%.3f) (norm %.2f noise norm %.2f)" % ( i, scores.mean(), scores.std(), codes_tsr.norm(dim=1).mean(), noise_tsr.norm(dim=1).mean())) optim_class.tell(class_codes, -scores) optim_noise.tell(noise_codes, -scores) scores_all.extend(list(scores)) generations.extend([i]*len(scores)) scores_all = np.array(scores_all) generations = np.array(generations) mtg = ToPILImage()(make_grid(imgs,nrow=6)) mtg.save(join(savedir, "lastgen%s_%05d_score%.1f.jpg"%(methodlab, RND, scores.mean()))) np.savez(join(savedir, "scores%s_%05d.npz"%(methodlab, RND)), generations=generations, scores_all=scores_all, codes_fin=latent_code.cpu().numpy()) visualize_trajectory(scores_all, generations, title_str=methodlab).savefig(join(savedir, "traj%s_%05d_score%.1f.jpg"%(methodlab, RND, scores.mean()))) #%% methodlab = "CMA_class" optim_class = CMAEvolutionStrategy(128 * [0.0], 0.2) scores_all = [] generations = []
def run(): train = 0 names = [ 'bet_multi_param', 'bet_tma_a', 'bet_tma_b', 'bet_lati_a', 'bet_lati_b', 'bet_tiew_a', 'bet_tiew_b', # 'bet_upsr_a', 'bet_upsr_b', # 'bet_sfcw_a', 'bet_sfcw_b', # 'bet_wnll_a', 'bet_wnll_b', # 'bet_tier_a', 'bet_tier_b', # 'bet_upsl_a', 'bet_upsl_b', # 'bet_ts_a', 'bet_ts_b', # 'bet_wnlw_a', 'bet_wnlw_b', # 'bet_setw_a', 'bet_setw_b', # 'bet_setl_a', 'bet_setl_b', # 'bet_gms_a', 'bet_gms_b', # 'bet_drsl_a', 'bet_drsl_b', # 'bet_tmi_a', 'bet_tmi_b', # 'bet_wnlr_a', 'bet_wnlr_b', # 'bet_upsw_a', 'bet_upsw_b', # 'bet_drs_a', 'bet_drs_b', # 'bet_setr_a', 'bet_setr_b', # 'bet_drsw_a', 'bet_drsw_b', # 'bet_tiel_a', 'bet_tiel_b', # 'bet_age_a', 'bet_age_b', # 'bet_spd_a', 'bet_spd_b', # 'bet_sfcr_a', 'bet_sfcr_b', ] tolx = 10000 # more higher then longer time params = [-14, 0, 0, 0, 0, 0, 0] bounds = [[-np.inf], [np.inf]] assert len(params) == len(names) # assert len(params) == len(bounds) if train: time_start = time() mins = 60 * 4 sigma = 1 opts = CMAOptions() opts['bounds'] = bounds es = CMAEvolutionStrategy(params, sigma, inopts=opts) while not es.stop(): solutions = es.ask() try: fitness = [main(x, train) for x in solutions] except ValueError as exc: print(str(exc)) continue es.tell(solutions, fitness) es.disp() # print(list(es.result[0])) print(f'tolx={es.opts["tolx"]:.3f} sol={list(es.result[5])}') es.opts['tolx'] = es.result[3] / tolx if time() - time_start > 60 * mins: print(f'{mins}min limit reached') break es.result_pretty() print( f'finished after {es.result[3]} evaluations and {es.result[4]} iterations' ) # print('') # print('best') # print(list(es.result[0])) print('') print( 'xfavorite: distribution mean in "phenotype" space, to be considered as current best estimate of the optimum' ) print(list(es.result[5])) # pmin = -20 # pmax = 20 # step = (pmax - pmin) / 10 # rranges = [ # slice(pmin, pmax, step), # slice(pmin, pmax, step), # ] # res = optimize.brute(main, rranges, (train,), finish=None) # print(res) # return # res = minimize(main, params, (train,), bounds=bounds) # print('') # print(f'{res.nit} iterations') # print(f'Success: {res.success} {res.message}') # print(f'Solution: {res.x}') # return else: main(params)
class CMAOptimizationSteppable(SteppableBasePy): def __init__(self, _simulator, _frequency=1): SteppableBasePy.__init__(self, _simulator, _frequency) self.optim = None self.sim_length_mcs = self.simulator.getNumSteps() - 1 self.f_vec = [] self.X_vec = [] self.X_vec_check = [] self.num_fcn_evals = -1 def minimized_fcn(self, *args, **kwds): """ this function needs to be overloaded in the subclass - it implements simulation fitness metric :return {float}: number describing the "fitness" of the simulation """ return 0.0 def initial_condition_fcn(self, *args, **kwds): """ This function prepares initial condition for the simulaiton. Typically it creates cell field and initializes all cell and field properties :param args: first argument is a vector of parameters that are being optimized. The rest are up to the user :param kwds: key words arguments - those are are up to the user :return: None """ pass def init_optimization_strategy(self, *args, **kwds): """ init_optimization_strategy initializes optimizer object. Its argument depend on the specific initializer used IN the case of the CMA optimizer the options are described here: https://pypi.python.org/pypi/cma :param args: see https://pypi.python.org/pypi/cma :param kwds: see https://pypi.python.org/pypi/cma :return: None """ self.optim = CMAEvolutionStrategy(*args, **kwds) def optimization_step(self, mcs): """ THis function implements houklsekeeping associated with running optimization algorithm in a steppable :param mcs {int}: current mcs :return: None """ if not mcs % self.sim_length_mcs: if self.optim.stop(): self.stopSimulation() print('termination by', self.optim.stop()) print('best f-value =', self.optim.result()[1]) print('best solution =', self.optim.result()[0]) if not len(self.X_vec): self.X_vec = self.optim.ask() if len(self.f_vec): # print 'self.X_vec_check=', self.X_vec_check # print 'self.f_vec=', self.f_vec self.optim.tell(self.X_vec_check, self.f_vec) # do all the real "update" work self.optim.disp(20) # display info every 20th iteration self.optim.logger.add() # log another "data line" self.f_vec = [] self.num_fcn_evals = len(self.X_vec) self.X_vec_check = deepcopy(self.X_vec) self.X_current = self.X_vec[0] if len(self.X_vec_check) != self.num_fcn_evals: fcn_target = self.minimized_fcn() self.f_vec.append(fcn_target) self.X_vec.pop(0) self.num_fcn_evals -= 1 CompuCellSetup.reset_current_step(0) self.simulator.setStep(0) self.clean_cell_field(reset_inventory=True) self.initial_condition_fcn(self.X_current)
def run_optimization(self): """ Runs optimization job :return: """ simulation_name = self.parse_args.input population_size = self.parse_args.population_size self.optim_param_mgr = OptimizationParameterManager() optim_param_mgr = self.optim_param_mgr # optim_param_mgr.parse(args.params_file) optim_param_mgr.parse(self.parse_args.params_file) starting_params = optim_param_mgr.get_starting_points() print 'starting_params (mapped to [0,1])=', starting_params print 'remapped (true) starting params=', optim_param_mgr.params_from_0_1(starting_params) print 'dictionary of remapped parameters labeled by parameter name=', optim_param_mgr.param_from_0_1_dict( starting_params) print 'simulation_name=', simulation_name self.workload_dict = self.prepare_optimization_run(simulation_name=simulation_name) workload_dict = self.workload_dict print workload_dict std_dev = optim_param_mgr.std_dev default_bounds = optim_param_mgr.default_bounds optim = CMAEvolutionStrategy(starting_params, std_dev, {'bounds': list(default_bounds)}) while not optim.stop(): # iterate # get candidate solutions # param_set_list = optim.ask(number=self.num_workers) # param_set_list = optim.ask(number=1) param_set_list = optim.ask(number=population_size) # set param_set_list for run_task to iterate over self.set_param_set_list(param_set_list=param_set_list) # #debug # return_result_vec = [self.fcn(optim_param_mgr.params_from_0_1(X)) for X in param_set_list] # evaluate targert function values at the candidate solutions return_result_vec = np.array([], dtype=float) for param_set in self.param_generator(self.num_workers): print 'CURRENT PARAM SET=', param_set # distribution param_set to workers - run tasks spawns appropriate number of workers # given self.num_workers and the size of the param_set partial_return_result_vec = self.run_task(workload_dict, param_set) return_result_vec = np.append(return_result_vec, partial_return_result_vec) print 'FINISHED PARAM_SET=', param_set optim.tell(param_set_list, return_result_vec) # do all the real "update" work optim.disp(20) # display info every 20th iteration optim.logger.add() # log another "data line" optimal_parameters = optim.result()[0] print('termination by', optim.stop()) print('best f-value =', optim.result()[1]) optimal_parameters_remapped = optim_param_mgr.params_from_0_1(optim.result()[0]) print('best solution =', optimal_parameters_remapped) # print('best solution =', optim_param_mgr.params_from_0_1(optim.result()[0])) print optim_param_mgr.params_names self.save_optimal_parameters(optimal_parameters) self.save_optimal_simulation(optimal_parameters)
class CMAOptimizationSteppable(SteppableBasePy): def __init__(self, _simulator, _frequency=1): SteppableBasePy.__init__(self, _simulator, _frequency) self.optim = None self.sim_length_mcs = self.simulator.getNumSteps() - 1 self.f_vec = [] self.X_vec = [] self.X_vec_check = [] self.num_fcn_evals = -1 def minimized_fcn(self, *args, **kwds): """ this function needs to be overloaded in the subclass - it implements simulation fitness metric :return {float}: number describing the "fitness" of the simulation """ return 0.0 def initial_condition_fcn(self, *args, **kwds): """ This function prepares initial condition for the simulaiton. Typically it creates cell field and initializes all cell and field properties :param args: first argument is a vector of parameters that are being optimized. The rest are up to the user :param kwds: key words arguments - those are are up to the user :return: None """ pass def init_optimization_strategy(self, *args, **kwds): """ init_optimization_strategy initializes optimizer object. Its argument depend on the specific initializer used IN the case of the CMA optimizer the options are described here: https://pypi.python.org/pypi/cma :param args: see https://pypi.python.org/pypi/cma :param kwds: see https://pypi.python.org/pypi/cma :return: None """ self.optim = CMAEvolutionStrategy(*args, **kwds) def optimization_step(self, mcs): """ THis function implements houklsekeeping associated with running optimization algorithm in a steppable :param mcs {int}: current mcs :return: None """ if not mcs % self.sim_length_mcs: if self.optim.stop(): self.stopSimulation() print('termination by', self.optim.stop()) print('best f-value =', self.optim.result()[1]) print('best solution =', self.optim.result()[0]) if not len(self.X_vec): self.X_vec = self.optim.ask() if len(self.f_vec): # print 'self.X_vec_check=', self.X_vec_check # print 'self.f_vec=', self.f_vec self.optim.tell( self.X_vec_check, self.f_vec) # do all the real "update" work self.optim.disp(20) # display info every 20th iteration self.optim.logger.add() # log another "data line" self.f_vec = [] self.num_fcn_evals = len(self.X_vec) self.X_vec_check = deepcopy(self.X_vec) self.X_current = self.X_vec[0] if len(self.X_vec_check) != self.num_fcn_evals: fcn_target = self.minimized_fcn() self.f_vec.append(fcn_target) self.X_vec.pop(0) self.num_fcn_evals -= 1 CompuCellSetup.reset_current_step(0) self.simulator.setStep(0) self.clean_cell_field(reset_inventory=True) self.initial_condition_fcn(self.X_current)
if not encoder is None: x_mean = encoder.predict(img[np.newaxis, ...]) fitness_func = Fitness(img, decoder) best_img = None best_z = None best_score = -1 for i in range(args.runs): print('Runs: %d / %d' % (i + 1, args.runs)) if encoder is None: init = np.random.randn(decoder.input_shape[-1]) * args.std else: init = x_mean[0] es = ES(init, args.sigma) for ite in range(args.iterations): dnas = np.asarray(es.ask()) es.tell(dnas, fitness_func(dnas)) es.disp() es.result_pretty() z = np.asarray(es.result[0]) img_reconstruct = decoder.predict(z[np.newaxis, ...])[0] mse = np.mean(np.square(img_reconstruct - img)) print('mse: {:.2f}'.format(mse)) if mse > best_score: best_score = mse best_z = z best_img = img_reconstruct output_img = np.round( np.concatenate( (np.squeeze(img), np.squeeze(img_reconstruct)), axis=1) * 127.5 + 127.5).astype(np.uint8) filename, ext = os.path.splitext(args.output)
class ALGO: def __init__(self, max_evaluations, n_points, dimension, function_id, **kwargs): self.function_id = function_id self.n_points = n_points self.dimension = dimension self.function = CEC2005(dimension)[function_id].objective_function self.max_evaluations = max_evaluations * dimension self.max_bounds = Boundary(dimension, function_id).max_bounds self.min_bounds = Boundary(dimension, function_id).min_bounds self.optimal_position = CEC2005( dimension)[function_id].get_optimal_solutions()[0].phenome self.optimal_fitness = self.function(self.optimal_position) #self.problem = Problem(self.function.objective_function, max_evaluations=max_evaluations) self.boundary = Boundary(dimension, function_id) self.verbose = kwargs.get('verbose', False) self.population = [ ] #self.init_population( self.n_points, self.dimension ) self.algo_type = kwargs.get('algo_type', 'CMA') self.init_algo() self.iteration = 0 self.should_terminate = False self.optimal_solution = self.find_optimal_solution() self.stats = OrderedDict([ ('iteration', []), ('FEs', []), ('error', []), ('best_value', []), #('best_position',[]) ]) self.run() self.best_solution = min(self.population, key=attrgetter('objective_values')) def init_population(self, n_points, dim): positions = np.zeros((n_points, dim)) for d in range(dim): positions[:, d] = np.random.uniform(self.boundary.min_bounds[d], self.boundary.max_bounds[d], self.n_points) population = [Individual(position) for position in positions] self.problem.batch_evaluate(population) population = sorted(population, key=attrgetter('objective_values')) population = population[:len(population) / 2] ranks = range(1, len(population) + 1) return [Cluster(population, ranks)] def init_algo(self): init_min_bound = self.boundary.init_min_bounds[0] init_max_bound = self.boundary.init_max_bounds[0] min_bound = self.boundary.min_bounds[0] max_bound = self.boundary.max_bounds[0] if self.algo_type == 'CMA': init_point = [(init_max_bound + init_min_bound) / 2] * dimension sigma = (init_max_bound - init_min_bound) * 0.2 #print 'init_point:', init_point #print 'sigma:', sigma self.algo = CMAEvolutionStrategy(init_point, sigma, { 'popsize': self.n_points, 'bounds': [min_bound, max_bound] }) #elif self.algo_type == 'PSO': def find_optimal_solution(self): dimension = self.dimension function_id = self.function_id optimal_solutions = CEC2005( dimension)[function_id].get_optimal_solutions() test_prob = Problem(CEC2005(dimension)[function_id].objective_function) test_prob.batch_evaluate(optimal_solutions) return min(optimal_solutions, key=attrgetter('objective_values')) def run(self): self.iteration = self.iteration + 1 if self.algo_type == 'CMA': positions = self.algo.ask() solutions = [Individual(position) for position in positions] try: self.problem.batch_evaluate(solutions) except ResourcesExhausted: self.should_terminate = True return self.algo.tell([p.phenome for p in solutions], [p.objective_values for p in solutions]) self.population = sorted(solutions, key=attrgetter('objective_values')) self.best_solution = min(self.population, key=attrgetter('objective_values')) self.update_statistics() def found_optimum(self, delta=1e-8): if self.best_solution.objective_values - self.optimal_solution.objective_values < delta: return True return False def stop(self): if self.algo.stop(): if self.verbose: print('Algorithm stops!') self.should_terminate = True elif self.problem.remaining_evaluations < 1: if self.verbose: print('Consumed all evaluations!') self.should_terminate = True elif self.found_optimum(delta=goal_error): if self.verbose: print('Found Optimum!') self.should_terminate = True return self.should_terminate def print_status(self): error = self.best_solution.objective_values - self.optimal_solution.objective_values print('') print(' Iteration %d: error = %e' % (self.iteration, error)) print(' Evaluations: consumed %d, remain %d' % (self.problem.consumed_evaluations, self.problem.remaining_evaluations)) print(' best fitness: %f at %r' % (self.best_solution.objective_values, self.best_solution.phenome.tolist())) print('optimal solution: %f at %r' % (self.optimal_solution.objective_values, self.optimal_solution.phenome)) print('') def update_statistics(self): self.stats['iteration'].append(self.iteration) self.stats['FEs'].append(self.problem.consumed_evaluations) self.stats['error'].append(self.best_solution.objective_values - self.optimal_solution.objective_values) self.stats['best_value'].append(self.best_solution.objective_values)
scale_h=opt.scale_h, save_dir=s_dir_name, num_samples=opt.num_samples) for solution in solutions ] playable = 0 bc0 = 0 bc1 = 0 end_states = [] for result in results: bcs.append([result[1], result[2]]) objectives.append(result[0]) playable += result[0] bc0 += result[1] bc1 += result[2] end_states.append(result[3]) es.tell(solutions, objectives) playable = 100 - playable / float(len(objectives)) bc0 = bc0 / float(len(objectives)) bc1 = bc1 / float(len(objectives)) archive = 0 tb_logging(archive, i, start_time, logdir, playable, bc0, bc1, end_states) i += 1
class CovarianceMatrixAdaptationEvolutionStrategyAgent: """Covariance Matrix Adaptation Evolution Strategy. Note: The CMA-ES method can hardly learn a successful policy even for simple task. It is still maintained here only for consistency with original rllab paper. Args: env_spec (garage.envs.EnvSpec): Environment specification. policy_arch (garage.np.policies.Policy): Action policy. baseline (garage.np.baselines.Baseline): Baseline for GAE (Generalized Advantage Estimation). num_candidate_policies (int): Number of policies sampled in one epoch. discount_factor (float): Environment reward discount. max_rollout_length (int): Maximum length of a single rollout. parameters_variance (float): Initial std for param distribution. """ def __init__( self, env_spec, num_candidate_policies: int = 20, policy_arch: GDKC = GDKC(CategoricalMLPPolicy, hidden_sizes=(32, 32)), baseline_arch: GDKC = GDKC( LinearFeatureBaseline ), # Baseline for GAE (Generalized Advantage Estimation). discount_factor: float = 0.99, max_rollout_length: int = 500, parameters_variance: float = 1.0, ): self.policy: Module = policy_arch(env_spec=env_spec) self.max_path_length = max_rollout_length # TODO: REMOVE THIS.. self.sampler_cls = RaySampler self._baseline = baseline_arch() self._max_rollout_length = max_rollout_length self._env_spec = env_spec self._discount = discount_factor self._parameters_variance = parameters_variance self._num_candidate_policies = num_candidate_policies self._evolution_strategy: CMAEvolutionStrategy = None self._shared_params = None self._all_returns = None def _resample_shared_parameters(self) -> None: """Return sample parameters. Returns: np.ndarray: A numpy array of parameter values. """ self._shared_params = self._evolution_strategy.ask() def build(self): """ """ pass # TODO: def __build__(self, init_mean_parameters: Sequence): self._evolution_strategy = CMAEvolutionStrategy( init_mean_parameters, self._parameters_variance, # Sigma is shared {"popsize": self._num_candidate_policies}, ) # Population size self._resample_shared_parameters() self.policy.set_param_values(self._shared_params[0]) def train(self, runner): """Initialize variables and start training. Args: runner (LocalRunner): LocalRunner is passed to give algorithm the access to runner.step_epochs(), which provides services such as snapshotting and sampler control. Returns: float: The average return in last epoch cycle. """ self.__build__(self.policy.get_param_values()) self._all_returns = [] # start actual training last_return = None for _ in runner.step_epochs(): for _ in range(self._num_candidate_policies): runner.step_path = runner.obtain_samples(runner.step_itr) last_return = self.train_once(runner.step_itr, runner.step_path) runner.step_itr += 1 return last_return def extract_signal(self): """ """ pass def train_once(self, iteration_number: int, trajectories: Sequence, *, writer: Writer = MockWriter()): """Perform one step of policy optimization given one batch of samples. Args: iteration_number (int): Iteration number. trajectories (list[dict]): A list of collected paths. Returns: float: The average return in last epoch cycle. @param writer: @type writer: """ undiscounted_returns = [] for trajectory in TrajectoryBatch.from_trajectory_list( self._env_spec, trajectories).split(): # TODO: EEEEW undiscounted_returns.append(sum(trajectory.rewards)) sample_returns = np.mean(undiscounted_returns) self._all_returns.append(sample_returns) epoch = iteration_number // self._num_candidate_policies i_sample = iteration_number - epoch * self._num_candidate_policies writer.scalar("Epoch", epoch) writer.scalar("# Sample", i_sample) if ( iteration_number + 1 ) % self._num_candidate_policies == 0: # When looped all the way around update shared parameters, WARNING RACE CONDITIONS! sample_returns = max(self._all_returns) self.update() self.policy.set_param_values( self._shared_params[(i_sample + 1) % self._num_candidate_policies]) return sample_returns def update(self) -> None: """ """ self._evolution_strategy.tell( self._shared_params, -np.array(self._all_returns)) # Report back results self.policy.set_param_values( self._evolution_strategy.best.get() [0]) # TODO: DOES NOTHING, as is overwritten everywhere self._all_returns.clear() # Clear for next epoch self._resample_shared_parameters()