def __init__( self, D="DefaultDir", H1=64, H2=64, P=100, G=5000, S=50000, E="TimePilot-ram-v0", wd=0.01, #weight decay initialized to 0.01 si=0.5): # HYPERPARAMETERS self.HL1 = H1 self.HL2 = H2 self.NPOP = P self.MAX_ITER = G self.STEPS = S self.dir = D # CONSTANTS self.STATE_SIZE = 128 self.ACTION_SIZE = self.decisions_env(E) self.env = gym.make(E) self.env.reset() # CMA NPARAMS = (self.STATE_SIZE * self.HL1) + (self.HL1 * self.HL2) + ( self.HL2 * self.ACTION_SIZE) cma = CMAES(NPARAMS, popsize=self.NPOP, weight_decay=wd, sigma_init=si) self.FINAL = self.Engine(cma)
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = 'log/' + gamename + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) game = config.games[gamename] model = make_model(game) num_params = model.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial
def problem4(): # data_parition = 100000 for trials in range(2): start = 100000 * trials end = 100000 * (trials + 1) pdis = PDIS(behavior_file="data.csv", start_index=start, end_index=end) pdis.calculate_pi_b() # for trials in range(10): NPARAMS = 4 # make this a 100-dimensinal problem. NPOPULATION = 75 # use population size of 101. MAX_ITERATION = 6 cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init=0.5 ) history = [] print("candidate_data: ", start, end, "Population: ", NPOPULATION, "Max Iteration: ", MAX_ITERATION) for j in range(MAX_ITERATION): solutions = cmaes.ask() fitness_list = np.zeros(cmaes.popsize) for i in range(cmaes.popsize): fitness_list[i] = pdis.upper_bound(solutions[i]) cmaes.tell(fitness_list) result = cmaes.result() # first element is the best solution, second element is the best fitness history.append(result[1]) if (j + 1) % 2 == 0: print("fitness at iteration", (j + 1), result[1]) print("local optimum discovered by solver:\n", result[0]) print("fitness score at this local optimum:", result[1]) # return history pdis.execute_safety_test(result[0], trials)
def main(): commands = sys.argv if ("solve" in commands): solver = CMAES( num_params=N_NEURONS, sigma_init=0.50, # initial standard deviation popsize=1000, # population size weight_decay=0.01) # weight decay coefficient # solver = SimpleGA(N_NEURONS) test_solver(solver) elif ("test" in commands): init = np.loadtxt(FILE_TEST) evaluate(init, True, False)
def initialize_settings(c, r, sigma_init=0.1, sigma_decay=0.9999, init_opt=""): global es, model, comm, rank comm, rank = c, r model = make_model(sys.argv[1]) num_params = model.param_count if len(init_opt) > 0: es = pickle.load(open(init_opt, "rb")) else: if config.OPTIMIZER == "cma": cma = CMAES(num_params, sigma_init=sigma_init, popsize=config.POPULATION) es = cma global PRECISION PRECISION = 10000 global SOLUTION_PACKET_SIZE SOLUTION_PACKET_SIZE = (5 + num_params) * config.NUM_WORKER_TRIAL global RESULT_PACKET_SIZE RESULT_PACKET_SIZE = 4 * config.NUM_WORKER_TRIAL return es, model, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
def run(self, max_generations, folder, ga_id='', init_solution_id=''): if (ga_id == ''): ga_id = self.init_time # disk results_dir = os.path.join(folder, ga_id) if not os.path.exists(results_dir): os.makedirs(results_dir) fitness_path = os.path.join( results_dir, 'fitness.txt' ) # most important fitness results per run (for plotting) ind_fitness_path = os.path.join( results_dir, 'ind_fitness.txt') # more detailed fitness results per individual solver_path = os.path.join( results_dir, "solver.pkl") # contains the current population best_solver_path = os.path.join( results_dir, "best_solver.pkl") # contains the current population init_solution_path = os.path.join( os.path.join(folder, init_solution_id), "solver.pkl") # path to initial solution solver current_generation = 0 P = self.P best_f = -sys.maxsize # initialize controller instance to be saved from models.controller import Controller best_controller = Controller(P[0].input_size, P[0].output_size) # initialize cma es (start from scratch or load previously saved solver/population) resume = False if os.path.exists(solver_path): resume = True self.solver = pickle.load(open(solver_path, 'rb')) new_results = self.solver.result() best_f = new_results[1] if os.path.exists(fitness_path): with open(fitness_path, 'r') as f: lines = f.read().splitlines() last_line = lines[-1] current_generation = int(last_line.split('/')[0]) # start from scratch but with an initial solution param elif os.path.exists(init_solution_path): tmp_solver = pickle.load(open(init_solution_path, 'rb')) self.solver = CMAES(num_params=self.num_controller_params, solution_init=tmp_solver.best_param(), sigma_init=0.1, popsize=self.pop_size) # completely start from scratch else: self.solver = CMAES(num_params=self.num_controller_params, sigma_init=0.1, popsize=self.pop_size) if not resume: with open(fitness_path, 'a') as file: file.write('gen/avg/cur/best\n') with open(ind_fitness_path, 'a') as file: file.write( 'gen/id/fitness/coverage/coverageReward/IC/PC/PCt0/PCt1\n') while current_generation < max_generations: fitness = np.zeros(self.pop_size) results_full = np.zeros(self.pop_size) print(f'Generation {current_generation}') print(f'Evaluating individuals: {len(P)}') # ask the ES to give us a set of candidate solutions solutions = self.solver.ask() # evaluate all candidates for i, s in enumerate(P): set_controller_weights(s.controller, solutions[i]) s.run_solution(generation=current_generation, local_id=i) # request fitness from simulator results_full = Client(ClientType.REQUEST).start() fitness = results_full[:, 0] for i, s in enumerate(P): s.fitness = fitness[i] current_f = np.max(fitness) average_f = np.mean(fitness) print( f'Current best: {current_f}\nCurrent average: {average_f}\nAll-time best: {best_f}' ) # return rewards to ES for param update self.solver.tell(fitness) max_index = np.argmax(fitness) new_results = self.solver.result() # process results pickle.dump(self.solver, open(solver_path, 'wb')) if current_f > best_f: set_controller_weights(best_controller, solutions[max_index]) torch.save(best_controller, os.path.join(results_dir, 'best_controller.pth')) # Save solver and change level to a random one pickle.dump(self.solver, open(best_solver_path, 'wb')) best_f = current_f for i, s in enumerate(P): # fitness/coverage/coverageReward/IC/PC/PCt0/PCt1 res = results_full[i, :] res_str = ('/'.join(['%.6f'] * len(res))) % tuple(res) with open(ind_fitness_path, 'a') as file: file.write('%d/%d/%s\n' % (current_generation, i, res_str)) res_str = '%d/%f/%f/%f' % (current_generation, average_f, current_f, best_f) print(f'gen/avg/cur/best : {res_str}') with open(fitness_path, 'a') as file: file.write(f'{res_str}\n') if (i > max_generations): break gc.collect() current_generation += 1 print('Finished')
class GA: def __init__(self, timelimit, pop_size, device): self.pop_size = pop_size self.truncation_threshold = int(pop_size / 2) # Should be dividable by two self.P = [] # unique GA id self.init_time = datetime.now().strftime("%Y%m%d_%H%M%S") # load configuration params with open('config/creature.json') as f: config = json.load(f) model_fromdisk = config.get('vae.model.fromdisk') model_path = config.get('vae.model.path') latent_size = config.get('vae.latent.size') obs_size = config.get('vae.obs.size') num_effectors = config.get('joints.size') + config.get( 'brushes.size') input_size = latent_size + num_effectors output_size = num_effectors cpg_enabled = config.get('cpg.enabled') if cpg_enabled: input_size += 1 output_size += 1 # load vision module from models.vae import VAE vae = VAE(latent_size).cuda() if model_fromdisk: vae.load_state_dict(torch.load(model_path)) vae.eval() # inference mode print(f'Loaded VAE model {model_path} from disk') print(f'Generating initial population of {pop_size} candidates...') # initialize population from train import GAIndividual for _ in range(pop_size): self.P.append( GAIndividual(self.init_time, input_size, output_size, obs_size, compressor=vae, cpg_enabled=cpg_enabled, device=device, time_limit=timelimit)) # report controller parameters self.num_controller_params = input_size * output_size + output_size print(f'Number of controller parameters: {self.num_controller_params}') def run(self, max_generations, folder, ga_id='', init_solution_id=''): if (ga_id == ''): ga_id = self.init_time # disk results_dir = os.path.join(folder, ga_id) if not os.path.exists(results_dir): os.makedirs(results_dir) fitness_path = os.path.join( results_dir, 'fitness.txt' ) # most important fitness results per run (for plotting) ind_fitness_path = os.path.join( results_dir, 'ind_fitness.txt') # more detailed fitness results per individual solver_path = os.path.join( results_dir, "solver.pkl") # contains the current population best_solver_path = os.path.join( results_dir, "best_solver.pkl") # contains the current population init_solution_path = os.path.join( os.path.join(folder, init_solution_id), "solver.pkl") # path to initial solution solver current_generation = 0 P = self.P best_f = -sys.maxsize # initialize controller instance to be saved from models.controller import Controller best_controller = Controller(P[0].input_size, P[0].output_size) # initialize cma es (start from scratch or load previously saved solver/population) resume = False if os.path.exists(solver_path): resume = True self.solver = pickle.load(open(solver_path, 'rb')) new_results = self.solver.result() best_f = new_results[1] if os.path.exists(fitness_path): with open(fitness_path, 'r') as f: lines = f.read().splitlines() last_line = lines[-1] current_generation = int(last_line.split('/')[0]) # start from scratch but with an initial solution param elif os.path.exists(init_solution_path): tmp_solver = pickle.load(open(init_solution_path, 'rb')) self.solver = CMAES(num_params=self.num_controller_params, solution_init=tmp_solver.best_param(), sigma_init=0.1, popsize=self.pop_size) # completely start from scratch else: self.solver = CMAES(num_params=self.num_controller_params, sigma_init=0.1, popsize=self.pop_size) if not resume: with open(fitness_path, 'a') as file: file.write('gen/avg/cur/best\n') with open(ind_fitness_path, 'a') as file: file.write( 'gen/id/fitness/coverage/coverageReward/IC/PC/PCt0/PCt1\n') while current_generation < max_generations: fitness = np.zeros(self.pop_size) results_full = np.zeros(self.pop_size) print(f'Generation {current_generation}') print(f'Evaluating individuals: {len(P)}') # ask the ES to give us a set of candidate solutions solutions = self.solver.ask() # evaluate all candidates for i, s in enumerate(P): set_controller_weights(s.controller, solutions[i]) s.run_solution(generation=current_generation, local_id=i) # request fitness from simulator results_full = Client(ClientType.REQUEST).start() fitness = results_full[:, 0] for i, s in enumerate(P): s.fitness = fitness[i] current_f = np.max(fitness) average_f = np.mean(fitness) print( f'Current best: {current_f}\nCurrent average: {average_f}\nAll-time best: {best_f}' ) # return rewards to ES for param update self.solver.tell(fitness) max_index = np.argmax(fitness) new_results = self.solver.result() # process results pickle.dump(self.solver, open(solver_path, 'wb')) if current_f > best_f: set_controller_weights(best_controller, solutions[max_index]) torch.save(best_controller, os.path.join(results_dir, 'best_controller.pth')) # Save solver and change level to a random one pickle.dump(self.solver, open(best_solver_path, 'wb')) best_f = current_f for i, s in enumerate(P): # fitness/coverage/coverageReward/IC/PC/PCt0/PCt1 res = results_full[i, :] res_str = ('/'.join(['%.6f'] * len(res))) % tuple(res) with open(ind_fitness_path, 'a') as file: file.write('%d/%d/%s\n' % (current_generation, i, res_str)) res_str = '%d/%f/%f/%f' % (current_generation, average_f, current_f, best_f) print(f'gen/avg/cur/best : {res_str}') with open(fitness_path, 'a') as file: file.write(f'{res_str}\n') if (i > max_generations): break gc.collect() current_generation += 1 print('Finished')
NPOPULATION = 80 # use population size of 200. MAX_ITERATION = 5000 # run each solver for 4000 generations. import numpy as np fitness_option = 0 original_class = corresponding_index_labels[iterations] filename = 'log_vgg19/imageName_' + random_filenames[iterations] filename += '_fitness_' + str(fitness_option) filename += '_target=' + str(target_class) filename += '_original=' + str(original_class) filename += '_npop=' + str(NPOPULATION) filename += '_iters=' + str(MAX_ITERATION) filename += 'range_limit' filename += '_x0=' + str(xpoint) filename += '_y0=' + str(ypoint) filename += '_x1=' + str(xpoint2) filename += '_y1=' + str(ypoint2) log_file= open(filename,"w") # defines CMA-ES algorithm solver for each target class cmaes = CMAES(NPARAMS, popsize=NPOPULATION, sigma_init = 0.5 ) # Solve a k dimensional problem, and choose 4000 descendants at each iteration iters, scratch, probabilities_all, orig_probs, target_probs = genetic_solver(xpoint,xpoint2,ypoint,ypoint2,cmaes, input_to_model, original_class,target_class, fitness_option, log_file, random_filenames[iterations]) print ('Iterations taken: ', iters) log_file.write('Iterations taken: ' + str(iters))
) oes = OpenES( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation sigma_decay=0.999, # don't anneal standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # annealing the learning rate popsize=NPOPULATION, # population size antithetic=False, # whether to use antithetic sampling weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) # defines CMA-ES algorithm solver cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init=0.5) print(mp.cpu_count()) pool = mp.Pool(mp.cpu_count()) fit_func = evluate_func # defines a function to use solver to solve fit_func def test_solver(solver): history = [] j = 0 seed_width = 20 while True: solutions = solver.ask() fitness_list = np.zeros(solver.popsize) #print(solutions)
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, init_opt = ''): global population, filebase, controller_filebase, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = './log/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population) controller_filebase = './controller/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population) model = make_model() num_params = model.param_count #print("size of model", num_params) if len(init_opt) > 0: es = pickle.load(open(init_opt, 'rb')) else: if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (4+num_params)*num_worker_trial RESULT_PACKET_SIZE = 4*num_worker_trial
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, controller, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filedir = 'results/{}/{}/log/'.format(exp_name, env_name) if not os.path.exists(filedir): os.makedirs(filedir) filebase = filedir + env_name + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) controller = make_controller(args=config_args) num_params = controller.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial
# this is why it is required to split the code in a main script (this) and a function script (functions.py) import matplotlib.pyplot as plt if __name__ == '__main__': # starting parallel workers multiprocessing.set_start_method('spawn', True) workers = Pool(functions.num_workers) # training phase (Tensorflow graph activation, performing CMA-ES optimisation) with functions.sess.as_default(): functions.sess.run(tf.compat.v1.global_variables_initializer()) saver = tf.compat.v1.train.Saver() cma = CMAES(functions.NPARAMS, sigma_init=functions.sigma, weight_decay=0, popsize=functions.NPOPULATION) cma_history = functions.solve(cma, workers) # loading learned gait after training with open(functions.save_dir, 'rb') as f: bestparams = pickle.load(f) with open(functions.max_fit_dir, 'rb') as f: history = pickle.load(f) # testing learned gait functions.env[0].reset() time.sleep(2) fitness = functions.fitness_func(bestparams, True) # showing reward/time graph
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE, model_name, novelty_search, unique_id, novelty_mode, BC_SIZE, ns_mode population = num_worker * num_worker_trial os.makedirs(os.path.join(ROOT, 'log'), exist_ok=True) filebase = os.path.join(ROOT, 'log', gamename+'.'+optimizer+'.'+ model_name + '.' + str(num_episode)+'.'+str(population)) + '.' + unique_id if novelty_search: filebase = filebase + '.novelty' if novelty_mode == 'h': BC_SIZE = H_SIZE elif novelty_mode == 'z': BC_SIZE = Z_SIZE elif novelty_mode =='h_concat': BC_SIZE = BC_SEQ_LENGTH * H_SIZE #NOVELTY_THRESHOLD = 180 elif novelty_mode == 'z_concat': BC_SIZE = BC_SEQ_LENGTH * Z_SIZE elif novelty_mode == 'a_concat': BC_SIZE = BC_SEQ_LENGTH * A_SIZE else: BC_SIZE = 9 # dummy bc size not used because the reward if the distance travelled. if novelty_mode: filebase = filebase + '.' + novelty_mode if ns_mode: filebase = filebase + '.' + ns_mode model = make_model(model_name, load_model=True) num_params = model.param_count print("size of model", num_params) PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = (4 + BC_SIZE) * num_worker_trial if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes
def testRun(): config() x = np.random.randn(NPARAMS) print("The fitness of initial guess", fit_func(x)) pepg = PEPG( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) # don't keep the historical best solution) pepg_history = test_solver(pepg) # pepgV = PEPGVariant( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False, # don't keep the historical best solution diversity_best=0.1) # use the diversity issue for just testing print("-----test PEPG vairant-----") pepgv_history = test_solver(pepgV) # print("---test PEPG variant with different diversity-----") pepgV2 = PEPGVariant( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # don't anneal the learning rate popsize=NPOPULATION, # population size average_baseline=False, # set baseline to average of batch weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False, # don't keep the historical best solution diversity_best=1) # use the diversity issue for just testing # done pepgV2_history = test_solver(pepgV2) oes = OpenES( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation sigma_decay=0.999, # don't anneal standard deviation learning_rate=0.1, # learning rate for standard deviation learning_rate_decay=1.0, # annealing the learning rate popsize=NPOPULATION, # population size antithetic=False, # whether to use antithetic sampling weight_decay=0.00, # weight decay coefficient rank_fitness=False, # use rank rather than fitness numbers forget_best=False) print("-----test oes--------------") oes_history = test_solver(oes) cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init=0.5) cma_history = test_solver(cmaes) best_history = [0] * MAX_ITERATION plt.figure(figsize=(16, 8), dpi=150) optimum_line, = plt.plot(best_history, color="black", linewidth=0.5, linestyle="-.", label='Global Optimum') pepgv_line, = plt.plot(pepgv_history, color="red", linewidth=1.0, linestyle="-", label='PEPGV / NES') pepg_line, = plt.plot(pepg_history, color="blue", linewidth=1.0, linestyle="-.", label='PEPG / NES') oes_line, = plt.plot(oes_history, color="orange", linewidth=1.0, linestyle="-", label='OpenAI-ES') cma_line, = plt.plot(cma_history, color="green", linewidth=1.0, linestyle="-", label='CMA-ES') plt.legend(handles=[optimum_line, pepgv_line, pepg_line, oes_line], loc='best') plt.xlim(0, 100) plt.xlabel('generation') plt.ylabel('loss') plt.savefig("./results/rose_" + str(NPARAMS) + "d.svg")
feed_dict = {self.position: jp, self.velocity: jv} dpval = self.sess.run(self.dp, feed_dict) return dpval[0] agent = StandActor() nbParams = agent.getNumberOfParams() try: x0 = pickle.load(open("params" + ENV_NAME + ".p", "rb")) except: print("no saved parameter found - starting from initialization") x0 = agent.getParameterValue() solver = CMAES(x0) #agent.loadParams( np.zeros( nbParams ) ) def evaluate(sol): agent.loadParams(sol) ob = env.reset() while True: action = agent.act(ob) ob, reward, done, _ = env.step(action) #time.sleep(0.01) if done: #print("episode done") #print(reward) gc.collect()
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, weight_decay=0.005): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = 'log/' + gamename + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) game = config.games[gamename] model = make_model(game) num_params = model.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=weight_decay, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=weight_decay, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population, weight_decay=weight_decay) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=weight_decay, popsize=population) es = pepg elif optimizer == 'oes': oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=weight_decay, popsize=population) es = oes # elif optimizer == 'pso': # pso = PSO(num_params, # sigma_init=sigma_init, # weight_decay=weight_decay, # popsize=population) # es = pso elif optimizer == 'global_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='global') es = pso elif optimizer == 'local_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='local') es = pso elif optimizer == 'random_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='random') es = pso else: if optimizer in list(sorted(ng.optimizers.registry.keys())): ng_optimizer = Nevergrad(optimizer, num_params, sigma_init=sigma_init, popsize=population, weight_decay=weight_decay) es = ng_optimizer else: raise ValueError('Could not find optimizer!') PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial