def run_simulation(conf): agent_1 = a.make_agent(pid.P1, conf) agent_2 = a.make_agent(pid.P2, conf) total_games = args.g for i in range(total_games): simulate(agent_1, agent_2) prefix = conf.get_prefix() p.plot_simulation_results(agent_1, agent_2, prefix) p.plot_scores(agent_1, agent_2, prefix) conf.log() print('Done')
def exec_second_pass(config): start_time = time.time() config['result_dir'] = config['result_dir_prefix'] + '/lr-' + str( config['lr']) try: # We create the agent env = gym.make(config['env_name']) agent = make_agent(config, env) # We train the agent agent.train(save_every=-1) stats = get_stats(config['result_dir'], ["score"]) mean_score = np.mean(stats['score']) stddev_score = np.sqrt(np.var(stats['score'])) result = { 'lr': config['lr'], 'mean_score': mean_score, 'stddev_score': stddev_score } seconds = int(round(time.time() - start_time)) print("Run with lr: {} | {}".format(config['lr'], time.ctime())) print("%d seconds." % seconds) except: result = { 'lr': config['lr'], 'mean_score': 0, 'stddev_score': 0, 'error': str(sys.exc_info()[0]), 'error_message': str(sys.exc_info()[1]) } return result
def eval_proc(file_name): print(file_name) f = open(os.path.join('./log_more', file_name), 'w+') types = ['RANDOM', 'RHCP', 'CDQN', 'MCT'] # for role_id in [2, 3, 1]: # for ta in types: # agent = make_agent(ta, role_id) # for i in range(1): # env = make_env('MCT') # st = StatCounter() # for j in tqdm(range(100)): # winning_rate = eval_episode(env, agent) # st.feed(winning_rate) # f.write('%s with role id %d against %s, winning rate: %f\n' % (ta, role_id, 'MCT', st.average)) for role_id in [2, 3, 1]: agent = make_agent('MCT', role_id) for i in range(1): for te in types: env = make_env(te) st = StatCounter() for j in tqdm(range(100)): winning_rate = eval_episode(env, agent) st.feed(winning_rate) f.write('%s with role id %d against %s, winning rate: %f\n' % ('MCT', role_id, te, st.average)) f.close()
def onlytest(): # env = make_env('RHCP') #env = make_env('RANDOM') #env = make_env('MCT') # env = make_env('CDQN') env = make_env('CDQN') agent = make_agent('RANDOM', 1) eval_episode(env, agent)
def main(_): config = flags.FLAGS.__flags.copy() # fixed_params must be a string to be passed in the shell, let's use JSON config["fixed_params"] = json.loads(config["fixed_params"]) if config['fullsearch']: print('Hyperparameter search not implemented yet') else: agent = make_agent(config) if config['infer']: # Some code for inference ... agent.infer() elif config['test']: agent.test() else: # Some code for training ... agent.train()
def eval_proc(file_name): print(file_name) f = open(os.path.join('./log') + file_name, 'w+') for te in types: for ta in types: for role_id in [2, 3, 1]: agent = make_agent(ta, role_id) for i in range(1): env = make_env(te) st = StatCounter() with get_tqdm(total=100) as pbar: for j in range(100): winning_rate = eval_episode(env, agent) st.feed(winning_rate) pbar.update() f.write( '%s with role id %d against %s, winning rate: %f\n' % (ta, role_id, te, st.average)) f.close()
def run_params(nb_epochs, params, main_config): config = copy.deepcopy(main_config) config.update(params) config['result_dir'] = config['result_dir_prefix'] + '/' + config[ 'env_name'] + '/' + config['agent_name'] + '/run-' + str( config['id']).zfill(3) config['max_iter'] = int(nb_epochs) * config['games_per_epoch'] # If we are reusing a configuration, we remove its folder before next training if os.path.exists(config['result_dir']): shutil.rmtree(config['result_dir']) try: # We create the agent env = gym.make(config['env_name']) agent = make_agent(config, env) # We train the agent agent.train(save_every=-1) agent.save() mean_score, stddev_score = get_score_stat(config['result_dir']) result = { 'loss': -mean_score, 'mean_score': mean_score, 'stddev_score': stddev_score } except: result = { 'loss': 0, 'mean_score': 0, 'stddev_score': 0, 'error': str(sys.exc_info()[0]), 'error_message': str(sys.exc_info()[1]) } # If we are training for less than 9 epochs, we remove the folder if nb_epochs < 9 and os.path.exists(config['result_dir']): shutil.rmtree(config['result_dir']) return result
def test_params(counter, config, params): start_time = time.time() run_id = str(counter).zfill(4) config['result_dir'] = config['result_dir_prefix'] + '/run-' + run_id try: # We create the env and agent env = gym.make(config['env_name']) env.seed(config['random_seed']) agent = make_agent(config, env) # We train the agent agent.train(save_every=-1) stats = get_stats(config['result_dir'], ["score"]) print(stats) mean_score = np.mean(stats['score']) stddev_score = np.sqrt(np.var(stats['score'])) result = { 'run_id': run_id , 'params': params , 'mean_score': mean_score , 'stddev_score': stddev_score } seconds = int( round( time.time() - start_time )) print("Run: {} | {}, mean_score {}".format(counter, time.ctime(), mean_score)) print("%d seconds." % seconds ) except Exception as inst: result = { 'params': params , 'mean_score': 0 , 'stddev_score': 0 , 'error': str(sys.exc_info()[0]) , 'error_message': str(sys.exc_info()[1]) } return result
def test_mcagent_act(self): config = { 'lr': 1 # unused , 'agent_name': 'TabularMCAgent' , 'env_name': 'CartPole-v0' , 'random_seed': 0 , 'result_dir': dir + '/results' , 'discount': 1. # 'debug': True } np.random.seed(0) config.update(get_agent_class(config).get_random_config()) config['discount'] = 1. env = gym.make(config['env_name']) env.seed(0) agent = make_agent(config, env) act, state_id = agent.act(env.reset()) self.assertEqual(act, 1) self.assertEqual(state_id, 144)
def exec_first_pass(counter, config, params): start_time = time.time() config['result_dir'] = config['result_dir_prefix'] + '/run-' + str( counter).zfill(3) try: # We create the agent env = gym.make(config['env_name']) agent = make_agent(config, env) # We train the agent agent.train(save_every=-1) mean_score, stddev_score = get_stats(config['result_dir']) result = { 'params': params, 'mean_score': mean_score, 'stddev_score': stddev_score } seconds = int(round(time.time() - start_time)) print("Run: {} | {}, mean_score {}".format(counter, time.ctime(), mean_score)) print("%d seconds." % seconds) except: result = { 'params': params, 'mean_score': 0, 'stddev_score': 0, 'error': str(sys.exc_info()[0]), 'error_message': str(sys.exc_info()[1]) } if os.path.exists(config['result_dir']): shutil.rmtree(config['result_dir']) return result
def main(_): config = flags.FLAGS.__flags.copy() env = gym.make(config['env_name']) agent = make_agent(config, env) qs = agent.sess.run(agent.Qs) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') x = np.linspace(0, len(qs) - 1, len(qs)) y = np.linspace(0, len(qs[0]) - 1, len(qs[0])) xpos, ypos = np.meshgrid(x, y, indexing='ij') xpos = xpos.flatten('F') ypos = ypos.flatten('F') num_elements = len(qs) * len(qs[0]) zpos = np.zeros(num_elements) dx = np.ones(num_elements) dy = np.ones(num_elements) dz = np.abs(np.array(qs).flatten('F')) ax.bar3d(xpos, ypos, zpos, dx, dy, dz, color='#00ceaa') plt.savefig('test.png')
def test_mcagent_learn_from_episode(self): config = { 'lr': 1 # unused , 'agent_name': 'TabularMCAgent' , 'env_name': 'CartPole-v0' , 'random_seed': 0 , 'result_dir': dir + '/results' , 'discount': 1. # 'debug': True } np.random.seed(0) config.update(get_agent_class(config).get_random_config()) config['discount'] = 1. env = gym.make(config['env_name']) env.seed(0) agent = make_agent(config, env) agent.learn_from_episode(env) qs = agent.sess.run(agent.Qs) # for i,q in enumerate(qs): # print(i,q) self.assertEqual(np.sum(np.isclose(qs[126], [ 4.49999952, 1.99999976])) == 2, True)
def main(_): config = flags.FLAGS.__flags.copy() config["fixed_params"] = json.loads(config["fixed_params"]) # if os.path.isfile(config['result_dir'] + '/config.json'): # print("Overriding shell configuration with the one found in " + config['result_dir']) # with open(config['result_dir'] + '/config.json', 'r') as f: # config = json.loads(f.read()) if config['hyperband']: print('Starting hyperband search') config['result_dir_prefix'] = dir + '/results/hyperband/' + str( int(time.time())) get_params = get_agent_class(config).get_random_config hb = Hyperband(get_params, run_params) results = hb.run(config, skip_last=True, dry_run=config['dry_run']) if not os.path.exists(config['result_dir_prefix']): os.makedirs(config['result_dir_prefix']) with open(config['result_dir_prefix'] + '/hb_results.json', 'w') as f: json.dump(results, f) elif config['fullsearch']: print('*** Starting full search') config['result_dir_prefix'] = dir + '/results/fullsearch/' + str( int(time.time())) + '-' + config['agent_name'] os.makedirs(config['result_dir_prefix']) print('*** Starting first pass: full random search') summary = fullsearch.first_pass(config) with open(config['result_dir_prefix'] + '/fullsearch_results1.json', 'w') as f: json.dump(summary, f) print('*** Starting second pass: Learning rate search') best_agent_config = summary['results'][0]['params'] summary = fullsearch.second_pass(config, best_agent_config) with open(config['result_dir_prefix'] + '/fullsearch_results2.json', 'w') as f: json.dump(summary, f) print('*** Starting third pass: Hyperband search with best lr') best_lr = summary['results'][0]['lr'] summary = fullsearch.third_pass(config, best_lr) with open(config['result_dir_prefix'] + '/fullsearch_results3.json', 'w') as f: json.dump(summary, f) elif config['randomsearch']: print('*** Starting random search') config['result_dir_prefix'] = dir + '/results/randomsearch/' + str( int(time.time())) + '-' + config['agent_name'] os.makedirs(config['result_dir_prefix']) summary = randomsearch.search(config) with open(config['result_dir_prefix'] + '/fullsearch_results1.json', 'w') as f: json.dump(summary, f) else: env = gym.make(config['env_name']) agent = make_agent(config, env) if config['play']: for i in range(config['play_nb']): agent.play(env) else: agent.train() agent.save()