def search(config): get_params = get_agent_class(config).get_random_config params_keys = list(get_params().keys()) nb_hp_params = len(params_keys) if config['debug']: print('*** Number of hyper-parameters: %d' % nb_hp_params) config['max_iter'] = 5 if config['debug'] else 500 futures = [] with concurrent.futures.ProcessPoolExecutor(min(multiprocessing.cpu_count(), config['nb_process'])) as executor: nb_config = 5 if config['debug'] else 200 * nb_hp_params for i in range(nb_config): params = get_params(config["fixed_params"]) config.update(params) config['random_seed'] = 1 futures.append(executor.submit(test_params, i, copy.deepcopy(config), copy.deepcopy(params))) concurrent.futures.wait(futures) results = [future.result() for future in futures] results = sorted(results, key=lambda result: result['mean_score'], reverse=True) best_params = results[0]['params'] return { 'best_params': best_params , 'results': results }
def main(env_id, agent_id, n_test_episodes, render, debug): rng = np.random.RandomState(42) if env_id == 'Point-v0': from environments import point_env env = gym.make('Point-v0') else: env = gym.make('MountainCarContinuous-v0') env.seed(42) tf.set_random_seed(42) np.random.seed(42) agent_class = agents.get_agent_class(agent_id) agent = agent_class(env, debug) agent.train() episode_rewards = [] for i in range(n_test_episodes): state = env.reset() done = False episode_reward = 0 render_episode = (i % 5 == 0) while not done: action = agent.get_action(state) state, reward, done, _ = env.step(action) episode_reward += reward if render and render_episode: env.render() episode_rewards.append(episode_reward) print("Average rewards:", np.mean(episode_rewards))
def third_pass(config, best_lr): config = copy.deepcopy(config) config["fixed_params"] = {'lr': best_lr} config['result_dir_prefix'] = config['result_dir_prefix'] + '/third-pass' config['games_per_epoch'] = 5 if config['debug'] else 100 dry_run = True if config['debug'] else False get_params = get_agent_class(config).get_random_config hb = Hyperband(get_params, run_params) summary = hb.run(config, skip_last=True, dry_run=dry_run) return summary
def test_mcagent_act(self): config = { 'lr': 1 # unused , 'agent_name': 'TabularMCAgent' , 'env_name': 'CartPole-v0' , 'random_seed': 0 , 'result_dir': dir + '/results' , 'discount': 1. # 'debug': True } np.random.seed(0) config.update(get_agent_class(config).get_random_config()) config['discount'] = 1. env = gym.make(config['env_name']) env.seed(0) agent = make_agent(config, env) act, state_id = agent.act(env.reset()) self.assertEqual(act, 1) self.assertEqual(state_id, 144)
class MagicThing(BasicMagicThing): off=1 deff=1 opponent_class=get_agent_class() understand=5 activated=False activated_description= False multiple_choice=False def craft_aid_off(self,opponent): return self.activated and isinstance(opponent,self.opponent_class) and self.off or 1 def craft_aid_def(self,opponent): return self.activated and isinstance(opponent,self.opponent_class) and self.deff or 1 def look_at(self,player,gui): Thing.look_at(self,player,gui) if not self.activated: gui.prn("C'è qualcosa misterioso qui.") understand = self.understand while understand > 0: outcome = self.challenge(player,gui) if not outcome: player_damage = min((understand+3)//4,player.sp) gui.prn('Perdi %d punti di spirito.\n' % player_damage) player.sp -= player_damage player.long_sp -= player_damage/2 return False else: understand = understand-player.craft self.activated = True if self.activated_description: gui.prn(self.activated_description) return True def use(self,player,gui): if self.activated: BasicMagicThing.use(self,player,gui)
def first_pass(config): config = copy.deepcopy(config) config['result_dir_prefix'] = config['result_dir_prefix'] + '/first-pass' if config['debug']: print('Removing fixed params') config["fixed_params"] = {} config['max_iter'] = 5 if config['debug'] else 150 if config['debug']: print('Overriding max_iter params to %d' % config['max_iter']) dry_run = True if config['debug'] else False get_params = get_agent_class(config).get_random_config results = [] futures = [] with concurrent.futures.ProcessPoolExecutor( min(multiprocessing.cpu_count(), config['nb_process'])) as executor: nb_config = 5 if config['debug'] else 1000 for i in range(nb_config): params = get_params(config["fixed_params"]) config.update(params) futures.append( executor.submit(exec_first_pass, i, copy.deepcopy(config), params)) concurrent.futures.wait(futures) results = [] for future in futures: results.append(future.result()) return { 'results': sorted(results, key=lambda result: result['mean_score'], reverse=True) }
def test_mcagent_learn_from_episode(self): config = { 'lr': 1 # unused , 'agent_name': 'TabularMCAgent' , 'env_name': 'CartPole-v0' , 'random_seed': 0 , 'result_dir': dir + '/results' , 'discount': 1. # 'debug': True } np.random.seed(0) config.update(get_agent_class(config).get_random_config()) config['discount'] = 1. env = gym.make(config['env_name']) env.seed(0) agent = make_agent(config, env) agent.learn_from_episode(env) qs = agent.sess.run(agent.Qs) # for i,q in enumerate(qs): # print(i,q) self.assertEqual(np.sum(np.isclose(qs[126], [ 4.49999952, 1.99999976])) == 2, True)
def main(_): config = flags.FLAGS.__flags.copy() config["fixed_params"] = json.loads(config["fixed_params"]) # if os.path.isfile(config['result_dir'] + '/config.json'): # print("Overriding shell configuration with the one found in " + config['result_dir']) # with open(config['result_dir'] + '/config.json', 'r') as f: # config = json.loads(f.read()) if config['hyperband']: print('Starting hyperband search') config['result_dir_prefix'] = dir + '/results/hyperband/' + str( int(time.time())) get_params = get_agent_class(config).get_random_config hb = Hyperband(get_params, run_params) results = hb.run(config, skip_last=True, dry_run=config['dry_run']) if not os.path.exists(config['result_dir_prefix']): os.makedirs(config['result_dir_prefix']) with open(config['result_dir_prefix'] + '/hb_results.json', 'w') as f: json.dump(results, f) elif config['fullsearch']: print('*** Starting full search') config['result_dir_prefix'] = dir + '/results/fullsearch/' + str( int(time.time())) + '-' + config['agent_name'] os.makedirs(config['result_dir_prefix']) print('*** Starting first pass: full random search') summary = fullsearch.first_pass(config) with open(config['result_dir_prefix'] + '/fullsearch_results1.json', 'w') as f: json.dump(summary, f) print('*** Starting second pass: Learning rate search') best_agent_config = summary['results'][0]['params'] summary = fullsearch.second_pass(config, best_agent_config) with open(config['result_dir_prefix'] + '/fullsearch_results2.json', 'w') as f: json.dump(summary, f) print('*** Starting third pass: Hyperband search with best lr') best_lr = summary['results'][0]['lr'] summary = fullsearch.third_pass(config, best_lr) with open(config['result_dir_prefix'] + '/fullsearch_results3.json', 'w') as f: json.dump(summary, f) elif config['randomsearch']: print('*** Starting random search') config['result_dir_prefix'] = dir + '/results/randomsearch/' + str( int(time.time())) + '-' + config['agent_name'] os.makedirs(config['result_dir_prefix']) summary = randomsearch.search(config) with open(config['result_dir_prefix'] + '/fullsearch_results1.json', 'w') as f: json.dump(summary, f) else: env = gym.make(config['env_name']) agent = make_agent(config, env) if config['play']: for i in range(config['play_nb']): agent.play(env) else: agent.train() agent.save()
def init_move(self,place): self.move(place) if isinstance(place,get_agent_class()): self.place.place.game_map.all_things.append(self) else: self.place.game_map.all_things.append(self)