def get_agent_data(config, trial_num): # loads config from agent dir agent_dir = join(pardir, get_agent_output_dir(config, agent_t, trial_num)) config_file = join(agent_dir, 'config.json') if not exists(config_file): return None, None config = EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # tries to load full analysis analyses_dir = get_analysis_output_dir(agent_dir) file_name = join(analyses_dir, 'full-analysis.json') analysis = None if exists(file_name): analysis = FullAnalysis.load_json(file_name) analysis.set_helper(helper) # tries to load all data stats_dir = join(agent_dir, 'results') if exists(stats_dir): helper.load_stats(stats_dir) else: helper = None return analysis, helper
def load_agent_config(agent_dir, trial=0): agent_dir = agent_dir if agent_dir else get_agent_output_dir(DEFAULT_CONFIG, AgentType.Learning, trial) config_file = os.path.join(agent_dir, 'config.json') if not os.path.exists(agent_dir) or not os.path.exists(config_file): raise ValueError(f'Could not load configuration from: {config_file}.') configuration = EnvironmentConfiguration.load_json(config_file) # if testing, we want to force a seed different than training (diff. test environments) # configuration.seed += 1 return configuration, agent_dir
def run_trial(args): # tries to get agent type agent_t = args.agent if agent_t == AgentType.Testing: # tries to load a pre-trained agent configuration file config, results_dir = load_agent_config(args.results, args.trial) else: # tries to load env config from provided file path config_file = args.config_file_path config = args.default_frogger_config if config_file is None or not exists(config_file) \ else EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # checks for provided output dir output_dir = args.output if args.output is not None else \ get_agent_output_dir(config, agent_t, args.trial) if not exists(output_dir): makedirs(output_dir) # saves / copies configs to file config.save_json(join(output_dir, 'config.json')) helper.save_state_features(join(output_dir, 'state_features.csv')) # register environment in Gym according to env config env_id = '{}-{}-v0'.format(config.gym_env_id, args.trial) helper.register_gym_environment(env_id, False, args.fps, args.show_score_bar) # create environment and monitor env = gym.make(env_id) config.num_episodes = args.num_episodes video_callable = video_schedule(config, args.record) env = Monitor(env, directory=output_dir, force=True, video_callable=video_callable) # adds reference to monitor to allow for gym environments to update video frames if video_callable(0): env.env.monitor = env # initialize seeds (one for the environment, another for the agent) env.seed(config.seed + args.trial) agent_rng = np.random.RandomState(config.seed + args.trial) # creates the agent agent, exploration_strategy = create_agent(helper, agent_t, agent_rng) # if testing, loads tables from file (some will be filled by the agent during the interaction) if agent_t == AgentType.Testing: agent.load(results_dir) # runs episodes behavior_tracker = BehaviorTracker(config.num_episodes) recorded_episodes = [] for e in range(config.num_episodes): # checks whether to activate video monitoring env.env.monitor = env if video_callable(e) else None # reset environment old_obs = env.reset() old_s = helper.get_state_from_observation(old_obs, 0, False) if args.verbose: print(f'Episode: {e}') # helper.update_stats_episode(e) exploration_strategy.update(e) # update for learning agent t = 0 done = False while not done: # select action a = agent.act(old_s) # observe transition obs, r, done, _ = env.step(a) s = helper.get_state_from_observation(obs, r, done) r = helper.get_reward(old_s, a, r, s, done) # update agent and stats agent.update(old_s, a, r, s) behavior_tracker.add_sample(old_s, a) helper.update_stats(e, t, old_obs, obs, old_s, a, r, s) old_s = s old_obs = obs t += 1 # adds to recorded episodes list if video_callable(e): recorded_episodes.append(e) # signals new episode to tracker behavior_tracker.new_episode() # writes results to files agent.save(output_dir) behavior_tracker.save(output_dir) write_table_csv(recorded_episodes, join(output_dir, 'rec_episodes.csv')) helper.save_stats(join(output_dir, 'results'), args.clear_results) print('\nResults of trial {} written to:\n\t\'{}\''.format( args.trial, output_dir)) env.close()
if __name__ == '__main__': # tries to get agent type agent_t = int(sys.argv[1]) if len(sys.argv) > 1 else DEF_AGENT_TYPE # tries to load agent from results dir agent_dir = sys.argv[2] if len(sys.argv) > 2 else get_agent_output_dir( DEFAULT_CONFIG, agent_t) if not exists(agent_dir): raise ValueError('Could not load agent from: {}'.format(agent_dir)) config_file = join(agent_dir, 'config.json') if not exists(config_file): raise ValueError('Configuration not found: {}'.format(config_file)) config = EnvironmentConfiguration.load_json(config_file) action_names = config.get_action_names() # creates env helper helper = create_helper(config, SOUND) feats_nbins = helper.get_features_bins() # loads the agent's behavior behavior_tracker = BehaviorTracker(0) behavior_tracker.load(agent_dir) # register environment in Gym according to env_config env_id = '{}-replay-v0'.format(config.gym_env_id) helper.register_gym_environment(env_id, True, FPS, SHOW_SCORE_BAR) # creates environment
def run_trial(args): # tries to get agent type agent_t = args.agent results_dir = '' if agent_t == AgentType.Testing: # tries to load config from provided results dir path results_dir = args.results if args.results is not None else \ get_agent_output_dir(DEFAULT_CONFIG, AgentType.Learning) config_file = join(results_dir, 'config.json') if not exists(results_dir) or not exists(config_file): raise ValueError('Could not load configuration from: {}.'.format(config_file)) config = EnvironmentConfiguration.load_json(config_file) # if testing, we want to force a seed different than training (diff. test environments) config.seed += 1 else: # tries to load env config from provided file path config_file = args.config config = DEFAULT_CONFIG if config_file is None or not exists(config_file) \ else EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # checks for provided output dir output_dir = args.output if args.output is not None else get_agent_output_dir(config, agent_t, args.trial) if not exists(output_dir): makedirs(output_dir) # saves / copies configs to file config.save_json(join(output_dir, 'config.json')) helper.save_state_features(join(output_dir, 'state_features.csv')) # register environment in Gym according to env config env_id = '{}-{}-v0'.format(config.gym_env_id, args.trial) helper.register_gym_environment(env_id, False, FPS, SHOW_SCORE_BAR) # create environment and monitor env = gym.make(env_id) # todo config.num_episodes = 100 video_callable = video_schedule(config, args.record) env = Monitor(env, directory=output_dir, force=True, video_callable=video_callable) # adds reference to monitor to allow for gym environments to update video frames if video_callable(0): env.env.monitor = env # initialize seeds (one for the environment, another for the agent) env.seed(config.seed + args.trial) agent_rng = np.random.RandomState(config.seed + args.trial) # creates the agent agent, exploration_strategy = create_agent(helper, agent_t, agent_rng) # if testing, loads tables from file (some will be filled by the agent during the interaction) if agent_t == AgentType.Testing: agent.load(results_dir, ) # runs episodes behavior_tracker = BehaviorTracker(config.num_episodes) recorded_episodes = [] for e in range(config.num_episodes): # checks whether to activate video monitoring env.env.monitor = env if video_callable(e) else None # reset environment old_obs = env.reset() old_s = helper.get_state_from_observation(old_obs, 0, False) if args.verbose: helper.update_stats_episode(e) exploration_strategy.update(e) t = 0 done = False while not done: # select action a = agent.act(old_s) # observe transition obs, r, done, _ = env.step(a) s = helper.get_state_from_observation(obs, r, done) r = helper.get_reward(old_s, a, r, s, done) # update agent and stats agent.update(old_s, a, r, s) behavior_tracker.add_sample(old_s, a) helper.update_stats(e, t, old_obs, obs, old_s, a, r, s) old_s = s old_obs = obs t += 1 # adds to recorded episodes list if video_callable(e): recorded_episodes.append(e) # signals new episode to tracker behavior_tracker.new_episode() # writes results to files agent.save(output_dir) behavior_tracker.save(output_dir) write_table_csv(recorded_episodes, join(output_dir, 'rec_episodes.csv')) helper.save_stats(join(output_dir, 'results'), CLEAR_RESULTS) print('\nResults of trial {} written to:\n\t\'{}\''.format(args.trial, output_dir)) env.close()