def get_params(args): params_manager = ParamsManager() agent_params = params_manager.get_agent_params() env_params = params_manager.get_env_params(args.env_params.lower()) env_params['env_name'] = args.env_name custom_region_available = False for key, value in env_params['useful_region'].items(): if key in env_params['env_name']: env_params['useful_region'] = value custom_region_available = True break if custom_region_available is not True: env_params['useful_region'] = env_params['useful_region']['Default'] return agent_params, env_params
obs = next_obs self.global_step_num += 1 if args.render: self.env.render() #print(self.actor_name + ":Episode#:", episode, "step#:", step_num, "\t rew=", reward, end="\r") writer.add_scalar(self.actor_name + "/reward", reward, self.global_step_num) print( "{}:Episode#:{} \t ep_reward:{} \t mean_ep_rew:{}\t best_ep_reward:{}" .format(self.actor_name, episode, ep_reward, np.mean(episode_rewards), self.best_reward)) writer.add_scalar(self.actor_name + "/ep_reward", ep_reward, self.global_step_num) if __name__ == "__main__": agent_params = params_manager.get_agent_params() agent_params["model_dir"] = args.model_dir env_params = params_manager.get_env_params( ) # Used with Atari environments env_params["env_name"] = args.env mp.set_start_method('spawn') # Prevents RuntimeError during cuda init agent_procs = [ DeepActorCriticAgent(id, args.env, agent_params, env_params) for id in range(agent_params["num_agents"]) ] [p.start() for p in agent_procs] [p.join() for p in agent_procs]
def load(self, env_name): file_name = self.params['load_dir'] + "DQL_" + env_name + ".ptm" agent_state = torch.load(file_name, map_location=lambda storage, loc: storage) self.Q.load_state_dict(agent_state["Q"]) self.Q.to(device) self.best_mean_reward = agent_state["best_mean_reward"] self.best_reward = agent_state["best_reward"] print("Loaded Q model state from", file_name, " which fetched a best mean reward of:", self.best_mean_reward, " and an all time best reward of:", self.best_reward) if __name__ == "__main__": env_conf = params_manager.get_env_params() env_conf["env_name"] = args.env_name # If a custom useful_region configuration for this environment ID is available, use it if not use the Default custom_region_available = False for key, value in env_conf['useful_region'].items(): if key in args.env_name: env_conf['useful_region'] = value custom_region_available = True break if custom_region_available is not True: env_conf['useful_region'] = env_conf['useful_region']['Default'] print("Using env_conf:", env_conf) atari_env = False for game in Atari.get_games_list(): if game in args.env_name.lower():
def load(self, env_name): file_name = self.params['load_dir'] + "DQL_" + env_name + ".ptn" agent_state = torch.load(file_name, map_location=lambda storage, loc: storage) self.Q.load_state.dict(agent_state["Q"]) self.Q.to(device) self.best_reward_mean = agent_state["best_reward_mean"] self.best_reward = agent_state["best_reward"] print("El modelo se ha cargado desde: ", file_name, ". Que tiene una mejor recompensa media: ", self.best_reward_mean, ". Recompensa máxima: ", self.best_reward) if __name__ == "__main__": env_conf = manager.get_env_params() env_conf["env_game"] = args.env if args.test: env_conf["episodic_life"] = False reward_type = "LIFE" if env_conf["episodic_life"] else "GAME" custom_region_available = False for key, value in env_conf["useful_region"].items(): if key in args.env: env_conf["useful_region"] = value custome_region_available = True break if custom_region_available is not True: