def load(self, env_name): file_name = self.params['load_dir'] + "DQL_" + env_name + ".ptm" agent_state = torch.load(file_name, map_location=lambda storage, loc: storage) self.Q.load_state_dict(agent_state["Q"]) self.Q.to(device) self.best_mean_reward = agent_state["best_mean_reward"] self.best_reward = agent_state["best_reward"] print("Cargado del modelo Q desde", file_name, "que hasta el momento tiene una mejor recompensa media de: ", self.best_mean_reward, " y una recompensa máxima de: ", self.best_reward) if __name__ == "__main__": env_conf = manager.get_environment_params() env_conf["env_name"] = args.env if args.test: env_conf["episodic_life"] = False reward_type = "LIFE" if env_conf["episodic_life"] else "GAME" custom_region_available = False for key, value in env_conf["useful_region"].items(): if key in args.env: env_conf["useful_region"] = value custom_region_available = True break if custom_region_available is not True: env_conf["useful_region"] = env_conf["useful_region"]["Default"] print("Configuración a utilizar:", env_conf)
writer.add_scalar(self.actor_name + "/reward", reward, self.global_step_num) writer.add_scalar(self.actor_name + "/ep_reward", ep_reward, self.global_step_num) writer.add_scalar(self.actor_name + "/mean_ep_reward", np.mean(episode_rewards), self.global_step_num) writer.add_scalar(self.actor_name + "/max_ep_reward", self.best_reward, self.global_step_num) if __name__ == "__main__": agent_params = manager.get_agent_params() agent_params["model_dir"] = args.output_dir agent_params["test"] = args.test env_params = manager.get_environment_params() env_params["env_name"] = args.env mp.set_start_method("spawn") agent_procs = [ DeepActorCriticAgent(id, args.env, agent_params, env_params) for id in range(agent_params["num_agents"]) ] [p.start() for p in agent_procs] [p.join() for p in agent_procs]