Пример #1
0
    def load(self, env_name):
        file_name = self.params['load_dir'] + "DQL_" + env_name + ".ptm"
        agent_state = torch.load(file_name,
                                 map_location=lambda storage, loc: storage)
        self.Q.load_state_dict(agent_state["Q"])
        self.Q.to(device)
        self.best_mean_reward = agent_state["best_mean_reward"]
        self.best_reward = agent_state["best_reward"]
        print("Cargado del modelo Q desde", file_name,
              "que hasta el momento tiene una mejor recompensa media de: ",
              self.best_mean_reward, " y una recompensa máxima de: ",
              self.best_reward)


if __name__ == "__main__":
    env_conf = manager.get_environment_params()
    env_conf["env_name"] = args.env

    if args.test:
        env_conf["episodic_life"] = False
    reward_type = "LIFE" if env_conf["episodic_life"] else "GAME"

    custom_region_available = False
    for key, value in env_conf["useful_region"].items():
        if key in args.env:
            env_conf["useful_region"] = value
            custom_region_available = True
            break
    if custom_region_available is not True:
        env_conf["useful_region"] = env_conf["useful_region"]["Default"]
    print("Configuración a utilizar:", env_conf)
Пример #2
0
                writer.add_scalar(self.actor_name + "/reward", reward,
                                  self.global_step_num)
                writer.add_scalar(self.actor_name + "/ep_reward", ep_reward,
                                  self.global_step_num)
                writer.add_scalar(self.actor_name + "/mean_ep_reward",
                                  np.mean(episode_rewards),
                                  self.global_step_num)
                writer.add_scalar(self.actor_name + "/max_ep_reward",
                                  self.best_reward, self.global_step_num)


if __name__ == "__main__":

    agent_params = manager.get_agent_params()
    agent_params["model_dir"] = args.output_dir
    agent_params["test"] = args.test

    env_params = manager.get_environment_params()
    env_params["env_name"] = args.env

    mp.set_start_method("spawn")

    agent_procs = [
        DeepActorCriticAgent(id, args.env, agent_params, env_params)
        for id in range(agent_params["num_agents"])
    ]

    [p.start() for p in agent_procs]
    [p.join() for p in agent_procs]