예제 #1
0
def get_params(args):
    params_manager = ParamsManager()
    agent_params = params_manager.get_agent_params()
    env_params = params_manager.get_env_params(args.env_params.lower())
    env_params['env_name'] = args.env_name

    custom_region_available = False
    for key, value in env_params['useful_region'].items():
        if key in env_params['env_name']:
            env_params['useful_region'] = value
            custom_region_available = True
            break
    if custom_region_available is not True:
        env_params['useful_region'] = env_params['useful_region']['Default']

    return agent_params, env_params
                obs = next_obs
                self.global_step_num += 1
                if args.render:
                    self.env.render()
                #print(self.actor_name + ":Episode#:", episode, "step#:", step_num, "\t rew=", reward, end="\r")
                writer.add_scalar(self.actor_name + "/reward", reward,
                                  self.global_step_num)
            print(
                "{}:Episode#:{} \t ep_reward:{} \t mean_ep_rew:{}\t best_ep_reward:{}"
                .format(self.actor_name, episode, ep_reward,
                        np.mean(episode_rewards), self.best_reward))
            writer.add_scalar(self.actor_name + "/ep_reward", ep_reward,
                              self.global_step_num)


if __name__ == "__main__":
    agent_params = params_manager.get_agent_params()
    agent_params["model_dir"] = args.model_dir
    env_params = params_manager.get_env_params(
    )  # Used with Atari environments
    env_params["env_name"] = args.env
    mp.set_start_method('spawn')  # Prevents RuntimeError during cuda init

    agent_procs = [
        DeepActorCriticAgent(id, args.env, agent_params, env_params)
        for id in range(agent_params["num_agents"])
    ]
    [p.start() for p in agent_procs]
    [p.join() for p in agent_procs]
    def load(self, env_name):
        file_name = self.params['load_dir'] + "DQL_" + env_name + ".ptm"
        agent_state = torch.load(file_name,
                                 map_location=lambda storage, loc: storage)
        self.Q.load_state_dict(agent_state["Q"])
        self.Q.to(device)
        self.best_mean_reward = agent_state["best_mean_reward"]
        self.best_reward = agent_state["best_reward"]
        print("Loaded Q model state from", file_name,
              " which fetched a best mean reward of:", self.best_mean_reward,
              " and an all time best reward of:", self.best_reward)


if __name__ == "__main__":
    env_conf = params_manager.get_env_params()
    env_conf["env_name"] = args.env_name
    # If a custom useful_region configuration for this environment ID is available, use it if not use the Default
    custom_region_available = False
    for key, value in env_conf['useful_region'].items():
        if key in args.env_name:
            env_conf['useful_region'] = value
            custom_region_available = True
            break
    if custom_region_available is not True:
        env_conf['useful_region'] = env_conf['useful_region']['Default']

    print("Using env_conf:", env_conf)
    atari_env = False
    for game in Atari.get_games_list():
        if game in args.env_name.lower():
예제 #4
0
    def load(self, env_name):
        file_name = self.params['load_dir'] + "DQL_" + env_name + ".ptn"
        agent_state = torch.load(file_name,
                                 map_location=lambda storage, loc: storage)
        self.Q.load_state.dict(agent_state["Q"])
        self.Q.to(device)
        self.best_reward_mean = agent_state["best_reward_mean"]
        self.best_reward = agent_state["best_reward"]
        print("El modelo se ha cargado desde: ", file_name,
              ". Que tiene una mejor recompensa media: ",
              self.best_reward_mean, ". Recompensa máxima: ", self.best_reward)


if __name__ == "__main__":
    env_conf = manager.get_env_params()
    env_conf["env_game"] = args.env

    if args.test:
        env_conf["episodic_life"] = False

    reward_type = "LIFE" if env_conf["episodic_life"] else "GAME"

    custom_region_available = False
    for key, value in env_conf["useful_region"].items():
        if key in args.env:
            env_conf["useful_region"] = value
            custome_region_available = True
            break

    if custom_region_available is not True: