Пример #1
0
    def test(self, episodes, render=False, log=False, record=False):
        self.model.eval()
        env = self.env
        if record:
            env = Monitor(self.env_fn(),
                          directory=os.path.join(self.path, 'recordings'),
                          force=True,
                          video_callable=lambda episode_id: True)
        with torch.no_grad():
            test_rewards = []
            total_test_steps = 0
            for ep in range(episodes):
                terminal = False
                obs_n = env.reset()
                step = 0
                ep_reward = [0 for _ in range(self.model.n_agents)]
                while not terminal:
                    if render:
                        env.render()

                    torch_obs_n = torch.FloatTensor(obs_n).to(
                        self.device).unsqueeze(0)
                    action_n = self._select_action(self.model,
                                                   torch_obs_n,
                                                   explore=False)

                    next_obs_n, reward_n, done_n, info = env.step(action_n)
                    terminal = all(done_n) or step >= self.episode_max_steps

                    obs_n = next_obs_n
                    step += 1
                    for i, r_n in enumerate(reward_n):
                        ep_reward[i] += r_n

                total_test_steps += step
                test_rewards.append(ep_reward)

            test_rewards = np.array(test_rewards).mean(axis=0)
            if log:
                # log - test
                for i, r_n in enumerate(test_rewards):
                    self.writer.add_scalar('agent_{}/eval_reward'.format(i),
                                           r_n, self._step_iter)
                self.writer.add_scalar('_overall/eval_reward',
                                       sum(test_rewards), self._step_iter)
                self.writer.add_scalar('_overall/test_ep_steps',
                                       total_test_steps / episodes,
                                       self._step_iter)
        if record:
            env.close()

        return test_rewards
Пример #2
0
    def test(self, episodes, render=False, log=False, record=False):
        self.model.eval()

        env = self.env
        if record:
            env = Monitor(self.env_fn(),
                          directory=os.path.join(self.path, 'recordings'),
                          force=True,
                          video_callable=lambda episode_id: True)
        with torch.no_grad():
            test_rewards = []
            total_test_steps = 0
            for ep in range(episodes):
                terminal = False
                obs_n = env.reset()
                step = 0
                ep_reward = [0 for _ in range(self.model.n_agents)]

                self.model.init_hidden(device=self.device)
                while not terminal:
                    if render:
                        env.render()

                    torch_obs_n = torch.FloatTensor(obs_n).to(
                        self.device).unsqueeze(0)

                    thoughts = []
                    for agent_i in range(self.model.n_agents):
                        thoughts.append(
                            self.model.agent(agent_i).get_thought(
                                torch_obs_n[:, agent_i]))

                    for i in range(self.share_iter):
                        for agent_i in range(self.model.n_agents):
                            thoughts[agent_i] = (thoughts[agent_i] + thoughts[
                                (agent_i + 1) % len(thoughts)]) / 2
                    thoughts = torch.stack(thoughts)

                    action_n = []
                    for agent_i in range(self.model.n_agents):
                        # assuming every other agent is a neighbour as of now
                        _neighbours = list(range(self.model.n_agents))
                        _neighbours.remove(agent_i)

                        logits = self.model.agent(agent_i)(thoughts[agent_i])
                        prob = F.softmax(logits, dim=1)
                        action = prob.argmax(1).item()
                        # action = prob.multinomial(num_samples=1).detach().item()

                        if log and step == 0 and ep == 0:
                            log_prob = F.log_softmax(logits, dim=1)
                            entropy = -(log_prob * prob).sum(1)
                            self.writer.add_scalar(
                                'agent_{}/entropy'.format(agent_i), entropy,
                                self._step_iter)

                        action_n.append(action)

                    next_obs_n, reward_n, done_n, info = env.step(action_n)
                    terminal = all(done_n) or step >= self.episode_max_steps

                    obs_n = next_obs_n
                    step += 1
                    for i, r_n in enumerate(reward_n):
                        ep_reward[i] += r_n

                total_test_steps += step
                test_rewards.append(ep_reward)

            test_rewards = np.array(test_rewards).mean(axis=0)

            # log - test
            if log:
                for i, r_n in enumerate(test_rewards):
                    self.writer.add_scalar('agent_{}/eval_reward'.format(i),
                                           r_n, self._step_iter)
                self.writer.add_scalar('_overall/eval_reward',
                                       sum(test_rewards), self._step_iter)
                self.writer.add_scalar('_overall/test_ep_steps',
                                       total_test_steps / episodes,
                                       self._step_iter)

        if record:
            env.close()
        return test_rewards
Пример #3
0
        description='Interactive Agent for ma-gym')
    parser.add_argument('--env',
                        default='Checkers-v0',
                        help='Name of the environment (default: %(default)s)')
    parser.add_argument('--episodes',
                        type=int,
                        default=1,
                        help='episodes (default: %(default)s)')
    args = parser.parse_args()

    print(
        'Enter the actions space together and press enter ( Eg: \'11<enter>\' which meanes take 1'
        ' for agent 1 and 1 for agent 2)')

    env = gym.make('ma_gym:{}'.format(args.env))
    env = Monitor(env, directory='recordings', force=True)
    for ep_i in range(args.episodes):
        done_n = [False for _ in range(env.n_agents)]
        ep_reward = 0

        obs_n = env.reset()
        env.render()
        while not all(done_n):
            action_n = [int(_) for _ in input('Action:')]
            obs_n, reward_n, done_n, _ = env.step(action_n)
            ep_reward += sum(reward_n)
            env.render()

        print('Episode #{} Reward: {}'.format(ep_i, ep_reward))
    env.close()
class Tester():
    def __init__(self, models, env_name="PongDuel-v0", render=True, video=True, step_number=1000, log_after_steps=200, log_on_win=True):
        self._models = models  # list of model id to test. If "all", test "all"
        if "all" in self._models:
            self._models = [i for i in registered_models["all"]]
        self._render = render
        self._video = video
        self._step_number = step_number
        self._log_after_steps = log_after_steps
        self._log_on_win = log_on_win
        self._env_name = env_name
        self._env = None

    def log_score(self, step, score, msg=""):
        print("Step: {0:05d} Score: {1}".format(step, score), end="")
        if msg != "":
            print(" [{}]".format(msg))
        else:
            print("")

    def run_tests(self):
        print("Running tests for model IDs: {}".format(self._models))
        print("-" * 10)
        models_score_summary = {}

        for model_id in self._models:
            print("Selected model_id: {}".format(model_id))
            model = AutoLoadModel(model_id)

            score = {"agent_0": {"moves": 0,
                                 "wins": 0},
                     "agent_1": {"moves": 0,
                                 "wins": 0}}
            self._env = gym.make(self._env_name)
            if self._video:
                if type(model_id) is list:
                    model_id = "{}_VS_{}".format(model_id[0], model_id[1])
                output_directory = "recordings/{}".format(model_id)
                self._env = Monitor(self._env, directory=output_directory, video_callable=lambda episode_id: True, force=True)
            obs_n = self._env.reset()

            for _ in range(self._step_number):

                # render env
                if self._render:
                    self._env.render()

                # select actions
                actions, actions_as_list = model.get_agents_actions(obs_n)

                # update moves counter
                for an in ["agent_0", "agent_1"]:
                    if actions[an] in [1, 2]:
                        score[an]["moves"] += 1

                # execute actions
                obs_n, reward_n, done_n, info = self._env.step(actions_as_list)

                # update score
                if any(reward_n):
                    score["agent_0"]["wins"] += reward_n[0]
                    score["agent_1"]["wins"] += reward_n[1]
                    if self._log_on_win == True:
                        self.log_score(_, score, "win")
                models_score_summary[model_id] = score
                if _ % self._log_after_steps == 0:
                    self.log_score(_, score)

                if all(done_n):
                    break

            self.log_score(_, score, "end")
            print("-" * 10)
            self._env.close()

        # Score summary
        print("Summary:")
        for k, v in models_score_summary.items():
            n_moves = 0
            n_wins = 0
            print("Model{}:".format(k))
            for a, b in v.items():
                print(a, b)
                n_moves += b["moves"]
                n_wins += b["wins"]
            print("Average move count: {}".format(n_moves / 2))
            print("Total move count: {}".format(n_moves))
            print("Total win count: {}".format(n_wins))
            print("")