示例#1
0
 def __init__(
     self,
     config_file: Optional[str] = None,
     config_dir: str = DEFAULT_CONFIG_DIR,
 ) -> None:
     config_env = get_config(config_file=config_file, config_dir=config_dir)
     self._env = Env(config=config_env)
示例#2
0
    def __init__(self, config_paths: Optional[str] = None) -> None:
        r"""..

        :param config_paths: file to be used for creating the environment
        """
        config_env = get_config(config_paths)
        self._env = Env(config=config_env)
示例#3
0
class Benchmark:
    """Benchmark for evaluating agents in environments.


    Args:
        config_file: file to be used for creating the environment.
        config_dir: directory where config_file is located.
    """
    def __init__(
        self,
        config_file: Optional[str] = None,
        config_dir: str = DEFAULT_CONFIG_DIR,
    ) -> None:
        config_env = get_config(config_file=config_file, config_dir=config_dir)
        self._env = Env(config=config_env)

    def evaluate(self,
                 agent: Agent,
                 num_episodes: Optional[int] = None) -> Dict[str, float]:
        """
        Args:
            agent: agent to be evaluated in environment.
            num_episodes: count of number of episodes for which the evaluation
                should be run.

        Returns:
            dict containing metrics tracked by environment.
        """

        if num_episodes is None:
            num_episodes = len(self._env.episodes)
        else:
            assert num_episodes <= len(self._env.episodes), (
                "num_episodes({}) is larger than number of episodes "
                "in environment ({})".format(num_episodes,
                                             len(self._env.episodes)))

        assert num_episodes > 0, "num_episodes should be greater than 0"

        agg_metrics: Dict = defaultdict(float)

        count_episodes = 0
        while count_episodes < num_episodes:
            agent.reset()
            observations = self._env.reset()

            while not self._env.episode_over:
                action = agent.act(observations)
                observations = self._env.step(action)

            metrics = self._env.get_metrics()
            for m, v in metrics.items():
                agg_metrics[m] += v
            count_episodes += 1

        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}

        return avg_metrics
示例#4
0
class Benchmark:
    r"""Benchmark for evaluating agents in environments.
    """
    def __init__(self, config_paths: Optional[str] = None) -> None:
        r"""..

        :param config_paths: file to be used for creating the environment
        """
        config_env = get_config(config_paths)
        self._env = Env(config=config_env)

    def evaluate(self,
                 agent: Agent,
                 num_episodes: Optional[int] = None) -> Dict[str, float]:
        r"""..

        :param agent: agent to be evaluated in environment.
        :param num_episodes: count of number of episodes for which the
            evaluation should be run.
        :return: dict containing metrics tracked by environment.
        """

        if num_episodes is None:
            num_episodes = 1  # Hijack
        else:
            assert num_episodes <= len(self._env.episodes), (
                "num_episodes({}) is larger than number of episodes "
                "in environment ({})".format(num_episodes,
                                             len(self._env.episodes)))

        assert num_episodes > 0, "num_episodes should be greater than 0"

        agg_metrics: Dict = defaultdict(float)

        count_episodes = 0
        while count_episodes < num_episodes:
            agent.reset()
            observations = self._env.reset()

            while not self._env.episode_over:
                action = agent.act(observations)
                observations = self._env.step(action)

            metrics = self._env.get_metrics()
            print(metrics)
            for m, v in metrics.items():
                agg_metrics[m] += v
            count_episodes += 1

        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}

        return avg_metrics
示例#5
0
def _make_env_fn(config: Config,
                 dataset: Optional[habitat.Dataset] = None,
                 rank: int = 0) -> Env:
    """Constructor for default habitat `env.Env`.

    :param config: configuration for environment.
    :param dataset: dataset for environment.
    :param rank: rank for setting seed of environment
    :return: `env.Env` / `env.RLEnv` object
    """
    habitat_env = Env(config=config, dataset=dataset)
    habitat_env.seed(config.SEED + rank)
    return habitat_env
def _make_env_fn(config: Config,
                 dataset: Optional[habitat.Dataset] = None,
                 rank: int = 0) -> Env:
    r"""Constructor for default habitat Env.
    Args:
        config: configuration for environment.
        dataset: dataset for environment.
        rank: rank for setting seed of environment
    Returns:
        ``Env``/``RLEnv`` object
    """
    habitat_env = Env(config=config, dataset=dataset)
    habitat_env.seed(config.SEED + rank)
    return habitat_env
 def __init__(self, name, config_paths: Optional[str] = None) -> None:
     config_env = get_config()
     self._env = Env(config=config_env.TASK_CONFIG)
     self.losses = []
     self.batch_scores = []
     self.episode_losses = []
     self.episode_batch_scores = []
     self._name = name
     self.action_tokens_idx = [
         'action_left_token', 'action_right_token', 'action_up_token',
         'action_down_token', 'action_teleport_token', 'action_stop_token',
         'action_start_token', 'action_ignore_token'
     ]
     self.action_tokens = self._env._task.get_action_tokens()
     print(self.action_tokens)
示例#8
0
    def __init__(
        self, config_paths: Optional[str] = None, eval_remote=False
    ) -> None:
        r"""..

        :param config_paths: file to be used for creating the environment
        :param eval_remote: boolean indicating whether evaluation should be run remotely or locally
        """
        config_env = get_config(config_paths)
        self._eval_remote = eval_remote

        if self._eval_remote is True:
            self._env = None
        else:
            self._env = Env(config=config_env)
class VLNBenchmark(habitat.Benchmark):
    def __init__(self, name, config_paths: Optional[str] = None) -> None:
        config_env = get_config()
        self._env = Env(config=config_env.TASK_CONFIG)
        self.losses = []
        self.batch_scores = []
        self.episode_losses = []
        self.episode_batch_scores = []
        self._name = name
        self.action_tokens_idx = [
            'action_left_token', 'action_right_token', 'action_up_token',
            'action_down_token', 'action_teleport_token', 'action_stop_token',
            'action_start_token', 'action_ignore_token'
        ]
        self.action_tokens = self._env._task.get_action_tokens()
        print(self.action_tokens)

    def evaluate(self,
                 agent,
                 num_episodes: Optional[int] = None,
                 feedback="argmax",
                 batch_size=4,
                 save=False):
        print("Training is running on device ", torch.cuda.current_device())
        agent.eval()
        count_episodes = 0
        agg_metrics = defaultdict(float)
        steps = 0
        ignore_idx = agent.model_actions.index("<ignore>")
        action_padding_idx = self.action_tokens[
            self.action_tokens_idx[ignore_idx]]
        rollout_observations = []

        while count_episodes < num_episodes:
            # 1 in 10 posibilities to save
            #if steps and steps % 3 == 0:
            if save:
                agent.save_example_to_file()
            agent.reset(steps)
            observations = self._env.reset()
            observations = {
                "rgb": observations["rgb"],
                "adjacentViewpoints": observations["adjacentViewpoints"]
            }
            episode_loss = []
            episode_batch_score = []
            action_sequence = [30528]
            while not self._env.episode_over:
                print(action_sequence)
                episode = self._env._current_episode
                # Adding observations to rollout

                # Adding tokens from episode
                sep_token_id = self._env._current_episode.instruction.tokens[
                    -1]
                action_tokens = action_sequence[-10:] + [sep_token_id]
                action_mask = [1] * len(action_tokens)

                tokens = self._env._current_episode.instruction.tokens + \
                    action_tokens
                mask = self._env._current_episode.instruction.mask + \
                    action_mask
                segment = [0] * \
                    len(self._env._current_episode.instruction.tokens) + \
                    [1] * len(action_tokens)

                padding = [0] * (128 - len(tokens))
                tokens += padding
                mask += padding
                segment += padding

                # add padding at the end
                observations["target_tokens"] = []
                observations["path_id"] = episode.episode_id
                observations["tokens"] = tokens
                observations["mask"] = mask
                observations["segment"] = segment

                target_action = agent.act_eval([observations])

                target_action["action_args"].update(
                    {"episode": self._env._current_episode})

                action_idx = agent.model_actions.index(target_action["action"])
                action_token_id = self.action_tokens[
                    self.action_tokens_idx[action_idx]]
                action_sequence.append(action_token_id)
                observations = self._env.step(target_action)  # Step 1
                observations = {
                    "rgb": observations["rgb"],
                    "adjacentViewpoints": observations["adjacentViewpoints"]
                }
                steps += 1
            self._env._current_episode.reset()
            count_episodes += 1

            metrics = self._env.get_metrics()
            for m, v in metrics.items():
                if m != "distance_to_goal":
                    agg_metrics[m] += v
        agent.reset(steps)
        print(count_episodes)
        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}
        #avg_metrics["losses"] = sum(self.losses) / len(self.losses)
        #avg_metrics["batch_score"] = sum(self.batch_scores) / len(self.batch_scores)

        return avg_metrics

    def eval_batch(self,
                   agent,
                   num_episodes: Optional[int] = None,
                   feedback="teacher",
                   checkpoint_iter=1000,
                   batch_size=4,
                   save=False):
        print("Training is running on device ", torch.cuda.current_device())
        agent.eval()
        count_episodes = 0
        agg_metrics = defaultdict(float)
        steps = 0
        ignore_idx = agent.model_actions.index("<ignore>")
        action_padding_idx = self.action_tokens[
            self.action_tokens_idx[ignore_idx]]
        rollout_observations = []
        action_scores = []
        vision_scores_p = []
        vision_scores_r = []
        vision_scores_a = []
        while count_episodes < num_episodes:

            if save and batch_size == 1:
                agent.save_example_to_file()
            agent.reset(steps)
            observations = self._env.reset()
            observations = {
                "rgb": observations["rgb"],
                "adjacentViewpoints": observations["adjacentViewpoints"]
            }
            episode_loss = []
            episode_batch_score = []
            action_sequence = [30528]  #Start token
            while not self._env.episode_over:
                final_goal = self._env._current_episode.goals[-1].image_id
                episode = self._env._current_episode
                shortest_path = self._env._task.get_shortest_path_to_target(
                    episode.scan, episode.curr_viewpoint.image_id, final_goal)

                #print("shortest_path", shortest_path)
                if len(shortest_path) > 1:
                    goal_viewpoint = shortest_path[1]
                else:
                    #print("Shortest Path is not good!!!")
                    goal_viewpoint = final_goal

                # Adding observations to rollout
                target_action, action_args = \
                    agent._teacher_actions(observations, goal_viewpoint)
                action_idx = agent.model_actions.index(target_action)
                action_token_id = self.action_tokens[
                    self.action_tokens_idx[action_idx]]
                observations["golden_action"] = action_idx
                action = {"action": target_action, "action_args": action_args}

                action["action_args"].update(
                    {"episode": self._env._current_episode})
                # Adding tokens from episode
                sep_token_id = self._env._current_episode.instruction.tokens[
                    -1]
                action_tokens = action_sequence[-10:] + [sep_token_id]
                action_mask = [1] * len(action_tokens)

                tokens = self._env._current_episode.instruction.tokens + \
                    action_tokens
                mask = self._env._current_episode.instruction.mask + \
                    action_mask
                segment = [0] * \
                    len(self._env._current_episode.instruction.tokens) + \
                    [1] * len(action_tokens)

                padding = [0] * (128 - len(tokens))
                tokens += padding
                mask += padding
                segment += padding

                # add padding at the end
                observations["path_id"] = episode.episode_id
                observations["target_tokens"] = \
                    self._env._current_episode.instruction.tokens + \
                    action_sequence[-10:] + [action_token_id] + \
                    padding
                observations["actions"] = action_tokens
                observations["tokens"] = tokens
                observations["mask"] = mask
                observations["segment"] = segment

                rollout_observations.append(observations)
                action_sequence.append(action_token_id)
                observations = self._env.step(action)  # Step 1
                observations = {
                    "rgb": observations["rgb"],
                    "adjacentViewpoints": observations["adjacentViewpoints"]
                }
                steps += 1
                if len(rollout_observations) == batch_size:

                    ## Act with batch
                    action_score, (
                        precision, recall,
                        accuracy) = agent.act_eval_batch(rollout_observations)
                    action_scores.append(action_score)
                    vision_scores_p.append(precision)
                    vision_scores_r.append(recall)
                    vision_scores_a.append(accuracy)

                    rollout_observations = []
                    print("Action scores", action_scores[-1])
                    print("Vision Scores precision", vision_scores_p[-1])
                    print("Vision Scores recall", vision_scores_r[-1])
                    print("Vision Scores accuracy", vision_scores_a[-1])

            self._env._current_episode.reset()

            count_episodes += 1

            metrics = self._env.get_metrics()
            for m, v in metrics.items():
                if m != "distance_to_goal":
                    agg_metrics[m] += v

        agent.reset(steps)
        print(count_episodes)
        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}
        avg_metrics["action_scores"] = sum(action_scores) / len(action_scores)
        avg_metrics["vision_scores_p"] = sum(vision_scores_p) / len(
            vision_scores_p)
        avg_metrics["vision_scores_r"] = sum(vision_scores_r) / len(
            vision_scores_r)
        avg_metrics["vision_scores_a"] = sum(vision_scores_a) / len(
            vision_scores_a)

        return avg_metrics

    def train_batch(self,
                    agent,
                    num_episodes: Optional[int] = None,
                    feedback="teacher",
                    checkpoint_iter=1000,
                    batch_size=4):
        print("Training is running on device ", torch.cuda.current_device())
        agent.train()
        count_episodes = 0
        agg_metrics = defaultdict(float)
        steps = 0
        ignore_idx = agent.model_actions.index("<ignore>")
        action_padding_idx = self.action_tokens[
            self.action_tokens_idx[ignore_idx]]
        rollout_observations = []
        while count_episodes < num_episodes:
            if count_episodes and count_episodes % checkpoint_iter == 0:
                save_path = "checkpoints/{}_train_{}.check".format(
                    self._name, count_episodes)
                print(save_path)
                agent.save(save_path)
                print("{} episodes have been processed".format(count_episodes))
            agent.reset(steps)
            observations = self._env.reset()
            observations = {
                "rgb": observations["rgb"],
                "adjacentViewpoints": observations["adjacentViewpoints"]
            }
            episode_loss = []
            episode_batch_score = []
            action_sequence = [30528]  #Start token
            while not self._env.episode_over:
                final_goal = self._env._current_episode.goals[-1].image_id
                episode = self._env._current_episode
                shortest_path = self._env._task.get_shortest_path_to_target(
                    episode.scan, episode.curr_viewpoint.image_id, final_goal)

                #print("shortest_path", shortest_path)
                if len(shortest_path) > 1:
                    goal_viewpoint = shortest_path[1]
                else:
                    #print("Shortest Path is not good!!!")
                    goal_viewpoint = final_goal

                # Adding observations to rollout
                target_action, action_args = \
                    agent._teacher_actions(observations, goal_viewpoint)
                action_idx = agent.model_actions.index(target_action)
                action_token_id = self.action_tokens[
                    self.action_tokens_idx[action_idx]]
                observations["golden_action"] = action_idx
                action = {"action": target_action, "action_args": action_args}

                action["action_args"].update(
                    {"episode": self._env._current_episode})
                # Adding tokens from episode
                sep_token_id = self._env._current_episode.instruction.tokens[
                    -1]
                action_tokens = action_sequence[-10:] + [sep_token_id]
                action_mask = [1] * len(action_tokens)

                tokens = self._env._current_episode.instruction.tokens + \
                    action_tokens
                mask = self._env._current_episode.instruction.mask + \
                    action_mask
                segment = [0] * \
                    len(self._env._current_episode.instruction.tokens) + \
                    [1] * len(action_tokens)

                padding = [0] * (128 - len(tokens))
                tokens += padding
                mask += padding
                segment += padding

                # add padding at the end
                observations["target_tokens"] = \
                    self._env._current_episode.instruction.tokens + \
                    action_sequence[-10:] + [action_token_id] + \
                    padding
                observations["actions"] = action_tokens
                observations["tokens"] = tokens
                observations["mask"] = mask
                observations["segment"] = segment

                rollout_observations.append(observations)
                action_sequence.append(action_token_id)
                observations = self._env.step(action)  # Step 1
                observations = {
                    "rgb": observations["rgb"],
                    "adjacentViewpoints": observations["adjacentViewpoints"]
                }
                steps += 1
                if len(rollout_observations) == batch_size:

                    ## Act with batch
                    loss, batch_score = agent.act_batch(rollout_observations)
                    episode_loss.append(loss)
                    episode_batch_score.append(batch_score)
                    self.losses.append(loss)
                    self.batch_scores.append(batch_score)

                    agent.train_step(steps)
                    rollout_observations = []
                    self.episode_losses.append(
                        sum(episode_loss) / len(episode_loss))
                    self.episode_batch_scores.append(
                        sum(episode_batch_score) / len(episode_batch_score))
                    print("Episode loss", self.episode_losses[-1])
                    print("Episode Batch Score", self.episode_batch_scores[-1])
                    writer.add_scalar('lr/train', agent.optimizer.show_lr(),
                                      count_episodes)
                    writer.add_scalar('episode_Loss/train',
                                      self.episode_losses[-1], count_episodes)
                    writer.add_scalar('episode_batch_scores/train',
                                      self.episode_batch_scores[-1],
                                      count_episodes)

            self._env._current_episode.reset()

            count_episodes += 1

            metrics = self._env.get_metrics()
            for m, v in metrics.items():
                if m != "distance_to_goal":
                    agg_metrics[m] += v

        agent.reset(steps)
        print(count_episodes)
        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}
        avg_metrics["losses"] = sum(self.losses) / len(self.losses)
        avg_metrics["batch_score"] = sum(self.batch_scores) / len(
            self.batch_scores)

        return avg_metrics

    def train(self,
              agent,
              num_episodes: Optional[int] = None,
              feedback="teacher") -> Dict[str, float]:

        print("Training is running on device ", torch.cuda.current_device())
        agent.train()
        count_episodes = 0
        agg_metrics = defaultdict(float)
        steps = 0

        while count_episodes < num_episodes:
            if count_episodes and count_episodes % 1000 == 0:
                agent.save("checkpoints/{}_train_{}.check".format(
                    self._name, count_episodes))
                print("{} episodes have been processed".format(count_episodes))
            agent.reset(steps)
            observations = self._env.reset()
            episode_loss = []
            episode_batch_score = []
            #action_sequence = []
            while not self._env.episode_over:
                final_goal = self._env._current_episode.goals[-1].image_id
                episode = self._env._current_episode
                shortest_path = self._env._task.get_shortest_path_to_target(
                    episode.scan, episode.curr_viewpoint.image_id, final_goal)

                #print("shortest_path", shortest_path)
                if len(shortest_path) > 1:
                    goal_viewpoint = shortest_path[1]
                else:
                    #print("Shortest Path is not good!!!")
                    goal_viewpoint = final_goal

                action, loss, batch_score = agent.act(
                    observations, self._env._current_episode, goal_viewpoint)
                episode_loss.append(loss)
                episode_batch_score.append(batch_score)
                self.losses.append(loss)
                self.batch_scores.append(batch_score)

                action["action_args"].update(
                    {"episode": self._env._current_episode})

                observations = self._env.step(action)  # Step 1
                steps += 1
                agent.train_step(steps)

            self._env._current_episode.reset()

            count_episodes += 1

            self.episode_losses.append(sum(episode_loss) / len(episode_loss))
            self.episode_batch_scores.append(
                sum(episode_batch_score) / len(episode_batch_score))
            print("Episode loss", self.episode_losses[-1])
            print("Episode Batch Score", self.episode_batch_scores[-1])
            writer.add_scalar('episode_Loss/train', self.episode_losses[-1],
                              count_episodes)
            writer.add_scalar('episode_batch_scores/train',
                              self.episode_batch_scores[-1], count_episodes)
            metrics = self._env.get_metrics()
            for m, v in metrics.items():
                if m != "distance_to_goal":
                    agg_metrics[m] += v

        agent.reset(steps)
        print(count_episodes)
        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}
        avg_metrics["losses"] = sum(self.losses) / len(self.losses)
        avg_metrics["batch_score"] = sum(self.batch_scores) / len(
            self.batch_scores)

        return avg_metrics
示例#10
0
class Benchmark:
    r"""Benchmark for evaluating agents in environments.
    """

    def __init__(
        self, config_paths: Optional[str] = None, eval_remote=False
    ) -> None:
        r"""..

        :param config_paths: file to be used for creating the environment
        :param eval_remote: boolean indicating whether evaluation should be run remotely or locally
        """
        config_env = get_config(config_paths)
        self._eval_remote = eval_remote

        if self._eval_remote is True:
            self._env = None
        else:
            self._env = Env(config=config_env)

    def remote_evaluate(
        self, agent: Agent, num_episodes: Optional[int] = None
    ):
        # The modules imported below are specific to habitat-challenge remote evaluation.
        # These modules are not part of the habitat-api repository.
        import evaluation_pb2
        import evaluation_pb2_grpc
        import evalai_environment_habitat
        import grpc
        import pickle
        import time

        time.sleep(60)

        def pack_for_grpc(entity):
            return pickle.dumps(entity)

        def unpack_for_grpc(entity):
            return pickle.loads(entity)

        def remote_ep_over(stub):
            res_env = unpack_for_grpc(
                stub.episode_over(evaluation_pb2.Package()).SerializedEntity
            )
            return res_env["episode_over"]

        env_address_port = os.environ.get("EVALENV_ADDPORT", "localhost:8085")
        channel = grpc.insecure_channel(env_address_port)
        stub = evaluation_pb2_grpc.EnvironmentStub(channel)

        base_num_episodes = unpack_for_grpc(
            stub.num_episodes(evaluation_pb2.Package()).SerializedEntity
        )
        num_episodes = base_num_episodes["num_episodes"]

        agg_metrics: Dict = defaultdict(float)

        count_episodes = 0

        while count_episodes < num_episodes:
            agent.reset()
            res_env = unpack_for_grpc(
                stub.reset(evaluation_pb2.Package()).SerializedEntity
            )

            while not remote_ep_over(stub):
                obs = res_env["observations"]
                action = agent.act(obs)

                res_env = unpack_for_grpc(
                    stub.act_on_environment(
                        evaluation_pb2.Package(
                            SerializedEntity=pack_for_grpc(action)
                        )
                    ).SerializedEntity
                )

            metrics = unpack_for_grpc(
                stub.get_metrics(
                    evaluation_pb2.Package(
                        SerializedEntity=pack_for_grpc(action)
                    )
                ).SerializedEntity
            )

            for m, v in metrics["metrics"].items():
                agg_metrics[m] += v
            count_episodes += 1

        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}

        stub.evalai_update_submission(evaluation_pb2.Package())

        return avg_metrics

    def local_evaluate(self, agent: Agent, num_episodes: Optional[int] = None):
        if num_episodes is None:
            num_episodes = len(self._env.episodes)
        else:
            assert num_episodes <= len(self._env.episodes), (
                "num_episodes({}) is larger than number of episodes "
                "in environment ({})".format(
                    num_episodes, len(self._env.episodes)
                )
            )

        assert num_episodes > 0, "num_episodes should be greater than 0"

        agg_metrics: Dict = defaultdict(float)

        count_episodes = 0
        while count_episodes < num_episodes:
            agent.reset()
            observations = self._env.reset()

            while not self._env.episode_over:
                action = agent.act(observations)
                observations = self._env.step(action)

            metrics = self._env.get_metrics()
            for m, v in metrics.items():
                agg_metrics[m] += v
            count_episodes += 1

        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}

        return avg_metrics

    def evaluate(
        self, agent: Agent, num_episodes: Optional[int] = None
    ) -> Dict[str, float]:
        r"""..

        :param agent: agent to be evaluated in environment.
        :param num_episodes: count of number of episodes for which the
            evaluation should be run.
        :return: dict containing metrics tracked by environment.
        """

        if self._eval_remote is True:
            return self.remote_evaluate(agent, num_episodes)
        else:
            return self.local_evaluate(agent, num_episodes)
示例#11
0
 def __init__(self, config_paths: Optional[str] = None) -> None:
     self.action_history: Dict = defaultdict()
     self.agg_metrics: Dict = defaultdict(float)
     config_env = get_config(config_paths)
     self._env = Env(config=config_env)
 def __init__(self, config_paths: Optional[str] = None) -> None:
     config_env = get_config(config_paths)
     self._env = Env(config=config_env)
示例#13
0
class Benchmark:
    r"""Benchmark for evaluating agents in environments."""
    def __init__(self,
                 config_paths: Optional[str] = None,
                 eval_remote: bool = False) -> None:
        r"""..

        :param config_paths: file to be used for creating the environment
        :param eval_remote: boolean indicating whether evaluation should be run remotely or locally
        """
        config_env = get_config(config_paths)
        self._eval_remote = eval_remote

        if self._eval_remote is True:
            self._env = None
        else:
            self._env = Env(config=config_env)

    def remote_evaluate(self,
                        agent: "Agent",
                        num_episodes: Optional[int] = None):
        # The modules imported below are specific to habitat-challenge remote evaluation.
        # These modules are not part of the habitat-lab repository.
        import pickle
        import time

        import evalai_environment_habitat  # noqa: F401
        import evaluation_pb2
        import evaluation_pb2_grpc
        import grpc

        time.sleep(60)

        def pack_for_grpc(entity):
            return pickle.dumps(entity)

        def unpack_for_grpc(entity):
            return pickle.loads(entity)

        def remote_ep_over(stub):
            res_env = unpack_for_grpc(
                stub.episode_over(evaluation_pb2.Package()).SerializedEntity)
            return res_env["episode_over"]

        env_address_port = os.environ.get("EVALENV_ADDPORT", "localhost:8085")
        channel = grpc.insecure_channel(env_address_port)
        stub = evaluation_pb2_grpc.EnvironmentStub(channel)

        base_num_episodes = unpack_for_grpc(
            stub.num_episodes(evaluation_pb2.Package()).SerializedEntity)
        num_episodes = base_num_episodes["num_episodes"]

        agg_metrics: Dict = defaultdict(float)

        count_episodes = 0

        while count_episodes < num_episodes:
            agent.reset()
            res_env = unpack_for_grpc(
                stub.reset(evaluation_pb2.Package()).SerializedEntity)

            while not remote_ep_over(stub):
                obs = res_env["observations"]
                action = agent.act(obs)

                res_env = unpack_for_grpc(
                    stub.act_on_environment(
                        evaluation_pb2.Package(SerializedEntity=pack_for_grpc(
                            action))).SerializedEntity)

            metrics = unpack_for_grpc(
                stub.get_metrics(
                    evaluation_pb2.Package(SerializedEntity=pack_for_grpc(
                        action))).SerializedEntity)

            for m, v in metrics["metrics"].items():
                agg_metrics[m] += v
            count_episodes += 1

        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}

        stub.evalai_update_submission(evaluation_pb2.Package())

        return avg_metrics

    def local_evaluate(self,
                       agent: "Agent",
                       num_episodes: Optional[int] = None,
                       skip_first_n: Optional[int] = 0) -> Dict[str, float]:
        if num_episodes is None:
            num_episodes = len(self._env.episodes)
        else:
            assert num_episodes <= len(self._env.episodes), (
                "num_episodes({}) is larger than number of episodes "
                "in environment ({})".format(num_episodes,
                                             len(self._env.episodes)))

        assert num_episodes > 0, "num_episodes should be greater than 0"

        agg_metrics: Dict = defaultdict(float)
        episode_metrics = dict(scene=[],
                               num_steps=[],
                               time=[],
                               num_collisions=[],
                               xy_error=[],
                               spl=[],
                               softspl=[],
                               success=[])
        last_success = None

        for _ in range(skip_first_n):
            self._env.reset()

        count_episodes = 0
        try:
            while count_episodes < num_episodes:
                agent.reset(last_success=last_success)
                observations = self._env.reset()

                # Skip infeasible episodes. This is easy to detect using spl
                # because start-to-path is recomputed and if it is infinite
                # spl will become nan.
                while not np.isfinite(self._env.get_metrics()['spl']):
                    observations = self._env.reset()

                # metrics = self._env.get_metrics()
                action = None
                num_steps = 0
                t = time.time()

                while not self._env.episode_over:
                    action = agent.act(observations)
                    observations = self._env.step(action)
                    num_steps += 1
                    # metrics = self._env.get_metrics()
                episode_time = time.time() - t

                metrics = self._env.get_metrics()
                if isinstance(action, dict) and 'xy_error' in action.keys():
                    # Add all outputs to metrics
                    for key, val in action.items():
                        try:
                            metrics[str(key)] = float(val)
                        except TypeError:
                            pass
                    xy_error = action['xy_error']
                else:
                    xy_error = 999.
                metrics[
                    'small_error'] = 1. if xy_error < 7.2 else 0.  # 0.36 / 0.05
                metrics['episode_length'] = num_steps
                metrics['time'] = episode_time
                if 'softspl' not in metrics.keys():
                    metrics['softspl'] = 0.

                for m, v in metrics.items():
                    if m != 'top_down_map':
                        agg_metrics[m] += v
                count_episodes += 1

                episode_metrics['scene'].append(
                    self._env.current_episode.scene_id)
                episode_metrics['num_steps'].append(num_steps)
                episode_metrics['time'].append(episode_time)
                episode_metrics['xy_error'].append(xy_error)
                episode_metrics['spl'].append(metrics['spl'])
                episode_metrics['softspl'].append(metrics['softspl'])
                episode_metrics['success'].append(metrics['success'])
                last_success = metrics['success']

                print(
                    "%d/%d: Meann success: %f, spl: %f. err: %f This trial success: %f. SPL: %f  SOFT_SPL: %f. err %f"
                    % (count_episodes, num_episodes, agg_metrics['success'] /
                       count_episodes, agg_metrics['spl'] / count_episodes,
                       agg_metrics['xy_error'] / count_episodes,
                       metrics['success'], metrics['spl'], metrics['softspl'],
                       metrics['xy_error']))

            # One more agent reset, so last data can be saved.
            agent.reset()

        except KeyboardInterrupt:
            print("interrupt")

        avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()}
        avg_metrics['num_episodes'] = count_episodes
        avg_metrics['input_args'] = ' '.join([str(x) for x in sys.argv])

        try:
            print(avg_metrics['input_args'])
            print(agent.params.name)
        except:
            pass

        import json
        timestamp_str = time.strftime('%m-%d-%H-%M-%S', time.localtime())
        filename = './temp/evals/eval_{}'.format(timestamp_str)
        with open(filename + '.summary.json', 'w') as file:
            json.dump(avg_metrics, file, indent=4)
        print(filename + '.summary.json')
        with open(filename + '.episodes.json', 'w') as file:
            json.dump(episode_metrics, file, indent=4)
        print(filename + '.episodes.json')

        return avg_metrics

    def evaluate(self,
                 agent: "Agent",
                 num_episodes: Optional[int] = None,
                 skip_first_n: Optional[int] = 0) -> Dict[str, float]:
        r"""..

        :param agent: agent to be evaluated in environment.
        :param num_episodes: count of number of episodes for which the
            evaluation should be run.
        :return: dict containing metrics tracked by environment.
        """

        if self._eval_remote is True:
            return self.remote_evaluate(agent, num_episodes)
        else:
            return self.local_evaluate(agent,
                                       num_episodes,
                                       skip_first_n=skip_first_n)
示例#14
0
 def __init__(self, config_file: Optional[str] = None) -> None:
     config_env = get_config(config_file=config_file)
     self._env = Env(config=config_env)