def __init__( self, config_file: Optional[str] = None, config_dir: str = DEFAULT_CONFIG_DIR, ) -> None: config_env = get_config(config_file=config_file, config_dir=config_dir) self._env = Env(config=config_env)
def __init__(self, config_paths: Optional[str] = None) -> None: r""".. :param config_paths: file to be used for creating the environment """ config_env = get_config(config_paths) self._env = Env(config=config_env)
class Benchmark: """Benchmark for evaluating agents in environments. Args: config_file: file to be used for creating the environment. config_dir: directory where config_file is located. """ def __init__( self, config_file: Optional[str] = None, config_dir: str = DEFAULT_CONFIG_DIR, ) -> None: config_env = get_config(config_file=config_file, config_dir=config_dir) self._env = Env(config=config_env) def evaluate(self, agent: Agent, num_episodes: Optional[int] = None) -> Dict[str, float]: """ Args: agent: agent to be evaluated in environment. num_episodes: count of number of episodes for which the evaluation should be run. Returns: dict containing metrics tracked by environment. """ if num_episodes is None: num_episodes = len(self._env.episodes) else: assert num_episodes <= len(self._env.episodes), ( "num_episodes({}) is larger than number of episodes " "in environment ({})".format(num_episodes, len(self._env.episodes))) assert num_episodes > 0, "num_episodes should be greater than 0" agg_metrics: Dict = defaultdict(float) count_episodes = 0 while count_episodes < num_episodes: agent.reset() observations = self._env.reset() while not self._env.episode_over: action = agent.act(observations) observations = self._env.step(action) metrics = self._env.get_metrics() for m, v in metrics.items(): agg_metrics[m] += v count_episodes += 1 avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} return avg_metrics
class Benchmark: r"""Benchmark for evaluating agents in environments. """ def __init__(self, config_paths: Optional[str] = None) -> None: r""".. :param config_paths: file to be used for creating the environment """ config_env = get_config(config_paths) self._env = Env(config=config_env) def evaluate(self, agent: Agent, num_episodes: Optional[int] = None) -> Dict[str, float]: r""".. :param agent: agent to be evaluated in environment. :param num_episodes: count of number of episodes for which the evaluation should be run. :return: dict containing metrics tracked by environment. """ if num_episodes is None: num_episodes = 1 # Hijack else: assert num_episodes <= len(self._env.episodes), ( "num_episodes({}) is larger than number of episodes " "in environment ({})".format(num_episodes, len(self._env.episodes))) assert num_episodes > 0, "num_episodes should be greater than 0" agg_metrics: Dict = defaultdict(float) count_episodes = 0 while count_episodes < num_episodes: agent.reset() observations = self._env.reset() while not self._env.episode_over: action = agent.act(observations) observations = self._env.step(action) metrics = self._env.get_metrics() print(metrics) for m, v in metrics.items(): agg_metrics[m] += v count_episodes += 1 avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} return avg_metrics
def _make_env_fn(config: Config, dataset: Optional[habitat.Dataset] = None, rank: int = 0) -> Env: """Constructor for default habitat `env.Env`. :param config: configuration for environment. :param dataset: dataset for environment. :param rank: rank for setting seed of environment :return: `env.Env` / `env.RLEnv` object """ habitat_env = Env(config=config, dataset=dataset) habitat_env.seed(config.SEED + rank) return habitat_env
def _make_env_fn(config: Config, dataset: Optional[habitat.Dataset] = None, rank: int = 0) -> Env: r"""Constructor for default habitat Env. Args: config: configuration for environment. dataset: dataset for environment. rank: rank for setting seed of environment Returns: ``Env``/``RLEnv`` object """ habitat_env = Env(config=config, dataset=dataset) habitat_env.seed(config.SEED + rank) return habitat_env
def __init__(self, name, config_paths: Optional[str] = None) -> None: config_env = get_config() self._env = Env(config=config_env.TASK_CONFIG) self.losses = [] self.batch_scores = [] self.episode_losses = [] self.episode_batch_scores = [] self._name = name self.action_tokens_idx = [ 'action_left_token', 'action_right_token', 'action_up_token', 'action_down_token', 'action_teleport_token', 'action_stop_token', 'action_start_token', 'action_ignore_token' ] self.action_tokens = self._env._task.get_action_tokens() print(self.action_tokens)
def __init__( self, config_paths: Optional[str] = None, eval_remote=False ) -> None: r""".. :param config_paths: file to be used for creating the environment :param eval_remote: boolean indicating whether evaluation should be run remotely or locally """ config_env = get_config(config_paths) self._eval_remote = eval_remote if self._eval_remote is True: self._env = None else: self._env = Env(config=config_env)
class VLNBenchmark(habitat.Benchmark): def __init__(self, name, config_paths: Optional[str] = None) -> None: config_env = get_config() self._env = Env(config=config_env.TASK_CONFIG) self.losses = [] self.batch_scores = [] self.episode_losses = [] self.episode_batch_scores = [] self._name = name self.action_tokens_idx = [ 'action_left_token', 'action_right_token', 'action_up_token', 'action_down_token', 'action_teleport_token', 'action_stop_token', 'action_start_token', 'action_ignore_token' ] self.action_tokens = self._env._task.get_action_tokens() print(self.action_tokens) def evaluate(self, agent, num_episodes: Optional[int] = None, feedback="argmax", batch_size=4, save=False): print("Training is running on device ", torch.cuda.current_device()) agent.eval() count_episodes = 0 agg_metrics = defaultdict(float) steps = 0 ignore_idx = agent.model_actions.index("<ignore>") action_padding_idx = self.action_tokens[ self.action_tokens_idx[ignore_idx]] rollout_observations = [] while count_episodes < num_episodes: # 1 in 10 posibilities to save #if steps and steps % 3 == 0: if save: agent.save_example_to_file() agent.reset(steps) observations = self._env.reset() observations = { "rgb": observations["rgb"], "adjacentViewpoints": observations["adjacentViewpoints"] } episode_loss = [] episode_batch_score = [] action_sequence = [30528] while not self._env.episode_over: print(action_sequence) episode = self._env._current_episode # Adding observations to rollout # Adding tokens from episode sep_token_id = self._env._current_episode.instruction.tokens[ -1] action_tokens = action_sequence[-10:] + [sep_token_id] action_mask = [1] * len(action_tokens) tokens = self._env._current_episode.instruction.tokens + \ action_tokens mask = self._env._current_episode.instruction.mask + \ action_mask segment = [0] * \ len(self._env._current_episode.instruction.tokens) + \ [1] * len(action_tokens) padding = [0] * (128 - len(tokens)) tokens += padding mask += padding segment += padding # add padding at the end observations["target_tokens"] = [] observations["path_id"] = episode.episode_id observations["tokens"] = tokens observations["mask"] = mask observations["segment"] = segment target_action = agent.act_eval([observations]) target_action["action_args"].update( {"episode": self._env._current_episode}) action_idx = agent.model_actions.index(target_action["action"]) action_token_id = self.action_tokens[ self.action_tokens_idx[action_idx]] action_sequence.append(action_token_id) observations = self._env.step(target_action) # Step 1 observations = { "rgb": observations["rgb"], "adjacentViewpoints": observations["adjacentViewpoints"] } steps += 1 self._env._current_episode.reset() count_episodes += 1 metrics = self._env.get_metrics() for m, v in metrics.items(): if m != "distance_to_goal": agg_metrics[m] += v agent.reset(steps) print(count_episodes) avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} #avg_metrics["losses"] = sum(self.losses) / len(self.losses) #avg_metrics["batch_score"] = sum(self.batch_scores) / len(self.batch_scores) return avg_metrics def eval_batch(self, agent, num_episodes: Optional[int] = None, feedback="teacher", checkpoint_iter=1000, batch_size=4, save=False): print("Training is running on device ", torch.cuda.current_device()) agent.eval() count_episodes = 0 agg_metrics = defaultdict(float) steps = 0 ignore_idx = agent.model_actions.index("<ignore>") action_padding_idx = self.action_tokens[ self.action_tokens_idx[ignore_idx]] rollout_observations = [] action_scores = [] vision_scores_p = [] vision_scores_r = [] vision_scores_a = [] while count_episodes < num_episodes: if save and batch_size == 1: agent.save_example_to_file() agent.reset(steps) observations = self._env.reset() observations = { "rgb": observations["rgb"], "adjacentViewpoints": observations["adjacentViewpoints"] } episode_loss = [] episode_batch_score = [] action_sequence = [30528] #Start token while not self._env.episode_over: final_goal = self._env._current_episode.goals[-1].image_id episode = self._env._current_episode shortest_path = self._env._task.get_shortest_path_to_target( episode.scan, episode.curr_viewpoint.image_id, final_goal) #print("shortest_path", shortest_path) if len(shortest_path) > 1: goal_viewpoint = shortest_path[1] else: #print("Shortest Path is not good!!!") goal_viewpoint = final_goal # Adding observations to rollout target_action, action_args = \ agent._teacher_actions(observations, goal_viewpoint) action_idx = agent.model_actions.index(target_action) action_token_id = self.action_tokens[ self.action_tokens_idx[action_idx]] observations["golden_action"] = action_idx action = {"action": target_action, "action_args": action_args} action["action_args"].update( {"episode": self._env._current_episode}) # Adding tokens from episode sep_token_id = self._env._current_episode.instruction.tokens[ -1] action_tokens = action_sequence[-10:] + [sep_token_id] action_mask = [1] * len(action_tokens) tokens = self._env._current_episode.instruction.tokens + \ action_tokens mask = self._env._current_episode.instruction.mask + \ action_mask segment = [0] * \ len(self._env._current_episode.instruction.tokens) + \ [1] * len(action_tokens) padding = [0] * (128 - len(tokens)) tokens += padding mask += padding segment += padding # add padding at the end observations["path_id"] = episode.episode_id observations["target_tokens"] = \ self._env._current_episode.instruction.tokens + \ action_sequence[-10:] + [action_token_id] + \ padding observations["actions"] = action_tokens observations["tokens"] = tokens observations["mask"] = mask observations["segment"] = segment rollout_observations.append(observations) action_sequence.append(action_token_id) observations = self._env.step(action) # Step 1 observations = { "rgb": observations["rgb"], "adjacentViewpoints": observations["adjacentViewpoints"] } steps += 1 if len(rollout_observations) == batch_size: ## Act with batch action_score, ( precision, recall, accuracy) = agent.act_eval_batch(rollout_observations) action_scores.append(action_score) vision_scores_p.append(precision) vision_scores_r.append(recall) vision_scores_a.append(accuracy) rollout_observations = [] print("Action scores", action_scores[-1]) print("Vision Scores precision", vision_scores_p[-1]) print("Vision Scores recall", vision_scores_r[-1]) print("Vision Scores accuracy", vision_scores_a[-1]) self._env._current_episode.reset() count_episodes += 1 metrics = self._env.get_metrics() for m, v in metrics.items(): if m != "distance_to_goal": agg_metrics[m] += v agent.reset(steps) print(count_episodes) avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} avg_metrics["action_scores"] = sum(action_scores) / len(action_scores) avg_metrics["vision_scores_p"] = sum(vision_scores_p) / len( vision_scores_p) avg_metrics["vision_scores_r"] = sum(vision_scores_r) / len( vision_scores_r) avg_metrics["vision_scores_a"] = sum(vision_scores_a) / len( vision_scores_a) return avg_metrics def train_batch(self, agent, num_episodes: Optional[int] = None, feedback="teacher", checkpoint_iter=1000, batch_size=4): print("Training is running on device ", torch.cuda.current_device()) agent.train() count_episodes = 0 agg_metrics = defaultdict(float) steps = 0 ignore_idx = agent.model_actions.index("<ignore>") action_padding_idx = self.action_tokens[ self.action_tokens_idx[ignore_idx]] rollout_observations = [] while count_episodes < num_episodes: if count_episodes and count_episodes % checkpoint_iter == 0: save_path = "checkpoints/{}_train_{}.check".format( self._name, count_episodes) print(save_path) agent.save(save_path) print("{} episodes have been processed".format(count_episodes)) agent.reset(steps) observations = self._env.reset() observations = { "rgb": observations["rgb"], "adjacentViewpoints": observations["adjacentViewpoints"] } episode_loss = [] episode_batch_score = [] action_sequence = [30528] #Start token while not self._env.episode_over: final_goal = self._env._current_episode.goals[-1].image_id episode = self._env._current_episode shortest_path = self._env._task.get_shortest_path_to_target( episode.scan, episode.curr_viewpoint.image_id, final_goal) #print("shortest_path", shortest_path) if len(shortest_path) > 1: goal_viewpoint = shortest_path[1] else: #print("Shortest Path is not good!!!") goal_viewpoint = final_goal # Adding observations to rollout target_action, action_args = \ agent._teacher_actions(observations, goal_viewpoint) action_idx = agent.model_actions.index(target_action) action_token_id = self.action_tokens[ self.action_tokens_idx[action_idx]] observations["golden_action"] = action_idx action = {"action": target_action, "action_args": action_args} action["action_args"].update( {"episode": self._env._current_episode}) # Adding tokens from episode sep_token_id = self._env._current_episode.instruction.tokens[ -1] action_tokens = action_sequence[-10:] + [sep_token_id] action_mask = [1] * len(action_tokens) tokens = self._env._current_episode.instruction.tokens + \ action_tokens mask = self._env._current_episode.instruction.mask + \ action_mask segment = [0] * \ len(self._env._current_episode.instruction.tokens) + \ [1] * len(action_tokens) padding = [0] * (128 - len(tokens)) tokens += padding mask += padding segment += padding # add padding at the end observations["target_tokens"] = \ self._env._current_episode.instruction.tokens + \ action_sequence[-10:] + [action_token_id] + \ padding observations["actions"] = action_tokens observations["tokens"] = tokens observations["mask"] = mask observations["segment"] = segment rollout_observations.append(observations) action_sequence.append(action_token_id) observations = self._env.step(action) # Step 1 observations = { "rgb": observations["rgb"], "adjacentViewpoints": observations["adjacentViewpoints"] } steps += 1 if len(rollout_observations) == batch_size: ## Act with batch loss, batch_score = agent.act_batch(rollout_observations) episode_loss.append(loss) episode_batch_score.append(batch_score) self.losses.append(loss) self.batch_scores.append(batch_score) agent.train_step(steps) rollout_observations = [] self.episode_losses.append( sum(episode_loss) / len(episode_loss)) self.episode_batch_scores.append( sum(episode_batch_score) / len(episode_batch_score)) print("Episode loss", self.episode_losses[-1]) print("Episode Batch Score", self.episode_batch_scores[-1]) writer.add_scalar('lr/train', agent.optimizer.show_lr(), count_episodes) writer.add_scalar('episode_Loss/train', self.episode_losses[-1], count_episodes) writer.add_scalar('episode_batch_scores/train', self.episode_batch_scores[-1], count_episodes) self._env._current_episode.reset() count_episodes += 1 metrics = self._env.get_metrics() for m, v in metrics.items(): if m != "distance_to_goal": agg_metrics[m] += v agent.reset(steps) print(count_episodes) avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} avg_metrics["losses"] = sum(self.losses) / len(self.losses) avg_metrics["batch_score"] = sum(self.batch_scores) / len( self.batch_scores) return avg_metrics def train(self, agent, num_episodes: Optional[int] = None, feedback="teacher") -> Dict[str, float]: print("Training is running on device ", torch.cuda.current_device()) agent.train() count_episodes = 0 agg_metrics = defaultdict(float) steps = 0 while count_episodes < num_episodes: if count_episodes and count_episodes % 1000 == 0: agent.save("checkpoints/{}_train_{}.check".format( self._name, count_episodes)) print("{} episodes have been processed".format(count_episodes)) agent.reset(steps) observations = self._env.reset() episode_loss = [] episode_batch_score = [] #action_sequence = [] while not self._env.episode_over: final_goal = self._env._current_episode.goals[-1].image_id episode = self._env._current_episode shortest_path = self._env._task.get_shortest_path_to_target( episode.scan, episode.curr_viewpoint.image_id, final_goal) #print("shortest_path", shortest_path) if len(shortest_path) > 1: goal_viewpoint = shortest_path[1] else: #print("Shortest Path is not good!!!") goal_viewpoint = final_goal action, loss, batch_score = agent.act( observations, self._env._current_episode, goal_viewpoint) episode_loss.append(loss) episode_batch_score.append(batch_score) self.losses.append(loss) self.batch_scores.append(batch_score) action["action_args"].update( {"episode": self._env._current_episode}) observations = self._env.step(action) # Step 1 steps += 1 agent.train_step(steps) self._env._current_episode.reset() count_episodes += 1 self.episode_losses.append(sum(episode_loss) / len(episode_loss)) self.episode_batch_scores.append( sum(episode_batch_score) / len(episode_batch_score)) print("Episode loss", self.episode_losses[-1]) print("Episode Batch Score", self.episode_batch_scores[-1]) writer.add_scalar('episode_Loss/train', self.episode_losses[-1], count_episodes) writer.add_scalar('episode_batch_scores/train', self.episode_batch_scores[-1], count_episodes) metrics = self._env.get_metrics() for m, v in metrics.items(): if m != "distance_to_goal": agg_metrics[m] += v agent.reset(steps) print(count_episodes) avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} avg_metrics["losses"] = sum(self.losses) / len(self.losses) avg_metrics["batch_score"] = sum(self.batch_scores) / len( self.batch_scores) return avg_metrics
class Benchmark: r"""Benchmark for evaluating agents in environments. """ def __init__( self, config_paths: Optional[str] = None, eval_remote=False ) -> None: r""".. :param config_paths: file to be used for creating the environment :param eval_remote: boolean indicating whether evaluation should be run remotely or locally """ config_env = get_config(config_paths) self._eval_remote = eval_remote if self._eval_remote is True: self._env = None else: self._env = Env(config=config_env) def remote_evaluate( self, agent: Agent, num_episodes: Optional[int] = None ): # The modules imported below are specific to habitat-challenge remote evaluation. # These modules are not part of the habitat-api repository. import evaluation_pb2 import evaluation_pb2_grpc import evalai_environment_habitat import grpc import pickle import time time.sleep(60) def pack_for_grpc(entity): return pickle.dumps(entity) def unpack_for_grpc(entity): return pickle.loads(entity) def remote_ep_over(stub): res_env = unpack_for_grpc( stub.episode_over(evaluation_pb2.Package()).SerializedEntity ) return res_env["episode_over"] env_address_port = os.environ.get("EVALENV_ADDPORT", "localhost:8085") channel = grpc.insecure_channel(env_address_port) stub = evaluation_pb2_grpc.EnvironmentStub(channel) base_num_episodes = unpack_for_grpc( stub.num_episodes(evaluation_pb2.Package()).SerializedEntity ) num_episodes = base_num_episodes["num_episodes"] agg_metrics: Dict = defaultdict(float) count_episodes = 0 while count_episodes < num_episodes: agent.reset() res_env = unpack_for_grpc( stub.reset(evaluation_pb2.Package()).SerializedEntity ) while not remote_ep_over(stub): obs = res_env["observations"] action = agent.act(obs) res_env = unpack_for_grpc( stub.act_on_environment( evaluation_pb2.Package( SerializedEntity=pack_for_grpc(action) ) ).SerializedEntity ) metrics = unpack_for_grpc( stub.get_metrics( evaluation_pb2.Package( SerializedEntity=pack_for_grpc(action) ) ).SerializedEntity ) for m, v in metrics["metrics"].items(): agg_metrics[m] += v count_episodes += 1 avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} stub.evalai_update_submission(evaluation_pb2.Package()) return avg_metrics def local_evaluate(self, agent: Agent, num_episodes: Optional[int] = None): if num_episodes is None: num_episodes = len(self._env.episodes) else: assert num_episodes <= len(self._env.episodes), ( "num_episodes({}) is larger than number of episodes " "in environment ({})".format( num_episodes, len(self._env.episodes) ) ) assert num_episodes > 0, "num_episodes should be greater than 0" agg_metrics: Dict = defaultdict(float) count_episodes = 0 while count_episodes < num_episodes: agent.reset() observations = self._env.reset() while not self._env.episode_over: action = agent.act(observations) observations = self._env.step(action) metrics = self._env.get_metrics() for m, v in metrics.items(): agg_metrics[m] += v count_episodes += 1 avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} return avg_metrics def evaluate( self, agent: Agent, num_episodes: Optional[int] = None ) -> Dict[str, float]: r""".. :param agent: agent to be evaluated in environment. :param num_episodes: count of number of episodes for which the evaluation should be run. :return: dict containing metrics tracked by environment. """ if self._eval_remote is True: return self.remote_evaluate(agent, num_episodes) else: return self.local_evaluate(agent, num_episodes)
def __init__(self, config_paths: Optional[str] = None) -> None: self.action_history: Dict = defaultdict() self.agg_metrics: Dict = defaultdict(float) config_env = get_config(config_paths) self._env = Env(config=config_env)
def __init__(self, config_paths: Optional[str] = None) -> None: config_env = get_config(config_paths) self._env = Env(config=config_env)
class Benchmark: r"""Benchmark for evaluating agents in environments.""" def __init__(self, config_paths: Optional[str] = None, eval_remote: bool = False) -> None: r""".. :param config_paths: file to be used for creating the environment :param eval_remote: boolean indicating whether evaluation should be run remotely or locally """ config_env = get_config(config_paths) self._eval_remote = eval_remote if self._eval_remote is True: self._env = None else: self._env = Env(config=config_env) def remote_evaluate(self, agent: "Agent", num_episodes: Optional[int] = None): # The modules imported below are specific to habitat-challenge remote evaluation. # These modules are not part of the habitat-lab repository. import pickle import time import evalai_environment_habitat # noqa: F401 import evaluation_pb2 import evaluation_pb2_grpc import grpc time.sleep(60) def pack_for_grpc(entity): return pickle.dumps(entity) def unpack_for_grpc(entity): return pickle.loads(entity) def remote_ep_over(stub): res_env = unpack_for_grpc( stub.episode_over(evaluation_pb2.Package()).SerializedEntity) return res_env["episode_over"] env_address_port = os.environ.get("EVALENV_ADDPORT", "localhost:8085") channel = grpc.insecure_channel(env_address_port) stub = evaluation_pb2_grpc.EnvironmentStub(channel) base_num_episodes = unpack_for_grpc( stub.num_episodes(evaluation_pb2.Package()).SerializedEntity) num_episodes = base_num_episodes["num_episodes"] agg_metrics: Dict = defaultdict(float) count_episodes = 0 while count_episodes < num_episodes: agent.reset() res_env = unpack_for_grpc( stub.reset(evaluation_pb2.Package()).SerializedEntity) while not remote_ep_over(stub): obs = res_env["observations"] action = agent.act(obs) res_env = unpack_for_grpc( stub.act_on_environment( evaluation_pb2.Package(SerializedEntity=pack_for_grpc( action))).SerializedEntity) metrics = unpack_for_grpc( stub.get_metrics( evaluation_pb2.Package(SerializedEntity=pack_for_grpc( action))).SerializedEntity) for m, v in metrics["metrics"].items(): agg_metrics[m] += v count_episodes += 1 avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} stub.evalai_update_submission(evaluation_pb2.Package()) return avg_metrics def local_evaluate(self, agent: "Agent", num_episodes: Optional[int] = None, skip_first_n: Optional[int] = 0) -> Dict[str, float]: if num_episodes is None: num_episodes = len(self._env.episodes) else: assert num_episodes <= len(self._env.episodes), ( "num_episodes({}) is larger than number of episodes " "in environment ({})".format(num_episodes, len(self._env.episodes))) assert num_episodes > 0, "num_episodes should be greater than 0" agg_metrics: Dict = defaultdict(float) episode_metrics = dict(scene=[], num_steps=[], time=[], num_collisions=[], xy_error=[], spl=[], softspl=[], success=[]) last_success = None for _ in range(skip_first_n): self._env.reset() count_episodes = 0 try: while count_episodes < num_episodes: agent.reset(last_success=last_success) observations = self._env.reset() # Skip infeasible episodes. This is easy to detect using spl # because start-to-path is recomputed and if it is infinite # spl will become nan. while not np.isfinite(self._env.get_metrics()['spl']): observations = self._env.reset() # metrics = self._env.get_metrics() action = None num_steps = 0 t = time.time() while not self._env.episode_over: action = agent.act(observations) observations = self._env.step(action) num_steps += 1 # metrics = self._env.get_metrics() episode_time = time.time() - t metrics = self._env.get_metrics() if isinstance(action, dict) and 'xy_error' in action.keys(): # Add all outputs to metrics for key, val in action.items(): try: metrics[str(key)] = float(val) except TypeError: pass xy_error = action['xy_error'] else: xy_error = 999. metrics[ 'small_error'] = 1. if xy_error < 7.2 else 0. # 0.36 / 0.05 metrics['episode_length'] = num_steps metrics['time'] = episode_time if 'softspl' not in metrics.keys(): metrics['softspl'] = 0. for m, v in metrics.items(): if m != 'top_down_map': agg_metrics[m] += v count_episodes += 1 episode_metrics['scene'].append( self._env.current_episode.scene_id) episode_metrics['num_steps'].append(num_steps) episode_metrics['time'].append(episode_time) episode_metrics['xy_error'].append(xy_error) episode_metrics['spl'].append(metrics['spl']) episode_metrics['softspl'].append(metrics['softspl']) episode_metrics['success'].append(metrics['success']) last_success = metrics['success'] print( "%d/%d: Meann success: %f, spl: %f. err: %f This trial success: %f. SPL: %f SOFT_SPL: %f. err %f" % (count_episodes, num_episodes, agg_metrics['success'] / count_episodes, agg_metrics['spl'] / count_episodes, agg_metrics['xy_error'] / count_episodes, metrics['success'], metrics['spl'], metrics['softspl'], metrics['xy_error'])) # One more agent reset, so last data can be saved. agent.reset() except KeyboardInterrupt: print("interrupt") avg_metrics = {k: v / count_episodes for k, v in agg_metrics.items()} avg_metrics['num_episodes'] = count_episodes avg_metrics['input_args'] = ' '.join([str(x) for x in sys.argv]) try: print(avg_metrics['input_args']) print(agent.params.name) except: pass import json timestamp_str = time.strftime('%m-%d-%H-%M-%S', time.localtime()) filename = './temp/evals/eval_{}'.format(timestamp_str) with open(filename + '.summary.json', 'w') as file: json.dump(avg_metrics, file, indent=4) print(filename + '.summary.json') with open(filename + '.episodes.json', 'w') as file: json.dump(episode_metrics, file, indent=4) print(filename + '.episodes.json') return avg_metrics def evaluate(self, agent: "Agent", num_episodes: Optional[int] = None, skip_first_n: Optional[int] = 0) -> Dict[str, float]: r""".. :param agent: agent to be evaluated in environment. :param num_episodes: count of number of episodes for which the evaluation should be run. :return: dict containing metrics tracked by environment. """ if self._eval_remote is True: return self.remote_evaluate(agent, num_episodes) else: return self.local_evaluate(agent, num_episodes, skip_first_n=skip_first_n)
def __init__(self, config_file: Optional[str] = None) -> None: config_env = get_config(config_file=config_file) self._env = Env(config=config_env)