def test_tune_metrics_availability(self): log_info = LogInfo() for metric in _AVAILABLE_TUNE_METRICS: self.assertTrue(metric in log_info.data)
def evaluate( experiment_dir, seed, agent_ids, policy_classes, checkpoint_dirs, scenario_info, num_episodes, max_episode_steps, headless, timestep_sec, log_dir, eval_mode=True, ): torch.set_num_threads(1) # Create the agent specifications matched with their associated ID. agent_specs = { agent_id: make( locator=policy_classes[agent_id], checkpoint_dir=checkpoint_dirs[agent_id], experiment_dir=experiment_dir, max_episode_steps=max_episode_steps, agent_id=agent_id, ) for agent_id in agent_ids } # Create the environment with the specified agents. env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, eval_mode=eval_mode, ) # Build each agent from its specification. agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } # A dictionary to hold the evaluation data for each agent. summary_log = {agent_id: LogInfo() for agent_id in agent_ids} # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"] # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0". etag = ":".join( [policy_class.split(":")[-1] for policy_class in policy_classes]) for episode in episodes(num_episodes, etag=etag, log_dir=log_dir): # Reset the environment and retrieve the initial observations. observations = env.reset() dones = {"__all__": False} infos = None episode.reset(mode="Evaluation") while not dones["__all__"]: # Get and perform the available agents' actions. actions = { agent_id: agents[agent_id].act(observation, explore=False) for agent_id, observation in observations.items() } observations, rewards, dones, infos = env.step(actions) # Record the data from this episode. episode.record_step(agent_ids_to_record=infos.keys(), infos=infos, rewards=rewards) episode.record_episode() for agent_id, agent_data in episode.info[episode.active_tag].items(): for key, value in agent_data.data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log[agent_id].data[key] += value # Normalize by the number of evaluation episodes. for agent_id, agent_data in summary_log.items(): for key, value in agent_data.data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log[agent_id].data[key] /= num_episodes env.close() return summary_log
def evaluate( experiment_dir, seed, agent_id, policy_class, itr_count, checkpoint_dir, scenario_info, num_episodes, headless, timestep_sec, ): torch.set_num_threads(1) spec = make( locator=policy_class, checkpoint_dir=checkpoint_dir, experiment_dir=experiment_dir, ) env = gym.make( "ultra.env:ultra-v0", agent_specs={agent_id: spec}, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, eval_mode=True, ) agent = spec.build_agent() summary_log = LogInfo() logs = [] for episode in episodes(num_episodes): observations = env.reset() state = observations[agent_id] dones, infos = {"__all__": False}, None episode.reset(mode="Evaluation") while not dones["__all__"]: action = agent.act(state, explore=False) observations, rewards, dones, infos = env.step({agent_id: action}) next_state = observations[agent_id] state = next_state episode.record_step(agent_id=agent_id, infos=infos, rewards=rewards) episode.record_episode() logs.append(episode.info[episode.active_tag].data) for key, value in episode.info[episode.active_tag].data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log.data[key] += value for key, val in summary_log.data.items(): if not isinstance(val, (list, tuple, np.ndarray)): summary_log.data[key] /= num_episodes env.close() return summary_log