def prepare_test_env_agent(headless=True): timestep_sec = 0.1 spec = BaselineAgentSpec( action_type=ActionSpaceType.Continuous, policy_class=SACPolicy, max_episode_steps=2, ) env = gym.make( "ultra.env:ultra-v0", agent_specs={AGENT_ID: spec}, scenario_info=("00", "eval_test"), headless=headless, timestep_sec=timestep_sec, seed=seed, ) agent = spec.build_agent() return agent, env, spec
def run_experiment(scenario_info, num_agents, log_dir, headless=True): agent_ids = [ "0" * max(0, 3 - len(str(i))) + str(i) for i in range(num_agents) ] agent_classes = { agent_id: "ultra.baselines.sac:sac-v0" for agent_id in agent_ids } agent_specs = { agent_id: BaselineAgentSpec(policy_class=SACPolicy, max_episode_steps=2) for agent_id in agent_ids } env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=0.1, seed=seed, ) agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } total_step = 0 etag = ":".join( [policy_class.split(":")[-1] for policy_class in agent_classes]) evaluation_task_ids = dict() for episode in episodes(1, etag=etag, log_dir=log_dir): observations = env.reset() dones = {"__all__": False} infos = None episode.reset() experiment_dir = episode.experiment_dir if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/agent_metadata.pkl", "wb") as metadata_file: dill.dump( { "agent_ids": agent_ids, "agent_classes": agent_classes, "agent_specs": agent_specs, }, metadata_file, pickle.HIGHEST_PROTOCOL, ) while not dones["__all__"]: evaluation_check( agents=agents, agent_ids=agent_ids, episode=episode, eval_rate=10, eval_episodes=1, max_episode_steps=2, policy_classes=agent_classes, scenario_info=scenario_info, evaluation_task_ids=evaluation_task_ids, timestep_sec=0.1, headless=True, log_dir=log_dir, ) collect_evaluations(evaluation_task_ids=evaluation_task_ids) actions = { agent_id: agents[agent_id].act(observation, explore=True) for agent_id, observation in observations.items() } next_observations, rewards, dones, infos = env.step(actions) active_agent_ids = observations.keys() & next_observations.keys() loss_outputs = { agent_id: agents[agent_id].step( state=observations[agent_id], action=actions[agent_id], reward=rewards[agent_id], next_state=next_observations[agent_id], done=dones[agent_id], info=infos[agent_id], ) for agent_id in active_agent_ids } episode.record_step( agent_ids_to_record=active_agent_ids, infos=infos, rewards=rewards, total_step=total_step, loss_outputs=loss_outputs, ) total_step += 1 observations = next_observations # Wait on the remaining evaluations to finish. while collect_evaluations(evaluation_task_ids): time.sleep(0.1) env.close()
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. from smarts.zoo.registry import register from .sac.sac.policy import SACPolicy from .ppo.ppo.policy import PPOPolicy from .dqn.dqn.policy import DQNPolicy from .ddpg.ddpg.policy import TD3Policy from .bdqn.bdqn.policy import BehavioralDQNPolicy from smarts.core.controllers import ActionSpaceType from ultra.baselines.agent_spec import BaselineAgentSpec register( locator="sac-v0", entry_point=lambda **kwargs: BaselineAgentSpec(action_type=ActionSpaceType. Continuous, policy_class=SACPolicy, **kwargs), ) register( locator="ppo-v0", entry_point=lambda **kwargs: BaselineAgentSpec(action_type=ActionSpaceType. Continuous, policy_class=PPOPolicy, **kwargs), ) register( locator="ddpg-v0", entry_point=lambda **kwargs: BaselineAgentSpec(action_type=ActionSpaceType. Continuous, policy_class=TD3Policy, **kwargs),
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. from smarts.zoo.registry import register from .sac.sac.policy import SACPolicy from .ppo.ppo.policy import PPOPolicy from .dqn.dqn.policy import DQNPolicy from .td3.td3.policy import TD3Policy from .bdqn.bdqn.policy import BehavioralDQNPolicy from smarts.core.controllers import ActionSpaceType from ultra.baselines.agent_spec import BaselineAgentSpec register( locator="sac-v0", entry_point=lambda **kwargs: BaselineAgentSpec(policy_class=SACPolicy, **kwargs), ) register( locator="ppo-v0", entry_point=lambda **kwargs: BaselineAgentSpec(policy_class=PPOPolicy, **kwargs), ) register( locator="td3-v0", entry_point=lambda **kwargs: BaselineAgentSpec(policy_class=TD3Policy, **kwargs), ) register( locator="dqn-v0", entry_point=lambda **kwargs: BaselineAgentSpec(policy_class=DQNPolicy, **kwargs),