def main(scenarios, sim_name, headless, num_episodes, seed): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Tracker, max_episode_steps=None), agent_builder=TrackingAgent, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={AGENT_ID: agent_spec}, sim_name=sim_name, headless=headless, visdom=False, fixed_timestep_sec=0.1, sumo_headless=True, seed=seed, # envision_record_data_replay_path="./data_replay", ) for episode in episodes(n=num_episodes): agent = agent_spec.build_agent() observations = env.reset() episode.record_scenario(env.scenario_log) dones = {"__all__": False} while not dones["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) episode.record_step(observations, rewards, dones, infos) env.close()
def test_graceful_interrupt(monkeypatch): """SMARTS should only throw a KeyboardInterript exception.""" agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), ) agent = agent_spec.build_agent() env = build_env(agent_spec) with pytest.raises(KeyboardInterrupt): obs = env.reset() # To simulate a user interrupting the sim (e.g. ctrl-c). We just need to # hook in to some function that SMARTS calls internally (like this one). with mock.patch( "smarts.core.sensors.Sensors.observe", side_effect=KeyboardInterrupt ): for episode in range(10): obs, _, _, _ = env.step({AGENT_ID: agent.act(obs)}) assert episode == 0, "SMARTS should have been interrupted, ending early" with pytest.raises(SMARTSNotSetupError): env.step({AGENT_ID: agent.act(obs)})
def main(scenarios, headless, num_episodes, max_episode_steps=None): agent_spec = AgentSpec( interface=AgentInterface.from_type( AgentType.LanerWithSpeed, max_episode_steps=max_episode_steps ), agent_builder=ChaseViaPointsAgent, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={"SingleAgent": agent_spec}, headless=headless, sumo_headless=True, ) # Convert `env.step()` and `env.reset()` from multi-agent interface to # single-agent interface. env = SingleAgent(env=env) for episode in episodes(n=num_episodes): agent = agent_spec.build_agent() observation = env.reset() episode.record_scenario(env.scenario_log) done = False while not done: agent_action = agent.act(observation) observation, reward, done, info = env.step(agent_action) episode.record_step(observation, reward, done, info) env.close()
def test_building_agent_with_tuple_params(): agent_spec = AgentSpec( agent_params=(32, 41), agent_builder=lambda x, y: Agent.from_function(lambda _: (x, y)), ) agent = agent_spec.build_agent() assert agent.act("dummy observation") == (32, 41)
def train(training_scenarios, evaluation_scenarios, sim_name, headless, num_episodes, seed): agent_params = {"input_dims": 4, "hidden_dims": 7, "output_dims": 3} agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Standard, max_episode_steps=5000), agent_params=agent_params, agent_builder=PyTorchAgent, observation_adapter=observation_adapter, ) env = gym.make( "smarts.env:hiway-v0", scenarios=training_scenarios, agent_specs={AGENT_ID: agent_spec}, sim_name=sim_name, headless=headless, fixed_timestep_sec=0.1, seed=seed, ) steps = 0 for episode in episodes(n=num_episodes): agent = agent_spec.build_agent() observations = env.reset() episode.record_scenario(env.scenario_log) dones = {"__all__": False} while not dones["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = env.step( {AGENT_ID: agent_action}) episode.record_step(observations, rewards, dones, infos) steps += 1 if steps % 500 == 0: print("Evaluating agent") # We construct an evaluation agent based on the saved # state of the agent in training. model_path = tempfile.mktemp() agent.save(model_path) eval_agent_spec = agent_spec.replace( agent_params=dict(agent_params, model_path=model_path)) # Remove the call to ray.wait if you want evaluation to run # in parallel with training ray.wait([ evaluate.remote(eval_agent_spec, evaluation_scenarios, headless, seed) ]) env.close()
def test_building_agent_with_dict_params(): agent_spec = AgentSpec( agent_params={ "y": 2, "x": 1 }, agent_builder=lambda x, y: Agent.from_function(lambda _: x / y), ) agent = agent_spec.build_agent() assert agent.act("dummy observation") == 1 / 2
def test_graceful_shutdown(): """SMARTS should not throw any exceptions when shutdown.""" agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), ) env = build_env(agent_spec) agent = agent_spec.build_agent() obs = env.reset() for _ in range(10): obs = env.step({AGENT_ID: agent.act(obs)}) env.close()
def entrypoint( gains={ "theta": 3.0, "position": 4.0, "obstacle": 3.0, "u_accel": 0.1, "u_yaw_rate": 1.0, "terminal": 0.01, "impatience": 0.01, "speed": 0.01, "rate": 1, }, debug=False, aggressiveness=0, max_episode_steps=None, ): from .agent import OpEnAgent return AgentSpec( interface=AgentInterface( action=ActionSpaceType.Trajectory, waypoints=True, neighborhood_vehicles=True, max_episode_steps=max_episode_steps, ), agent_params={ "gains": gains, "debug": debug, }, agent_builder=OpEnAgent, )
def main(scenario): scenario_path = Path(scenario).absolute() agent_mission_count = Scenario.discover_agent_missions_count(scenario_path) assert agent_mission_count > 0, "agent mission count should larger than 0" agent_ids = [f"AGENT-{i}" for i in range(agent_mission_count)] agent_specs = { agent_id: AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=RuleBasedAgent, ) for agent_id in agent_ids } agents = {aid: agent_spec.build_agent() for aid, agent_spec in agent_specs.items()} env = HiWayEnv(scenarios=[scenario_path], agent_specs=agent_specs) while True: observations = env.reset() done = False while not done: agent_ids = list(observations.keys()) actions = {aid: agents[aid].act(observations[aid]) for aid in agent_ids} observations, _, dones, _ = env.step(actions) done = dones["__all__"]
def entrypoint( goal_is_nearby_threshold=40, lane_end_threshold=51, lane_crash_distance_threshold=6, lane_crash_ttc_threshold=2, intersection_crash_distance_threshold=6, intersection_crash_ttc_threshold=5, target_speed=15, lane_change_speed=12.5, ): with pkg_resources.path(rl_agent, "checkpoint") as checkpoint_path: return AgentSpec( interface=agent_interface, observation_adapter=get_observation_adapter( goal_is_nearby_threshold=goal_is_nearby_threshold, lane_end_threshold=lane_end_threshold, lane_crash_distance_threshold=lane_crash_distance_threshold, lane_crash_ttc_threshold=lane_crash_ttc_threshold, intersection_crash_distance_threshold= intersection_crash_distance_threshold, intersection_crash_ttc_threshold= intersection_crash_ttc_threshold, ), action_adapter=get_action_adapter( target_speed=target_speed, lane_change_speed=lane_change_speed, ), agent_builder=lambda: RLAgent( load_path=str((checkpoint_path / "checkpoint").absolute()), policy_name="default_policy", observation_space=OBSERVATION_SPACE, action_space=ACTION_SPACE, ), )
def _make_agent_specs(num_agent): agent_specs = { "AGENT_" + str(agent_id): AgentSpec( interface=AgentInterface( rgb=RGB(), action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), observation_adapter=lambda obs: obs.top_down_rgb.data, reward_adapter=lambda obs, reward: reward, info_adapter=lambda obs, reward, info: info["score"], ) for agent_id in range(num_agent) } obs_space = gym.spaces.Dict( { "AGENT_" + str(agent_id): gym.spaces.Box( low=0, high=255, shape=( agent_specs["AGENT_" + str(agent_id)].interface.rgb.width, agent_specs["AGENT_" + str(agent_id)].interface.rgb.height, 3, ), dtype=np.uint8, ) for agent_id in range(num_agent) } ) return agent_specs, obs_space
def replay_entrypoint( save_directory, id, wrapped_agent_locator, wrapped_agent_params=None, read=False, ): if wrapped_agent_params is None: wrapped_agent_params = {} from .replay_agent import ReplayAgent internal_spec = make(wrapped_agent_locator, **wrapped_agent_params) global social_index global replay_save_dir global replay_read spec = AgentSpec( interface=internal_spec.interface, agent_params={ "save_directory": replay_save_dir, "id": f"{id}_{social_index}", "internal_spec": internal_spec, "wrapped_agent_params": wrapped_agent_params, "read": replay_read, }, agent_builder=ReplayAgent, ) social_index += 1 return spec
def env_and_spec(action, agent_type, max_episode_steps, scenarios, seed=42, agent_id="Agent-006"): class Policy(Agent): def act(self, obs): return action agent_spec = AgentSpec( interface=AgentInterface.from_type( agent_type, max_episode_steps=max_episode_steps), agent_builder=Policy, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={agent_id: agent_spec}, headless=True, visdom=False, fixed_timestep_sec=TIMESTEP_SEC, sumo_headless=True, seed=seed, ) return (env, agent_spec)
def run_experiment(log_path, experiment_name, training_iteration=100): model_path = Path(__file__).parent / "model" agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Standard, max_episode_steps=5000), policy=RLlibTFSavedModelAgent( model_path.absolute(), OBSERVATION_SPACE, ), observation_adapter=observation_adapter, reward_adapter=reward_adapter, action_adapter=action_adapter, ) rllib_policies = { "policy": ( None, OBSERVATION_SPACE, ACTION_SPACE, { "model": { "custom_model": TrainingModel.NAME } }, ) } scenario_path = Path(__file__).parent / "../../scenarios/loop" scenario_path = str(scenario_path.absolute()) tune_confg = { "env": RLlibHiWayEnv, "env_config": { "scenarios": [scenario_path], "seed": 42, "headless": True, "agent_specs": { "Agent-007": agent_spec }, }, "multiagent": { "policies": rllib_policies, "policy_mapping_fn": lambda _: "policy", }, "log_level": "WARN", "num_workers": multiprocessing.cpu_count() - 1, "horizon": HORIZON, } analysis = tune.run( "PPO", name=experiment_name, stop={"training_iteration": training_iteration}, max_failures=10, local_dir=log_path, config=tune_confg, ) return analysis
def agent_spec(): return AgentSpec( interface=AgentInterface.from_type( AgentType.TrajectoryInterpolator, neighborhood_vehicles=True ), agent_builder=WithTimeTrajectoryAgent, agent_params=None, )
def klws_entrypoint(speed): from .keep_left_with_speed_agent import KeepLeftWithSpeedAgent return AgentSpec( interface=AgentInterface.from_type(AgentType.LanerWithSpeed, max_episode_steps=20000), agent_params={"speed": speed * 0.01}, agent_builder=KeepLeftWithSpeedAgent, )
def human_keyboard_entrypoint(*arg, **kwargs): from .human_in_the_loop import HumanKeyboardAgent spec = AgentSpec( interface=AgentInterface.from_type( AgentType.StandardWithAbsoluteSteering, max_episode_steps=3000), agent_builder=HumanKeyboardAgent, ) return spec
def on_trigger(ctx: Dict[str, Any]): # Define agent specs to be assigned agent_spec = AgentSpec( interface=AgentInterface(waypoints=True, action=ActionSpaceType.Lane), agent_builder=BasicAgent, ) # Select a random sample from candidates k = ctx.get("vehicles_to_replace_randomly", 0) if k <= 0: logger.warning( "default (0) or negative value specified for replacement. Replacing all valid vehicle candidates." ) sample = ctx["vehicle_candidates"] else: logger.info( f"Choosing {k} vehicles randomly from {len(ctx['vehicle_candidates'])} valid vehicle candidates." ) sample = random.sample(ctx["vehicle_candidates"], k) assert len(sample) != 0 for veh_id in sample: # Map selected vehicles to agent ids & specs agent_id = f"agent-{veh_id}" ctx["agents"][agent_id] = agent_spec.build_agent() # Create missions based on current state and traffic history positional, traverse = scenario.create_dynamic_traffic_history_mission( veh_id, ctx["elapsed_sim_time"], ctx["positional_radius"] ) # Take control of vehicles immediately try: # Try to assign a PositionalGoal at the last recorded timestep smarts.add_agent_and_switch_control( veh_id, agent_id, agent_spec.interface, positional ) except PlanningError: logger.warning( f"Unable to create PositionalGoal for vehicle {veh_id}, falling back to TraverseGoal" ) smarts.add_agent_and_switch_control( veh_id, agent_id, agent_spec.interface, traverse )
def agent_spec(): class KeepLaneAgent(Agent): def act(self, obs): return "keep_lane" return AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=MAX_STEPS), agent_builder=KeepLaneAgent, )
def entrypoint(): with pkg_resources.path(cross_rl_agent, "models") as model_path: return AgentSpec( interface=cross_interface, observation_adapter=observation_adapter, action_adapter=action_adapter, agent_builder=lambda: RLAgent( load_path=str(model_path) + "/", policy_name="Soc_Mt_TD3Network", ), )
def prepare_test_agent_and_environment( required_interface: Dict[str, Any], action_adapter: Callable = lambda action: action, info_adapter: Callable = lambda observation, reward, info: info, observation_adapter: Callable = lambda observation: observation, reward_adapter: Callable = lambda _, reward: reward, headless: bool = True, ) -> Tuple[Agent, UltraEnv]: if "waypoints" not in required_interface: required_interface["waypoints"] = Waypoints(lookahead=20) if "neighborhood_vehicles" not in required_interface: required_interface["neighborhood_vehicles"] = NeighborhoodVehicles( radius=200) if "action" not in required_interface: required_interface["action"] = ActionSpaceType.Lane agent_spec = AgentSpec( interface=AgentInterface(**required_interface), agent_builder=RandomAgent, agent_params={"action_type": required_interface["action"]}, action_adapter=action_adapter, info_adapter=info_adapter, observation_adapter=observation_adapter, reward_adapter=reward_adapter, ) agent = agent_spec.build_agent() environment = gym.make( "ultra.env:ultra-v0", agent_specs={AGENT_ID: agent_spec}, scenario_info=("00", "easy"), headless=headless, timestep_sec=TIMESTEP_SEC, seed=SEED, ) return agent, environment
def gen_config(**kwargs): scenario_path = Path(kwargs["scenario"]).absolute() agent_missions_count = Scenario.discover_agent_missions_count( scenario_path) if agent_missions_count == 0: agent_ids = ["default_policy"] else: agent_ids = [f"AGENT-{i}" for i in range(agent_missions_count)] config = load_config(kwargs["config_file"], mode=kwargs.get("mode", "training")) agents = {agent_id: AgentSpec(**config["agent"]) for agent_id in agent_ids} config["env_config"].update({ "seed": 42, "scenarios": [str(scenario_path)], "headless": kwargs["headless"], "agent_specs": agents, }) obs_space, act_space = config["policy"][1:3] tune_config = config["run"]["config"] if kwargs["paradigm"] == "centralized": config["env_config"].update({ "obs_space": gym.spaces.Tuple([obs_space] * agent_missions_count), "act_space": gym.spaces.Tuple([act_space] * agent_missions_count), "groups": { "group": agent_ids }, }) tune_config.update(config["policy"][-1]) else: policies = {} for k in agents: policies[k] = config["policy"][:-1] + ({ **config["policy"][-1], "agent_id": k }, ) tune_config.update({ "multiagent": { "policies": policies, "policy_mapping_fn": lambda agent_id: agent_id, } }) return config
def agent_spec(max_steps_per_episode): return AgentSpec( interface=AgentInterface( drivable_area_grid_map=True, ogm=True, rgb=True, lidar=True, waypoints=True, max_episode_steps=max_steps_per_episode, debug=True, neighborhood_vehicles=True, action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), )
def agent_specs(): def observation_adapter(env_observation): return env_observation.top_down_rgb.data return { "AGENT_" + agent_id: AgentSpec( interface=AgentInterface( rgb=RGB(), action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), observation_adapter=observation_adapter, ) for agent_id in ["001", "002"] }
def _make_agent_specs(topdown_rgb): if topdown_rgb == "rgb": rgb = RGB() elif topdown_rgb == "false": rgb = False return { "AGENT_" + agent_id: AgentSpec( interface=AgentInterface( rgb=rgb, action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), ) for agent_id in ["001", "002"] }
def agent_specs(): return { "Agent_" + agent_id: AgentSpec( interface=AgentInterface( rgb=RGB(width=256, height=256, resolution=50 / 256), action=ActionSpaceType.Lane, max_episode_steps=3, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), observation_adapter=lambda obs: obs.top_down_rgb.data, reward_adapter=lambda obs, reward: reward, info_adapter=lambda obs, reward, info: info["score"], ) for agent_id in ["1", "2"] }
def agent_spec(): return AgentSpec( interface=AgentInterface( road_waypoints=RoadWaypoints(40), neighborhood_vehicles=NeighborhoodVehicles( radius=max(MAP_WIDTH * MAP_RESOLUTION, MAP_HEIGHT * MAP_RESOLUTION) * 0.5), drivable_area_grid_map=DrivableAreaGridMap( width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION), ogm=OGM(width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION), rgb=RGB(width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION), action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), )
def env_and_spec(scenarios, seed, headless=True, max_episode_steps=None): agent_spec = AgentSpec( interface=AgentInterface.from_type( AgentType.Laner, max_episode_steps=max_episode_steps), agent_builder=KeepLaneAgent, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={AGENT_ID: agent_spec}, sim_name=None, headless=headless, visdom=False, timestep_sec=0.1, sumo_headless=True, seed=seed, ) return env, agent_spec
def _make_agent_specs(intrfcs): base_intrfc = AgentInterface( action=ActionSpaceType.Lane, accelerometer=False, drivable_area_grid_map=False, lidar=False, neighborhood_vehicles=False, ogm=False, rgb=False, waypoints=False, ) return { "AGENT_" + agent_id: AgentSpec( interface=dataclasses.replace(base_intrfc, **intrfc), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), ) for agent_id, intrfc in zip(["001", "002"], intrfcs) }
def agent_spec(): def observation_adapter(env_observation): ego = env_observation.ego_vehicle_state waypoint_paths = env_observation.waypoint_paths wps = [path[0] for path in waypoint_paths] # distance of vehicle from center of lane closest_wp = min(wps, key=lambda wp: wp.dist_to(ego.position)) signed_dist_from_center = closest_wp.signed_lateral_error(ego.position) lane_hwidth = closest_wp.lane_width * 0.5 norm_dist_from_center = signed_dist_from_center / lane_hwidth return {OBSERVATION_EXPECTED: norm_dist_from_center} def reward_adapter(env_obs, env_reward): # reward is currently the delta in distance travelled by this agent. # We want to make sure that this is infact a delta and not total distance # travelled since this bug has appeared a few times. # # The way to verify this is by making sure the reward does not grow without bounds. assert -3 < env_reward < 3 # Return a constant reward to test reward adapter call. return REWARD_EXPECTED def info_adapter(env_obs, env_reward, env_info): env_info[INFO_EXTRA_KEY] = "blah" return env_info def action_adapter(model_action): # We convert the action command to the required lower case. return model_action.lower() return AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=100), agent_builder=lambda: Agent.from_function(lambda _: ACTION_TO_BE_ADAPTED), observation_adapter=observation_adapter, reward_adapter=reward_adapter, action_adapter=action_adapter, info_adapter=info_adapter, )