def test_ma_roundabout_init_space(): try: for start_seed in [5000, 6000, 7000]: for num_agents in [16, 32]: for num_others in [0, 2, 4, 8]: for crash_vehicle_penalty in [0, 5]: env_config = dict( start_seed=start_seed, num_agents=num_agents, vehicle_config=dict(lidar=dict(num_others=num_others)), crash_vehicle_penalty=crash_vehicle_penalty ) env = MultiAgentRoundaboutEnv(env_config) single_space = env.observation_space["agent0"] assert single_space.shape is not None, single_space assert np.prod(single_space.shape) is not None, single_space single_space = env.action_space["agent0"] assert single_space.shape is not None, single_space assert np.prod(single_space.shape) is not None, single_space _check_spaces_before_reset(env) env.reset() _check_spaces_after_reset(env) env.close() print('Finish: ', env_config) finally: if "env" in locals(): env.close()
def test_ma_roundabout_reward_done_alignment(): # out of road env = MultiAgentRoundaboutEnv({ "horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False }) try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for action in [-1, 1]: for step in range(5000): act = {k: [action, 1] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) for kkk, ddd in d.items(): if ddd and kkk != "__all__": #assert r[kkk] == -777 assert i[kkk][TerminationState.OUT_OF_ROAD] # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): if rrr == -777: assert d[kkk] assert i[kkk][TerminationState.OUT_OF_ROAD] # print('{} reward passed!'.format(kkk)) if d["__all__"]: env.reset() break finally: env.close()
def test_ma_roundabout_40_agent_reset_after_respawn(): def check_pos(vehicles): while vehicles: v_1 = vehicles[0] for v_2 in vehicles[1:]: v_1_pos = v_1.position v_2_pos = v_2.position assert norm( v_1_pos[0] - v_2_pos[0], v_1_pos[1] - v_2_pos[1] ) > v_1.WIDTH / 2 + v_2.WIDTH / 2, "Vehicles overlap after reset()" assert not v_1.crash_vehicle, "Vehicles overlap after reset()" vehicles.remove(v_1) env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 40}) try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(50): env.reset() check_pos(list(env.vehicles.values())) for v_id in list(env.vehicles.keys())[:20]: env.agent_manager.finish(v_id) env.step({k: [1, 1] for k in env.vehicles.keys()}) env.step({k: [1, 1] for k in env.vehicles.keys()}) env.step({k: [1, 1] for k in env.vehicles.keys()}) finally: env.close()
def test_infinite_agents(): env = MultiAgentRoundaboutEnv( { "map_config": dict(exit_length=20, lane_num=2), # "use_render": True, "fast": True, "num_agents": -1, "delay_done": 50, "horizon": 50, "debug": True, "random_traffic": False } ) try: o = env.reset() env.seed(100) env._DEBUG_RANDOM_SEED = 100 max_num = old_num_of_vehicles = len(env.vehicles) for i in range(1, 1000): o, r, d, info = env.step({k: [1, 1] for k in env.vehicles}) print( "{} Current active agents: ".format(i), len(env.vehicles), ". Objects: ", len(env.agent_manager._object_to_agent) ) max_num = max(len(env.vehicles), max_num) # env.render(mode="top_down") for kkk, iii in info.items(): if d[kkk]: assert iii["episode_length"] > 1 if d["__all__"]: o = env.reset() print("Finish {} steps.".format(i)) finally: env._DEBUG_RANDOM_SEED = None env.close() env = MultiAgentRoundaboutEnv({"num_agents": -1, "delay_done": 0, "horizon": 50, "debug": True}) try: o = env.reset() max_num = old_num_of_vehicles = len(env.vehicles) for i in range(1, 300): o, r, d, info = env.step({k: [0, 1] for k in env.vehicles}) # print("Current active agents: ", len(env.vehicles), # ". Objects: ", len(env.agent_manager._object_to_agent)) max_num = max(len(env.vehicles), max_num) # env.render(mode="top_down") for kkk, iii in info.items(): if d[kkk]: assert iii["episode_length"] > 1 if d["__all__"]: o = env.reset() print("Finish {} steps.".format(i)) finally: env.close() assert max_num > old_num_of_vehicles
def test_ma_roundabout_horizon_termination(): # test horizon env = MultiAgentRoundaboutEnv({ "horizon": 100, "num_agents": 8, "crash_done": False }) try: for _ in range( 3 ): # This function is really easy to break, repeat multiple times! _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) should_respawn = set() special_agents = set(["agent0", "agent7"]) for step in range(1, 10000): act = {k: [0, 0] for k in env.vehicles.keys()} for v_id in act.keys(): if v_id in special_agents: act[v_id] = [1, 1] # Add some randomness else: if v_id in env.vehicles: env.vehicles[v_id].set_static(True) obs, r, d, i = _act(env, act) if step == 0 or step == 1: assert not any(d.values()) if should_respawn: for kkk in should_respawn: assert kkk not in obs, "It seems the max_step agents is not respawn!" assert kkk not in r assert kkk not in d assert kkk not in i should_respawn.clear() for kkk, ddd in d.items(): if ddd and kkk == "__all__": print("Current: ", step) continue if ddd and kkk not in special_agents: assert i[kkk][TerminationState.MAX_STEP] assert not i[kkk][TerminationState.OUT_OF_ROAD] assert not i[kkk][TerminationState.CRASH] assert not i[kkk][TerminationState.CRASH_VEHICLE] should_respawn.add(kkk) if d["__all__"]: obs = env.reset() should_respawn.clear() break finally: env.close()
def test_save_episode(vis=False): """ 1. Set record_episode=True to record each episode 2. dump_episode when done[__all__] == True 3. You can keep recent episodes 4. Input episode data to reset() function can replay the episode ! """ setup_logger(True) test_dump = True dump_recent_episode = 5 dump_count = 0 env = MultiAgentRoundaboutEnv( dict(use_render=vis, manual_control=vis, record_episode=True, horizon=100)) try: # Test Record o = env.reset() epi_info = None for i in range(1, 100000 if vis else 600): o, r, d, info = env.step( {agent_id: [0, .2] for agent_id in env.vehicles.keys()}) if vis: env.render() if d["__all__"]: epi_info = env.engine.dump_episode() # test dump json if test_dump: with open("test_dump_{}.json".format(dump_count), "w") as f: json.dump(epi_info, f) dump_count += 1 dump_count = dump_count % dump_recent_episode env.reset() # input episode_info to restore o = env.reset(epi_info) for i in range(1, 100000 if vis else 2000): o, r, d, info = env.step( {agent_id: [0, 0.1] for agent_id in env.vehicles.keys()}) if vis: env.render() if d["__all__"]: break finally: env.close()
def test_randomize_spawn_place(): last_pos = {} env = MultiAgentRoundaboutEnv({"num_agents": 4, "use_render": False, "fast": True}) try: obs = env.reset() for step in range(1000): act = {k: [1, 1] for k in env.vehicles.keys()} last_pos = {kkk: v.position for kkk, v in env.vehicles.items()} o, r, d, i = env.step(act) obs = env.reset() new_pos = {kkk: v.position for kkk, v in env.vehicles.items()} for kkk, new_p in new_pos.items(): assert not np.all(new_p == last_pos[kkk]), (new_p, last_pos[kkk], kkk) finally: env.close()
def test_ma_no_reset_error(): # It is possible that many agents are populated in the same spawn place! def check_pos(vehicles): while vehicles: v_1 = vehicles[0] for v_2 in vehicles[1:]: v_1_pos = v_1.position v_2_pos = v_2.position assert norm( v_1_pos[0] - v_2_pos[0], v_1_pos[1] - v_2_pos[1] ) > v_1.WIDTH / 2 + v_2.WIDTH / 2, "Vehicles overlap after reset()" assert not v_1.crash_vehicle, "Vehicles overlap after reset()" vehicles.remove(v_1) env = MultiAgentRoundaboutEnv({"horizon": 300, "num_agents": 40, "delay_done": 0}) try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(300): check_pos(list(env.vehicles.values())) o, r, d, i = env.step({k: [0, 1] for k in env.vehicles.keys()}) if d["__all__"]: break finally: env.close()
def test_ma_roundabout_close_spawn(): def _no_close_spawn(vehicles): vehicles = list(vehicles.values()) for c1, v1 in enumerate(vehicles): for c2 in range(c1 + 1, len(vehicles)): v2 = vehicles[c2] dis = norm(v1.position[0] - v2.position[0], v1.position[1] - v2.position[1]) assert distance_greater(v1.position, v2.position, length=2.2) MultiAgentRoundaboutEnv._DEBUG_RANDOM_SEED = 1 env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 16, "map_config": {"exit_length": 30}}) env.seed(100) try: _check_spaces_before_reset(env) for num_r in range(10): obs = env.reset() _check_spaces_after_reset(env) for _ in range(10): o, r, d, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) assert not any(d.values()) _no_close_spawn(env.vehicles) print('Finish {} resets.'.format(num_r)) finally: env.close() MultiAgentRoundaboutEnv._DEBUG_RANDOM_SEED = None
def test_ma_roundabout_horizon(): # test horizon for _ in range( 3 ): # This function is really easy to break, repeat multiple times! env = MultiAgentRoundaboutEnv({ "horizon": 100, "num_agents": 4, "vehicle_config": { "lidar": { "num_others": 2 } }, "out_of_road_penalty": 777, "out_of_road_cost": 778, "crash_done": False }) try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) last_keys = set(env.vehicles.keys()) for step in range(1, 1000): act = {k: [1, 1] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) new_keys = set(env.vehicles.keys()) if step == 0: assert not any(d.values()) if any(d.values()): assert len(last_keys) <= 4 # num of agents assert len(new_keys) <= 4 # num of agents for k in new_keys.difference(last_keys): assert k in o assert k in d print("Step {}, Done: {}".format(step, d)) for kkk, rrr in r.items(): if rrr == -777: assert d[kkk] assert i[kkk]["cost"] == 778 assert i[kkk][TerminationState.OUT_OF_ROAD] for kkk, iii in i.items(): if iii and (iii[TerminationState.OUT_OF_ROAD] or iii["cost"] == 778): assert d[kkk] assert i[kkk]["cost"] == 778 assert i[kkk][TerminationState.OUT_OF_ROAD] #assert r[kkk] == -777 if d["__all__"]: break last_keys = new_keys finally: env.close()
def test_ma_env_force_reset(): def close_and_reset_num_agents(env, num_agents, raw_input_config): config = copy.deepcopy(raw_input_config) env.close() config["num_agents"] = num_agents env.__init__(config) config = {'num_agents': 1} e = MultiAgentRoundaboutEnv(config) _raw_input_config = copy.deepcopy(config) e.reset() assert len(e.vehicles) == e.num_agents == len( e.config["target_vehicle_configs"]) == 1 close_and_reset_num_agents(e, num_agents=2, raw_input_config=_raw_input_config) e.reset() assert len(e.vehicles) == e.num_agents == len( e.config["target_vehicle_configs"]) == 2 close_and_reset_num_agents(e, num_agents=5, raw_input_config=_raw_input_config) e.reset() assert len(e.vehicles) == e.num_agents == len( e.config["target_vehicle_configs"]) == 5 e.close()
def test_ma_roundabout_no_short_episode(): env = MultiAgentRoundaboutEnv({ "horizon": 300, "num_agents": 40, }) try: _check_spaces_before_reset(env) o = env.reset() _check_spaces_after_reset(env, o) actions = [[0, 1], [1, 1], [-1, 1]] start = time.time() d_count = 0 d = {"__all__": False} for step in range(2000): # act = {k: actions[np.random.choice(len(actions))] for k in o.keys()} act = { k: actions[np.random.choice(len(actions))] for k in env.vehicles.keys() } o_keys = set(o.keys()).union({"__all__"}) a_keys = set(env.action_space.spaces.keys()).union(set(d.keys())) assert o_keys == a_keys o, r, d, i = _act(env, act) for kkk, iii in i.items(): if d[kkk]: assert iii["episode_length"] >= 1 d_count += 1 if d["__all__"]: o = env.reset() d = {"__all__": False} if (step + 1) % 100 == 0: print( "Finish {}/2000 simulation steps. Time elapse: {:.4f}. Average FPS: {:.4f}" .format(step + 1, time.time() - start, (step + 1) / (time.time() - start))) if d_count > 200: break finally: env.close()
def test_ma_env_force_reset(): e = MultiAgentRoundaboutEnv({'num_agents': 1}) e.reset() assert len(e.vehicles) == e.num_agents == len( e.config["target_vehicle_configs"]) == 1 e.close_and_reset_num_agents(num_agents=2) e.reset() assert len(e.vehicles) == e.num_agents == len( e.config["target_vehicle_configs"]) == 2 e.close_and_reset_num_agents(num_agents=5) e.reset() assert len(e.vehicles) == e.num_agents == len( e.config["target_vehicle_configs"]) == 5 e.close()
def test_ma_roundabout_reset(): env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 4}) try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(1000): act = {k: [1, 1] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) if step == 0: assert not any(d.values()) if d["__all__"]: obs = env.reset() assert env.observation_space.contains(obs) _check_spaces_after_reset(env, obs) assert set(env.observation_space.spaces.keys()) == set(env.action_space.spaces.keys()) == \ set(env.observations.keys()) == set(obs.keys()) == \ set(env.config["target_vehicle_configs"].keys()) break finally: env.close() # Put vehicles to destination and then reset. This might cause error if agent is assigned destination BEFORE reset. env = MultiAgentRoundaboutEnv({"horizon": 100, "num_agents": 32, "success_reward": 777}) try: _check_spaces_before_reset(env) success_count = 0 agent_count = 0 obs = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for num_reset in range(5): for step in range(1000): for _ in range(2): act = {k: [1, 1] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) # Force vehicle to success! for v_id, v in env.vehicles.items(): loc = v.routing_localization.final_lane.end v.set_position(loc) pos = v.position np.testing.assert_almost_equal(pos, loc, decimal=3) new_loc = v.routing_localization.final_lane.end long, lat = v.routing_localization.final_lane.local_coordinates(v.position) flag1 = ( v.routing_localization.final_lane.length - 5 < long < v.routing_localization.final_lane.length + 5 ) flag2 = ( v.routing_localization.get_current_lane_width() / 2 >= lat >= (0.5 - v.routing_localization.get_current_lane_num()) * v.routing_localization.get_current_lane_width() ) if not v.arrive_destination: print('sss') assert v.arrive_destination act = {k: [0, 0] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) for v in env.vehicles.values(): assert len(v.routing_localization.checkpoints) > 2 for kkk, iii in i.items(): if iii and iii[TerminationState.SUCCESS]: # print("{} success!".format(kkk)) success_count += 1 for kkk, ddd in d.items(): if ddd and kkk != "__all__": assert i[kkk][TerminationState.SUCCESS] agent_count += 1 for kkk, rrr in r.items(): if d[kkk]: assert rrr == 777 if d["__all__"]: print("Finish {} agents. Success {} agents.".format(agent_count, success_count)) o = env.reset() assert env.observation_space.contains(o) _check_spaces_after_reset(env, o) break finally: env.close()
def test_delay_done(): # Put agent 0 in the left, agent 1 in the right, and let agent 0 dead at first. # We wish to see agent 1 hits the dead body of agent 0. env = MultiAgentRoundaboutEnv({ # "use_render": True, # "fast": True, "target_vehicle_configs": { "agent0": { "spawn_longitude": 12, "spawn_lateral": 0, "spawn_lane_index": (">", ">>", 0), }, "agent1": { "spawn_longitude": 10, # locate a little forward "spawn_lateral": 0, "spawn_lane_index": (">", ">>", 1), } }, "num_agents": 2, "traffic_density": 0, "delay_done": 100, "horizon": 100 }) try: agent0_done = False agent1_already_hit = False o = env.reset() for i in range(1, 300): actions = {"agent0": [1, 1], "agent1": [1, 1]} if "agent0" not in env.vehicles: actions.pop("agent0") if "agent1" not in env.vehicles: actions.pop("agent1") o, r, d, info = env.step(actions) if agent0_done: assert "agent0" not in o assert "agent0" not in info assert "agent0" not in d if d.get("agent0"): agent0_done = True if agent0_done: if info["agent1"][TerminationState.CRASH_VEHICLE]: agent1_already_hit = True print("Hit!") if d["__all__"]: assert agent1_already_hit agent0_done = False agent1_already_hit = False env.reset() finally: env.close() env = MultiAgentRoundaboutEnv({ "num_agents": 5, "delay_done": 10, "horizon": 100 }) try: env.reset() dead = set() for _ in range(300): o, r, d, i = env.step({k: [1, 1] for k in env.vehicles.keys()}) for dead_name in dead: assert dead_name not in o print("{} there!".format(env.vehicles.keys())) print("{} dead!".format([kkk for kkk, ddd in d.items() if ddd])) for kkk, ddd in d.items(): if ddd and kkk != "__all__": dead.add(kkk) if d["__all__"]: env.reset() dead.clear() finally: env.close()
def test_respawn(): out_of_road_cost = 5555 out_of_road_penalty = 2222 env = MultiAgentRoundaboutEnv({ "num_agents": 2, "out_of_road_cost": out_of_road_cost, "out_of_road_penalty": out_of_road_penalty, "delay_done": 0, # Since we are testing respawn! # "use_render": True, "fast": True "crash_done": False, }) try: assert set(env.observations.keys()) == {"agent0", "agent1"} assert set(env.action_space.spaces.keys()) == {"agent0", "agent1"} assert set(env.config["target_vehicle_configs"].keys()) == { "agent0", "agent1" } assert set(env.vehicles.keys()) == set() # Not initialized yet! o = env.reset() assert set(o.keys()) == {"agent0", "agent1"} assert set(env.observations.keys()) == {"agent0", "agent1"} assert set(env.action_space.spaces.keys()) == {"agent0", "agent1"} assert set(env.config["target_vehicle_configs"].keys()) == { "agent0", "agent1" } assert set(env.vehicles.keys()) == {"agent0", "agent1"} v_id_0 = "agent0" v_id_1 = "agent1" count = 2 tracks = [] done_count = 0 for i in range(1, 1000): o, r, d, info = env.step({v_id_0: [-1, 1], v_id_1: [1, 1]}) assert set(o.keys()) == set(r.keys()) == set(info.keys()) assert set(o.keys()).union({"__all__"}) == set(d.keys()) tracks.append(d) if d[v_id_0]: assert info[v_id_0][TerminationState.OUT_OF_ROAD] assert info[v_id_0]["cost"] == out_of_road_cost assert r[v_id_0] == -out_of_road_penalty v_id_0 = "agent{}".format(count) count += 1 done_count += 1 if d[v_id_1]: assert info[v_id_1][TerminationState.OUT_OF_ROAD] assert info[v_id_1]["cost"] == out_of_road_cost assert r[v_id_1] == -out_of_road_penalty v_id_1 = "agent{}".format(count) count += 1 done_count += 1 if all(d.values()): raise ValueError() if i % 100 == 0: # Horizon v_id_0 = "agent0" v_id_1 = "agent1" count = 2 o = env.reset() assert set(o.keys()) == {"agent0", "agent1"} assert set(env.observations.keys()) == {"agent0", "agent1"} assert set( env.action_space.spaces.keys()) == {"agent0", "agent1"} assert set(env.config["target_vehicle_configs"].keys()) == { "agent0", "agent1" } assert set(env.vehicles.keys()) == {"agent0", "agent1"} finally: env.close() assert done_count > 0 print("Finish {} dones.".format(done_count))
def test_ma_roundabout_reward_done_alignment(): # out of road env = MultiAgentRoundaboutEnv({"horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False}) try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for action in [-1, 1]: for step in range(5000): act = {k: [action, 1] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) for kkk, ddd in d.items(): if ddd and kkk != "__all__": assert r[kkk] == -777 assert i[kkk][TerminationState.OUT_OF_ROAD] # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): if rrr == -777: assert d[kkk] assert i[kkk][TerminationState.OUT_OF_ROAD] # print('{} reward passed!'.format(kkk)) if d["__all__"]: env.reset() break finally: env.close() # crash env = MultiAgentRoundaboutEnv( { "horizon": 100, "num_agents": 2, "crash_vehicle_penalty": 1.7777, "crash_done": True, "delay_done": 0, # "use_render": True, # "fast": True, "top_down_camera_initial_z": 160 } ) # Force the seed here so that the agent1 and agent2 are in same heading! Otherwise they might be in vertical # heading and cause one of the vehicle raise "out of road" error! env._DEBUG_RANDOM_SEED = 1 try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) for step in range(5): act = {k: [0, 0] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) env.vehicles["agent0"].set_position(env.vehicles["agent1"].position, height=1.2) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) if not any(d.values()): continue assert sum(d.values()) == 2 for kkk in ['agent0', 'agent1']: iii = i[kkk] assert iii[TerminationState.CRASH_VEHICLE] assert iii[TerminationState.CRASH] assert r[kkk] == -1.7777 # for kkk, ddd in d.items(): ddd = d[kkk] if ddd and kkk != "__all__": assert r[kkk] == -1.7777 assert i[kkk][TerminationState.CRASH_VEHICLE] assert i[kkk][TerminationState.CRASH] # print('{} done passed!'.format(kkk)) # for kkk, rrr in r.items(): rrr = r[kkk] if rrr == -1.7777: assert d[kkk] assert i[kkk][TerminationState.CRASH_VEHICLE] assert i[kkk][TerminationState.CRASH] # print('{} reward passed!'.format(kkk)) # assert d["__all__"] # if d["__all__"]: break finally: env._DEBUG_RANDOM_SEED = None env.close() # crash with real fixed vehicle # crash 2 env = MultiAgentRoundaboutEnv( { "map_config": { "exit_length": 110, "lane_num": 1 }, # "use_render": True, # "fast": True, "horizon": 200, "num_agents": 40, "crash_vehicle_penalty": 1.7777, } ) try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env, obs) for step in range(1): act = {k: [0, 0] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) for v_id, v in env.vehicles.items(): if v_id != "agent0": v.set_static(True) for step in range(5000): act = {k: [0, 1] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) for kkk, iii in i.items(): if iii[TerminationState.CRASH]: assert iii[TerminationState.CRASH_VEHICLE] if iii[TerminationState.CRASH_VEHICLE]: assert iii[TerminationState.CRASH] assert r[kkk] == -1.7777 for kkk, ddd in d.items(): if ddd and kkk != "__all__": assert i[kkk][TerminationState.OUT_OF_ROAD] # print('{} done passed!'.format(kkk)) for kkk, rrr in r.items(): if rrr == -1.7777: # assert d[kkk] assert i[kkk][TerminationState.CRASH_VEHICLE] assert i[kkk][TerminationState.CRASH] # print('{} reward passed!'.format(kkk)) if d["agent0"]: break if d["__all__"]: break finally: env.close() # success env = MultiAgentRoundaboutEnv( { "horizon": 100, "num_agents": 2, "success_reward": 999, "out_of_road_penalty": 555, "crash_done": True } ) try: _check_spaces_before_reset(env) obs = env.reset() _check_spaces_after_reset(env) env.vehicles["agent0"].set_position(env.vehicles["agent0"].routing_localization.final_lane.end) assert env.observation_space.contains(obs) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) if d["__all__"]: break kkk = "agent0" assert r[kkk] == 999 assert i[kkk][TerminationState.SUCCESS] assert d[kkk] kkk = "agent1" assert r[kkk] != 999 assert not i[kkk][TerminationState.SUCCESS] assert not d[kkk] break finally: env.close()