Python MultiAgentRoundaboutEnv.reset示例，pgdrive.envs.marl_envs.marl_inout_roundabout.MultiAgentRoundaboutEnv.reset Python示例

示例#1

0

显示文件

def test_ma_roundabout_init_space():
    try:
        for start_seed in [5000, 6000, 7000]:
            for num_agents in [16, 32]:
                for num_others in [0, 2, 4, 8]:
                    for crash_vehicle_penalty in [0, 5]:
                        env_config = dict(
                            start_seed=start_seed,
                            num_agents=num_agents,
                            vehicle_config=dict(lidar=dict(num_others=num_others)),
                            crash_vehicle_penalty=crash_vehicle_penalty
                        )
                        env = MultiAgentRoundaboutEnv(env_config)

                        single_space = env.observation_space["agent0"]
                        assert single_space.shape is not None, single_space
                        assert np.prod(single_space.shape) is not None, single_space

                        single_space = env.action_space["agent0"]
                        assert single_space.shape is not None, single_space
                        assert np.prod(single_space.shape) is not None, single_space

                        _check_spaces_before_reset(env)
                        env.reset()
                        _check_spaces_after_reset(env)
                        env.close()
                        print('Finish: ', env_config)
    finally:
        if "env" in locals():
            env.close()

示例#2

0

显示文件

文件： test_ma_roundabout_env.py 项目： decisionforce/pgdrive

def test_ma_roundabout_reward_done_alignment():
    # out of road
    env = MultiAgentRoundaboutEnv({
        "horizon": 200,
        "num_agents": 4,
        "out_of_road_penalty": 777,
        "crash_done": False
    })
    try:
        _check_spaces_before_reset(env)
        obs = env.reset()
        _check_spaces_after_reset(env, obs)
        assert env.observation_space.contains(obs)
        for action in [-1, 1]:
            for step in range(5000):
                act = {k: [action, 1] for k in env.vehicles.keys()}
                o, r, d, i = _act(env, act)
                for kkk, ddd in d.items():
                    if ddd and kkk != "__all__":
                        #assert r[kkk] == -777
                        assert i[kkk][TerminationState.OUT_OF_ROAD]
                        # print('{} done passed!'.format(kkk))
                for kkk, rrr in r.items():
                    if rrr == -777:
                        assert d[kkk]
                        assert i[kkk][TerminationState.OUT_OF_ROAD]
                        # print('{} reward passed!'.format(kkk))
                if d["__all__"]:
                    env.reset()
                    break
    finally:
        env.close()

示例#3

0

显示文件

def test_ma_roundabout_40_agent_reset_after_respawn():
    def check_pos(vehicles):
        while vehicles:
            v_1 = vehicles[0]
            for v_2 in vehicles[1:]:
                v_1_pos = v_1.position
                v_2_pos = v_2.position
                assert norm(
                    v_1_pos[0] - v_2_pos[0], v_1_pos[1] - v_2_pos[1]
                ) > v_1.WIDTH / 2 + v_2.WIDTH / 2, "Vehicles overlap after reset()"
            assert not v_1.crash_vehicle, "Vehicles overlap after reset()"
            vehicles.remove(v_1)

    env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 40})
    try:
        _check_spaces_before_reset(env)
        obs = env.reset()
        _check_spaces_after_reset(env, obs)
        assert env.observation_space.contains(obs)
        for step in range(50):
            env.reset()
            check_pos(list(env.vehicles.values()))
            for v_id in list(env.vehicles.keys())[:20]:
                env.agent_manager.finish(v_id)
            env.step({k: [1, 1] for k in env.vehicles.keys()})
            env.step({k: [1, 1] for k in env.vehicles.keys()})
            env.step({k: [1, 1] for k in env.vehicles.keys()})
    finally:
        env.close()

示例#4

0

显示文件

def test_infinite_agents():
    env = MultiAgentRoundaboutEnv(
        {
            "map_config": dict(exit_length=20, lane_num=2),
            # "use_render": True, "fast": True,
            "num_agents": -1,
            "delay_done": 50,
            "horizon": 50,
            "debug": True,
            "random_traffic": False
        }
    )
    try:
        o = env.reset()
        env.seed(100)
        env._DEBUG_RANDOM_SEED = 100
        max_num = old_num_of_vehicles = len(env.vehicles)
        for i in range(1, 1000):
            o, r, d, info = env.step({k: [1, 1] for k in env.vehicles})
            print(
                "{} Current active agents: ".format(i), len(env.vehicles), ". Objects: ",
                len(env.agent_manager._object_to_agent)
            )
            max_num = max(len(env.vehicles), max_num)
            # env.render(mode="top_down")
            for kkk, iii in info.items():
                if d[kkk]:
                    assert iii["episode_length"] > 1
            if d["__all__"]:
                o = env.reset()
                print("Finish {} steps.".format(i))
    finally:
        env._DEBUG_RANDOM_SEED = None
        env.close()

    env = MultiAgentRoundaboutEnv({"num_agents": -1, "delay_done": 0, "horizon": 50, "debug": True})
    try:
        o = env.reset()
        max_num = old_num_of_vehicles = len(env.vehicles)
        for i in range(1, 300):
            o, r, d, info = env.step({k: [0, 1] for k in env.vehicles})
            # print("Current active agents: ", len(env.vehicles),
            #       ". Objects: ", len(env.agent_manager._object_to_agent))
            max_num = max(len(env.vehicles), max_num)
            # env.render(mode="top_down")
            for kkk, iii in info.items():
                if d[kkk]:
                    assert iii["episode_length"] > 1
            if d["__all__"]:
                o = env.reset()
                print("Finish {} steps.".format(i))
    finally:
        env.close()
    assert max_num > old_num_of_vehicles

示例#5

0

显示文件

文件： test_ma_roundabout_env.py 项目： decisionforce/pgdrive

def test_ma_roundabout_horizon_termination():
    # test horizon
    env = MultiAgentRoundaboutEnv({
        "horizon": 100,
        "num_agents": 8,
        "crash_done": False
    })
    try:
        for _ in range(
                3
        ):  # This function is really easy to break, repeat multiple times!
            _check_spaces_before_reset(env)
            obs = env.reset()
            _check_spaces_after_reset(env, obs)
            assert env.observation_space.contains(obs)
            should_respawn = set()
            special_agents = set(["agent0", "agent7"])
            for step in range(1, 10000):
                act = {k: [0, 0] for k in env.vehicles.keys()}
                for v_id in act.keys():
                    if v_id in special_agents:
                        act[v_id] = [1, 1]  # Add some randomness
                    else:
                        if v_id in env.vehicles:
                            env.vehicles[v_id].set_static(True)
                obs, r, d, i = _act(env, act)
                if step == 0 or step == 1:
                    assert not any(d.values())

                if should_respawn:
                    for kkk in should_respawn:
                        assert kkk not in obs, "It seems the max_step agents is not respawn!"
                        assert kkk not in r
                        assert kkk not in d
                        assert kkk not in i
                    should_respawn.clear()

                for kkk, ddd in d.items():
                    if ddd and kkk == "__all__":
                        print("Current: ", step)
                        continue
                    if ddd and kkk not in special_agents:
                        assert i[kkk][TerminationState.MAX_STEP]
                        assert not i[kkk][TerminationState.OUT_OF_ROAD]
                        assert not i[kkk][TerminationState.CRASH]
                        assert not i[kkk][TerminationState.CRASH_VEHICLE]
                        should_respawn.add(kkk)

                if d["__all__"]:
                    obs = env.reset()
                    should_respawn.clear()
                    break
    finally:
        env.close()

示例#6

0

显示文件

def test_save_episode(vis=False):
    """
    1. Set record_episode=True to record each episode
    2. dump_episode when done[__all__] == True
    3. You can keep recent episodes
    4. Input episode data to reset() function can replay the episode !
    """

    setup_logger(True)

    test_dump = True
    dump_recent_episode = 5
    dump_count = 0
    env = MultiAgentRoundaboutEnv(
        dict(use_render=vis,
             manual_control=vis,
             record_episode=True,
             horizon=100))
    try:
        # Test Record
        o = env.reset()
        epi_info = None
        for i in range(1, 100000 if vis else 600):
            o, r, d, info = env.step(
                {agent_id: [0, .2]
                 for agent_id in env.vehicles.keys()})
            if vis:
                env.render()
            if d["__all__"]:
                epi_info = env.engine.dump_episode()
                # test dump json
                if test_dump:
                    with open("test_dump_{}.json".format(dump_count),
                              "w") as f:
                        json.dump(epi_info, f)
                    dump_count += 1
                    dump_count = dump_count % dump_recent_episode
                env.reset()

        # input episode_info to restore
        o = env.reset(epi_info)
        for i in range(1, 100000 if vis else 2000):
            o, r, d, info = env.step(
                {agent_id: [0, 0.1]
                 for agent_id in env.vehicles.keys()})
            if vis:
                env.render()
            if d["__all__"]:
                break
    finally:
        env.close()

示例#7

0

显示文件

def test_randomize_spawn_place():
    last_pos = {}
    env = MultiAgentRoundaboutEnv({"num_agents": 4, "use_render": False, "fast": True})
    try:
        obs = env.reset()
        for step in range(1000):
            act = {k: [1, 1] for k in env.vehicles.keys()}
            last_pos = {kkk: v.position for kkk, v in env.vehicles.items()}
            o, r, d, i = env.step(act)
            obs = env.reset()
            new_pos = {kkk: v.position for kkk, v in env.vehicles.items()}
            for kkk, new_p in new_pos.items():
                assert not np.all(new_p == last_pos[kkk]), (new_p, last_pos[kkk], kkk)
    finally:
        env.close()

示例#8

0

显示文件

def test_ma_no_reset_error():
    # It is possible that many agents are populated in the same spawn place!
    def check_pos(vehicles):
        while vehicles:
            v_1 = vehicles[0]
            for v_2 in vehicles[1:]:
                v_1_pos = v_1.position
                v_2_pos = v_2.position
                assert norm(
                    v_1_pos[0] - v_2_pos[0], v_1_pos[1] - v_2_pos[1]
                ) > v_1.WIDTH / 2 + v_2.WIDTH / 2, "Vehicles overlap after reset()"
            assert not v_1.crash_vehicle, "Vehicles overlap after reset()"
            vehicles.remove(v_1)

    env = MultiAgentRoundaboutEnv({"horizon": 300, "num_agents": 40, "delay_done": 0})
    try:
        _check_spaces_before_reset(env)
        obs = env.reset()
        _check_spaces_after_reset(env, obs)
        assert env.observation_space.contains(obs)
        for step in range(300):
            check_pos(list(env.vehicles.values()))
            o, r, d, i = env.step({k: [0, 1] for k in env.vehicles.keys()})
            if d["__all__"]:
                break
    finally:
        env.close()

示例#9

0

显示文件

def test_ma_roundabout_close_spawn():
    def _no_close_spawn(vehicles):
        vehicles = list(vehicles.values())
        for c1, v1 in enumerate(vehicles):
            for c2 in range(c1 + 1, len(vehicles)):
                v2 = vehicles[c2]
                dis = norm(v1.position[0] - v2.position[0], v1.position[1] - v2.position[1])
                assert distance_greater(v1.position, v2.position, length=2.2)

    MultiAgentRoundaboutEnv._DEBUG_RANDOM_SEED = 1
    env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 16, "map_config": {"exit_length": 30}})
    env.seed(100)
    try:
        _check_spaces_before_reset(env)
        for num_r in range(10):
            obs = env.reset()
            _check_spaces_after_reset(env)
            for _ in range(10):
                o, r, d, i = env.step({k: [0, 0] for k in env.vehicles.keys()})
                assert not any(d.values())
            _no_close_spawn(env.vehicles)
            print('Finish {} resets.'.format(num_r))
    finally:
        env.close()
        MultiAgentRoundaboutEnv._DEBUG_RANDOM_SEED = None

示例#10

0

显示文件

文件： test_ma_roundabout_env.py 项目： decisionforce/pgdrive

def test_ma_roundabout_horizon():
    # test horizon
    for _ in range(
            3
    ):  # This function is really easy to break, repeat multiple times!
        env = MultiAgentRoundaboutEnv({
            "horizon": 100,
            "num_agents": 4,
            "vehicle_config": {
                "lidar": {
                    "num_others": 2
                }
            },
            "out_of_road_penalty": 777,
            "out_of_road_cost": 778,
            "crash_done": False
        })
        try:
            _check_spaces_before_reset(env)
            obs = env.reset()
            _check_spaces_after_reset(env, obs)
            assert env.observation_space.contains(obs)
            last_keys = set(env.vehicles.keys())
            for step in range(1, 1000):
                act = {k: [1, 1] for k in env.vehicles.keys()}
                o, r, d, i = _act(env, act)
                new_keys = set(env.vehicles.keys())
                if step == 0:
                    assert not any(d.values())
                if any(d.values()):
                    assert len(last_keys) <= 4  # num of agents
                    assert len(new_keys) <= 4  # num of agents
                    for k in new_keys.difference(last_keys):
                        assert k in o
                        assert k in d
                    print("Step {}, Done: {}".format(step, d))

                for kkk, rrr in r.items():
                    if rrr == -777:
                        assert d[kkk]
                        assert i[kkk]["cost"] == 778
                        assert i[kkk][TerminationState.OUT_OF_ROAD]

                for kkk, iii in i.items():
                    if iii and (iii[TerminationState.OUT_OF_ROAD]
                                or iii["cost"] == 778):
                        assert d[kkk]
                        assert i[kkk]["cost"] == 778
                        assert i[kkk][TerminationState.OUT_OF_ROAD]
                        #assert r[kkk] == -777

                if d["__all__"]:
                    break
                last_keys = new_keys
        finally:
            env.close()

示例#11

0

显示文件

文件： test_ma_env_force_reset.py 项目： decisionforce/pgdrive

def test_ma_env_force_reset():
    def close_and_reset_num_agents(env, num_agents, raw_input_config):
        config = copy.deepcopy(raw_input_config)
        env.close()
        config["num_agents"] = num_agents
        env.__init__(config)

    config = {'num_agents': 1}
    e = MultiAgentRoundaboutEnv(config)
    _raw_input_config = copy.deepcopy(config)
    e.reset()
    assert len(e.vehicles) == e.num_agents == len(
        e.config["target_vehicle_configs"]) == 1

    close_and_reset_num_agents(e,
                               num_agents=2,
                               raw_input_config=_raw_input_config)
    e.reset()
    assert len(e.vehicles) == e.num_agents == len(
        e.config["target_vehicle_configs"]) == 2

    close_and_reset_num_agents(e,
                               num_agents=5,
                               raw_input_config=_raw_input_config)
    e.reset()
    assert len(e.vehicles) == e.num_agents == len(
        e.config["target_vehicle_configs"]) == 5

    e.close()

示例#12

0

显示文件

文件： test_ma_roundabout_env.py 项目： decisionforce/pgdrive

def test_ma_roundabout_no_short_episode():
    env = MultiAgentRoundaboutEnv({
        "horizon": 300,
        "num_agents": 40,
    })
    try:
        _check_spaces_before_reset(env)
        o = env.reset()
        _check_spaces_after_reset(env, o)
        actions = [[0, 1], [1, 1], [-1, 1]]
        start = time.time()
        d_count = 0
        d = {"__all__": False}
        for step in range(2000):
            # act = {k: actions[np.random.choice(len(actions))] for k in o.keys()}
            act = {
                k: actions[np.random.choice(len(actions))]
                for k in env.vehicles.keys()
            }
            o_keys = set(o.keys()).union({"__all__"})
            a_keys = set(env.action_space.spaces.keys()).union(set(d.keys()))
            assert o_keys == a_keys
            o, r, d, i = _act(env, act)
            for kkk, iii in i.items():
                if d[kkk]:
                    assert iii["episode_length"] >= 1
                    d_count += 1
            if d["__all__"]:
                o = env.reset()
                d = {"__all__": False}
            if (step + 1) % 100 == 0:
                print(
                    "Finish {}/2000 simulation steps. Time elapse: {:.4f}. Average FPS: {:.4f}"
                    .format(step + 1,
                            time.time() - start,
                            (step + 1) / (time.time() - start)))
            if d_count > 200:
                break
    finally:
        env.close()

示例#13

0

显示文件

文件： test_ma_env_force_reset.py 项目： Edwardhk/pgdrive

def test_ma_env_force_reset():
    e = MultiAgentRoundaboutEnv({'num_agents': 1})
    e.reset()
    assert len(e.vehicles) == e.num_agents == len(
        e.config["target_vehicle_configs"]) == 1

    e.close_and_reset_num_agents(num_agents=2)
    e.reset()
    assert len(e.vehicles) == e.num_agents == len(
        e.config["target_vehicle_configs"]) == 2

    e.close_and_reset_num_agents(num_agents=5)
    e.reset()
    assert len(e.vehicles) == e.num_agents == len(
        e.config["target_vehicle_configs"]) == 5

    e.close()

示例#14

0

显示文件

def test_ma_roundabout_reset():
    env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 4})
    try:
        _check_spaces_before_reset(env)
        obs = env.reset()
        _check_spaces_after_reset(env, obs)
        assert env.observation_space.contains(obs)
        for step in range(1000):
            act = {k: [1, 1] for k in env.vehicles.keys()}
            o, r, d, i = _act(env, act)
            if step == 0:
                assert not any(d.values())
            if d["__all__"]:
                obs = env.reset()
                assert env.observation_space.contains(obs)

                _check_spaces_after_reset(env, obs)
                assert set(env.observation_space.spaces.keys()) == set(env.action_space.spaces.keys()) == \
                       set(env.observations.keys()) == set(obs.keys()) == \
                       set(env.config["target_vehicle_configs"].keys())

                break
    finally:
        env.close()

    # Put vehicles to destination and then reset. This might cause error if agent is assigned destination BEFORE reset.
    env = MultiAgentRoundaboutEnv({"horizon": 100, "num_agents": 32, "success_reward": 777})
    try:
        _check_spaces_before_reset(env)
        success_count = 0
        agent_count = 0
        obs = env.reset()
        _check_spaces_after_reset(env, obs)
        assert env.observation_space.contains(obs)

        for num_reset in range(5):
            for step in range(1000):

                for _ in range(2):
                    act = {k: [1, 1] for k in env.vehicles.keys()}
                    o, r, d, i = _act(env, act)

                # Force vehicle to success!
                for v_id, v in env.vehicles.items():
                    loc = v.routing_localization.final_lane.end
                    v.set_position(loc)
                    pos = v.position
                    np.testing.assert_almost_equal(pos, loc, decimal=3)
                    new_loc = v.routing_localization.final_lane.end
                    long, lat = v.routing_localization.final_lane.local_coordinates(v.position)
                    flag1 = (
                        v.routing_localization.final_lane.length - 5 < long <
                        v.routing_localization.final_lane.length + 5
                    )
                    flag2 = (
                        v.routing_localization.get_current_lane_width() / 2 >= lat >=
                        (0.5 - v.routing_localization.get_current_lane_num()) *
                        v.routing_localization.get_current_lane_width()
                    )
                    if not v.arrive_destination:
                        print('sss')
                    assert v.arrive_destination

                act = {k: [0, 0] for k in env.vehicles.keys()}
                o, r, d, i = _act(env, act)

                for v in env.vehicles.values():
                    assert len(v.routing_localization.checkpoints) > 2

                for kkk, iii in i.items():
                    if iii and iii[TerminationState.SUCCESS]:
                        # print("{} success!".format(kkk))
                        success_count += 1

                for kkk, ddd in d.items():
                    if ddd and kkk != "__all__":
                        assert i[kkk][TerminationState.SUCCESS]
                        agent_count += 1

                for kkk, rrr in r.items():
                    if d[kkk]:
                        assert rrr == 777

                if d["__all__"]:
                    print("Finish {} agents. Success {} agents.".format(agent_count, success_count))
                    o = env.reset()
                    assert env.observation_space.contains(o)
                    _check_spaces_after_reset(env, o)
                    break
    finally:
        env.close()

示例#15

0

显示文件

文件： test_marl_reborn.py 项目： decisionforce/pgdrive

def test_delay_done():
    # Put agent 0 in the left, agent 1 in the right, and let agent 0 dead at first.
    # We wish to see agent 1 hits the dead body of agent 0.
    env = MultiAgentRoundaboutEnv({
        # "use_render": True,
        # "fast": True,
        "target_vehicle_configs": {
            "agent0": {
                "spawn_longitude": 12,
                "spawn_lateral": 0,
                "spawn_lane_index": (">", ">>", 0),
            },
            "agent1": {
                "spawn_longitude": 10,  # locate a little forward
                "spawn_lateral": 0,
                "spawn_lane_index": (">", ">>", 1),
            }
        },
        "num_agents": 2,
        "traffic_density": 0,
        "delay_done": 100,
        "horizon": 100
    })
    try:
        agent0_done = False
        agent1_already_hit = False
        o = env.reset()
        for i in range(1, 300):
            actions = {"agent0": [1, 1], "agent1": [1, 1]}
            if "agent0" not in env.vehicles:
                actions.pop("agent0")
            if "agent1" not in env.vehicles:
                actions.pop("agent1")
            o, r, d, info = env.step(actions)
            if agent0_done:
                assert "agent0" not in o
                assert "agent0" not in info
                assert "agent0" not in d
            if d.get("agent0"):
                agent0_done = True
            if agent0_done:
                if info["agent1"][TerminationState.CRASH_VEHICLE]:
                    agent1_already_hit = True
                    print("Hit!")
            if d["__all__"]:
                assert agent1_already_hit
                agent0_done = False
                agent1_already_hit = False
                env.reset()
    finally:
        env.close()

    env = MultiAgentRoundaboutEnv({
        "num_agents": 5,
        "delay_done": 10,
        "horizon": 100
    })
    try:
        env.reset()
        dead = set()
        for _ in range(300):
            o, r, d, i = env.step({k: [1, 1] for k in env.vehicles.keys()})
            for dead_name in dead:
                assert dead_name not in o
            print("{} there!".format(env.vehicles.keys()))
            print("{} dead!".format([kkk for kkk, ddd in d.items() if ddd]))
            for kkk, ddd in d.items():
                if ddd and kkk != "__all__":
                    dead.add(kkk)
            if d["__all__"]:
                env.reset()
                dead.clear()
    finally:
        env.close()

示例#16

0

显示文件

文件： test_marl_reborn.py 项目： decisionforce/pgdrive

def test_respawn():
    out_of_road_cost = 5555
    out_of_road_penalty = 2222
    env = MultiAgentRoundaboutEnv({
        "num_agents": 2,
        "out_of_road_cost": out_of_road_cost,
        "out_of_road_penalty": out_of_road_penalty,
        "delay_done": 0,  # Since we are testing respawn!
        # "use_render": True, "fast": True
        "crash_done": False,
    })
    try:
        assert set(env.observations.keys()) == {"agent0", "agent1"}
        assert set(env.action_space.spaces.keys()) == {"agent0", "agent1"}
        assert set(env.config["target_vehicle_configs"].keys()) == {
            "agent0", "agent1"
        }
        assert set(env.vehicles.keys()) == set()  # Not initialized yet!

        o = env.reset()

        assert set(o.keys()) == {"agent0", "agent1"}
        assert set(env.observations.keys()) == {"agent0", "agent1"}
        assert set(env.action_space.spaces.keys()) == {"agent0", "agent1"}
        assert set(env.config["target_vehicle_configs"].keys()) == {
            "agent0", "agent1"
        }
        assert set(env.vehicles.keys()) == {"agent0", "agent1"}

        v_id_0 = "agent0"
        v_id_1 = "agent1"
        count = 2
        tracks = []
        done_count = 0
        for i in range(1, 1000):
            o, r, d, info = env.step({v_id_0: [-1, 1], v_id_1: [1, 1]})
            assert set(o.keys()) == set(r.keys()) == set(info.keys())
            assert set(o.keys()).union({"__all__"}) == set(d.keys())
            tracks.append(d)
            if d[v_id_0]:
                assert info[v_id_0][TerminationState.OUT_OF_ROAD]
                assert info[v_id_0]["cost"] == out_of_road_cost
                assert r[v_id_0] == -out_of_road_penalty
                v_id_0 = "agent{}".format(count)
                count += 1
                done_count += 1
            if d[v_id_1]:
                assert info[v_id_1][TerminationState.OUT_OF_ROAD]
                assert info[v_id_1]["cost"] == out_of_road_cost
                assert r[v_id_1] == -out_of_road_penalty
                v_id_1 = "agent{}".format(count)
                count += 1
                done_count += 1
            if all(d.values()):
                raise ValueError()
            if i % 100 == 0:  # Horizon
                v_id_0 = "agent0"
                v_id_1 = "agent1"
                count = 2
                o = env.reset()
                assert set(o.keys()) == {"agent0", "agent1"}
                assert set(env.observations.keys()) == {"agent0", "agent1"}
                assert set(
                    env.action_space.spaces.keys()) == {"agent0", "agent1"}
                assert set(env.config["target_vehicle_configs"].keys()) == {
                    "agent0", "agent1"
                }
                assert set(env.vehicles.keys()) == {"agent0", "agent1"}
    finally:
        env.close()
    assert done_count > 0
    print("Finish {} dones.".format(done_count))

示例#17

0

显示文件

def test_ma_roundabout_reward_done_alignment():
    # out of road
    env = MultiAgentRoundaboutEnv({"horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False})
    try:
        _check_spaces_before_reset(env)
        obs = env.reset()
        _check_spaces_after_reset(env, obs)
        assert env.observation_space.contains(obs)
        for action in [-1, 1]:
            for step in range(5000):
                act = {k: [action, 1] for k in env.vehicles.keys()}
                o, r, d, i = _act(env, act)
                for kkk, ddd in d.items():
                    if ddd and kkk != "__all__":
                        assert r[kkk] == -777
                        assert i[kkk][TerminationState.OUT_OF_ROAD]
                        # print('{} done passed!'.format(kkk))
                for kkk, rrr in r.items():
                    if rrr == -777:
                        assert d[kkk]
                        assert i[kkk][TerminationState.OUT_OF_ROAD]
                        # print('{} reward passed!'.format(kkk))
                if d["__all__"]:
                    env.reset()
                    break
    finally:
        env.close()

    # crash
    env = MultiAgentRoundaboutEnv(
        {
            "horizon": 100,
            "num_agents": 2,
            "crash_vehicle_penalty": 1.7777,
            "crash_done": True,
            "delay_done": 0,

            # "use_render": True,
            # "fast": True,
            "top_down_camera_initial_z": 160
        }
    )
    # Force the seed here so that the agent1 and agent2 are in same heading! Otherwise they might be in vertical
    # heading and cause one of the vehicle raise "out of road" error!
    env._DEBUG_RANDOM_SEED = 1
    try:
        _check_spaces_before_reset(env)
        obs = env.reset()
        _check_spaces_after_reset(env, obs)
        for step in range(5):
            act = {k: [0, 0] for k in env.vehicles.keys()}
            o, r, d, i = _act(env, act)
        env.vehicles["agent0"].set_position(env.vehicles["agent1"].position, height=1.2)
        for step in range(5000):
            act = {k: [0, 0] for k in env.vehicles.keys()}
            o, r, d, i = _act(env, act)

            if not any(d.values()):
                continue

            assert sum(d.values()) == 2

            for kkk in ['agent0', 'agent1']:
                iii = i[kkk]
                assert iii[TerminationState.CRASH_VEHICLE]
                assert iii[TerminationState.CRASH]
                assert r[kkk] == -1.7777
                # for kkk, ddd in d.items():
                ddd = d[kkk]
                if ddd and kkk != "__all__":
                    assert r[kkk] == -1.7777
                    assert i[kkk][TerminationState.CRASH_VEHICLE]
                    assert i[kkk][TerminationState.CRASH]
                    # print('{} done passed!'.format(kkk))
                # for kkk, rrr in r.items():
                rrr = r[kkk]
                if rrr == -1.7777:
                    assert d[kkk]
                    assert i[kkk][TerminationState.CRASH_VEHICLE]
                    assert i[kkk][TerminationState.CRASH]
                    # print('{} reward passed!'.format(kkk))
            # assert d["__all__"]
            # if d["__all__"]:
            break
    finally:
        env._DEBUG_RANDOM_SEED = None
        env.close()

    # crash with real fixed vehicle

    # crash 2
    env = MultiAgentRoundaboutEnv(
        {
            "map_config": {
                "exit_length": 110,
                "lane_num": 1
            },
            # "use_render": True,
            # "fast": True,
            "horizon": 200,
            "num_agents": 40,
            "crash_vehicle_penalty": 1.7777,
        }
    )
    try:
        _check_spaces_before_reset(env)
        obs = env.reset()
        _check_spaces_after_reset(env, obs)
        for step in range(1):
            act = {k: [0, 0] for k in env.vehicles.keys()}
            o, r, d, i = _act(env, act)

        for v_id, v in env.vehicles.items():
            if v_id != "agent0":
                v.set_static(True)

        for step in range(5000):
            act = {k: [0, 1] for k in env.vehicles.keys()}
            o, r, d, i = _act(env, act)
            for kkk, iii in i.items():
                if iii[TerminationState.CRASH]:
                    assert iii[TerminationState.CRASH_VEHICLE]
                if iii[TerminationState.CRASH_VEHICLE]:
                    assert iii[TerminationState.CRASH]
                    assert r[kkk] == -1.7777
            for kkk, ddd in d.items():
                if ddd and kkk != "__all__":
                    assert i[kkk][TerminationState.OUT_OF_ROAD]
                    # print('{} done passed!'.format(kkk))
            for kkk, rrr in r.items():
                if rrr == -1.7777:
                    # assert d[kkk]
                    assert i[kkk][TerminationState.CRASH_VEHICLE]
                    assert i[kkk][TerminationState.CRASH]
                    # print('{} reward passed!'.format(kkk))
            if d["agent0"]:
                break
            if d["__all__"]:
                break
    finally:
        env.close()

    # success
    env = MultiAgentRoundaboutEnv(
        {
            "horizon": 100,
            "num_agents": 2,
            "success_reward": 999,
            "out_of_road_penalty": 555,
            "crash_done": True
        }
    )
    try:
        _check_spaces_before_reset(env)
        obs = env.reset()
        _check_spaces_after_reset(env)
        env.vehicles["agent0"].set_position(env.vehicles["agent0"].routing_localization.final_lane.end)
        assert env.observation_space.contains(obs)
        for step in range(5000):
            act = {k: [0, 0] for k in env.vehicles.keys()}
            o, r, d, i = _act(env, act)
            if d["__all__"]:
                break
            kkk = "agent0"
            assert r[kkk] == 999
            assert i[kkk][TerminationState.SUCCESS]
            assert d[kkk]

            kkk = "agent1"
            assert r[kkk] != 999
            assert not i[kkk][TerminationState.SUCCESS]
            assert not d[kkk]
            break
    finally:
        env.close()