def get_many(self, handles: Optional[List[int]] = None) -> Dict[int, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
     """
     Called whenever an observation has to be computed for the `env` environment, for each agent with handle
     in the `handles` list.
     """
     self.shortest_paths = get_shortest_paths(self.env.distance_map)
     return super().get_many(handles)
示例#2
0
def test_get_shortest_paths_unreachable():
    rail, rail_map = make_disconnected_simple_rail()

    env = RailEnv(width=rail_map.shape[1],
                  height=rail_map.shape[0],
                  rail_generator=rail_from_grid_transition_map(rail),
                  schedule_generator=random_schedule_generator(),
                  number_of_agents=1,
                  obs_builder_object=GlobalObsForRailEnv())
    env.reset()

    # set the initial position
    agent = env.agents[0]
    agent.position = (3, 1)  # west dead-end
    agent.initial_position = (3, 1)  # west dead-end
    agent.direction = Grid4TransitionsEnum.WEST
    agent.target = (3, 9)  # east dead-end
    agent.moving = True

    env.reset(False, False)

    actual = get_shortest_paths(env.distance_map)
    expected = {0: None}

    assert actual == expected, "actual={},expected={}".format(actual, expected)
def test_get_shortest_paths_agent_handle():
    #env = load_flatland_environment_from_file('Level_distance_map_shortest_path.pkl', 'env_data.tests')
    env, _ = RailEnvPersister.load_new("Level_distance_map_shortest_path.mpk",
                                       "env_data.tests")
    env.reset()
    actual = get_shortest_paths(env.distance_map, agent_handle=6)

    print(actual, file=sys.stderr)

    expected = {
        6: [
            Waypoint(position=(5, 5), direction=0),
            Waypoint(position=(4, 5), direction=0),
            Waypoint(position=(3, 5), direction=0),
            Waypoint(position=(2, 5), direction=0),
            Waypoint(position=(1, 5), direction=0),
            Waypoint(position=(0, 5), direction=0),
            Waypoint(position=(0, 6), direction=1),
            Waypoint(position=(0, 7), direction=1),
            Waypoint(position=(0, 8), direction=1),
            Waypoint(position=(0, 9), direction=1),
            Waypoint(position=(0, 10), direction=1),
            Waypoint(position=(1, 10), direction=2),
            Waypoint(position=(2, 10), direction=2),
            Waypoint(position=(3, 10), direction=2),
            Waypoint(position=(4, 10), direction=2),
            Waypoint(position=(5, 10), direction=2),
            Waypoint(position=(6, 10), direction=2),
            Waypoint(position=(7, 10), direction=2),
            Waypoint(position=(8, 10), direction=2),
            Waypoint(position=(9, 10), direction=2),
            Waypoint(position=(10, 10), direction=2),
            Waypoint(position=(11, 10), direction=2),
            Waypoint(position=(12, 10), direction=2),
            Waypoint(position=(13, 10), direction=2),
            Waypoint(position=(14, 10), direction=2),
            Waypoint(position=(15, 10), direction=2),
            Waypoint(position=(16, 10), direction=2),
            Waypoint(position=(17, 10), direction=2),
            Waypoint(position=(18, 10), direction=2),
            Waypoint(position=(19, 10), direction=2),
            Waypoint(position=(20, 10), direction=2),
            Waypoint(position=(20, 9), direction=3),
            Waypoint(position=(20, 8), direction=3),
            Waypoint(position=(21, 8), direction=2),
            Waypoint(position=(21, 7), direction=3),
            Waypoint(position=(21, 6), direction=3),
            Waypoint(position=(21, 5), direction=3)
        ]
    }

    for agent_handle in expected:
        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
示例#4
0
def test_get_shortest_paths():
    env = load_flatland_environment_from_file('test_002.pkl', 'env_data.tests')
    env.reset()
    actual = get_shortest_paths(env.distance_map)

    expected = {
        0: [
            WayPoint(position=(1, 1), direction=1),
            WayPoint(position=(1, 2), direction=1),
            WayPoint(position=(1, 3), direction=1),
            WayPoint(position=(2, 3), direction=2),
            WayPoint(position=(2, 4), direction=1),
            WayPoint(position=(2, 5), direction=1),
            WayPoint(position=(2, 6), direction=1),
            WayPoint(position=(2, 7), direction=1),
            WayPoint(position=(2, 8), direction=1),
            WayPoint(position=(2, 9), direction=1),
            WayPoint(position=(2, 10), direction=1),
            WayPoint(position=(2, 11), direction=1),
            WayPoint(position=(2, 12), direction=1),
            WayPoint(position=(2, 13), direction=1),
            WayPoint(position=(2, 14), direction=1),
            WayPoint(position=(2, 15), direction=1),
            WayPoint(position=(2, 16), direction=1),
            WayPoint(position=(2, 17), direction=1),
            WayPoint(position=(2, 18), direction=1)
        ],
        1: [
            WayPoint(position=(3, 18), direction=3),
            WayPoint(position=(3, 17), direction=3),
            WayPoint(position=(3, 16), direction=3),
            WayPoint(position=(2, 16), direction=0),
            WayPoint(position=(2, 15), direction=3),
            WayPoint(position=(2, 14), direction=3),
            WayPoint(position=(2, 13), direction=3),
            WayPoint(position=(2, 12), direction=3),
            WayPoint(position=(2, 11), direction=3),
            WayPoint(position=(2, 10), direction=3),
            WayPoint(position=(2, 9), direction=3),
            WayPoint(position=(2, 8), direction=3),
            WayPoint(position=(2, 7), direction=3),
            WayPoint(position=(2, 6), direction=3),
            WayPoint(position=(2, 5), direction=3),
            WayPoint(position=(2, 4), direction=3),
            WayPoint(position=(2, 3), direction=3),
            WayPoint(position=(2, 2), direction=3),
            WayPoint(position=(2, 1), direction=3)
        ]
    }

    for agent_handle in expected:
        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
 def reset(self):
     # Useful for precomputing stuff - at the beginning of an episode
     # Precompute rail_obs of ALL env - then compute local rail obs from this
     self.rail_obs = np.zeros((self.env.height, self.env.width, 16))  # Transition map of the whole env
     for i in range(self.env.height):
         for j in range(self.env.width):
             bitlist = [int(digit) for digit in bin(self.env.rail.get_full_transitions(i, j))[2:]]
             bitlist = [0] * (16 - len(bitlist)) + bitlist
             self.rail_obs[i, j] = np.array(bitlist)
     # Global targets - not subtargets
     self.targets_obs = np.zeros((self.view_height, self.view_width, 2))
     distance_map: DistanceMap = self.env.distance_map
     self.shortest_paths = get_shortest_paths(distance_map)  # TODO Must be computed in the get_many since at reset it doesn't fill values. but i don't want to compute it everytime
示例#6
0
def test_get_shortest_paths_max_depth():
    env = load_flatland_environment_from_file('test_002.pkl', 'env_data.tests')
    env.reset()
    actual = get_shortest_paths(env.distance_map, max_depth=2)

    expected = {
        0: [
            Waypoint(position=(1, 1), direction=1),
            Waypoint(position=(1, 2), direction=1)
        ],
        1: [
            Waypoint(position=(3, 18), direction=3),
            Waypoint(position=(3, 17), direction=3),
        ]
    }

    for agent_handle in expected:
        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
    def _cheat_expert(self, start_pos, orientation, agentID):
        """
        return the next position when expert is standing on a junction
        """
        fake_env = copy.deepcopy(self.env)
        fake_env.agents = [fake_env.agents[agentID]]
        # if fake_env.agents[0].position is not None:
        fake_env.agents[0].position = start_pos
        fake_env.agents[0].direction = orientation
        fake_env.agents[0].handle = 0
        # else:
        fake_env.agents[0].initial_position = start_pos
        fake_env.agents[0].initial_direction = orientation

        distance_map = DistanceMap(env_width=self.env.rail.width,
                                   env_height=self.env.rail.height,
                                   agents=fake_env.agents)
        distance_map.reset(fake_env.agents, self.env.rail)
        path = get_shortest_paths(distance_map, agent_handle=0)
        return path[0]
示例#8
0
    def get(self, handle: int = None):
        """
        Called whenever get_many in the observation build is called.
        Requires distance_map to extract the shortest path.
        Does not take into account future positions of other agents!

        If there is no shortest path, the agent just stands still and stops moving.

        Parameters
        ----------
        handle : int, optional
            Handle of the agent for which to compute the observation vector.

        Returns
        -------
        np.array
            Returns a dictionary indexed by the agent handle and for each agent a vector of (max_depth + 1)x5 elements:
            - time_offset
            - position axis 0
            - position axis 1
            - direction
            - action taken to come here (not implemented yet)
            The prediction at 0 is the current position, direction etc.
        """
        agents = self.env.agents
        if handle:
            agents = [self.env.agents[handle]]
        distance_map: DistanceMap = self.env.distance_map

        shortest_paths = get_shortest_paths(distance_map,
                                            max_depth=self.max_depth)

        prediction_dict = {}
        for agent in agents:

            if agent.status == RailAgentStatus.READY_TO_DEPART:
                agent_virtual_position = agent.initial_position
            elif agent.status == RailAgentStatus.ACTIVE:
                agent_virtual_position = agent.position
            elif agent.status == RailAgentStatus.DONE:
                agent_virtual_position = agent.target
            else:

                prediction = np.zeros(shape=(self.max_depth + 1, 5))
                for i in range(self.max_depth):
                    prediction[i] = [i, None, None, None, None]
                prediction_dict[agent.handle] = prediction
                continue

            agent_virtual_direction = agent.direction
            agent_speed = agent.speed_data["speed"]
            times_per_cell = int(np.reciprocal(agent_speed))
            prediction = np.zeros(shape=(self.max_depth + 1, 5))
            prediction[0] = [
                0, *agent_virtual_position, agent_virtual_direction, 0
            ]

            shortest_path = shortest_paths[agent.handle]

            # if there is a shortest path, remove the initial position
            if shortest_path:
                shortest_path = shortest_path[1:]

            new_direction = agent_virtual_direction
            new_position = agent_virtual_position
            visited = OrderedSet()
            for index in range(1, self.max_depth + 1):
                # if we're at the target, stop moving until max_depth is reached
                if new_position == agent.target or not shortest_path:
                    prediction[index] = [
                        index, *new_position, new_direction,
                        RailEnvActions.STOP_MOVING
                    ]
                    visited.add((*new_position, agent.direction))
                    continue

                if index % times_per_cell == 0:
                    new_position = shortest_path[0].position
                    new_direction = shortest_path[0].direction

                    shortest_path = shortest_path[1:]

                # prediction is ready
                prediction[index] = [index, *new_position, new_direction, 0]
                visited.add((*new_position, new_direction))

            # TODO: very bady side effects for visualization only: hand the dev_pred_dict back instead of setting on env!
            self.env.dev_pred_dict[agent.handle] = visited
            prediction_dict[agent.handle] = prediction

        return prediction_dict
示例#9
0
stats = []
shortest_paths_rewards = []

for episode in range(0, EPISODES):
    # Reset the environment
    old_observations, info = environment.reset()
    print(str(old_observations))
    old_observations = reshape_observation(old_observations)

    # Reset the renderer
    if render:
        env_renderer = RenderTool(env, gl="PGL")
        env_renderer.set_new_rail()

    # Shortest path = number of intermediate states = number of states - 2 (excluding the first and the last one)
    shortest_paths_rewards.append(-(len(get_shortest_paths(env.distance_map, max_depth=25, agent_handle=0)[0])-2))

    # Initialize variables
    episode_reward = 0
    terminated = False

    # Episode stats
    action_counter = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}

    for time_step in range(TIMESTEPS):
        print(shortest_paths_rewards)
        if print_stats:
            print("Episode " + str(time_step) + " in episode " + str(episode + 1))

        # Initially False, remains False if no agent updates it
        update_values = False
def test_shortest_path_predictor(rendering=False):
    rail, rail_map = make_simple_rail()
    env = RailEnv(
        width=rail_map.shape[1],
        height=rail_map.shape[0],
        rail_generator=rail_from_grid_transition_map(rail),
        schedule_generator=random_schedule_generator(),
        number_of_agents=1,
        obs_builder_object=TreeObsForRailEnv(
            max_depth=2, predictor=ShortestPathPredictorForRailEnv()),
    )
    env.reset()

    # set the initial position
    agent = env.agents[0]
    agent.initial_position = (5, 6)  # south dead-end
    agent.position = (5, 6)  # south dead-end
    agent.direction = 0  # north
    agent.initial_direction = 0  # north
    agent.target = (3, 9)  # east dead-end
    agent.moving = True
    agent.status = RailAgentStatus.ACTIVE

    env.reset(False, False)

    if rendering:
        renderer = RenderTool(env, gl="PILSVG")
        renderer.render_env(show=True, show_observations=False)
        input("Continue?")

    # compute the observations and predictions
    distance_map = env.distance_map.get()
    assert distance_map[0, agent.initial_position[0], agent.initial_position[1], agent.direction] == 5.0, \
        "found {} instead of {}".format(
            distance_map[agent.handle, agent.initial_position[0], agent.position[1], agent.direction], 5.0)

    paths = get_shortest_paths(env.distance_map)[0]
    assert paths == [
        Waypoint((5, 6), 0),
        Waypoint((4, 6), 0),
        Waypoint((3, 6), 0),
        Waypoint((3, 7), 1),
        Waypoint((3, 8), 1),
        Waypoint((3, 9), 1)
    ]

    # extract the data
    predictions = env.obs_builder.predictions
    positions = np.array(
        list(map(lambda prediction: [*prediction[1:3]], predictions[0])))
    directions = np.array(
        list(map(lambda prediction: [prediction[3]], predictions[0])))
    time_offsets = np.array(
        list(map(lambda prediction: [prediction[0]], predictions[0])))

    # test if data meets expectations
    expected_positions = [
        [5, 6],
        [4, 6],
        [3, 6],
        [3, 7],
        [3, 8],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
        [3, 9],
    ]
    expected_directions = [
        [Grid4TransitionsEnum.NORTH],  # next is [5,6] heading north
        [Grid4TransitionsEnum.NORTH],  # next is [4,6] heading north
        [Grid4TransitionsEnum.NORTH],  # next is [3,6] heading north
        [Grid4TransitionsEnum.EAST],  # next is [3,7] heading east
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
        [Grid4TransitionsEnum.EAST],
    ]

    expected_time_offsets = np.array([
        [0.],
        [1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.],
        [10.],
        [11.],
        [12.],
        [13.],
        [14.],
        [15.],
        [16.],
        [17.],
        [18.],
        [19.],
        [20.],
    ])

    assert np.array_equal(time_offsets, expected_time_offsets), \
        "time_offsets {}, expected {}".format(time_offsets, expected_time_offsets)

    assert np.array_equal(positions, expected_positions), \
        "positions {}, expected {}".format(positions, expected_positions)
    assert np.array_equal(directions, expected_directions), \
        "directions {}, expected {}".format(directions, expected_directions)
def test_get_shortest_paths():
    #env = load_flatland_environment_from_file('test_002.mpk', 'env_data.tests')
    env, env_dict = RailEnvPersister.load_new("test_002.mpk", "env_data.tests")

    #print("env len(agents): ", len(env.agents))
    #print(env.distance_map)
    #print("env number_of_agents:", env.number_of_agents)

    #print("env agents:", env.agents)

    #env.distance_map.reset(env.agents, env.rail)

    #actual = get_shortest_paths(env.distance_map)
    #print("shortest paths:", actual)

    #print(env.distance_map)
    #print("Dist map agents:", env.distance_map.agents)

    #print("\nenv reset()")
    env.reset()
    actual = get_shortest_paths(env.distance_map)
    #print("env agents: ", len(env.agents))
    #print("env number_of_agents: ", env.number_of_agents)

    assert len(
        actual) == 2, "get_shortest_paths should return a dict of length 2"

    expected = {
        0: [
            Waypoint(position=(1, 1), direction=1),
            Waypoint(position=(1, 2), direction=1),
            Waypoint(position=(1, 3), direction=1),
            Waypoint(position=(2, 3), direction=2),
            Waypoint(position=(2, 4), direction=1),
            Waypoint(position=(2, 5), direction=1),
            Waypoint(position=(2, 6), direction=1),
            Waypoint(position=(2, 7), direction=1),
            Waypoint(position=(2, 8), direction=1),
            Waypoint(position=(2, 9), direction=1),
            Waypoint(position=(2, 10), direction=1),
            Waypoint(position=(2, 11), direction=1),
            Waypoint(position=(2, 12), direction=1),
            Waypoint(position=(2, 13), direction=1),
            Waypoint(position=(2, 14), direction=1),
            Waypoint(position=(2, 15), direction=1),
            Waypoint(position=(2, 16), direction=1),
            Waypoint(position=(2, 17), direction=1),
            Waypoint(position=(2, 18), direction=1)
        ],
        1: [
            Waypoint(position=(3, 18), direction=3),
            Waypoint(position=(3, 17), direction=3),
            Waypoint(position=(3, 16), direction=3),
            Waypoint(position=(2, 16), direction=0),
            Waypoint(position=(2, 15), direction=3),
            Waypoint(position=(2, 14), direction=3),
            Waypoint(position=(2, 13), direction=3),
            Waypoint(position=(2, 12), direction=3),
            Waypoint(position=(2, 11), direction=3),
            Waypoint(position=(2, 10), direction=3),
            Waypoint(position=(2, 9), direction=3),
            Waypoint(position=(2, 8), direction=3),
            Waypoint(position=(2, 7), direction=3),
            Waypoint(position=(2, 6), direction=3),
            Waypoint(position=(2, 5), direction=3),
            Waypoint(position=(2, 4), direction=3),
            Waypoint(position=(2, 3), direction=3),
            Waypoint(position=(2, 2), direction=3),
            Waypoint(position=(2, 1), direction=3)
        ]
    }

    for agent_handle in expected:
        assert np.array_equal(actual[agent_handle], expected[agent_handle]), \
            "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
 def shortest_path(distance_map, handle):
     """
     Calculates the naive shortest path of an agent
     """
     return get_shortest_paths(distance_map, agent_handle=handle)[handle]