def get_many(self, handles: Optional[List[int]] = None) -> Dict[int, Tuple[np.ndarray, np.ndarray, np.ndarray]]: """ Called whenever an observation has to be computed for the `env` environment, for each agent with handle in the `handles` list. """ self.shortest_paths = get_shortest_paths(self.env.distance_map) return super().get_many(handles)
def test_get_shortest_paths_unreachable(): rail, rail_map = make_disconnected_simple_rail() env = RailEnv(width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, obs_builder_object=GlobalObsForRailEnv()) env.reset() # set the initial position agent = env.agents[0] agent.position = (3, 1) # west dead-end agent.initial_position = (3, 1) # west dead-end agent.direction = Grid4TransitionsEnum.WEST agent.target = (3, 9) # east dead-end agent.moving = True env.reset(False, False) actual = get_shortest_paths(env.distance_map) expected = {0: None} assert actual == expected, "actual={},expected={}".format(actual, expected)
def test_get_shortest_paths_agent_handle(): #env = load_flatland_environment_from_file('Level_distance_map_shortest_path.pkl', 'env_data.tests') env, _ = RailEnvPersister.load_new("Level_distance_map_shortest_path.mpk", "env_data.tests") env.reset() actual = get_shortest_paths(env.distance_map, agent_handle=6) print(actual, file=sys.stderr) expected = { 6: [ Waypoint(position=(5, 5), direction=0), Waypoint(position=(4, 5), direction=0), Waypoint(position=(3, 5), direction=0), Waypoint(position=(2, 5), direction=0), Waypoint(position=(1, 5), direction=0), Waypoint(position=(0, 5), direction=0), Waypoint(position=(0, 6), direction=1), Waypoint(position=(0, 7), direction=1), Waypoint(position=(0, 8), direction=1), Waypoint(position=(0, 9), direction=1), Waypoint(position=(0, 10), direction=1), Waypoint(position=(1, 10), direction=2), Waypoint(position=(2, 10), direction=2), Waypoint(position=(3, 10), direction=2), Waypoint(position=(4, 10), direction=2), Waypoint(position=(5, 10), direction=2), Waypoint(position=(6, 10), direction=2), Waypoint(position=(7, 10), direction=2), Waypoint(position=(8, 10), direction=2), Waypoint(position=(9, 10), direction=2), Waypoint(position=(10, 10), direction=2), Waypoint(position=(11, 10), direction=2), Waypoint(position=(12, 10), direction=2), Waypoint(position=(13, 10), direction=2), Waypoint(position=(14, 10), direction=2), Waypoint(position=(15, 10), direction=2), Waypoint(position=(16, 10), direction=2), Waypoint(position=(17, 10), direction=2), Waypoint(position=(18, 10), direction=2), Waypoint(position=(19, 10), direction=2), Waypoint(position=(20, 10), direction=2), Waypoint(position=(20, 9), direction=3), Waypoint(position=(20, 8), direction=3), Waypoint(position=(21, 8), direction=2), Waypoint(position=(21, 7), direction=3), Waypoint(position=(21, 6), direction=3), Waypoint(position=(21, 5), direction=3) ] } for agent_handle in expected: assert np.array_equal(actual[agent_handle], expected[agent_handle]), \ "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
def test_get_shortest_paths(): env = load_flatland_environment_from_file('test_002.pkl', 'env_data.tests') env.reset() actual = get_shortest_paths(env.distance_map) expected = { 0: [ WayPoint(position=(1, 1), direction=1), WayPoint(position=(1, 2), direction=1), WayPoint(position=(1, 3), direction=1), WayPoint(position=(2, 3), direction=2), WayPoint(position=(2, 4), direction=1), WayPoint(position=(2, 5), direction=1), WayPoint(position=(2, 6), direction=1), WayPoint(position=(2, 7), direction=1), WayPoint(position=(2, 8), direction=1), WayPoint(position=(2, 9), direction=1), WayPoint(position=(2, 10), direction=1), WayPoint(position=(2, 11), direction=1), WayPoint(position=(2, 12), direction=1), WayPoint(position=(2, 13), direction=1), WayPoint(position=(2, 14), direction=1), WayPoint(position=(2, 15), direction=1), WayPoint(position=(2, 16), direction=1), WayPoint(position=(2, 17), direction=1), WayPoint(position=(2, 18), direction=1) ], 1: [ WayPoint(position=(3, 18), direction=3), WayPoint(position=(3, 17), direction=3), WayPoint(position=(3, 16), direction=3), WayPoint(position=(2, 16), direction=0), WayPoint(position=(2, 15), direction=3), WayPoint(position=(2, 14), direction=3), WayPoint(position=(2, 13), direction=3), WayPoint(position=(2, 12), direction=3), WayPoint(position=(2, 11), direction=3), WayPoint(position=(2, 10), direction=3), WayPoint(position=(2, 9), direction=3), WayPoint(position=(2, 8), direction=3), WayPoint(position=(2, 7), direction=3), WayPoint(position=(2, 6), direction=3), WayPoint(position=(2, 5), direction=3), WayPoint(position=(2, 4), direction=3), WayPoint(position=(2, 3), direction=3), WayPoint(position=(2, 2), direction=3), WayPoint(position=(2, 1), direction=3) ] } for agent_handle in expected: assert np.array_equal(actual[agent_handle], expected[agent_handle]), \ "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
def reset(self): # Useful for precomputing stuff - at the beginning of an episode # Precompute rail_obs of ALL env - then compute local rail obs from this self.rail_obs = np.zeros((self.env.height, self.env.width, 16)) # Transition map of the whole env for i in range(self.env.height): for j in range(self.env.width): bitlist = [int(digit) for digit in bin(self.env.rail.get_full_transitions(i, j))[2:]] bitlist = [0] * (16 - len(bitlist)) + bitlist self.rail_obs[i, j] = np.array(bitlist) # Global targets - not subtargets self.targets_obs = np.zeros((self.view_height, self.view_width, 2)) distance_map: DistanceMap = self.env.distance_map self.shortest_paths = get_shortest_paths(distance_map) # TODO Must be computed in the get_many since at reset it doesn't fill values. but i don't want to compute it everytime
def test_get_shortest_paths_max_depth(): env = load_flatland_environment_from_file('test_002.pkl', 'env_data.tests') env.reset() actual = get_shortest_paths(env.distance_map, max_depth=2) expected = { 0: [ Waypoint(position=(1, 1), direction=1), Waypoint(position=(1, 2), direction=1) ], 1: [ Waypoint(position=(3, 18), direction=3), Waypoint(position=(3, 17), direction=3), ] } for agent_handle in expected: assert np.array_equal(actual[agent_handle], expected[agent_handle]), \ "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
def _cheat_expert(self, start_pos, orientation, agentID): """ return the next position when expert is standing on a junction """ fake_env = copy.deepcopy(self.env) fake_env.agents = [fake_env.agents[agentID]] # if fake_env.agents[0].position is not None: fake_env.agents[0].position = start_pos fake_env.agents[0].direction = orientation fake_env.agents[0].handle = 0 # else: fake_env.agents[0].initial_position = start_pos fake_env.agents[0].initial_direction = orientation distance_map = DistanceMap(env_width=self.env.rail.width, env_height=self.env.rail.height, agents=fake_env.agents) distance_map.reset(fake_env.agents, self.env.rail) path = get_shortest_paths(distance_map, agent_handle=0) return path[0]
def get(self, handle: int = None): """ Called whenever get_many in the observation build is called. Requires distance_map to extract the shortest path. Does not take into account future positions of other agents! If there is no shortest path, the agent just stands still and stops moving. Parameters ---------- handle : int, optional Handle of the agent for which to compute the observation vector. Returns ------- np.array Returns a dictionary indexed by the agent handle and for each agent a vector of (max_depth + 1)x5 elements: - time_offset - position axis 0 - position axis 1 - direction - action taken to come here (not implemented yet) The prediction at 0 is the current position, direction etc. """ agents = self.env.agents if handle: agents = [self.env.agents[handle]] distance_map: DistanceMap = self.env.distance_map shortest_paths = get_shortest_paths(distance_map, max_depth=self.max_depth) prediction_dict = {} for agent in agents: if agent.status == RailAgentStatus.READY_TO_DEPART: agent_virtual_position = agent.initial_position elif agent.status == RailAgentStatus.ACTIVE: agent_virtual_position = agent.position elif agent.status == RailAgentStatus.DONE: agent_virtual_position = agent.target else: prediction = np.zeros(shape=(self.max_depth + 1, 5)) for i in range(self.max_depth): prediction[i] = [i, None, None, None, None] prediction_dict[agent.handle] = prediction continue agent_virtual_direction = agent.direction agent_speed = agent.speed_data["speed"] times_per_cell = int(np.reciprocal(agent_speed)) prediction = np.zeros(shape=(self.max_depth + 1, 5)) prediction[0] = [ 0, *agent_virtual_position, agent_virtual_direction, 0 ] shortest_path = shortest_paths[agent.handle] # if there is a shortest path, remove the initial position if shortest_path: shortest_path = shortest_path[1:] new_direction = agent_virtual_direction new_position = agent_virtual_position visited = OrderedSet() for index in range(1, self.max_depth + 1): # if we're at the target, stop moving until max_depth is reached if new_position == agent.target or not shortest_path: prediction[index] = [ index, *new_position, new_direction, RailEnvActions.STOP_MOVING ] visited.add((*new_position, agent.direction)) continue if index % times_per_cell == 0: new_position = shortest_path[0].position new_direction = shortest_path[0].direction shortest_path = shortest_path[1:] # prediction is ready prediction[index] = [index, *new_position, new_direction, 0] visited.add((*new_position, new_direction)) # TODO: very bady side effects for visualization only: hand the dev_pred_dict back instead of setting on env! self.env.dev_pred_dict[agent.handle] = visited prediction_dict[agent.handle] = prediction return prediction_dict
stats = [] shortest_paths_rewards = [] for episode in range(0, EPISODES): # Reset the environment old_observations, info = environment.reset() print(str(old_observations)) old_observations = reshape_observation(old_observations) # Reset the renderer if render: env_renderer = RenderTool(env, gl="PGL") env_renderer.set_new_rail() # Shortest path = number of intermediate states = number of states - 2 (excluding the first and the last one) shortest_paths_rewards.append(-(len(get_shortest_paths(env.distance_map, max_depth=25, agent_handle=0)[0])-2)) # Initialize variables episode_reward = 0 terminated = False # Episode stats action_counter = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0} for time_step in range(TIMESTEPS): print(shortest_paths_rewards) if print_stats: print("Episode " + str(time_step) + " in episode " + str(episode + 1)) # Initially False, remains False if no agent updates it update_values = False
def test_shortest_path_predictor(rendering=False): rail, rail_map = make_simple_rail() env = RailEnv( width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, obs_builder_object=TreeObsForRailEnv( max_depth=2, predictor=ShortestPathPredictorForRailEnv()), ) env.reset() # set the initial position agent = env.agents[0] agent.initial_position = (5, 6) # south dead-end agent.position = (5, 6) # south dead-end agent.direction = 0 # north agent.initial_direction = 0 # north agent.target = (3, 9) # east dead-end agent.moving = True agent.status = RailAgentStatus.ACTIVE env.reset(False, False) if rendering: renderer = RenderTool(env, gl="PILSVG") renderer.render_env(show=True, show_observations=False) input("Continue?") # compute the observations and predictions distance_map = env.distance_map.get() assert distance_map[0, agent.initial_position[0], agent.initial_position[1], agent.direction] == 5.0, \ "found {} instead of {}".format( distance_map[agent.handle, agent.initial_position[0], agent.position[1], agent.direction], 5.0) paths = get_shortest_paths(env.distance_map)[0] assert paths == [ Waypoint((5, 6), 0), Waypoint((4, 6), 0), Waypoint((3, 6), 0), Waypoint((3, 7), 1), Waypoint((3, 8), 1), Waypoint((3, 9), 1) ] # extract the data predictions = env.obs_builder.predictions positions = np.array( list(map(lambda prediction: [*prediction[1:3]], predictions[0]))) directions = np.array( list(map(lambda prediction: [prediction[3]], predictions[0]))) time_offsets = np.array( list(map(lambda prediction: [prediction[0]], predictions[0]))) # test if data meets expectations expected_positions = [ [5, 6], [4, 6], [3, 6], [3, 7], [3, 8], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], [3, 9], ] expected_directions = [ [Grid4TransitionsEnum.NORTH], # next is [5,6] heading north [Grid4TransitionsEnum.NORTH], # next is [4,6] heading north [Grid4TransitionsEnum.NORTH], # next is [3,6] heading north [Grid4TransitionsEnum.EAST], # next is [3,7] heading east [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], [Grid4TransitionsEnum.EAST], ] expected_time_offsets = np.array([ [0.], [1.], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.], [10.], [11.], [12.], [13.], [14.], [15.], [16.], [17.], [18.], [19.], [20.], ]) assert np.array_equal(time_offsets, expected_time_offsets), \ "time_offsets {}, expected {}".format(time_offsets, expected_time_offsets) assert np.array_equal(positions, expected_positions), \ "positions {}, expected {}".format(positions, expected_positions) assert np.array_equal(directions, expected_directions), \ "directions {}, expected {}".format(directions, expected_directions)
def test_get_shortest_paths(): #env = load_flatland_environment_from_file('test_002.mpk', 'env_data.tests') env, env_dict = RailEnvPersister.load_new("test_002.mpk", "env_data.tests") #print("env len(agents): ", len(env.agents)) #print(env.distance_map) #print("env number_of_agents:", env.number_of_agents) #print("env agents:", env.agents) #env.distance_map.reset(env.agents, env.rail) #actual = get_shortest_paths(env.distance_map) #print("shortest paths:", actual) #print(env.distance_map) #print("Dist map agents:", env.distance_map.agents) #print("\nenv reset()") env.reset() actual = get_shortest_paths(env.distance_map) #print("env agents: ", len(env.agents)) #print("env number_of_agents: ", env.number_of_agents) assert len( actual) == 2, "get_shortest_paths should return a dict of length 2" expected = { 0: [ Waypoint(position=(1, 1), direction=1), Waypoint(position=(1, 2), direction=1), Waypoint(position=(1, 3), direction=1), Waypoint(position=(2, 3), direction=2), Waypoint(position=(2, 4), direction=1), Waypoint(position=(2, 5), direction=1), Waypoint(position=(2, 6), direction=1), Waypoint(position=(2, 7), direction=1), Waypoint(position=(2, 8), direction=1), Waypoint(position=(2, 9), direction=1), Waypoint(position=(2, 10), direction=1), Waypoint(position=(2, 11), direction=1), Waypoint(position=(2, 12), direction=1), Waypoint(position=(2, 13), direction=1), Waypoint(position=(2, 14), direction=1), Waypoint(position=(2, 15), direction=1), Waypoint(position=(2, 16), direction=1), Waypoint(position=(2, 17), direction=1), Waypoint(position=(2, 18), direction=1) ], 1: [ Waypoint(position=(3, 18), direction=3), Waypoint(position=(3, 17), direction=3), Waypoint(position=(3, 16), direction=3), Waypoint(position=(2, 16), direction=0), Waypoint(position=(2, 15), direction=3), Waypoint(position=(2, 14), direction=3), Waypoint(position=(2, 13), direction=3), Waypoint(position=(2, 12), direction=3), Waypoint(position=(2, 11), direction=3), Waypoint(position=(2, 10), direction=3), Waypoint(position=(2, 9), direction=3), Waypoint(position=(2, 8), direction=3), Waypoint(position=(2, 7), direction=3), Waypoint(position=(2, 6), direction=3), Waypoint(position=(2, 5), direction=3), Waypoint(position=(2, 4), direction=3), Waypoint(position=(2, 3), direction=3), Waypoint(position=(2, 2), direction=3), Waypoint(position=(2, 1), direction=3) ] } for agent_handle in expected: assert np.array_equal(actual[agent_handle], expected[agent_handle]), \ "[{}] actual={},expected={}".format(agent_handle, actual[agent_handle], expected[agent_handle])
def shortest_path(distance_map, handle): """ Calculates the naive shortest path of an agent """ return get_shortest_paths(distance_map, agent_handle=handle)[handle]