Exemplo n.º 1
0
def render_env(env, fname):
    env_renderer = RenderTool(env, gl="PGL")
    env_renderer.render_env()

    image = env_renderer.get_image()
    pil_image = PIL.Image.fromarray(image)
    pil_image.save(fname)
Exemplo n.º 2
0
def render_env(env):
    env_renderer = RenderTool(env, gl="PGL")
    env_renderer.render_env()

    image = env_renderer.get_image()
    pil_image = PIL.Image.fromarray(image)
    #print("RENDER")
    #pil_image.show()
    images.append(pil_image)
    print(len(images))
Exemplo n.º 3
0
    def render(self, mode='human'):
        # TODO: Merge both strategies (Jupyter vs .py)
        # In .py files
        # self.renderer.render_env(show=False, show_observations=False, show_predictions=False)
        # In Jupyter Notebooks
        env_renderer = RenderTool(self.flatland_env, gl="PILSVG")
        env_renderer.render_env()

        image = env_renderer.get_image()
        pil_image = Image.fromarray(image)
        display(pil_image)
        return image
Exemplo n.º 4
0
def createEnvSet(nStart, nEnd, sDir, bSmall=True):
    # print("Generate small envs in train-envs-small:")
    print(f"Generate envs (small={bSmall}) in dir {sDir}:")

    sDirImages = "train-envs-small/images/"
    if not os.path.exists(sDirImages):
        os.makedirs(sDirImages)

    for test_id in range(nStart, nEnd, 1):
        env = create_test_env(RandomTestParams_small, test_id, sDir)

        oRender = RenderTool(env, gl="PILSVG")

        # oRender.envs = envs
        # oRender.set_new_rail()
        oRender.render_env()
        g2img = oRender.get_image()
        imgPIL = Image.fromarray(g2img)
        # imgPIL.show()

        imgPIL.save(sDirImages + "Level_{}.png".format(test_id))
Exemplo n.º 5
0
def eval_policy(env_params, checkpoint, n_eval_episodes, max_steps,
                action_size, state_size, seed, render, allow_skipping,
                allow_caching):
    # Evaluation is faster on CPU (except if you use a really huge policy)
    parameters = {'use_gpu': False}

    # policy = DDDQNPolicy(state_size, action_size, Namespace(**parameters), evaluation_mode=True)
    # policy.qnetwork_local = torch.load(checkpoint, map_location={'cuda:0': 'cpu'})

    env_params = Namespace(**env_params)

    # Environment parameters
    n_agents = env_params.n_agents
    x_dim = env_params.x_dim
    y_dim = env_params.y_dim
    n_cities = env_params.n_cities
    max_rails_between_cities = env_params.max_rails_between_cities
    max_rails_in_city = env_params.max_rails_in_city

    agents = []
    for agent_id in range(n_agents):
        agent = AttentionAgent(num_in_pol=state_size,
                               num_out_pol=action_size,
                               hidden_dim=256,
                               lr=0.001)

        agent.policy = torch.load(os.path.join(
            checkpoint, f'2300_agent{agent_id}' + '.pth'),
                                  map_location=torch.device('cpu'))
        agent.policy.eval()

        agents.append(agent)

    # Malfunction and speed profiles
    # TODO pass these parameters properly from main!
    malfunction_parameters = MalfunctionParameters(
        malfunction_rate=1. / 2000,  # Rate of malfunctions
        min_duration=20,  # Minimal duration
        max_duration=50  # Max duration
    )

    # Only fast trains in Round 1
    speed_profiles = {
        1.: 1.0,  # Fast passenger train
        1. / 2.: 0.0,  # Fast freight train
        1. / 3.: 0.0,  # Slow commuter train
        1. / 4.: 0.0  # Slow freight train
    }

    # Observation parameters
    observation_tree_depth = env_params.observation_tree_depth
    observation_radius = env_params.observation_radius
    observation_max_path_depth = env_params.observation_max_path_depth

    # Observation builder
    predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
    tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth,
                                         predictor=predictor)

    # Setup the environment
    env = RailEnv(
        width=x_dim,
        height=y_dim,
        rail_generator=sparse_rail_generator(
            max_num_cities=n_cities,
            grid_mode=False,
            max_rails_between_cities=max_rails_between_cities,
            max_rails_in_city=max_rails_in_city,
        ),
        # rail_generator = complex_rail_generator(
        #     nr_start_goal=10,
        #     nr_extra=10,
        #     min_dist=10,
        #     max_dist=99999,
        #     seed=1
        # ),
        schedule_generator=sparse_schedule_generator(speed_profiles),
        number_of_agents=n_agents,
        malfunction_generator_and_process_data=malfunction_from_params(
            malfunction_parameters),
        obs_builder_object=tree_observation)

    if render:
        # env_renderer = RenderTool(env, gl="PGL")
        env_renderer = RenderTool(
            env,
            # gl="PGL",
            agent_render_variant=AgentRenderVariant.
            AGENT_SHOWS_OPTIONS_AND_BOX,
            show_debug=False,
            screen_height=600,  # Adjust these parameters to fit your resolution
            screen_width=800)

    action_dict = dict()
    scores = []
    completions = []
    nb_steps = []
    inference_times = []
    preproc_times = []
    agent_times = []
    step_times = []

    for agent_id in range(n_agents):
        action_dict[agent_id] = 0

    for episode_idx in range(n_eval_episodes):
        images = []
        seed += 1

        inference_timer = Timer()
        preproc_timer = Timer()
        agent_timer = Timer()
        step_timer = Timer()

        step_timer.start()
        obs, info = env.reset(regenerate_rail=True,
                              regenerate_schedule=True,
                              random_seed=seed)
        step_timer.end()

        agent_obs = [None] * env.get_num_agents()
        score = 0.0

        if render:
            env_renderer.set_new_rail()

        final_step = 0
        skipped = 0

        nb_hit = 0
        agent_last_obs = {}
        agent_last_action = {}

        for step in range(max_steps - 1):
            # time.sleep(0.2)
            if allow_skipping and check_if_all_blocked(env):
                # FIXME why -1? bug where all agents are "done" after max_steps!
                skipped = max_steps - step - 1
                final_step = max_steps - 2
                n_unfinished_agents = sum(not done[idx]
                                          for idx in env.get_agent_handles())
                score -= skipped * n_unfinished_agents
                break

            agent_timer.start()
            for agent in env.get_agent_handles():
                agent_model = agents[agent]
                if obs[agent] and info['action_required'][agent]:
                    if agent in agent_last_obs and np.all(
                            agent_last_obs[agent] == obs[agent]):
                        nb_hit += 1
                        action = agent_last_action[agent]

                    else:
                        preproc_timer.start()
                        norm_obs = normalize_observation(
                            obs[agent],
                            tree_depth=observation_tree_depth,
                            observation_radius=observation_radius)
                        preproc_timer.end()

                        inference_timer.start()
                        action = act(agent_model, norm_obs)
                        inference_timer.end()

                    action_dict.update({agent: action})

                    if allow_caching:
                        agent_last_obs[agent] = obs[agent]
                        agent_last_action[agent] = action
            agent_timer.end()

            step_timer.start()
            obs, all_rewards, done, info = env.step(action_dict)
            step_timer.end()

            if render:
                env_renderer.render_env(show=True,
                                        frames=False,
                                        show_observations=False,
                                        show_predictions=False)

                im = env_renderer.get_image()
                im = PIL.Image.fromarray(im)
                images.append(im)

                if step % 100 == 0:
                    print("{}/{}".format(step, max_steps - 1))

            for agent in env.get_agent_handles():
                score += all_rewards[agent]

            final_step = step

            if done['__all__']:
                break

        if render:
            for _ in range(10):
                images.append(images[len(images) - 1])

            # save video
            images[0].save(
                f'/Users/nikhilvs/repos/nyu/flatland-reinforcement-learning/videos/maac-final/out_{episode_idx}.gif',
                save_all=True,
                append_images=images[1:],
                optimize=False,
                duration=60,
                loop=0)

        normalized_score = score / (max_steps * env.get_num_agents())
        scores.append(normalized_score)

        tasks_finished = sum(done[idx] for idx in env.get_agent_handles())
        completion = tasks_finished / max(1, env.get_num_agents())
        completions.append(completion)

        nb_steps.append(final_step)

        inference_times.append(inference_timer.get())
        preproc_times.append(preproc_timer.get())
        agent_times.append(agent_timer.get())
        step_times.append(step_timer.get())

        skipped_text = ""
        if skipped > 0:
            skipped_text = "\t⚡ Skipped {}".format(skipped)

        hit_text = ""
        if nb_hit > 0:
            hit_text = "\t⚡ Hit {} ({:.1f}%)".format(nb_hit, (100 * nb_hit) /
                                                     (n_agents * final_step))

        print(
            "☑️  Score: {:.3f} \tDone: {:.1f}% \tNb steps: {:.3f} "
            "\t🍭 Seed: {}"
            "\t🚉 Env: {:.3f}s  "
            "\t🤖 Agent: {:.3f}s (per step: {:.3f}s) \t[preproc: {:.3f}s \tinfer: {:.3f}s]"
            "{}{}".format(normalized_score, completion * 100.0, final_step,
                          seed, step_timer.get(), agent_timer.get(),
                          agent_timer.get() / final_step, preproc_timer.get(),
                          inference_timer.get(), skipped_text, hit_text))

    return scores, completions, nb_steps, agent_times, step_times
Exemplo n.º 6
0
class FlatlandEnv(gym.Env):
    def __init__(self,
                 n_cars=3,
                 n_acts=5,
                 min_obs=-1,
                 max_obs=1,
                 n_nodes=2,
                 ob_radius=10,
                 x_dim=36,
                 y_dim=36,
                 feats='all'):

        self.tree_obs = tree_observation.TreeObservation(n_nodes)
        self.n_cars = n_cars
        self.n_nodes = n_nodes
        self.ob_radius = ob_radius
        self.feats = feats

        rail_gen = sparse_rail_generator(max_num_cities=3,
                                         seed=666,
                                         grid_mode=False,
                                         max_rails_between_cities=2,
                                         max_rails_in_city=3)

        self._rail_env = RailEnv(
            width=x_dim,
            height=y_dim,
            rail_generator=rail_gen,
            schedule_generator=sparse_schedule_generator(speed_ration_map),
            number_of_agents=n_cars,
            malfunction_generator_and_process_data=malfunction_from_params(
                stochastic_data),
            obs_builder_object=self.tree_obs)

        self.renderer = RenderTool(self._rail_env, gl="PILSVG")
        self.action_dict = dict()
        self.info = dict()
        self.old_obs = dict()

    def step(self, action):
        # Update the action of each agent
        for agent_id in range(self.n_cars):
            if action[agent_id] is None:
                action[agent_id] = 2
            self.action_dict.update({
                agent_id: action[agent_id] + 1
            })  # FIXME: Hack for ignoring action 0 (model only outputs 4)

        # Take actions, get observations
        next_obs, all_rewards, done, self.info = self._rail_env.step(
            self.action_dict)

        # Normalise observations for each agent
        for agent_id in range(self._rail_env.get_num_agents()):

            # Check if agent is finished
            if not done[agent_id]:
                # Normalise next observation
                next_obs[agent_id] = normalize_observation(
                    tree=next_obs[agent_id],
                    max_depth=self.n_nodes,
                    observation_radius=self.ob_radius,
                    feats=self.feats)

                # Keep track of last observation for trains that finish
                self.old_obs[agent_id] = next_obs[agent_id].copy()
            else:
                # Use last observation if agent finished
                next_obs[agent_id] = self.old_obs[agent_id]

        return next_obs, all_rewards, done, self.info

    def reset(self):
        """
        Reset the state of the environment and returns an initial observation.
        return obs: initial observation of the space
        """
        self.action_dict = dict()
        self.info = dict()
        self.old_obs = dict()

        obs, self.info = self._rail_env.reset(True, True)
        for agent_id in range(self.n_cars):
            if obs[agent_id]:
                obs[agent_id] = normalize_observation(obs[agent_id],
                                                      self.n_nodes,
                                                      self.ob_radius,
                                                      feats=self.feats)
        self.renderer.reset()
        return obs, self.info

    def render(self, mode=None):
        self.renderer.render_env()
        image = self.renderer.get_image()
        cv2.imshow('Render', image)
        cv2.waitKey(20)
Exemplo n.º 7
0
    for idx in range(env.get_num_agents()):
        tmp_agent = env.agents[idx]
        tmp_agent.speed_data["speed"] = 1 / (idx + 1)
    env_renderer.reset()
    # Here you can also further enhance the provided observation by means of normalization
    # See training navigation example in the baseline repository
    images = []
    score = 0
    # Run episode
    for step in range(1000):
        # Chose an action for each agent in the environment
        action_dict = controller.act(observation=obs)

        # Environment step which returns the observations for all agents, their corresponding
        # reward and whether their are done
        next_obs, all_rewards, done, _ = env.step(action_dict)
        env_renderer.render_env(show=False, show_observations=False, show_predictions=False)
        images.append(env_renderer.get_image())
        score += sum(all_rewards)

        # Update replay buffer and train agent
        controller.env_reaction(obs, action_dict, all_rewards, next_obs, done)

        obs = next_obs.copy()
        if done['__all__']:
            break
    controller.end_of_round()
    print('Episode Nr. {}\t Score = {}'.format(trial, score))

    log_video(images, trial)
Exemplo n.º 8
0
from flatland.envs.agent_utils import RailAgentStatus
from flatland.utils.rendertools import RenderTool

import PIL

observation_tree_depth = 2
observation_radius = 10
observation_max_path_depth = 30

predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth,
                                     predictor=predictor)

env_file = "D:/Sudhish/FYP/Final-Year-Project-main/Sudhish/envs-100-999/envs/Level_100.pkl"

env = RailEnv(
    width=1,
    height=1,
    rail_generator=rail_from_file(env_file),
    schedule_generator=schedule_from_file(env_file),
    malfunction_generator_and_process_data=malfunction_from_file(env_file),
    obs_builder_object=tree_observation)

obs, info = env.reset(True, True)
env_renderer = RenderTool(env, gl="PILSVG")
env_renderer.render_env()
image = env_renderer.get_image()
pil_image = PIL.Image.fromarray(image)
pil_image.show()
print("Env Loaded")
Exemplo n.º 9
0
class FlatlandMultiAgentEnv(MultiAgentEnv):
    """
    Wrap a flatland RailEnv as an Rllib MultiAgentEnv.
    
    width, height, number_of_agents: int
    remove_agents_at_target: bool
    """
    def __init__(self,
                 width,
                 height,
                 rail_generator,
                 number_of_agents,
                 remove_agents_at_target,
                 obs_builder_object,
                 wait_for_all_done,
                 schedule_generator=random_schedule_generator(),
                 name=None):
        super().__init__()

        self.env = RailEnv(
            width=width,
            height=height,
            rail_generator=rail_generator,
            schedule_generator=schedule_generator,
            number_of_agents=number_of_agents,
            obs_builder_object=obs_builder_object,
            remove_agents_at_target=remove_agents_at_target,
        )

        self.wait_for_all_done = wait_for_all_done
        self.env_renderer = None
        self.agents_done = []
        self.frame_step = 0
        self.name = name
        self.number_of_agents = number_of_agents

        # Track when targets are reached. Ony used for correct reward propagation
        # when using wait_for_all_done=True
        self.at_target = dict(
            zip(list(np.arange(self.number_of_agents)),
                [False for _ in range(self.number_of_agents)]))

    def _running_agents(self):
        """
        Return IDs of the agents that are not done
        """
        agents = range(len(self.env.agents))
        return (i for i in agents if i not in self.agents_done)

    def _agents_not_at_target(self):
        """
        Return the number of agents that are not at their targets.
        Used when wait_for_all_done=True
        """
        return max(1, list(self.at_target.values()).count(False))

    def step(self, action_dict):
        """
        Env step for each agent, like a gym.step() call
        
        The action_dict object is a dict with str or int keys corresponding to agent IDs
        E.g: {'0': ..., '1': ..., ...} or {0: ..., 1: ..., ...}
        
        Return a dict with keys:
            "observations"
            "rewards"
            "dones"
            "infos"
        """
        obs, rewards, dones, infos = self.env.step(action_dict)
        o, r, d, i = {}, {}, {}, {}

        for agent in self._running_agents():
            o[agent] = obs[agent]
            r[agent] = rewards[agent] / self._agents_not_at_target()
            i[agent] = infos

            if self.wait_for_all_done:
                dones, r, i = self._process_all_done(agent, dones, r, i)

            d[agent] = dones[agent]

        d["__all__"] = dones["__all__"]

        for agent, done in dones.items():
            if agent != "__all__" and done:
                self.agents_done.append(agent)

        self.frame_step += 1

        return o, r, d, i

    def reset(self):
        """
        Return a dict {agent_id: agent_obs, ...}
        """
        self.agents_done = []
        obs, _ = self.env.reset()
        if self.env_renderer:
            self.env_renderer.set_new_rail()
        return obs

    def render(self, **kwargs):
        from flatland.utils.rendertools import RenderTool

        if not self.env_renderer:
            self.env_renderer = RenderTool(self.env, gl="PILSVG")
            self.env_renderer.set_new_rail()
        self.env_renderer.render_env(show=True,
                                     frames=False,
                                     show_observations=False,
                                     **kwargs)
        time.sleep(0.1)
        self.env_renderer.render_env(show=True,
                                     frames=False,
                                     show_observations=False,
                                     **kwargs)
        return self.env_renderer.get_image()

    def _process_all_done(self, agent, dones, r, i):
        # Do not count target reward more than once
        if self.at_target[agent]:
            r[agent] = 0.0

        # If agent is done, and the group is not done, and agent has
        # not previously reached the target
        if dones[agent] and not dones['__all__']:
            self.at_target[agent] = True

        # Ensure each individual agent is only marked 'done' when all are done
        for a in list(dones.keys()):
            dones[a] = dones['__all__']

        return dones, r, i

    @property
    def action_space(self):
        return Discrete(5)

    @property
    def observation_space(self):
        size, pow4 = 0, 1
        for _ in range(self.env.obs_builder.max_depth + 1):
            size += pow4
            pow4 *= 4
        observation_size = size * self.env.obs_builder.observation_dim
        return Box(-np.inf, np.inf, shape=(observation_size, ))