Exemplo n.º 1
0
    def __init__(self, env_config, **kwargs):
        super(FlatlandMeta, self).__init__(env_config, **kwargs)
        assert env_config['generator'] == 'sparse_rail_generator'
        self._env_config = env_config

        self._observation = make_obs(env_config['observation'],
                                     env_config.get('observation_config'))
        self._config = get_generator_config(env_config['generator_config'])

        if env_config.get('number_of_agents', None) is not None:
            self._config['number_of_agents'] = env_config['number_of_agents']

        # Overwrites with env_config seed if it exists
        if env_config.get('seed'):
            self._config['seed'] = env_config.get('seed')

        if not hasattr(env_config, 'worker_index') or (
                env_config.worker_index == 0 and env_config.vector_index == 0):
            print("=" * 50)
            print(self._config)
            print("=" * 50)

        self._env = RobustFlatlandGymEnv(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            render=env_config.get('render'),
            regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=self.
            _config['regenerate_schedule_on_reset'],
            config=env_config,
            allow_noop=True)
        self.last_obs = None
Exemplo n.º 2
0
 def setup_policy_map(self, config: dict):
     obs_space = make_obs(config["env_config"]["observation"],
                          config["env_config"]["observation_config"]).observation_space()
     config["multiagent"] = {
         "policies": {"pol_" + str(i): (None, obs_space, FillingFlatlandGymEnv.action_space, {"agent_id": i})
                      for i in range(config["env_config"]["observation_config"]["max_n_agents"])},
         "policy_mapping_fn": lambda agent_id: "pol_" + str(agent_id)}
Exemplo n.º 3
0
 def __init__(self, config) -> None:
     super().__init__(config)
     self._observations = [
         make_obs(obs_name, config[obs_name]) for obs_name in config.keys()
     ]
     self._builder = CombinedObsForRailEnv(
         [o._builder for o in self._observations])
Exemplo n.º 4
0
def evaluate(config, run):
    start_time = time()
    obs_builder = make_obs(
        config["env_config"]['observation'],
        config["env_config"].get('observation_config')).builder()
    evaluation_number = 0
    total_reward = 0
    all_rewards = []
    sp_agent = ShortestPathAgent()

    while True:
        try:
            observation, info = remote_client.env_create(
                obs_builder_object=obs_builder)

            if not observation:
                break

            steps = 0

            evaluation_number += 1
            episode_start_info(evaluation_number, remote_client=remote_client)

            done = defaultdict(lambda: False)
            while True:
                try:
                    while not done['__all__']:
                        rail_actions = sp_agent.compute_actions(
                            observation, remote_client.env)
                        observation, all_rewards, done, info = remote_client.env_step(
                            rail_actions)
                        steps += 1
                        print('.', end='', flush=True)

                        if (time() - start_time) > TIME_LIMIT:
                            skip(done)
                            break

                    if done['__all__']:
                        total_reward = episode_end_info(
                            all_rewards,
                            total_reward,
                            evaluation_number,
                            steps,
                            remote_client=remote_client)
                        break

                except TimeoutException as err:
                    print(
                        "Timeout! Will skip this episode and go to the next.",
                        err)
                    break
        except TimeoutException as err:
            print(
                "Timeout during planning time. Will skip to next evaluation!",
                err)

    print("Evaluation of all environments complete...")
    print(remote_client.submit())
Exemplo n.º 5
0
    def __init__(self, env_config, fine_tune_env_path=None, max_steps=None, **kwargs) -> None:
        super().__init__(env_config.get("actions_are_logits", False), max_steps=max_steps)

        assert env_config['generator'] == 'sparse_rail_generator'
        self._env_config = env_config
        self._fine_tune_env_path = fine_tune_env_path

        self._observation = make_obs(env_config['observation'], env_config.get('observation_config'))
        self._config = get_generator_config(env_config['generator_config'])

        if env_config.get('number_of_agents', None) is not None:
            self._config['number_of_agents'] = env_config['number_of_agents']

        # Overwrites with env_config seed if it exists
        if env_config.get('seed'):
            self._config['seed'] = env_config.get('seed')

        if not hasattr(env_config, 'worker_index') or (env_config.worker_index == 0 and env_config.vector_index == 0):
            print("=" * 50)
            pprint(self._config)
            print("=" * 50)

        self._gym_env_class = self._gym_envs[env_config.get("gym_env", "default")]

        self._env = self._gym_env_class(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            render=env_config.get('render'),
            regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=self._config['regenerate_schedule_on_reset'],
            config=env_config,
            allow_noop=env_config.get('allow_noop', True)
        )

        if env_config['observation'] in self._sp_action_needed:
            self._env = ShortestPathActionWrapper(self._env)
        if env_config['observation'] == 'path' or env_config['observation'] == 'nr_conflicts_path':
            self._env = NoStopShortestPathActionWrapper(self._env)
        if env_config.get('priorization', False):
            self._env = PriorizationWrapper(self._env)
        if env_config.get('sparse_priorization', False):
            self._env = SparsePriorizationWrapper(self._env)
        if env_config.get('sparse_reward', False):
            self._env = SparseRewardWrapper(self._env, finished_reward=env_config.get('done_reward', 1),
                                            not_finished_reward=env_config.get('not_finished_reward', -1))
        if env_config.get('global_reward', False):
            self._env = GlobalRewardWrapper(self._env)
        if env_config.get('deadlock_reward', 0) != 0:
            self._env = DeadlockWrapper(self._env, deadlock_reward=env_config['deadlock_reward'])
        if env_config.get('resolve_deadlocks', False):
            deadlock_reward = env_config.get('deadlock_reward', 0)
            self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
        if env_config.get('skip_no_choice_cells', False):
            self._env = SkipNoChoiceCellsWrapper(self._env, env_config.get('accumulate_skipped_rewards', False),
                                                 discounting=env_config.get('discounting', 1.))
        if env_config.get('available_actions_obs', False):
            self._env = AvailableActionsWrapper(self._env, env_config.get('allow_noop', True))
        if env_config.get('fill_unavailable_actions', False):
            self._env = AvailableActionsWrapper(self._env, env_config.get('allow_noop', True))
Exemplo n.º 6
0
 def setup_hierarchical_policies(self, config: dict):
     obs_space: gym.spaces.Tuple = make_obs(config["env_config"]["observation"],
                          config["env_config"]["observation_config"]).observation_space()
     config["multiagent"] = {
         "policies": {"meta": (None, obs_space.spaces[0], gym.spaces.Box(high=1, low=0, shape=(1,)), {}),
                      "agent": (None, obs_space.spaces[1], FillingFlatlandGymEnv.action_space, {})
                      },
         "policy_mapping_fn": lambda agent_id: "meta" if 'meta' in str(agent_id) else "agent"
     }
Exemplo n.º 7
0
 def __init__(self, config) -> None:
     super().__init__(config)
     self.interval = 10
     if config.get('interval', None) is not None:
         self.interval = config['interval']
         del config['interval']
     self._observations = [
         make_obs(obs_name, config[obs_name]) for obs_name in config.keys()
     ]
     self._builder = CombinedObsForRailEnv([
         o._builder for o in self._observations
     ], interval=self.interval)
Exemplo n.º 8
0
    def setup_grouping(config: dict):
        grouping = {
            "group_1": list(range(config["env_config"]["max_n_agents"])),
        }

        obs_space = Tuple([make_obs(config["env_config"]["observation"],
                                    config["env_config"]["observation_config"]).observation_space()
                           for _ in range(config["env_config"]["max_n_agents"])])

        act_space = Tuple([GlobalFlatlandGymEnv.action_space for _ in range(config["env_config"]["max_n_agents"])])

        register_env(
            "flatland_sparse_grouped",
            lambda config: FlatlandSparse(config).with_agent_groups(
                grouping, obs_space=obs_space, act_space=act_space))
Exemplo n.º 9
0
    def __init__(self, env_config):
        self._observation = make_obs(env_config['observation'],
                                     env_config.get('observation_config'))
        self._config = get_generator_config(env_config['generator_config'])
        self._global_obs = env_config.get('observation_config',
                                          {}).get('global_obs', False)

        # Overwrites with env_config seed if it exists
        if env_config.get('seed'):
            self._config['seed'] = env_config.get('seed')

        if env_config.get('gym_env', None) is not None:
            self._gym_env_class = self._gym_envs[env_config["gym_env"]]
        else:
            self._gym_env_class = self._gym_envs[
                "default"] if not self._global_obs else self._gym_envs["single"]

        self._env = self._gym_env_class(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=self.
            _config['regenerate_schedule_on_reset'],
            config=env_config)
        if env_config['observation'] == 'shortest_path':
            self._env = ShortestPathActionWrapper(self._env)
        if env_config.get('sparse_reward', False):
            self._env = SparseRewardWrapper(
                self._env,
                finished_reward=env_config.get('done_reward', 1),
                not_finished_reward=env_config.get('not_finished_reward', -1))
        if env_config.get('deadlock_reward', 0) != 0:
            self._env = DeadlockWrapper(
                self._env, deadlock_reward=env_config['deadlock_reward'])
        if env_config.get('resolve_deadlocks', False):
            deadlock_reward = env_config.get('deadlock_reward', 0)
            self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
        if env_config.get('skip_no_choice_cells', False):
            self._env = SkipNoChoiceCellsWrapper(
                self._env, env_config.get('accumulate_skipped_rewards', False))
        if env_config.get('available_actions_obs', False):
            self._env = AvailableActionsWrapper(self._env, allow_noop=False)
Exemplo n.º 10
0
 def __init__(self, env_config) -> None:
     super().__init__()
     self._env_config = env_config
     self._test = env_config.get('test', False)
     self._min_seed = env_config['min_seed']
     self._max_seed = env_config['max_seed']
     assert self._min_seed <= self._max_seed
     self._min_test_seed = env_config.get('min_test_seed', 0)
     self._max_test_seed = env_config.get('max_test_seed', 100)
     assert self._min_test_seed <= self._max_test_seed
     self._next_test_seed = self._min_test_seed
     self._num_resets = 0
     self._observation = make_obs(env_config['observation'],
                                  env_config.get('observation_config'))
     self._env = FlatlandGymEnv(
         rail_env=self._launch(),
         observation_space=self._observation.observation_space(),
         render=env_config.get('render'),
         regenerate_rail_on_reset=env_config['regenerate_rail_on_reset'],
         regenerate_schedule_on_reset=env_config[
             'regenerate_schedule_on_reset'])
     if env_config['observation'] == 'shortest_path':
         self._env = ShortestPathActionWrapper(self._env)
     if env_config.get('sparse_reward', False):
         self._env = SparseRewardWrapper(
             self._env,
             finished_reward=env_config.get('done_reward', 1),
             not_finished_reward=env_config.get('not_finished_reward', -1))
     if env_config.get('deadlock_reward', 0) != 0:
         self._env = DeadlockWrapper(
             self._env, deadlock_reward=env_config['deadlock_reward'])
     if env_config.get('resolve_deadlocks', False):
         deadlock_reward = env_config.get('deadlock_reward', 0)
         self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
     if env_config.get('skip_no_choice_cells', False):
         self._env = SkipNoChoiceCellsWrapper(
             self._env, env_config.get('accumulate_skipped_rewards', False))
     if env_config.get('available_actions_obs', False):
         self._env = AvailableActionsWrapper(self._env)
Exemplo n.º 11
0
def get_env(config=None, rl=False):
    n_agents = 16
    schedule_generator = sparse_schedule_generator(None)

    rail_generator = sparse_rail_generator(
        seed=seed,
        max_num_cities=3,
        grid_mode=False,
        max_rails_between_cities=2,
        max_rails_in_city=4,
    )

    if rl:
        obs_builder = make_obs("combined", {
            "path": None,
            "simple_meta": None
        }).builder()
    else:
        obs_builder = DummyObs()

    params = MalfunctionParameters(malfunction_rate=1 / 1000,
                                   max_duration=50,
                                   min_duration=20)
    malfunction_generator = ParamMalfunctionGen(params)

    env = RailEnv(
        width=28,
        height=28,
        rail_generator=rail_generator,
        schedule_generator=schedule_generator,
        number_of_agents=n_agents,
        malfunction_generator=malfunction_generator,
        obs_builder_object=obs_builder,
        remove_agents_at_target=True,
        random_seed=seed,
    )

    return env
Exemplo n.º 12
0
def get_env(config=None, rl=False):
    n_agents = 32
    schedule_generator = sparse_schedule_generator(None)

    rail_generator = sparse_rail_generator(
        seed=seed,
        max_num_cities=4,
        grid_mode=False,
        max_rails_between_cities=2,
        max_rails_in_city=4,
    )

    if rl:
        obs_builder = make_obs(
            config["env_config"]['observation'],
            config["env_config"].get('observation_config')).builder()
    else:
        obs_builder = DummyObs()

    params = MalfunctionParameters(malfunction_rate=1 / 1000,
                                   max_duration=50,
                                   min_duration=20)
    malfunction_generator = ParamMalfunctionGen(params)

    env = RailEnv(
        width=32,
        height=32,
        rail_generator=rail_generator,
        schedule_generator=schedule_generator,
        number_of_agents=n_agents,
        malfunction_generator=malfunction_generator,
        obs_builder_object=obs_builder,
        remove_agents_at_target=True,
        random_seed=seed,
    )

    return env
Exemplo n.º 13
0
from flatlander.envs.utils.gym_env import FlatlandGymEnv

parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="QMIX")
parser.add_argument("--num-cpus", type=int, default=0)
parser.add_argument("--as-test", action="store_true")
parser.add_argument("--torch", action="store_true")
parser.add_argument("--stop-timesteps", type=int, default=50000)

if __name__ == "__main__":
    args = parser.parse_args()

    grouping = {
        "group_1": [0, 1, 2, 3, 4],
    }
    obs_space = Tuple([make_obs("tree", {"max_depth": 2, "shortest_path_max_depth": 30}).observation_space()
                       for i in range(5)])

    act_space = Tuple([FlatlandGymEnv.action_space for i in range(5)])

    register_env(
        "flatland_sparse_grouped",
        lambda config: FlatlandSparse(config).with_agent_groups(
            grouping, obs_space=obs_space, act_space=act_space))

    config = {
        "rollout_fragment_length": 50,
        "train_batch_size": 1000,
        "exploration_config": {
            "epsilon_timesteps": 5000,
            "final_epsilon": 0.05,