def load_new(cls, filename, load_from_package=None):

        env_dict = cls.load_env_dict(filename,
                                     load_from_package=load_from_package)

        # TODO: inefficient - each one of these generators loads the complete env file.
        env = rail_env.RailEnv(width=1,
                               height=1,
                               rail_generator=rail_gen.rail_from_file(
                                   filename,
                                   load_from_package=load_from_package),
                               schedule_generator=sched_gen.schedule_from_file(
                                   filename,
                                   load_from_package=load_from_package),
                               malfunction_generator_and_process_data=mal_gen.
                               malfunction_from_file(
                                   filename,
                                   load_from_package=load_from_package),
                               obs_builder_object=DummyObservationBuilder(),
                               record_steps=True)

        env.rail = GridTransitionMap(1, 1)  # dummy

        cls.set_full_state(env, env_dict)
        return env, env_dict
示例#2
0
    def handle_env_create(self, command):
        """
        Handles a ENV_CREATE command from the client
        TODO: Add a high level summary of everything thats happening here.
        """
        self.simulation_count += 1
        if self.simulation_count < len(self.env_file_paths):
            """
            There are still test envs left that are yet to be evaluated 
            """
            test_env_file_path = self.env_file_paths[self.simulation_count]
            print("Evaluating : {}".format(test_env_file_path))
            test_env_file_path = os.path.join(self.test_env_folder,
                                              test_env_file_path)
            del self.env
            self.env = RailEnv(
                width=1,
                height=1,
                rail_generator=rail_from_file(test_env_file_path),
                schedule_generator=schedule_from_file(test_env_file_path),
                malfunction_generator_and_process_data=malfunction_from_file(
                    test_env_file_path),
                obs_builder_object=DummyObservationBuilder())

            if self.begin_simulation:
                # If begin simulation has already been initialized
                # atleast once
                self.simulation_times.append(time.time() -
                                             self.begin_simulation)
            self.begin_simulation = time.time()

            self.simulation_rewards.append(0)
            self.simulation_rewards_normalized.append(0)
            self.simulation_percentage_complete.append(0)
            self.simulation_steps.append(0)

            self.current_step = 0

            _observation, _info = self.env.reset(regenerate_rail=True,
                                                 regenerate_schedule=True,
                                                 activate_agents=False,
                                                 random_seed=RANDOM_SEED)

            if self.visualize:
                if self.env_renderer:
                    del self.env_renderer
                self.env_renderer = RenderTool(
                    self.env,
                    gl="PILSVG",
                )

            _command_response = {}
            _command_response[
                'type'] = messages.FLATLAND_RL.ENV_CREATE_RESPONSE
            _command_response['payload'] = {}
            _command_response['payload']['observation'] = _observation
            _command_response['payload'][
                'env_file_path'] = self.env_file_paths[self.simulation_count]
            _command_response['payload']['info'] = _info
            _command_response['payload']['random_seed'] = RANDOM_SEED
        else:
            """
            All test env evaluations are complete
            """
            _command_response = {}
            _command_response[
                'type'] = messages.FLATLAND_RL.ENV_CREATE_RESPONSE
            _command_response['payload'] = {}
            _command_response['payload']['observation'] = False
            _command_response['payload']['env_file_path'] = False
            _command_response['payload']['info'] = False
            _command_response['payload']['random_seed'] = False

        self.send_response(_command_response, command)
        #####################################################################
        # Update evaluation state
        #####################################################################
        progress = np.clip(
            self.simulation_count * 1.0 / len(self.env_file_paths), 0, 1)
        mean_reward = round(np.mean(self.simulation_rewards), 2)
        mean_normalized_reward = round(
            np.mean(self.simulation_rewards_normalized), 2)
        mean_percentage_complete = round(
            np.mean(self.simulation_percentage_complete), 3)
        self.evaluation_state["state"] = "IN_PROGRESS"
        self.evaluation_state["progress"] = progress
        self.evaluation_state["simulation_count"] = self.simulation_count
        self.evaluation_state["score"]["score"] = mean_percentage_complete
        self.evaluation_state["score"]["score_secondary"] = mean_reward
        self.evaluation_state["meta"][
            "normalized_reward"] = mean_normalized_reward
        self.handle_aicrowd_info_event(self.evaluation_state)
示例#3
0
            """
            while True:
                time.sleep(10)
        return _response['payload']


if __name__ == "__main__":
    remote_client = FlatlandRemoteClient()

    def my_controller(obs, _env):
        _action = {}
        for _idx, _ in enumerate(_env.agents):
            _action[_idx] = np.random.randint(0, 5)
        return _action

    my_observation_builder = DummyObservationBuilder()

    episode = 0
    obs = True
    while obs:
        obs, info = remote_client.env_create(
            obs_builder_object=my_observation_builder)
        if not obs:
            """
            The remote env returns False as the first obs
            when it is done evaluating all the individual episodes
            """
            break
        print("Episode : {}".format(episode))
        episode += 1
    print("Checkpoint not found, using untrained policy! (path: {})".format(checkpoint))

#####################################################################
# Main evaluation loop
#####################################################################
evaluation_number = 0

while True:
    evaluation_number += 1

    # We use a dummy observation and call TreeObsForRailEnv ourselves when needed.
    # This way we decide if we want to calculate the observations or not instead
    # of having them calculated every time we perform an env step.
    time_start = time.time()
    observation, info = remote_client.env_create(
        obs_builder_object=DummyObservationBuilder()
    )
    env_creation_time = time.time() - time_start

    if not observation:
        # If the remote_client returns False on a `env_create` call,
        # then it basically means that your agent has already been
        # evaluated on all the required evaluation environments,
        # and hence it's safe to break out of the main evaluation loop.
        break

    print("Env Path : ", remote_client.current_env_path)
    print("Env Creation Time : ", env_creation_time)

    local_env = remote_client.env
    nb_agents = len(local_env.agents)
示例#5
0
    def handle_env_create(self, command):
        """
        Handles a ENV_CREATE command from the client
        TODO: Add a high level summary of everything thats happening here.
        """
        if not self.simulation_done:
            # trying to reset a simulation before finishing the previous one
            _command_response = self._error_template(
                "CAN'T CREATE NEW ENV BEFORE PREVIOUS IS DONE")
            self.send_response(_command_response, command)
            raise Exception(_command_response['payload'])

        self.simulation_count += 1
        self.simulation_done = False
        if self.simulation_count < len(self.env_file_paths):
            """
            There are still test envs left that are yet to be evaluated 
            """
            test_env_file_path = self.env_file_paths[self.simulation_count]
            print("Evaluating : {}".format(test_env_file_path))
            test_env_file_path = os.path.join(self.test_env_folder,
                                              test_env_file_path)
            del self.env
            self.env = RailEnv(
                width=1,
                height=1,
                rail_generator=rail_from_file(test_env_file_path),
                schedule_generator=schedule_from_file(test_env_file_path),
                malfunction_generator_and_process_data=malfunction_from_file(
                    test_env_file_path),
                obs_builder_object=DummyObservationBuilder())

            if self.begin_simulation:
                # If begin simulation has already been initialized
                # atleast once
                # This adds the simulation time for the previous episode
                self.simulation_times.append(time.time() -
                                             self.begin_simulation)
            self.begin_simulation = time.time()

            # Update evaluation metadata for the previous episode
            self.update_evaluation_metadata()

            # Start adding placeholders for the new episode
            self.simulation_env_file_paths.append(
                os.path.relpath(test_env_file_path,
                                self.test_env_folder))  # relative path

            self.simulation_rewards.append(0)
            self.simulation_rewards_normalized.append(0)
            self.simulation_percentage_complete.append(0)
            self.simulation_steps.append(0)

            self.current_step = 0

            _observation, _info = self.env.reset(regenerate_rail=True,
                                                 regenerate_schedule=True,
                                                 activate_agents=False,
                                                 random_seed=RANDOM_SEED)

            if self.visualize:
                current_env_path = self.env_file_paths[self.simulation_count]
                if current_env_path in self.video_generation_envs:
                    self.env_renderer = RenderTool(
                        self.env,
                        gl="PILSVG",
                    )
                elif self.env_renderer:
                    self.env_renderer = False

            _command_response = {}
            _command_response[
                'type'] = messages.FLATLAND_RL.ENV_CREATE_RESPONSE
            _command_response['payload'] = {}
            _command_response['payload']['observation'] = _observation
            _command_response['payload'][
                'env_file_path'] = self.env_file_paths[self.simulation_count]
            _command_response['payload']['info'] = _info
            _command_response['payload']['random_seed'] = RANDOM_SEED
        else:
            """
            All test env evaluations are complete
            """
            _command_response = {}
            _command_response[
                'type'] = messages.FLATLAND_RL.ENV_CREATE_RESPONSE
            _command_response['payload'] = {}
            _command_response['payload']['observation'] = False
            _command_response['payload']['env_file_path'] = False
            _command_response['payload']['info'] = False
            _command_response['payload']['random_seed'] = False

        self.send_response(_command_response, command)
        #####################################################################
        # Update evaluation state
        #####################################################################
        progress = np.clip(
            self.simulation_count * 1.0 / len(self.env_file_paths), 0, 1)

        mean_reward, mean_normalized_reward, mean_percentage_complete = self.compute_mean_scores(
        )

        self.evaluation_state["state"] = "IN_PROGRESS"
        self.evaluation_state["progress"] = progress
        self.evaluation_state["simulation_count"] = self.simulation_count
        self.evaluation_state["score"]["score"] = mean_percentage_complete
        self.evaluation_state["score"]["score_secondary"] = mean_reward
        self.evaluation_state["meta"][
            "normalized_reward"] = mean_normalized_reward
        self.handle_aicrowd_info_event(self.evaluation_state)