def load_new(cls, filename, load_from_package=None): env_dict = cls.load_env_dict(filename, load_from_package=load_from_package) # TODO: inefficient - each one of these generators loads the complete env file. env = rail_env.RailEnv(width=1, height=1, rail_generator=rail_gen.rail_from_file( filename, load_from_package=load_from_package), schedule_generator=sched_gen.schedule_from_file( filename, load_from_package=load_from_package), malfunction_generator_and_process_data=mal_gen. malfunction_from_file( filename, load_from_package=load_from_package), obs_builder_object=DummyObservationBuilder(), record_steps=True) env.rail = GridTransitionMap(1, 1) # dummy cls.set_full_state(env, env_dict) return env, env_dict
def handle_env_create(self, command): """ Handles a ENV_CREATE command from the client TODO: Add a high level summary of everything thats happening here. """ self.simulation_count += 1 if self.simulation_count < len(self.env_file_paths): """ There are still test envs left that are yet to be evaluated """ test_env_file_path = self.env_file_paths[self.simulation_count] print("Evaluating : {}".format(test_env_file_path)) test_env_file_path = os.path.join(self.test_env_folder, test_env_file_path) del self.env self.env = RailEnv( width=1, height=1, rail_generator=rail_from_file(test_env_file_path), schedule_generator=schedule_from_file(test_env_file_path), malfunction_generator_and_process_data=malfunction_from_file( test_env_file_path), obs_builder_object=DummyObservationBuilder()) if self.begin_simulation: # If begin simulation has already been initialized # atleast once self.simulation_times.append(time.time() - self.begin_simulation) self.begin_simulation = time.time() self.simulation_rewards.append(0) self.simulation_rewards_normalized.append(0) self.simulation_percentage_complete.append(0) self.simulation_steps.append(0) self.current_step = 0 _observation, _info = self.env.reset(regenerate_rail=True, regenerate_schedule=True, activate_agents=False, random_seed=RANDOM_SEED) if self.visualize: if self.env_renderer: del self.env_renderer self.env_renderer = RenderTool( self.env, gl="PILSVG", ) _command_response = {} _command_response[ 'type'] = messages.FLATLAND_RL.ENV_CREATE_RESPONSE _command_response['payload'] = {} _command_response['payload']['observation'] = _observation _command_response['payload'][ 'env_file_path'] = self.env_file_paths[self.simulation_count] _command_response['payload']['info'] = _info _command_response['payload']['random_seed'] = RANDOM_SEED else: """ All test env evaluations are complete """ _command_response = {} _command_response[ 'type'] = messages.FLATLAND_RL.ENV_CREATE_RESPONSE _command_response['payload'] = {} _command_response['payload']['observation'] = False _command_response['payload']['env_file_path'] = False _command_response['payload']['info'] = False _command_response['payload']['random_seed'] = False self.send_response(_command_response, command) ##################################################################### # Update evaluation state ##################################################################### progress = np.clip( self.simulation_count * 1.0 / len(self.env_file_paths), 0, 1) mean_reward = round(np.mean(self.simulation_rewards), 2) mean_normalized_reward = round( np.mean(self.simulation_rewards_normalized), 2) mean_percentage_complete = round( np.mean(self.simulation_percentage_complete), 3) self.evaluation_state["state"] = "IN_PROGRESS" self.evaluation_state["progress"] = progress self.evaluation_state["simulation_count"] = self.simulation_count self.evaluation_state["score"]["score"] = mean_percentage_complete self.evaluation_state["score"]["score_secondary"] = mean_reward self.evaluation_state["meta"][ "normalized_reward"] = mean_normalized_reward self.handle_aicrowd_info_event(self.evaluation_state)
""" while True: time.sleep(10) return _response['payload'] if __name__ == "__main__": remote_client = FlatlandRemoteClient() def my_controller(obs, _env): _action = {} for _idx, _ in enumerate(_env.agents): _action[_idx] = np.random.randint(0, 5) return _action my_observation_builder = DummyObservationBuilder() episode = 0 obs = True while obs: obs, info = remote_client.env_create( obs_builder_object=my_observation_builder) if not obs: """ The remote env returns False as the first obs when it is done evaluating all the individual episodes """ break print("Episode : {}".format(episode)) episode += 1
print("Checkpoint not found, using untrained policy! (path: {})".format(checkpoint)) ##################################################################### # Main evaluation loop ##################################################################### evaluation_number = 0 while True: evaluation_number += 1 # We use a dummy observation and call TreeObsForRailEnv ourselves when needed. # This way we decide if we want to calculate the observations or not instead # of having them calculated every time we perform an env step. time_start = time.time() observation, info = remote_client.env_create( obs_builder_object=DummyObservationBuilder() ) env_creation_time = time.time() - time_start if not observation: # If the remote_client returns False on a `env_create` call, # then it basically means that your agent has already been # evaluated on all the required evaluation environments, # and hence it's safe to break out of the main evaluation loop. break print("Env Path : ", remote_client.current_env_path) print("Env Creation Time : ", env_creation_time) local_env = remote_client.env nb_agents = len(local_env.agents)
def handle_env_create(self, command): """ Handles a ENV_CREATE command from the client TODO: Add a high level summary of everything thats happening here. """ if not self.simulation_done: # trying to reset a simulation before finishing the previous one _command_response = self._error_template( "CAN'T CREATE NEW ENV BEFORE PREVIOUS IS DONE") self.send_response(_command_response, command) raise Exception(_command_response['payload']) self.simulation_count += 1 self.simulation_done = False if self.simulation_count < len(self.env_file_paths): """ There are still test envs left that are yet to be evaluated """ test_env_file_path = self.env_file_paths[self.simulation_count] print("Evaluating : {}".format(test_env_file_path)) test_env_file_path = os.path.join(self.test_env_folder, test_env_file_path) del self.env self.env = RailEnv( width=1, height=1, rail_generator=rail_from_file(test_env_file_path), schedule_generator=schedule_from_file(test_env_file_path), malfunction_generator_and_process_data=malfunction_from_file( test_env_file_path), obs_builder_object=DummyObservationBuilder()) if self.begin_simulation: # If begin simulation has already been initialized # atleast once # This adds the simulation time for the previous episode self.simulation_times.append(time.time() - self.begin_simulation) self.begin_simulation = time.time() # Update evaluation metadata for the previous episode self.update_evaluation_metadata() # Start adding placeholders for the new episode self.simulation_env_file_paths.append( os.path.relpath(test_env_file_path, self.test_env_folder)) # relative path self.simulation_rewards.append(0) self.simulation_rewards_normalized.append(0) self.simulation_percentage_complete.append(0) self.simulation_steps.append(0) self.current_step = 0 _observation, _info = self.env.reset(regenerate_rail=True, regenerate_schedule=True, activate_agents=False, random_seed=RANDOM_SEED) if self.visualize: current_env_path = self.env_file_paths[self.simulation_count] if current_env_path in self.video_generation_envs: self.env_renderer = RenderTool( self.env, gl="PILSVG", ) elif self.env_renderer: self.env_renderer = False _command_response = {} _command_response[ 'type'] = messages.FLATLAND_RL.ENV_CREATE_RESPONSE _command_response['payload'] = {} _command_response['payload']['observation'] = _observation _command_response['payload'][ 'env_file_path'] = self.env_file_paths[self.simulation_count] _command_response['payload']['info'] = _info _command_response['payload']['random_seed'] = RANDOM_SEED else: """ All test env evaluations are complete """ _command_response = {} _command_response[ 'type'] = messages.FLATLAND_RL.ENV_CREATE_RESPONSE _command_response['payload'] = {} _command_response['payload']['observation'] = False _command_response['payload']['env_file_path'] = False _command_response['payload']['info'] = False _command_response['payload']['random_seed'] = False self.send_response(_command_response, command) ##################################################################### # Update evaluation state ##################################################################### progress = np.clip( self.simulation_count * 1.0 / len(self.env_file_paths), 0, 1) mean_reward, mean_normalized_reward, mean_percentage_complete = self.compute_mean_scores( ) self.evaluation_state["state"] = "IN_PROGRESS" self.evaluation_state["progress"] = progress self.evaluation_state["simulation_count"] = self.simulation_count self.evaluation_state["score"]["score"] = mean_percentage_complete self.evaluation_state["score"]["score_secondary"] = mean_reward self.evaluation_state["meta"][ "normalized_reward"] = mean_normalized_reward self.handle_aicrowd_info_event(self.evaluation_state)