def make_unity_env(self, env_name, float_params=dict(), time_scale=1, seed=time.time(), worker_id=None, **kwargs): """ creates a gym environment from a unity game env_name: str the path to the game float_params: dict or None this should be a dict of argument settings for the unity environment keys: varies by environment time_scale: float argument to set Unity's time scale. This applies less to gym wrapped versions of Unity Environments, I believe.. but I'm not sure seed: int the seed for randomness worker_id: int must specify a unique worker id for each unity process on this machine """ if float_params is None: float_params = dict() path = os.path.expanduser(env_name) channel = EngineConfigurationChannel() env_channel = EnvironmentParametersChannel() channel.set_configuration_parameters(time_scale=1) for k, v in float_params.items(): if k == "validation" and v >= 1: print("Game in validation mode") env_channel.set_float_parameter(k, float(v)) if worker_id is None: worker_id = seed % 500 + 1 env_made = False n_loops = 0 worker_id = 0 while not env_made and n_loops < 50: try: env = UnityEnvironment(file_name=path, side_channels=[channel, env_channel], worker_id=worker_id, seed=seed) env_made = True except: s = "Error encountered making environment, " s += "trying new worker_id" print(s) worker_id = (worker_id + 1 + int(np.random.random() * 100)) % 500 try: env.close() except: pass n_loops += 1 env = UnityToGymWrapper(env, allow_multiple_obs=True) return env
def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: """ Helper method to send sampler settings over EnvironmentParametersChannel Calls the constant sampler type set method. :param key: environment parameter to be sampled :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment """ env_channel.set_float_parameter(key, self.value)
class UnityWrapper(object): def __init__(self, env_args): self.engine_configuration_channel = EngineConfigurationChannel() if env_args['train_mode']: self.engine_configuration_channel.set_configuration_parameters( time_scale=env_args['train_time_scale']) else: self.engine_configuration_channel.set_configuration_parameters( width=env_args['width'], height=env_args['height'], quality_level=env_args['quality_level'], time_scale=env_args['inference_time_scale'], target_frame_rate=env_args['target_frame_rate']) self.float_properties_channel = EnvironmentParametersChannel() if env_args['file_path'] is None: self._env = UnityEnvironment(base_port=5004, seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ]) else: unity_env_dict = load_yaml('/'.join( [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml'])) self._env = UnityEnvironment( file_name=env_args['file_path'], base_port=env_args['port'], no_graphics=not env_args['render'], seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ], additional_args=[ '--scene', str( unity_env_dict.get(env_args.get('env_name', 'Roller'), 'None')), '--n_agents', str(env_args.get('env_num', 1)) ]) self.reset_config = env_args['reset_config'] def reset(self, **kwargs): reset_config = kwargs.get('reset_config', None) or self.reset_config for k, v in reset_config.items(): self.float_properties_channel.set_float_parameter(k, v) self._env.reset() def __getattr__(self, name): if name.startswith('_'): raise AttributeError( "attempted to get missing private attribute '{}'".format(name)) return getattr(self._env, name)
def test_environment_parameters(): sender = EnvironmentParametersChannel() # We use a raw bytes channel to interpred the data receiver = RawBytesChannel(sender.channel_id) sender.set_float_parameter("param-1", 0.1) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) message = IncomingMessage(receiver.get_and_clear_received_messages()[0]) key = message.read_string() dtype = message.read_int32() value = message.read_float32() assert key == "param-1" assert dtype == EnvironmentParametersChannel.EnvironmentDataTypes.FLOAT assert value - 0.1 < 1e-8 sender.set_float_parameter("param-1", 0.1) sender.set_float_parameter("param-2", 0.1) sender.set_float_parameter("param-3", 0.1) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) assert len(receiver.get_and_clear_received_messages()) == 3 with pytest.raises(UnityCommunicationException): # try to send data to the EngineConfigurationChannel sender.set_float_parameter("param-1", 0.1) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([sender]).process_side_channel_message(data)
def initialize_all_side_channels(self, initialize_config, engine_config): """ 初始化所有的通讯频道 """ engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration_parameters( **engine_config) float_properties_channel = EnvironmentParametersChannel() float_properties_channel.set_float_parameter('env_copies', self._n_copies) for k, v in initialize_config.items(): float_properties_channel.set_float_parameter(k, v) return dict(engine_configuration_channel=engine_configuration_channel, float_properties_channel=float_properties_channel)
def initialize_all_side_channels(self, kwargs): ''' 初始化所有的通讯频道 ''' engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration_parameters( width=kwargs['width'], height=kwargs['height'], quality_level=kwargs['quality_level'], time_scale=1 if bool(kwargs.get('inference', False)) else kwargs['time_scale'], target_frame_rate=kwargs['target_frame_rate'], capture_frame_rate=kwargs['capture_frame_rate']) float_properties_channel = EnvironmentParametersChannel() for k, v in kwargs.get('initialize_config', {}).items(): float_properties_channel.set_float_parameter(k, v) return dict(engine_configuration_channel=engine_configuration_channel, float_properties_channel=float_properties_channel)
def __init__(self, env_config): self.worker_index = 0 if 'SM_CHANNEL_TRAIN' in os.environ: env_name = os.environ['SM_CHANNEL_TRAIN'] +'/'+ env_config['env_name'] os.chmod(env_name, 0o755) print("Changed environment binary into executable mode.") # Try connecting to the Unity3D game instance. while True: try: channel = EnvironmentParametersChannel() unity_env = UnityEnvironment( env_name, no_graphics=True, worker_id=self.worker_index, side_channels=[channel], additional_args=['-logFile', 'unity.log']) channel.set_float_parameter("simulation_mode", 1.0) except UnityWorkerInUseException: self.worker_index += 1 else: break else: env_name = env_config['env_name'] while True: try: unity_env = default_registry[env_name].make( no_graphics=True, worker_id=self.worker_index, additional_args=['-logFile', 'unity.log']) except UnityWorkerInUseException: self.worker_index += 1 else: break self.env = UnityToGymWrapper(unity_env) self.action_space = self.env.action_space self.observation_space = self.env.observation_space
from gym_unity.envs import UnityToGymWrapper from mlagents_envs.side_channel.environment_parameters_channel import EnvironmentParametersChannel # setup environment if sys.platform == 'win32': env_build = "../env/FlyCamera/windows/FlyCamera.exe" elif sys.platform == 'linux': env_build = "../env/FlyCamera/linux/FlyCamera.x86_64" elif sys.platform == "darwin": env_build = "../env/FlyCamera/mac.app" else: raise AttributeError("{} platform is not supported.".format(sys.platform)) channel = EnvironmentParametersChannel() unity_env = UnityEnvironment(env_build, side_channels=[channel]) channel.set_float_parameter("key_speed", 10.0) channel.set_float_parameter("cam_sens", 0.25) env = UnityToGymWrapper(unity_env, uint8_visual=True) # interface max_mouse_move = 10 # in pixel; to limit mouse "jump" due to slow in-loop process mouse_position = np.zeros((2,)) def mouse_move(event): global mouse_position x, y = event.xdata, event.ydata mouse_position = np.array([x, y]) key_wasd = np.array([False] * 4) def key_press(event): # NOTE: cannot handle multiple key press at the same time global key_wasd try:
if False: # sanity check # current environment lock motion of x and z axes dt = 1. / UNITY_STEP_FREQ acc = act * thrust_multiplier / mass next_velo_y = velo[1] + (GRAVITY + acc) * dt velo_y_err = np.absolute(next_velo_y - obs[1][4]) # use velocity to compute distance dist_curr = dist_curr + obs[1][4] * dt dist_err = np.absolute(dist_curr - process_ray(obs[1])) print("Velocity error (y-axis) = {}, Distance error = {}".format(velo_y_err, dist_err)) velo = obs[1][3:] distance_to_ground = process_ray(obs[1]) ax.set_title("Velocity = ({:.2f}, {:.2f}, {:.2f})\n Distance to Ground: {:.8f}".format(\ *velo, distance_to_ground)) # example of resetting environment if False: i += 1 if i > 10: channel.set_float_parameter("end_episode", 1.0) i = 0 else: channel.set_float_parameter("end_episode", 0.0) # update mpl windows fig.canvas.draw() plt.pause(0.05)
class UnitySamplerSequenceLearning(ABC, Sampler): warning_done = False def __init__(self, dataset, name_dataset_unity, unity_env_params, nSc, nSt, nFc, nFt, k, size_canvas, grayscale, play_mode=False, change_lights=False, place_camera_mode=PlaceCamerasMode.RND, get_labels_mode=GetLabelsMode.RND, train_comparison_type=TrainComparisonType.ALL, batch_provider_args=None, episodes_per_epoch=999999, channel=0): if batch_provider_args is None: batch_provider_args = no_batch_args self.place_camera_mode = place_camera_mode self.get_labels_mode = get_labels_mode self.episodes_per_epoch = episodes_per_epoch self.dataset = dataset self.k = k self.nSc = nSc self.nSt = nSt self.nFc = nFc self.grayscale = grayscale self.nFt = nFt self.matrix_values = None if play_mode: self.scene = None channel = 0 else: if os.name == 'nt': machine_name = 'win' ext = 'exe' else: machine_name = 'linux' ext = 'x86_64' self.scene_path = f'./Unity-ML-Agents-Computer-Vision/Builds/Builds_{machine_name}/SequenceLearning/' self.scene_folder = f'k{self.k}_nSt{self.nSt}_nSc{self.nSc}_nFt{self.nFt}_nFc{self.nFc}_sc{size_canvas[0]}_g{int(self.grayscale)}' self.scene = f'{self.scene_path}/{self.scene_folder}/scene.{ext}' if not os.path.exists(self.scene): assert False, f"Unity scene {self.scene} generator does not exist, create it in Windows!" log.set_log_level('INFO') self.additional_arguments = { '-dataset': name_dataset_unity, '-place_camera_mode': self.place_camera_mode.value, '-get_labels_mode': self.get_labels_mode.value, '-train_comparison_type': train_comparison_type.value, '-change_lights': int(change_lights), '-logFile': 'logout.txt', '-repeat_same_batch': -1, **batch_provider_args } self.observation_channel = EnvironmentParametersChannel() while True: try: env_params_channel = StringEnvParamsChannel( "621f0a70-4f87-11ea-a6bf-784f4387d1f7") debug_channel = StringDebugLogChannel( "8e8d2cbd-ea04-444d-9180-56ed79a2b94e") print( f"\n*** Trying to open Unity scene with dataset: {name_dataset_unity}, folder: {self.scene_folder}" ) self.env = UnityEnvironment( file_name=self.scene, seed=batch_provider_args['-seed'], side_channels=[ self.observation_channel, env_params_channel, debug_channel ], no_graphics=False, worker_id=channel, additional_args=list( np.array([ [k, v] for k, v in self.additional_arguments.items() ]).flatten()), timeout_wait=180 if batch_provider_args['-use_batch_provider'] == 1 else 60) break except UnityWorkerInUseException as e: channel += 1 self.env_params = unity_env_params(self.observation_channel) self.env.reset() self.observation_channel.set_float_parameter("newLevel", float(0)) self.behaviour_names = list(self.env.behavior_specs.keys()) self.num_objects = env_params_channel.num_objects self.labels = None self.camera_positions = None self.tot_num_frames_each_iter = self.k * (self.nSt * self.nFt + self.nSc * self.nFc) self.tot_num_matching = self.k self.num_labels_passed_by_unity = self.k * (2 if self.nSc > 0 else 1) self.tot_num_frames_each_comparison = int( self.tot_num_frames_each_iter / self.tot_num_matching) self.max_length_elements = np.max( (self.tot_num_matching, self.tot_num_frames_each_iter)) self.dummy_labels = np.empty(self.tot_num_frames_each_iter) self.images = [] def __len__(self): return self.episodes_per_epoch def update_optional_arguments(self): pass def send_episode_info(self, idx): pass def __iter__(self): # vh1, vh2, vh3 = [], [], [] for idx in range(self.episodes_per_epoch): self.send_episode_info(idx) # remember that the images are passed in alphabetical order (which is C0, C1, T0, T1 ..), whereas the camera positions are passed in a more convenient format: # organizes in number of matching (k), and inside that all the Cs, then all the Ts # labels is a list of size k with [[C, T] *k] self.env.step() self.observation_channel.set_float_parameter("newLevel", float(0)) DS, TS = self.env.get_steps(self.behaviour_names[0]) # when agent receives an action, it setups a new batch self.env.set_actions( self.behaviour_names[0], np.array([[1]]) ) # just give a random thing as an action, it doesn't matter here if self.nSc > 0: labels = DS.obs[-1][ 0][:self.num_labels_passed_by_unity].astype(int).reshape( (-1, 2)) else: labels = torch.tensor( DS.obs[-1][0][:self.num_labels_passed_by_unity]).type( torch.LongTensor) camera_positions = DS.obs[-1][0][ self.num_labels_passed_by_unity:].reshape( (self.tot_num_matching, self.tot_num_frames_each_comparison, 3)) self.images = [ i[0] for i in DS.obs[:-1] ] # .reshape((self.tot_num_frames_each_iter, self.tot_num_frames_each_comparison, 64, 64, 3)) #################################~~~~~~DEBUG~~~~~~############################################### # _, self.ax = framework_utils.create_sphere() # vh1, vh2 = [], [] # import matplotlib.pyplot as plt # plt.show() # import copy # def unity2python(v): # v = copy.deepcopy(v) # v.T[[1, 2]] = v.T[[2, 1]] # return v # for idx, c in enumerate(camera_positions): # if vh1: # # [i.remove() for i in vh1] # # [i.remove() for i in vh2] # vh1 = [] # vh2 = [] # for i in range(len(camera_positions[0]) - 1): # vh2.append(framework_utils.add_norm_vector(unity2python(c[i + 1]), 'r', ax=self.ax)) # vh1.append(framework_utils.add_norm_vector(unity2python(c[0]), 'k', ax=self.ax)) ## ali = framework_utils.align_vectors(c, t) ## vh3 = framework_utils.add_norm_vector(ali, 'r', ax=self.ax ) ################################################################################################# self.labels = labels self.camera_positions = camera_positions batch = self.images[:] self.post_process_labels() # yield [[b, l] for b, l in zip(batch, np.hstack((self.labels[:, 0], self.labels[:, 1])))] <--- if you want to have same length labels and images. yield [[b, l] for b, l in zip(batch, labels)] if self.nSc == 0 else batch def post_process_labels(self): pass
class UnityWrapperProcess: def __init__(self, conn: multiprocessing.connection.Connection = None, train_mode=True, file_name=None, worker_id=0, base_port=5005, no_graphics=True, seed=None, scene=None, additional_args=None, n_agents=1): """ Args: train_mode: If in train mode, Unity will speed up file_name: The executable path. The UnityEnvironment will run in editor if None worker_id: Offset from base_port base_port: The port that communicate to Unity. It will be set to 5004 automatically if in editor. no_graphics: If Unity runs in no graphic mode. It must be set to False if Unity has camera sensor. seed: Random seed scene: The scene name n_agents: The agents count """ self.scene = scene self.n_agents = n_agents seed = seed if seed is not None else np.random.randint(0, 65536) additional_args = [] if additional_args is None else additional_args.split( ' ') self.engine_configuration_channel = EngineConfigurationChannel() self.environment_parameters_channel = EnvironmentParametersChannel() self.environment_parameters_channel.set_float_parameter( 'env_copys', float(n_agents)) if conn: try: from algorithm import config_helper config_helper.set_logger() except: pass self._logger = logging.getLogger( f'UnityWrapper.Process_{os.getpid()}') else: self._logger = logging.getLogger('UnityWrapper.Process') self._env = UnityEnvironment( file_name=file_name, worker_id=worker_id, base_port=base_port if file_name else None, no_graphics=no_graphics and train_mode, seed=seed, additional_args=['--scene', scene] + additional_args, side_channels=[ self.engine_configuration_channel, self.environment_parameters_channel ]) self.engine_configuration_channel.set_configuration_parameters( width=200 if train_mode else 1280, height=200 if train_mode else 720, quality_level=5, time_scale=20 if train_mode else 1) self._env.reset() self.bahavior_name = list(self._env.behavior_specs)[0] if conn: try: while True: cmd, data = conn.recv() if cmd == INIT: conn.send(self.init()) elif cmd == RESET: conn.send(self.reset(data)) elif cmd == STEP: conn.send(self.step(*data)) elif cmd == CLOSE: self.close() except: self._logger.error(traceback.format_exc()) def init(self): """ Returns: observation shapes: tuple[(o1, ), (o2, ), (o3_1, o3_2, o3_3), ...] discrete action size: int, sum of all action branches continuous action size: int """ behavior_spec = self._env.behavior_specs[self.bahavior_name] obs_names = [o.name for o in behavior_spec.observation_specs] self._logger.info(f'Observation names: {obs_names}') obs_shapes = [o.shape for o in behavior_spec.observation_specs] self._logger.info(f'Observation shapes: {obs_shapes}') self._empty_action = behavior_spec.action_spec.empty_action discrete_action_size = 0 if behavior_spec.action_spec.discrete_size > 0: discrete_action_size = 1 action_product_list = [] for action, branch_size in enumerate( behavior_spec.action_spec.discrete_branches): discrete_action_size *= branch_size action_product_list.append(range(branch_size)) self._logger.info( f"Discrete action branch {action} has {branch_size} different actions" ) self.action_product = np.array( list(itertools.product(*action_product_list))) continuous_action_size = behavior_spec.action_spec.continuous_size self._logger.info(f'Continuous action size: {continuous_action_size}') self.d_action_size = discrete_action_size self.c_action_size = continuous_action_size for o in behavior_spec.observation_specs: if len(o.shape) >= 3: self.engine_configuration_channel.set_configuration_parameters( quality_level=5) break return obs_shapes, discrete_action_size, continuous_action_size def reset(self, reset_config=None): """ return: observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)] """ reset_config = {} if reset_config is None else reset_config for k, v in reset_config.items(): self.environment_parameters_channel.set_float_parameter( k, float(v)) self._env.reset() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) return [obs.astype(np.float32) for obs in decision_steps.obs] def step(self, d_action, c_action): """ Args: d_action: (NAgents, discrete_action_size), one hot like action c_action: (NAgents, continuous_action_size) Returns: observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)] rewards: (NAgents, ) done: (NAgents, ), np.bool max_step: (NAgents, ), np.bool """ if self.d_action_size: d_action = np.argmax(d_action, axis=1) d_action = self.action_product[d_action] self._env.set_actions( self.bahavior_name, ActionTuple(continuous=c_action, discrete=d_action)) self._env.step() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) tmp_terminal_steps = terminal_steps while len(decision_steps) == 0: self._env.set_actions(self.bahavior_name, self._empty_action(0)) self._env.step() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) tmp_terminal_steps.agent_id = np.concatenate( [tmp_terminal_steps.agent_id, terminal_steps.agent_id]) tmp_terminal_steps.reward = np.concatenate( [tmp_terminal_steps.reward, terminal_steps.reward]) tmp_terminal_steps.interrupted = np.concatenate( [tmp_terminal_steps.interrupted, terminal_steps.interrupted]) reward = decision_steps.reward reward[tmp_terminal_steps.agent_id] = tmp_terminal_steps.reward done = np.full([ len(decision_steps), ], False, dtype=np.bool) done[tmp_terminal_steps.agent_id] = True max_step = np.full([ len(decision_steps), ], False, dtype=np.bool) max_step[tmp_terminal_steps.agent_id] = tmp_terminal_steps.interrupted return ([obs.astype(np.float32) for obs in decision_steps.obs], decision_steps.reward.astype(np.float32), done, max_step) def close(self): self._env.close() self._logger.warning(f'Process {os.getpid()} exits')
def get_environment(config: Config) -> BaseEnv: channel = EnvironmentParametersChannel() file_name = None if config.RuntimeArgs.run_in_unity: file_name = None print(Fore.CYAN + "Environment set. Press play within Unity" + Fore.RESET) elif sys.platform.startswith("win"): file_name = 'Build/GridWorld.exe' elif sys.platform.startswith("darwin"): file_name = 'Build.app/Contents/MacOS/GridWorld' elif sys.platform.startswith("linux"): raise Exception("Tell chris to support linux") else: raise Exception("Unable to find which executable to run for system:" + sys.platform) # Load env = UnityEnvironment(file_name=file_name, side_channels=[channel]) # Set the channel environment accordingly channel.set_float_parameter("num_targets", config.Game.num_targets) channel.set_float_parameter("num_fires", config.Game.num_fires) allow_light_source = 1.0 if config.Game.allow_light_source else 0.0 channel.set_float_parameter("allow_light_source", allow_light_source) channel.set_float_parameter("step_reward", config.Game.step_reward) channel.set_float_parameter("target_reward", config.Game.target_reward) channel.set_float_parameter("fire_reward", config.Game.fire_reward) channel.set_float_parameter("max_steps", config.Game.max_steps) channel.set_float_parameter("time_scale", config.RuntimeArgs.time_scale) return env
def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int, engine_configuration: EngineConfig, log_level: int = logging_util.INFO, ) -> None: env_factory: Callable[[int, List[SideChannel]], UnityEnvironment] = cloudpickle.loads( pickled_env_factory) env_parameters = EnvironmentParametersChannel() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_configuration) stats_channel = StatsSideChannel() env: BaseEnv = None # Set log level. On some platforms, the logger isn't common with the # main process, so we need to set it again. logging_util.set_log_level(log_level) def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None: parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) def _generate_all_results() -> AllStepResult: all_step_result: AllStepResult = {} for brain_name in env.behavior_specs: all_step_result[brain_name] = env.get_steps(brain_name) return all_step_result def external_brains(): result = {} for behavior_name, behavior_specs in env.behavior_specs.items(): result[behavior_name] = behavior_spec_to_brain_parameters( behavior_name, behavior_specs) return result try: env = env_factory( worker_id, [env_parameters, engine_configuration_channel, stats_channel]) while True: req: EnvironmentRequest = parent_conn.recv() if req.cmd == EnvironmentCommand.STEP: all_action_info = req.payload for brain_name, action_info in all_action_info.items(): if len(action_info.action) != 0: env.set_actions(brain_name, action_info.action) env.step() all_step_result = _generate_all_results() # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. env_stats = stats_channel.get_and_reset_stats() step_response = StepResponse(all_step_result, get_timer_root(), env_stats) step_queue.put( EnvironmentResponse(EnvironmentCommand.STEP, worker_id, step_response)) reset_timers() elif req.cmd == EnvironmentCommand.EXTERNAL_BRAINS: _send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains()) elif req.cmd == EnvironmentCommand.RESET: for k, v in req.payload.items(): if isinstance(v, float): env_parameters.set_float_parameter(k, v) elif isinstance(v, ParameterRandomizationSettings): v.apply(k, env_parameters) env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) elif req.cmd == EnvironmentCommand.CLOSE: break except ( KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException, UnityEnvironmentException, UnityCommunicatorStoppedException, ) as ex: logger.info( f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put( EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)) _send_response(EnvironmentCommand.ENV_EXITED, ex) finally: # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread() # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for # more info. logger.debug(f"UnityEnvironment worker {worker_id} closing.") step_queue.cancel_join_thread() step_queue.close() if env is not None: env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.")
class ObstacleTowerEnv(gym.Env): ALLOWED_VERSIONS = ["4.0?team=0"] _REGISTRY_YAML = "https://storage.googleapis.com/obstacle-tower-build/v4.0/obstacle_tower_v4.0.yaml" def __init__( self, environment_filename=None, worker_id=0, retro=True, timeout_wait=30, realtime_mode=False, config=None, greyscale=False, ): """ Arguments: environment_filename: The file path to the Unity executable. Does not require the extension. docker_training: Whether this is running within a docker environment and should use a virtual frame buffer (xvfb). worker_id: The index of the worker in the case where multiple environments are running. Each environment reserves port (5005 + worker_id) for communication with the Unity executable. retro: Resize visual observation to 84x84 (int8) and flattens action space. timeout_wait: Time for python interface to wait for environment to connect. realtime_mode: Whether to render the environment window image and run environment at realtime. """ self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() if environment_filename is None: registry = UnityEnvRegistry() registry.register_from_yaml(self._REGISTRY_YAML) self._env = registry["ObstacleTower"].make( worker_id=worker_id, timeout_wait=timeout_wait, side_channels=[self.reset_parameters, self.engine_config]) else: self._env = UnityEnvironment( environment_filename, worker_id, timeout_wait=timeout_wait, side_channels=[self.reset_parameters, self.engine_config], ) if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0) else: self.engine_config.set_configuration_parameters(time_scale=20.0) self._env.reset() behavior_name = list(self._env.behavior_specs)[0] split_name = behavior_name.split("-v") if len(split_name) == 2 and split_name[0] == "ObstacleTowerAgent": self.name, self.version = split_name else: raise UnityGymException( "Attempting to launch non-Obstacle Tower environment") if self.version not in self.ALLOWED_VERSIONS: raise UnityGymException( "Invalid Obstacle Tower version. Your build is v" + self.version + " but only the following versions are compatible with this gym: " + str(self.ALLOWED_VERSIONS)) self.visual_obs = None self._current_state = None self._n_agents = None self._flattener = None self._greyscale = greyscale # Environment reset parameters self._seed = None self._floor = None self.realtime_mode = realtime_mode self.game_over = False # Hidden flag used by Atari environments to determine if the game is over self.retro = retro if config != None: self.config = config else: self.config = None flatten_branched = self.retro uint8_visual = self.retro # Check behavior configuration if len(self._env.behavior_specs) != 1: raise UnityGymException( "There can only be one agent in this environment " "if it is wrapped in a gym.") self.behavior_name = behavior_name behavior_spec = self._env.behavior_specs[behavior_name] if len(behavior_spec) < 2: raise UnityGymException( "Environment provides too few observations.") self.uint8_visual = uint8_visual # Check for number of agents in scene. initial_info, terminal_info = self._env.get_steps(behavior_name) self._check_agents(len(initial_info)) # Set observation and action spaces if len(behavior_spec.action_shape) == 1: self._action_space = spaces.Discrete(behavior_spec.action_shape[0]) else: if flatten_branched: self._flattener = ActionFlattener(behavior_spec.action_shape) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete( behavior_spec.action_shape) if self._greyscale: depth = 1 else: depth = 3 image_space_max = 1.0 image_space_dtype = np.float32 camera_height = behavior_spec.observation_shapes[0][0] camera_width = behavior_spec.observation_shapes[0][1] if self.retro: image_space_max = 255 image_space_dtype = np.uint8 camera_height = 84 camera_width = 84 image_space = spaces.Box( 0, image_space_max, dtype=image_space_dtype, shape=(camera_height, camera_width, depth), ) if self.retro: self._observation_space = image_space else: max_float = np.finfo(np.float32).max keys_space = spaces.Discrete(5) time_remaining_space = spaces.Box(low=0.0, high=max_float, shape=(1, ), dtype=np.float32) floor_space = spaces.Discrete(9999) self._observation_space = spaces.Tuple( (image_space, keys_space, time_remaining_space, floor_space)) def reset(self, config=None): """Resets the state of the environment and returns an initial observation. In the case of multi-agent environments, this is a list. Returns: observation (object/list): the initial observation of the space. """ if config is None: reset_params = {} if self.config is not None: reset_params = self.config else: reset_params = config if self._floor is not None: reset_params["starting-floor"] = self._floor if self._seed is not None: reset_params["tower-seed"] = self._seed for key, value in reset_params.items(): self.reset_parameters.set_float_parameter(key, value) self.reset_params = None self._env.reset() info, terminal_info = self._env.get_steps(self.behavior_name) n_agents = len(info) self._check_agents(n_agents) self.game_over = False obs, reward, done, info = self._single_step(info, terminal_info) return obs def step(self, action): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). In the case of multi-agent environments, these are lists. Args: action (object/list): an action provided by the environment Returns: observation (object/list): agent's observation of the current environment reward (float/list) : amount of reward returned after previous action done (boolean/list): whether the episode has ended. info (dict): contains auxiliary diagnostic information, including BrainInfo. """ # Use random actions for all other agents in environment. if self._flattener is not None: # Translate action into list action = np.array(self._flattener.lookup_action(action)) self._env.set_actions(self.behavior_name, action.reshape([1, -1])) self._env.step() info, terminal_info = self._env.get_steps(self.behavior_name) n_agents = len(info) self._check_agents(n_agents) self._current_state = info obs, reward, done, info = self._single_step(info, terminal_info) self.game_over = done return obs, reward, done, info def _single_step(self, info, terminal_info): if len(terminal_info) == 0: done = False use_info = info else: done = True use_info = terminal_info self.visual_obs = self._preprocess_single(use_info.obs[0][0][:, :, :]) self.visual_obs, keys, time, current_floor = self._prepare_tuple_observation( self.visual_obs, use_info.obs[1][0]) if self.retro: self.visual_obs = self._resize_observation(self.visual_obs) self.visual_obs = self._add_stats_to_image(self.visual_obs, use_info.obs[1][0]) default_observation = self.visual_obs else: default_observation = self.visual_obs, keys, time, current_floor if self._greyscale: default_observation = self._greyscale_obs(default_observation) return ( default_observation, use_info.reward[0], done, { "text_observation": None, "brain_info": use_info, "total_keys": keys, "time_remaining": time, "current_floor": current_floor, }, ) def _greyscale_obs(self, obs): new_obs = np.floor(np.expand_dims(np.mean(obs, axis=2), axis=2)).astype(np.uint8) return new_obs def _preprocess_single(self, single_visual_obs): if self.uint8_visual: return (255.0 * single_visual_obs).astype(np.uint8) else: return single_visual_obs def render(self, mode="rgb_array"): return self.visual_obs def close(self): """Override _close in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ self._env.close() def seed(self, seed=None): """Sets a fixed seed for this env's random number generator(s). The valid range for seeds is [0, 99999). By default a random seed will be chosen. """ if seed is None: self._seed = seed return seed = int(seed) if seed < 0 or seed >= 99999: logger.warning( "Seed outside of valid range [0, 99999). A random seed " "within the valid range will be used on next reset.") logger.warning("New seed " + str(seed) + " will apply on next reset.") self._seed = seed def floor(self, floor=None): """Sets the starting floor to a fixed floor number on subsequent environment resets.""" if floor is None: self._floor = floor return floor = int(floor) if floor < 0 or floor > 99: logger.warning( "Starting floor outside of valid range [0, 99]. Floor 0 will be used" "on next reset.") logger.warning("New starting floor " + str(floor) + " will apply on next reset.") self._floor = floor @staticmethod def _resize_observation(observation): """ Re-sizes visual observation to 84x84 """ obs_image = Image.fromarray(observation) obs_image = obs_image.resize((84, 84), Image.NEAREST) return np.array(obs_image) @staticmethod def _prepare_tuple_observation(vis_obs, vector_obs): """ Converts separate visual and vector observation into prepared tuple """ key = vector_obs[0:6] time = vector_obs[6] floor_number = vector_obs[7] key_num = np.argmax(key, axis=0) return vis_obs, key_num, time, floor_number @staticmethod def _add_stats_to_image(vis_obs, vector_obs): """ Displays time left and number of keys on visual observation """ key = vector_obs[0:6] time = vector_obs[6] key_num = int(np.argmax(key, axis=0)) time_num = min(time, 10000) / 10000 vis_obs[0:10, :, :] = 0 for i in range(key_num): start = int(i * 16.8) + 4 end = start + 10 vis_obs[1:5, start:end, 0:2] = 255 vis_obs[6:10, 0:int(time_num * 84), 1] = 255 return vis_obs def _check_agents(self, n_agents): if n_agents > 1: raise UnityGymException( "The environment was launched as a single-agent environment, however" "there is more than one agent in the scene.") if self._n_agents is None: self._n_agents = n_agents logger.info("{} agents within environment.".format(n_agents)) elif self._n_agents != n_agents: raise UnityGymException( "The number of agents in the environment has changed since " "initialization. This is not supported.") @property def metadata(self): return {"render.modes": ["rgb_array"]} @property def reward_range(self): return -float("inf"), float("inf") @property def spec(self): return None @property def action_space(self): return self._action_space @property def observation_space(self): return self._observation_space @property def number_agents(self): return self._n_agents
# setup environment mass = 1.0 thrust_multiplier = 30.0 action_mode = [0.0, 1.0][0] # thrust control (0.0) / velocity control (1.0) if sys.platform == "win32": env_build = "../env/FreeFall/windows/FreeFall.exe" elif sys.platform == "linux": env_build = "../env/FreeFall/linux/FreeFall.x86_64" elif sys.platform == "darwin": env_build = "../env/FreeFall/mac.app" else: raise AttributeError("{} platform is not supported.".format(sys.platform)) channel = EnvironmentParametersChannel() unity_env = UnityEnvironment(env_build, side_channels=[channel]) env = UnityToGymWrapper(unity_env, uint8_visual=True, allow_multiple_obs=True) channel.set_float_parameter("mass", mass) channel.set_float_parameter("thrust_multiplier", thrust_multiplier) channel.set_float_parameter("action_mode", action_mode) # NOTE: you can also set agent's starting position through position.x, position.y, position.z # interface key_ws = np.array([False] * 2) def key_press( event): # NOTE: cannot handle multiple key press at the same time global key_ws try: key = event.key.lower() except: key = event.key
class UnityWrapper: def __init__(self, train_mode=True, file_name=None, base_port=5005, seed=None, scene=None, n_agents=1): seed = seed if seed is not None else np.random.randint(0, 65536) self.engine_configuration_channel = EngineConfigurationChannel() self.environment_parameters_channel = EnvironmentParametersChannel() self._env = UnityEnvironment( file_name=file_name, base_port=base_port, seed=seed, args=['--scene', scene, '--n_agents', str(n_agents)], side_channels=[ self.engine_configuration_channel, self.environment_parameters_channel ]) if train_mode: self.engine_configuration_channel.set_configuration_parameters( width=200, height=200, quality_level=0, time_scale=100) else: self.engine_configuration_channel.set_configuration_parameters( width=1028, height=720, quality_level=5, time_scale=5, target_frame_rate=60) self._env.reset() self.bahavior_name = self._env.get_behavior_names()[0] def init(self): behavior_spec = self._env.get_behavior_spec(self.bahavior_name) logger.info(f'Observation shapes: {behavior_spec.observation_shapes}') is_discrete = behavior_spec.is_action_discrete() logger.info( f'Action size: {behavior_spec.action_size}. Is discrete: {is_discrete}' ) for o in behavior_spec.observation_shapes: if len(o) >= 3: self.engine_configuration_channel.set_configuration_parameters( quality_level=5) break return behavior_spec.observation_shapes, behavior_spec.action_size, is_discrete def reset(self, reset_config=None): reset_config = {} if reset_config is None else reset_config for k, v in reset_config.items(): self.environment_parameters_channel.set_float_parameter( k, float(v)) self._env.reset() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) return [obs.astype(np.float32) for obs in decision_steps.obs] def step(self, action): self._env.set_actions(self.bahavior_name, action) self._env.step() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) tmp_terminal_steps = terminal_steps while len(decision_steps) == 0: self._env.set_actions(self.bahavior_name, np.empty([0, action.shape[-1]])) self._env.step() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) tmp_terminal_steps.agent_id = np.concatenate( [tmp_terminal_steps.agent_id, terminal_steps.agent_id]) tmp_terminal_steps.reward = np.concatenate( [tmp_terminal_steps.reward, terminal_steps.reward]) tmp_terminal_steps.max_step = np.concatenate( [tmp_terminal_steps.max_step, terminal_steps.max_step]) reward = decision_steps.reward reward[tmp_terminal_steps.agent_id] = tmp_terminal_steps.reward done = np.full([ len(decision_steps), ], False, dtype=np.bool) done[tmp_terminal_steps.agent_id] = True max_step = np.full([ len(decision_steps), ], False, dtype=np.bool) max_step[tmp_terminal_steps.agent_id] = tmp_terminal_steps.max_step return ([obs.astype(np.float32) for obs in decision_steps.obs], decision_steps.reward.astype(np.float32), done, max_step) def close(self): self._env.close()
class StorageEnvController(ConFormSimUnityEnvController): _BASE_PORT = 5004 def __init__(self, config=DEFAULT_ENV_CONFIG): """ Environment initialization :param config: Configuration of the environment. """ # create side channels self.env_param_channel = EnvironmentParametersChannel() self.engine_channel = EngineConfigurationChannel() self.color_pool_channel = IntListPropertiesChannel() side_channels = [ self.env_param_channel, self.engine_channel, self.color_pool_channel, ] # flag whether the config has been apllied to the environment self.is_already_initialized = False # create environment with config and side channels super().__init__(config, DEFAULT_ENV_CONFIG, side_channels=side_channels) def apply_config(self): # set FloatProperties grid_size_x = self.config.get("grid_size_x") if not isinstance(grid_size_x, list) or len(grid_size_x) != 2: raise ("The provided grid_size_x parameter is no list of type " "[min, max]. Please correct this.") grid_size_y = self.config.get("grid_size_y") if not isinstance(grid_size_y, list) or len(grid_size_y) != 2: raise ("The provided grid_size_y parameter is no list of type " "[min, max]. Please correct this.") vis_obs_size = self.config.get("vis_obs_size") if not isinstance(vis_obs_size, list) or len(vis_obs_size) != 2: raise ("The provided vis_obs_size parameter is no list of type " "[min, max]. Please correct this.") base_size_x = self.config.get("base_size_x") if not isinstance(base_size_x, list) or len(base_size_x) != 2: raise ("The provided base_size_x parameter is no list of type " "[min, max]. Please correct this.") base_size_y = self.config.get("base_size_x") if not isinstance(base_size_x, list) or len(base_size_x) != 2: raise ("The provided base_size_x parameter is no list of type " "[min, max]. Please correct this.") num_per_base_type = self.config.get("num_per_base_type") if not isinstance(num_per_base_type, list) or len(num_per_base_type) != 2: raise ( "The provided num_per_base_type parameter is no list of type " "[min, max]. Please correct this.") num_per_item = self.config.get("num_per_item") if not isinstance(num_per_item, list) or len(num_per_item) != 2: raise ("The provided num_per_item parameter is no list of type " "[min, max]. Please correct this.") color_pool = self.config.get("color_pool") if not isinstance(color_pool, list): raise ("The provided color_pool parameter is not of type list. " "Please correct this.") camera_type = self.config.get("camera_type") camera_type_f: float = CAMERA_TYPES[camera_type] or 0.0 # set properties in reset channel self.env_param_channel.set_float_parameter("minGridSizeX", grid_size_x[0]) self.env_param_channel.set_float_parameter("maxGridSizeX", grid_size_x[1]) self.env_param_channel.set_float_parameter("minGridSizeY", grid_size_y[0]) self.env_param_channel.set_float_parameter("maxGridSizeY", grid_size_y[1]) self.env_param_channel.set_float_parameter("cameraType", camera_type_f) # area settings # check if num train areas should be set if self.is_already_initialized: print("You're trying to change the number of " "train areas, during runtime. This is only possible at " "initialization.") else: self.env_param_channel.set_float_parameter( "numTrainAreas", self.config.get("num_train_areas")) self.env_param_channel.set_float_parameter( "numBaseTypesToUse", self.config.get("num_base_types")) self.env_param_channel.set_float_parameter("numberPerBaseTypeMax", num_per_base_type[1]) self.env_param_channel.set_float_parameter("numberPerBaseTypeMin", num_per_base_type[0]) self.env_param_channel.set_float_parameter("baseSizeXMax", base_size_x[1]) self.env_param_channel.set_float_parameter("baseSizeXMin", base_size_x[0]) self.env_param_channel.set_float_parameter("baseSizeZMax", base_size_y[1]) self.env_param_channel.set_float_parameter("baseSizeZMin", base_size_y[0]) self.env_param_channel.set_float_parameter( "baseInCornersOnly", 1 if self.config.get("base_in_corners_only") else 0) self.env_param_channel.set_float_parameter( "boxesVanish", 1 if self.config.get("boxes_vanish") else 0) self.env_param_channel.set_float_parameter( "boxesNeedDrop", 1 if self.config.get("boxes_need_drop") else 0) self.env_param_channel.set_float_parameter( "sparseReward", 1 if self.config.get("sparse_reward_only") else 0) # color settings self.env_param_channel.set_float_parameter( "noBaseFillColor", 1 if self.config.get("no_base_fill_color") else 0) self.env_param_channel.set_float_parameter( "brighterBases", 1 if self.config.get("brighter_bases") else 0) self.env_param_channel.set_float_parameter( "full_base_line", 1 if self.config.get("fullBaseLine") else 0) # item settings self.env_param_channel.set_float_parameter( "numItemTypesToUse", self.config.get("num_item_types")) self.env_param_channel.set_float_parameter("numberPerItemTypeMax", num_per_item[1]) self.env_param_channel.set_float_parameter("numberPerItemTypeMin", num_per_item[0]) # general settings self.env_param_channel.set_float_parameter( "noDisplay", 1 if self.config.get("no_display") else 0) self.env_param_channel.set_float_parameter("visObsWidth", vis_obs_size[0]) self.env_param_channel.set_float_parameter("visObsHeight", vis_obs_size[1]) self.env_param_channel.set_float_parameter( "useVisual", 1 if self.config.get("use_visual") and not self.config.get("use_object_property_camera") else 0) self.env_param_channel.set_float_parameter( "useRayPerception", 1 if self.config.get("use_ray_perception") else 0) self.env_param_channel.set_float_parameter( "useObjectPropertyCamera", 1 if self.config.get("use_object_property_camera") else 0) self.env_param_channel.set_float_parameter( "maxSteps", self.config.get("max_steps")) self.env_param_channel.set_float_parameter( "taskLevel", self.config.get("task_level")) # Read engine config engine_config = self.config.get("engine_config") # Configure the Engine engine_config = EngineConfig( width=engine_config.get("window_width"), height=engine_config.get("window_height"), quality_level=engine_config.get("quality_level"), time_scale=engine_config.get("sim_speed"), target_frame_rate=engine_config.get("target_frame_rate"), capture_frame_rate=60) self.engine_channel.set_configuration(engine_config) # set list properties self.color_pool_channel.set_property("colorPool", self.config.get("color_pool")) self.is_already_initialized = True
print("Seed:", seed) print("Params:", params) torch.manual_seed(seed) np.random.seed(seed) game_path = os.path.expanduser( "/data4/pdp/grantsrb/loc_games/LocationGame2dLinux_9/LocationGame2dLinux.x86_64" ) channel = EngineConfigurationChannel() env_channel = EnvironmentParametersChannel() env = UnityEnvironment(file_name=game_path, side_channels=[channel, env_channel], seed=seed) channel.set_configuration_parameters(time_scale=1) for k, v in params.items(): env_channel.set_float_parameter(k, v) env = UnityToGymWrapper(env, allow_multiple_obs=True) print("Environment created!") #matplotlib.use("tkagg") obs = env.reset() #plt.imshow(obs[0]) #plt.show() done = False while True: print("stepping") x, z = [float(y.strip()) for y in str(input("action: ")).split(",")] # The obs is a list of length 2 in which the first element is the image and the second is the goal coordinate # Reward in this case is the difference between the action location and the nearest object to the action location obs, rew, done, _ = env.step([x, z]) plt.imsave("sample.png", obs[0])
class MyEnv(gym.Env): def __init__(self, worker_id, realtime_mode=False): self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4" self._env = UnityEnvironment( env_path, worker_id, side_channels=[self.reset_parameters, self.engine_config]) self._env.reset() self.behavior_name = list(self._env.behavior_specs)[0] behavior_spec = self._env.behavior_specs[self.behavior_name] print(behavior_spec) if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0) self.reset_parameters.set_float_parameter("train-mode", 0.0) else: self.engine_config.set_configuration_parameters(time_scale=20.0) self.reset_parameters.set_float_parameter("train-mode", 1.0) self._flattener = ActionFlattener( behavior_spec.action_spec.discrete_branches) def reset(self): # for key, value in reset_params.items(): # self.reset_parameters.set_float_parameter(key, value) self._env.reset() info, terminal_info = self._env.get_steps(self.behavior_name) self.game_over = False obs, reward, done, info = self._single_step(info, terminal_info) return obs def step(self, action): # Use random actions for all other agents in environment. if self._flattener is not None and type(action) == int: # Translate action into list action = np.array(self._flattener.lookup_action(action)) c_action = Action(action) self._env.set_actions(self.behavior_name, c_action) self._env.step() running_info, terminal_info = self._env.get_steps(self.behavior_name) obs, reward, done, info = self._single_step(running_info, terminal_info) self.game_over = done return obs, reward, done, info def _single_step(self, info, terminal_info): if len(terminal_info) == 0: done = False use_info = info else: done = True use_info = terminal_info # 카메라, 센서 순으로 나옴 output_info = {} output_info["visual_obs"] = use_info.obs[0][0] #obs = np.concatenate([use_info.obs[1][0], use_info.obs[2][0]]) return use_info.obs[1][0], use_info.reward[0], done, output_info def close(self): self._env.close() def render(self): pass
class UnityWrapper(Env): """This class wraps Unity environments. This wrapper has notable constraints: - Only one agent (no multi-agent environments). - Only one visual observation - Only discrete and multi-discrete action spaces (no continuous action space)""" def __init__(self, env_path, reset_params, worker_id = 1, no_graphis = False, realtime_mode = False, record_trajectory = False): """Instantiates the Unity Environment from a specified executable. Arguments: env_path {string} -- Path to the executable of the environment reset_params {dict} -- Reset parameters of the environment such as the seed Keyword Arguments: worker_id {int} -- Port of the environment"s instance (default: {1}) no_graphis {bool} -- Whether to allow the executable to render or not (default: {False}) realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False}) record_trajectory {bool} -- Whether to record the trajectory of an entire episode. This can be used for video recording. (default: {False}) """ # Initialize channels self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() # Prepare default reset parameters self._default_reset_parameters = {} for key, value in reset_params.items(): self._default_reset_parameters[key] = value if key != "start-seed" or key != "num-seeds": self.reset_parameters.set_float_parameter(key, value) self._realtime_mode = realtime_mode if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720) else: self.engine_config.set_configuration_parameters(time_scale=30.0, width=256, height=256) # Whether to record the trajectory of an entire episode self._record = record_trajectory # Launch the environment's executable self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # If the Unity Editor chould be used instead of a build # self._env = UnityEnvironment(file_name = None, worker_id = 0, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # Reset the environment self._env.reset() # Retrieve behavior configuration self._behavior_name = list(self._env.behavior_specs)[0] self._behavior_spec = self._env.behavior_specs[self._behavior_name] # Check whether this Unity environment is supported self._verify_environment() # Set action space properties if self._behavior_spec.action_spec.is_discrete(): num_action_branches = self._behavior_spec.action_spec.discrete_size action_branch_dimensions = self._behavior_spec.action_spec.discrete_branches if num_action_branches == 1: self._action_space = spaces.Discrete(action_branch_dimensions[0]) else: self._action_space = spaces.MultiDiscrete(action_branch_dimensions) # Count visual and vector observations self._num_vis_obs, self._num_vec_obs = 0, 0 self._vec_obs_indices = [] for index, obs in enumerate(self._behavior_spec.observation_specs): if len(obs) > 1: self._num_vis_obs = self._num_vis_obs + 1 self._vis_obs_index = index else: self._num_vec_obs = self._num_vec_obs + 1 self._vec_obs_indices.append(index) # Set visual observation space property if self._num_vis_obs == 1: vis_obs_shape = self._behavior_spec.observation_specs[self._vis_obs_index].shape self._visual_observation_space = spaces.Box( low = 0, high = 1.0, shape = vis_obs_shape, dtype = np.float32) else: self._visual_observation_space = None # Set vector observation space property if self._num_vec_obs > 0: # Determine the length of vec obs by summing the length of each distinct one vec_obs_length = sum([self._behavior_spec.observation_specs[i][0] for i in self._vec_obs_indices]) self._vector_observatoin_space = (vec_obs_length, ) else: self._vector_observatoin_space = None # Videos can only be recorded if the environment provides visual observations if self._record and self._visual_observation_space is None: UnityEnvironmentException("Videos cannot be rendered for a Unity environment that does not provide visual observations.") @property def unwrapped(self): """ Returns: {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state """ return self @property def action_space(self): """Returns the shape of the action space of the agent.""" return self._action_space @property def action_names(self): return None @property def get_episode_trajectory(self): """Returns the trajectory of an entire episode as dictionary (vis_obs, vec_obs, rewards, actions). """ self._trajectory["action_names"] = self.action_names return self._trajectory if self._trajectory else None @property def visual_observation_space(self): return self._visual_observation_space @property def vector_observation_space(self): return self._vector_observatoin_space def reset(self, reset_params = None): """Resets the environment based on a global or just specified config. Keyword Arguments: config {dict} -- Reset parameters to configure the environment (default: {None}) Returns: {numpy.ndarray} -- Visual observation {numpy.ndarray} -- Vector observation """ # Track rewards of an entire episode self._rewards = [] # Use initial or new reset parameters if reset_params is None: reset_params = self._default_reset_parameters else: reset_params = reset_params # Apply reset parameters for key, value in reset_params.items(): # Skip reset parameters that are not used by the Unity environment if key != "start-seed" or key != "num-seeds": self.reset_parameters.set_float_parameter(key, value) # Sample the to be used seed if reset_params["start-seed"] > -1: seed = randint(reset_params["start-seed"], reset_params["start-seed"] + reset_params["num-seeds"] - 1) else: # Use unlimited seeds seed = -1 self.reset_parameters.set_float_parameter("seed", seed) # Reset and verify the environment self._env.reset() info, terminal_info = self._env.get_steps(self._behavior_name) self._verify_environment() # Retrieve initial observations vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info) # Prepare trajectory recording self._trajectory = { "vis_obs": [vis_obs * 255], "vec_obs": [vec_obs], "rewards": [0.0], "actions": [] } return vis_obs, vec_obs def step(self, action): """Runs one timestep of the environment"s dynamics. Once an episode is done, reset() has to be called manually. Arguments: action {List} -- A list of at least one discrete action to be executed by the agent Returns: {numpy.ndarray} -- Visual observation {numpy.ndarray} -- Vector observation {float} -- (Total) Scalar reward signaled by the environment {bool} -- Whether the episode of the environment terminated {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed """ # Carry out the agent's action action_tuple = ActionTuple() action_tuple.add_discrete(np.asarray(action).reshape([1, -1])) self._env.set_actions(self._behavior_name, action_tuple) self._env.step() info, terminal_info = self._env.get_steps(self._behavior_name) # Process step results vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info) self._rewards.append(reward) # Record trajectory data if self._record: self._trajectory["vis_obs"].append(vis_obs * 255) self._trajectory["vec_obs"].append(vec_obs) self._trajectory["rewards"].append(reward) self._trajectory["actions"].append(action) # Episode information if done: info = {"reward": sum(self._rewards), "length": len(self._rewards)} else: info = None return vis_obs, vec_obs, reward, done, info def close(self): """Shut down the environment.""" self._env.close() def _process_agent_info(self, info, terminal_info): """Extracts the observations, rewards, dones, and episode infos. Args: info {DecisionSteps}: Current state terminal_info {TerminalSteps}: Terminal state Returns: vis_obs {ndarray} -- Visual observation if available, else None vec_obs {ndarray} -- Vector observation if available, else None reward {float} -- Reward signal from the environment done {bool} -- Whether the episode terminated or not """ # Determine if the episode terminated or not if len(terminal_info) == 0: done = False use_info = info else: done = True use_info = terminal_info # Process visual observations if self.visual_observation_space is not None: vis_obs = use_info.obs[self._vis_obs_index][0] else: vis_obs = None # Process vector observations if self.vector_observation_space is not None: for i, dim in enumerate(self._vec_obs_indices): if i == 0: vec_obs = use_info.obs[dim][0] else: vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0])) else: vec_obs = None return vis_obs, vec_obs, use_info.reward[0], done def _verify_environment(self): # Verify number of agent behavior types if len(self._env.behavior_specs) != 1: raise UnityEnvironmentException("The unity environment containts more than one agent type.") # Verify number of agents decision_steps, _ = self._env.get_steps(self._behavior_name) if len(decision_steps) > 1: raise UnityEnvironmentException("The unity environment contains more than one agent, which is not supported.") # Verify action space type if not self._behavior_spec.action_spec.is_discrete() or self._behavior_spec.action_spec.is_continuous(): raise UnityEnvironmentException("Continuous action spaces are not supported. " "Only discrete and MultiDiscrete spaces are supported.") # Verify that at least one observation is provided num_vis_obs = 0 num_vec_obs = 0 for obs_spec in self._behavior_spec.observation_specs: if len(obs_spec.shape) == 3: num_vis_obs += 1 elif(len(obs_spec.shape)) == 1: num_vec_obs += 1 if num_vis_obs == 0 and num_vec_obs == 0: raise UnityEnvironmentException("The unity environment does not contain any observations.") # Verify number of visual observations if num_vis_obs > 1: raise UnityEnvironmentException("The unity environment contains more than one visual observation.")
class UnityWrapper(Env): """This class wraps Unity environments. This wrapper has notable constraints: - Only one agent (no multi-agent environments). - Only one visual observation - Only discrete and multi-discrete action spaces (no continuous action space)""" def __init__(self, env_path, worker_id = 1, no_graphis = False, realtime_mode = False, config = None): """Instantiates the Unity Environment from a specified executable. Arguments: env_path {string} -- Path to the executable of the environment Keyword Arguments: worker_id {int} -- Port of the environment"s instance (default: {1}) no_graphis {bool} -- Whether to allow the executable to render or not (default: {False}) realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False}) config {dict} -- Specifies the reset parameters of the environment (default: {None}) """ # Disable logging logging.disable(logging.INFO) # Initialize channels self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() self._config = config self._realtime_mode = realtime_mode if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720) else: self.engine_config.set_configuration_parameters(time_scale=20.0, width=128, height=128) # Launch the environment's executable self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # Reset the environment self._env.reset() # Retrieve behavior configuration self._behavior_name = list(self._env.behavior_specs)[0] self._behavior_spec = self._env.behavior_specs[self._behavior_name] # Set action space properties if len(self._behavior_spec.action_shape) == 1: self._action_space = spaces.Discrete(self._behavior_spec.action_shape[0]) else: self._action_space = spaces.MultiDiscrete(self._behavior_spec.action_shape) self._action_names = ["Not available"] # Count visual and vector observations self._num_vis_obs, self._num_vec_obs = 0, 0 self._vec_obs_indices = [] for index, obs in enumerate(self._behavior_spec.observation_shapes): if len(obs) > 1: self._num_vis_obs = self._num_vis_obs + 1 self._vis_obs_index = index else: self._num_vec_obs = self._num_vec_obs + 1 self._vec_obs_indices.append(index) # Verify the environment self._verify_environment() # Set visual observation space property if self._num_vis_obs == 1: height = self._behavior_spec.observation_shapes[self._vis_obs_index][0] width = self._behavior_spec.observation_shapes[self._vis_obs_index][1] depth = self._behavior_spec.observation_shapes[self._vis_obs_index][2] self._visual_observation_space = spaces.Box( low = 0, high = 1.0, shape = (height, width, depth), dtype = np.float32) else: self._visual_observation_space = None # Set vector observation space property if self._num_vec_obs > 0: # Determine the length of vec obs by summing the length of each distinct one vec_obs_length = sum([self._behavior_spec.observation_shapes[i][0] for i in self._vec_obs_indices]) self._vector_observatoin_space = (vec_obs_length, ) else: self._vector_observatoin_space = None @property def unwrapped(self): """ Returns: {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state """ return self @property def action_space(self): """Returns the shape of the action space of the agent.""" return self._action_space @property def action_names(self): return self._action_names @property def visual_observation_space(self): return self._visual_observation_space @property def vector_observation_space(self): return self._vector_observatoin_space def reset(self, reset_params = None): """Resets the environment based on a global or just specified config. Keyword Arguments: config {dict} -- Reset parameters to configure the environment (default: {None}) Returns: {numpy.ndarray} -- Visual observation {numpy.ndarray} -- Vector observation """ # Track rewards of an entire episode self._rewards = [] # Process config: Either load global or new config (if specified) if reset_params is None: reset_params = {} if self._config is not None: reset_params = self._config else: reset_params = reset_params # Apply reset parameters for key, value in reset_params.items(): self.reset_parameters.set_float_parameter(key, value) # Reset and verify the environment self._env.reset() info, terminal_info = self._env.get_steps(self._behavior_name) self._verify_environment(len(info)) # Retrieve initial observations vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info) return vis_obs, vec_obs def step(self, action): """Runs one timestep of the environment"s dynamics. Once an episode is done, reset() has to be called manually. Arguments: action {List} -- A list of at least one discrete action to be executed by the agent Returns: {numpy.ndarray} -- Visual observation {numpy.ndarray} -- Vector observation {float} -- (Total) Scalar reward signaled by the environment {bool} -- Whether the episode of the environment terminated {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed """ # Carry out the agent's action self._env.set_actions(self._behavior_name, action.reshape([1, -1])) self._env.step() info, terminal_info = self._env.get_steps(self._behavior_name) # Process step results vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info) self._rewards.append(reward) # Episode information if done: info = {"reward": sum(self._rewards), "length": len(self._rewards)} else: info = None return vis_obs, vec_obs, reward, done, info def close(self): """Shut down the environment.""" self._env.close() def _process_agent_info(self, info, terminal_info): """Extracts the observations, rewards, dones, and episode infos. Args: info {DecisionSteps}: Current state terminal_info {TerminalSteps}: Terminal state Returns: vis_obs {ndarray} -- Visual observation if available, else None vec_obs {ndarray} -- Vector observation if available, else None reward {float} -- Reward signal from the environment done {bool} -- Whether the episode terminated or not """ # Determine if the episode terminated or not if len(terminal_info) == 0: done = False use_info = info else: done = True use_info = terminal_info # Process visual observations if self.visual_observation_space is not None: vis_obs = use_info.obs[self._vis_obs_index][0] else: vis_obs = None # Process vector observations if self.vector_observation_space is not None: for i, dim in enumerate(self._vec_obs_indices): if i == 0: vec_obs = use_info.obs[dim][0] else: vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0])) else: vec_obs = None return vis_obs, vec_obs, use_info.reward[0], done def _verify_environment(self, num_agents = None): """Checks if the environment meets the requirements of this wrapper. Only one agent and at maximum one visual observation is allowed. Only Discrete and MultiDiscrete action spaces are supported. Arguments: num_agents {int} -- Number of agents (default: {None}) """ # Verify number of agent types if len(self._env.behavior_specs) != 1: raise UnityEnvironmentException("The unity environment containts more than one agent type.") # Verify action space type if int(self._behavior_spec.action_type.value) == 1: raise UnityEnvironmentException("Continuous action spaces are not supported. Only discrete and MultiDiscrete spaces are supported.") # Verify number of visual observations if self._num_vis_obs > 1: raise UnityEnvironmentException("The unity environment contains more than one visual observation.") # Verify agent count if num_agents is not None and num_agents > 1: raise UnityEnvironmentException("The unity environment contains more than one agent.")
def main(): parser = argparse.ArgumentParser() parser.add_argument("--run_id", help="The run id") parser.add_argument("--config_file", default=None, help="The configuration file.") parser.add_argument( "--env_location", default=None, help= "The location of the environment executable. If not set connects to the editor (Default: None" ) parser.add_argument("--exec_type", default="eval", help="The execution type (Default: eval)") parser.add_argument( "--eval_best", default="false", help= "Wether to load the best model or the last saved model (Default: true)" ) parser.add_argument("--device", default="cpu", help="The device to run the model on (Default: cpu)") parser.add_argument("--simu_spd", default=1.0, type=float, help="The simulation speed (Default: 1.0)") parser.add_argument("--eval_episodes", default=-1.0, type=float, help="The simulation speed (Default: 1.0)") parser.add_argument( "--seed", default=0, type=int, help= "The number of episodes when evaluating. If -1 is passed, uses the value on the parameters file. (Default: -1)" ) parser.add_argument( "--manual_control", default="false", help= "Overrides the RL agent and reads input from the gamepad (Default: false)" ) parser.add_argument( "--naive_policy", default="false", help="Uses a naive policy that only goes straight (Default: false)") parser.add_argument("--visualize_input", default="false", help="Visualize agent image input (Default: false)") args = parser.parse_args() with open(args.config_file) as file: parameters = yaml.load(file, Loader=yaml.FullLoader) conf_channel = EngineConfigurationChannel() parameter_channel = EnvironmentParametersChannel() string_log = StringLogChannel() if (args.seed != 0): # This means that the used set a diferent seed in cmd parameters["random_seed"] = args.seed # if(args.simu_spd != 1.0): # # This means that the used set a diferent simulation speed in cmd # parameters["time_scale"] = args.simu_spd if (args.env_location is None): unity_env = UnityEnvironment( side_channels=[conf_channel, string_log, parameter_channel]) else: unity_env = UnityEnvironment( args.env_location, side_channels=[conf_channel, string_log, parameter_channel]) parameter_channel.set_float_parameter("seed", parameters["random_seed"]) env_parameters = parameters["simulation"] for element in env_parameters: parameter_channel.set_float_parameter(element, env_parameters[element]) if (args.exec_type == "train"): parameter_channel.set_float_parameter("training", 1.0) else: parameters["time_scale"] = args.simu_spd parameter_channel.set_float_parameter("training", 0.0) if (args.eval_episodes != -1.0): parameters["eval_episodes"] = args.eval_episodes conf_channel.set_configuration_parameters( time_scale=parameters["time_scale"]) parameter_channel.set_float_parameter("parameters_set", 1.0) env = MultiAgentUnityEnv(unity_env, encoder=None) model = None simu_info = {} print("----- ENV INFO -------") print(parameters["random_seed"]) print(env.state_dim) print(env.action_dim) print(env.action_magnitude) print(env.no_of_agents) print(env.visual_obs_indexes) print(env.non_visual_obs_index) simu_info["state_dimension"] = env.state_dim simu_info["action_dimension"] = env.action_dim simu_info["action_magnitude"] = env.action_magnitude simu_info["no_of_agents"] = env.no_of_agents if (args.env_location == None): simu_info["env_type"] = "Editor" else: simu_info["env_type"] = args.env_location.split("/")[-1].split(".")[0] parameters["simu_info"] = simu_info print("------------") # quit() # env.seed(seed) torch.manual_seed(parameters["random_seed"]) np.random.seed(parameters["random_seed"]) rl_algorithm = parameters["rl_algorithm"] if "memory" in parameters: mem_parameters = parameters["memory"] else: mem_parameters = None if "augmentation" in parameters: aug_parameters = parameters["augmentation"] else: aug_parameters = {} aug_parameters["indexes"] = None # quit() if (rl_algorithm["type"] == "DDPG"): pass # model = DDPG( # num_states, # num_actions, # model_name=args.model_name, # actor_lr=1e-4, # critic_lr=1e-3, # device=args.device, # net_config=args.net_name # ) elif (rl_algorithm["type"] == "TD3"): kwargs = { "state_dim": env.state_dim, "action_dim": env.action_dim, # "model_name": parameters["run_id"], "model_name": args.run_id, "max_action": env.action_magnitude, "net_config_name": parameters["architecture_type"], "device": args.device, "discount": rl_algorithm["discount"], "tau": rl_algorithm["tau"], "policy_noise": rl_algorithm["policy_noise"] * env.action_magnitude, "expl_noise": rl_algorithm["expl_noise"], "noise_clip": rl_algorithm["noise_clip"] * env.action_magnitude, "policy_freq": rl_algorithm["policy_freq"], "mem_parameters": mem_parameters } model = TD3(**kwargs) simu_info["actor_total_params"] = model.actor_total_params simu_info["critic_total_params"] = model.critic_total_params if (args.exec_type == "train"): rb_parameters = parameters["replay_buffer"] has_curriculum = parameters["base_run_id"] != "None" if (rb_parameters["location"] != "None"): rb = ReplayBuffer.load(rb_parameters["location"], device="cpu") else: if (model.actor.memory_capable() and model.critic.memory_capable()): rb = ReplayBufferM( state_space_dim=env.state_dim, action_dim=env.action_dim, no_of_agents=env.no_of_agents, memory_length=mem_parameters["memory_length"], buffer_capacity=rb_parameters["size"], batch_size=parameters["batch_size"], a_lstm_hidden_dim=model.actor.lstm_hidden_dim, c_lstm_hidden_dim=model.critic.lstm_hidden_dim, device="cpu") else: rb = ReplayBuffer(env.state_dim, env.action_dim, rb_parameters["size"], parameters["batch_size"], device="cpu") if (has_curriculum): model_type_str = "best" if args.eval_best == "true" else "latest" print( "Transfering learning from a previous model. The %s model will be loaded..." % (model_type_str)) if (args.eval_best == "true"): model.load("./models", name=parameters["base_run_id"], prefix="") else: model.load("./models", name=parameters["base_run_id"], prefix="last_exec_") # model.load("./models", name=parameters["base_run_id"]) # quit() # Saving model information: print("Saving training information...") model.save_model_info("./models", parameters) print("Done!") train_model( model, env, rb, string_log, buffer_size_to_train=rb_parameters["minimum_obs_before_training"], eval_freq=parameters["eval_frequency"], number_of_eval_episodes=parameters["eval_episodes"], max_steps=parameters["max_step_count"], save_best=True, render=False, # writer=None # writer=SummaryWriter("./models/logs/" + parameters["run_id"]), writer=SummaryWriter("./models/logs/" + args.run_id), # buffer_op = args.buffer_op, curriculum=has_curriculum, # use_augmentation = (model.actor.augmentation_capable() and model.critic.augmentation_capable()), use_memory=(model.actor.memory_capable() and model.critic.memory_capable()), step_update_ratio=parameters["step_update_ratio"], augmentation_indexes=aug_parameters["indexes"], parameters=parameters) elif (args.exec_type == "eval"): if (args.visualize_input == "true"): image = np.zeros((256, 256)) cv2.imshow('Agent image', image) # cv2.moveWindow('Agent image',int(960-368/2),0) # cv2.waitKey(0) rec_arch = False if (args.manual_control == "true"): model = HumanOperator("./src/Utils/xbox.yaml", env.action_dim) elif (args.naive_policy == "true"): model = NaiveModel() (mr, r_std), (mel, mel_std), (suc, suc_std), ev_steps = eval_model( model, env, parameters["eval_episodes"], rec_arch=False, verbose=True, parameters=parameters, render=(args.visualize_input == "true")) else: model_type_str = "best" if args.eval_best == "true" else "latest" print("Evaluating model. The %s model will be loaded..." % (model_type_str)) if (args.eval_best == "true"): model.load("./models", prefix="") else: model.load("./models", prefix="last_exec_") rec_arch = (model.actor.memory_capable() and model.critic.memory_capable()) (mr, r_std), (mel, mel_std), (suc, suc_std), ev_steps = eval_model( model, env, parameters["eval_episodes"], rec_arch=rec_arch, render=(args.visualize_input == "true"), verbose=True, parameters=parameters) print("Evaluated the model for %d episodes. Summary:" % (parameters["eval_episodes"])) print("\tMean reward %f (± %f)" % (mr, r_std)) print("\tMean success %.2f%% (± %f%%)" % (suc * 100, suc_std * 100)) print("\tMean episode length %f (± %f)" % (mel, mel_std)) print("\tTotal steps %f" % (ev_steps)) if (args.manual_control == "true"): model.controller.stop()