def test_environment_parameters(): sender = EnvironmentParametersChannel() # We use a raw bytes channel to interpred the data receiver = RawBytesChannel(sender.channel_id) sender.set_float_parameter("param-1", 0.1) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) message = IncomingMessage(receiver.get_and_clear_received_messages()[0]) key = message.read_string() dtype = message.read_int32() value = message.read_float32() assert key == "param-1" assert dtype == EnvironmentParametersChannel.EnvironmentDataTypes.FLOAT assert value - 0.1 < 1e-8 sender.set_float_parameter("param-1", 0.1) sender.set_float_parameter("param-2", 0.1) sender.set_float_parameter("param-3", 0.1) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([receiver]).process_side_channel_message(data) assert len(receiver.get_and_clear_received_messages()) == 3 with pytest.raises(UnityCommunicationException): # try to send data to the EngineConfigurationChannel sender.set_float_parameter("param-1", 0.1) data = SideChannelManager([sender]).generate_side_channel_messages() SideChannelManager([sender]).process_side_channel_message(data)
def __init__(self, worker_id, realtime_mode=False): self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4" self._env = UnityEnvironment( env_path, worker_id, side_channels=[self.reset_parameters, self.engine_config]) self._env.reset() self.behavior_name = list(self._env.behavior_specs)[0] behavior_spec = self._env.behavior_specs[self.behavior_name] print(behavior_spec) if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0) self.reset_parameters.set_float_parameter("train-mode", 0.0) else: self.engine_config.set_configuration_parameters(time_scale=20.0) self.reset_parameters.set_float_parameter("train-mode", 1.0) self._flattener = ActionFlattener( behavior_spec.action_spec.discrete_branches)
def make_unity_env(self, env_name, float_params=dict(), time_scale=1, seed=time.time(), worker_id=None, **kwargs): """ creates a gym environment from a unity game env_name: str the path to the game float_params: dict or None this should be a dict of argument settings for the unity environment keys: varies by environment time_scale: float argument to set Unity's time scale. This applies less to gym wrapped versions of Unity Environments, I believe.. but I'm not sure seed: int the seed for randomness worker_id: int must specify a unique worker id for each unity process on this machine """ if float_params is None: float_params = dict() path = os.path.expanduser(env_name) channel = EngineConfigurationChannel() env_channel = EnvironmentParametersChannel() channel.set_configuration_parameters(time_scale=1) for k, v in float_params.items(): if k == "validation" and v >= 1: print("Game in validation mode") env_channel.set_float_parameter(k, float(v)) if worker_id is None: worker_id = seed % 500 + 1 env_made = False n_loops = 0 worker_id = 0 while not env_made and n_loops < 50: try: env = UnityEnvironment(file_name=path, side_channels=[channel, env_channel], worker_id=worker_id, seed=seed) env_made = True except: s = "Error encountered making environment, " s += "trying new worker_id" print(s) worker_id = (worker_id + 1 + int(np.random.random() * 100)) % 500 try: env.close() except: pass n_loops += 1 env = UnityToGymWrapper(env, allow_multiple_obs=True) return env
def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: """ Helper method to send sampler settings over EnvironmentParametersChannel Calls the constant sampler type set method. :param key: environment parameter to be sampled :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment """ env_channel.set_float_parameter(key, self.value)
def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: """ Helper method to send sampler settings over EnvironmentParametersChannel Calls the gaussian sampler type set method. :param key: environment parameter to be sampled :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment """ env_channel.set_gaussian_sampler_parameters( key, self.mean, self.st_dev, self.seed )
def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: """ Helper method to send sampler settings over EnvironmentParametersChannel Calls the multirangeuniform sampler type set method. :param key: environment parameter to be sampled :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment """ env_channel.set_multirangeuniform_sampler_parameters( key, self.intervals, self.seed )
class UnityWrapper(object): def __init__(self, env_args): self.engine_configuration_channel = EngineConfigurationChannel() if env_args['train_mode']: self.engine_configuration_channel.set_configuration_parameters( time_scale=env_args['train_time_scale']) else: self.engine_configuration_channel.set_configuration_parameters( width=env_args['width'], height=env_args['height'], quality_level=env_args['quality_level'], time_scale=env_args['inference_time_scale'], target_frame_rate=env_args['target_frame_rate']) self.float_properties_channel = EnvironmentParametersChannel() if env_args['file_path'] is None: self._env = UnityEnvironment(base_port=5004, seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ]) else: unity_env_dict = load_yaml('/'.join( [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml'])) self._env = UnityEnvironment( file_name=env_args['file_path'], base_port=env_args['port'], no_graphics=not env_args['render'], seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ], additional_args=[ '--scene', str( unity_env_dict.get(env_args.get('env_name', 'Roller'), 'None')), '--n_agents', str(env_args.get('env_num', 1)) ]) self.reset_config = env_args['reset_config'] def reset(self, **kwargs): reset_config = kwargs.get('reset_config', None) or self.reset_config for k, v in reset_config.items(): self.float_properties_channel.set_float_parameter(k, v) self._env.reset() def __getattr__(self, name): if name.startswith('_'): raise AttributeError( "attempted to get missing private attribute '{}'".format(name)) return getattr(self._env, name)
def initialize_env(self, config, env_file) -> Environment: """ Initialize the environment. Args: config: the configuration parameters. env_file: the environment file. Returns: env: Environment """ # [3] Environment configuration base_port = int(input("Enter base port: ")) time_scale = int(config.get("time_scale")) width = int(config.get("width")) height = int(config.get("height")) channel_config = EngineConfigurationChannel() channel_param = EnvironmentParametersChannel() env = Environment( file_name=env_file, base_port=base_port, side_channels=[channel_config, channel_param], ) channel_config.set_configuration_parameters(time_scale=time_scale, quality_level=1, width=width, height=height) env.set_float_parameters(config) return env
def _check_environment_trains( env, trainer_config, reward_processor=default_reward_processor, meta_curriculum=None, success_threshold=0.9, env_manager=None, ): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 seed = 1337 StatsReporter.writers.clear( ) # Clear StatsReporters so we don't write to file debug_writer = DebugWriter() StatsReporter.add_writer(debug_writer) # Make sure threading is turned off for determinism trainer_config["threading"] = False if env_manager is None: env_manager = SimpleEnvManager(env, EnvironmentParametersChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, summaries_dir=dir, run_id=run_id, model_path=dir, keep_checkpoints=1, train_model=True, load_model=False, seed=seed, meta_curriculum=meta_curriculum, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, summaries_dir=dir, model_path=dir, run_id=run_id, meta_curriculum=meta_curriculum, train=True, training_seed=seed, sampler_manager=SamplerManager(None), resampling_interval=None, save_freq=save_freq, ) # Begin training tc.start_learning(env_manager) if (success_threshold is not None ): # For tests where we are just checking setup and not reward processed_rewards = [ reward_processor(rewards) for rewards in env.final_rewards.values() ] assert all(not math.isnan(reward) for reward in processed_rewards) assert all(reward > success_threshold for reward in processed_rewards)
def initialize_all_side_channels(self, kwargs): ''' 初始化所有的通讯频道 ''' engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration_parameters( width=kwargs['width'], height=kwargs['height'], quality_level=kwargs['quality_level'], time_scale=1 if bool(kwargs.get('inference', False)) else kwargs['time_scale'], target_frame_rate=kwargs['target_frame_rate'], capture_frame_rate=kwargs['capture_frame_rate']) float_properties_channel = EnvironmentParametersChannel() for k, v in kwargs.get('initialize_config', {}).items(): float_properties_channel.set_float_parameter(k, v) return dict(engine_configuration_channel=engine_configuration_channel, float_properties_channel=float_properties_channel)
def __init__(self, env_args): self.engine_configuration_channel = EngineConfigurationChannel() if env_args['train_mode']: self.engine_configuration_channel.set_configuration_parameters( time_scale=env_args['train_time_scale']) else: self.engine_configuration_channel.set_configuration_parameters( width=env_args['width'], height=env_args['height'], quality_level=env_args['quality_level'], time_scale=env_args['inference_time_scale'], target_frame_rate=env_args['target_frame_rate']) self.float_properties_channel = EnvironmentParametersChannel() if env_args['file_path'] is None: self._env = UnityEnvironment(base_port=5004, seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ]) else: unity_env_dict = load_yaml('/'.join( [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml'])) self._env = UnityEnvironment( file_name=env_args['file_path'], base_port=env_args['port'], no_graphics=not env_args['render'], seed=env_args['env_seed'], side_channels=[ self.engine_configuration_channel, self.float_properties_channel ], additional_args=[ '--scene', str( unity_env_dict.get(env_args.get('env_name', 'Roller'), 'None')), '--n_agents', str(env_args.get('env_num', 1)) ]) self.reset_config = env_args['reset_config']
def check_environment_trains( env, trainer_config, reward_processor=default_reward_processor, env_parameter_manager=None, success_threshold=0.9, env_manager=None, training_seed=None, ): if env_parameter_manager is None: env_parameter_manager = EnvironmentParameterManager() # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" seed = 1337 if training_seed is None else training_seed StatsReporter.writers.clear( ) # Clear StatsReporters so we don't write to file debug_writer = DebugWriter() StatsReporter.add_writer(debug_writer) if env_manager is None: env_manager = SimpleEnvManager(env, EnvironmentParametersChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, output_path=dir, train_model=True, load_model=False, seed=seed, param_manager=env_parameter_manager, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, output_path=dir, run_id=run_id, param_manager=env_parameter_manager, train=True, training_seed=seed, ) # Begin training tc.start_learning(env_manager) if (success_threshold is not None ): # For tests where we are just checking setup and not reward processed_rewards = [ reward_processor(rewards) for rewards in env.final_rewards.values() ] assert all(not math.isnan(reward) for reward in processed_rewards) assert all(reward > success_threshold for reward in processed_rewards)
def __init__(self, env_config): self.worker_index = 0 if 'SM_CHANNEL_TRAIN' in os.environ: env_name = os.environ['SM_CHANNEL_TRAIN'] +'/'+ env_config['env_name'] os.chmod(env_name, 0o755) print("Changed environment binary into executable mode.") # Try connecting to the Unity3D game instance. while True: try: channel = EnvironmentParametersChannel() unity_env = UnityEnvironment( env_name, no_graphics=True, worker_id=self.worker_index, side_channels=[channel], additional_args=['-logFile', 'unity.log']) channel.set_float_parameter("simulation_mode", 1.0) except UnityWorkerInUseException: self.worker_index += 1 else: break else: env_name = env_config['env_name'] while True: try: unity_env = default_registry[env_name].make( no_graphics=True, worker_id=self.worker_index, additional_args=['-logFile', 'unity.log']) except UnityWorkerInUseException: self.worker_index += 1 else: break self.env = UnityToGymWrapper(unity_env) self.action_space = self.env.action_space self.observation_space = self.env.observation_space
def __init__(self, config=DEFAULT_ENV_CONFIG): """ Environment initialization :param config: Configuration of the environment. """ # create side channels self.env_param_channel = EnvironmentParametersChannel() self.engine_channel = EngineConfigurationChannel() self.color_pool_channel = IntListPropertiesChannel() side_channels = [ self.env_param_channel, self.engine_channel, self.color_pool_channel, ] # flag whether the config has been apllied to the environment self.is_already_initialized = False # create environment with config and side channels super().__init__(config, DEFAULT_ENV_CONFIG, side_channels=side_channels)
def __init__(self, train_mode=True, file_name=None, base_port=5005, seed=None, scene=None, n_agents=1): seed = seed if seed is not None else np.random.randint(0, 65536) self.engine_configuration_channel = EngineConfigurationChannel() self.environment_parameters_channel = EnvironmentParametersChannel() self._env = UnityEnvironment( file_name=file_name, base_port=base_port, seed=seed, args=['--scene', scene, '--n_agents', str(n_agents)], side_channels=[ self.engine_configuration_channel, self.environment_parameters_channel ]) if train_mode: self.engine_configuration_channel.set_configuration_parameters( width=200, height=200, quality_level=0, time_scale=100) else: self.engine_configuration_channel.set_configuration_parameters( width=1028, height=720, quality_level=5, time_scale=5, target_frame_rate=60) self._env.reset() self.bahavior_name = self._env.get_behavior_names()[0]
def initialize_all_side_channels(self, initialize_config, engine_config): """ 初始化所有的通讯频道 """ engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration_parameters( **engine_config) float_properties_channel = EnvironmentParametersChannel() float_properties_channel.set_float_parameter('env_copies', self._n_copies) for k, v in initialize_config.items(): float_properties_channel.set_float_parameter(k, v) return dict(engine_configuration_channel=engine_configuration_channel, float_properties_channel=float_properties_channel)
def make_unity_env(config): # setup environment if sys.platform == "win32": env_build = "../env/FreeFallVer2/windows/FreeFall.exe" elif sys.platform == "linux": env_build = "../env/FreeFallVer2/linux/FreeFall.x86_64" elif sys.platform == "darwin": env_build = "../env/FreeFallVer2/mac.app" else: raise AttributeError("{} platform is not supported.".format( sys.platform)) channel = EnvironmentParametersChannel() unity_env = UnityEnvironment(env_build, side_channels=[channel], additional_args=["-batchmode"]) env = UnityToGymWrapper(unity_env, uint8_visual=True, allow_multiple_obs=True) env = DistanceWrapper(env) env = MatplotlibWrapper(env) assign_config(channel, config) return env
from mlagents_envs.environment import UnityEnvironment from gym_unity.envs import UnityToGymWrapper from mlagents_envs.side_channel.environment_parameters_channel import EnvironmentParametersChannel # setup environment if sys.platform == 'win32': env_build = "../env/FlyCamera/windows/FlyCamera.exe" elif sys.platform == 'linux': env_build = "../env/FlyCamera/linux/FlyCamera.x86_64" elif sys.platform == "darwin": env_build = "../env/FlyCamera/mac.app" else: raise AttributeError("{} platform is not supported.".format(sys.platform)) channel = EnvironmentParametersChannel() unity_env = UnityEnvironment(env_build, side_channels=[channel]) channel.set_float_parameter("key_speed", 10.0) channel.set_float_parameter("cam_sens", 0.25) env = UnityToGymWrapper(unity_env, uint8_visual=True) # interface max_mouse_move = 10 # in pixel; to limit mouse "jump" due to slow in-loop process mouse_position = np.zeros((2,)) def mouse_move(event): global mouse_position x, y = event.xdata, event.ydata mouse_position = np.array([x, y]) key_wasd = np.array([False] * 4) def key_press(event): # NOTE: cannot handle multiple key press at the same time
class MyEnv(gym.Env): def __init__(self, worker_id, realtime_mode=False): self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4" self._env = UnityEnvironment( env_path, worker_id, side_channels=[self.reset_parameters, self.engine_config]) self._env.reset() self.behavior_name = list(self._env.behavior_specs)[0] behavior_spec = self._env.behavior_specs[self.behavior_name] print(behavior_spec) if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0) self.reset_parameters.set_float_parameter("train-mode", 0.0) else: self.engine_config.set_configuration_parameters(time_scale=20.0) self.reset_parameters.set_float_parameter("train-mode", 1.0) self._flattener = ActionFlattener( behavior_spec.action_spec.discrete_branches) def reset(self): # for key, value in reset_params.items(): # self.reset_parameters.set_float_parameter(key, value) self._env.reset() info, terminal_info = self._env.get_steps(self.behavior_name) self.game_over = False obs, reward, done, info = self._single_step(info, terminal_info) return obs def step(self, action): # Use random actions for all other agents in environment. if self._flattener is not None and type(action) == int: # Translate action into list action = np.array(self._flattener.lookup_action(action)) c_action = Action(action) self._env.set_actions(self.behavior_name, c_action) self._env.step() running_info, terminal_info = self._env.get_steps(self.behavior_name) obs, reward, done, info = self._single_step(running_info, terminal_info) self.game_over = done return obs, reward, done, info def _single_step(self, info, terminal_info): if len(terminal_info) == 0: done = False use_info = info else: done = True use_info = terminal_info # 카메라, 센서 순으로 나옴 output_info = {} output_info["visual_obs"] = use_info.obs[0][0] #obs = np.concatenate([use_info.obs[1][0], use_info.obs[2][0]]) return use_info.obs[1][0], use_info.reward[0], done, output_info def close(self): self._env.close() def render(self): pass
class FooCarEnv(gym.Env): _channel = EnvironmentParametersChannel() PathSpace = { 'xyz': 0, 'xy': 2, 'yz': 2, 'xz': 2 } def __init__(self, no_graphics:bool=False, seed:int=1, **config): self._config = config worker_id = 0 if 'worker_id' in config: worker_id = config['worker_id'] self._unity_env = UnityEnvironment( file_name=UNITY_ENV_EXE_FILE, # file_name=None, # Unity Editor Mode (debug) no_graphics=no_graphics, seed=seed, side_channels=[self._channel], worker_id=worker_id ) for key, value in config.items(): self._channel.set_float_parameter(key, float(value)) self._gym_env = UnityToGymWrapper(self._unity_env) def step(self, action): obs, reward, done, info = self._gym_env.step(action) size = self.observation_size return obs[:size], reward, done, info def reset(self): obs = self._gym_env.reset() size = self.observation_size return obs[:size] def render(self, mode="rgb_array"): return self._gym_env.render(mode=mode) def seed(self, seed=None): self._gym_env.seed(seed=seed) # it will throw a warning def close(self): self._gym_env.close() @property def metadata(self): return self._gym_env.metadata @property def reward_range(self) -> Tuple[float, float]: return self._gym_env.reward_range @property def action_space(self): return self._gym_env.action_space @property def observation_space(self): config = self._config space = self.PathSpace path_space = config['path_space'] if 'path_space' in config else space['xz'] r = config['radius_anchor_circle'] if 'radius_anchor_circle' in config else 8.0 r_e = config['radius_epsilon_ratio'] if 'radius_epsilon_ratio' in config else 0.7 h = config['max_anchor_height'] if 'max_anchor_height' in config else 1.0 xyz_mode = (path_space == space['xyz']) bound = max(r * (1 + r_e), h if xyz_mode else 0) shape = (self.observation_size,) return gym.spaces.Box(-bound, +bound, dtype=np.float32, shape=shape) @property def observation_size(self): # Reference: readonly variable (Unity)FooCar/CarAgent.ObservationSize config = self._config space = self.PathSpace path_space = config['path_space'] if 'path_space' in config else space['xz'] ticker_end = config['ticker_end'] if 'ticker_end' in config else 5 ticker_start = config['ticker_start'] if 'ticker_start' in config else -3 xyz_mode = (path_space == space['xyz']) basic_num = 6 point_dim = 3 if xyz_mode else 2 return basic_num + 2 * point_dim * (ticker_end - ticker_start + 1)
def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int, engine_configuration: EngineConfig, log_level: int = logging_util.INFO, ) -> None: env_factory: Callable[ [int, List[SideChannel]], UnityEnvironment ] = cloudpickle.loads(pickled_env_factory) env_parameters = EnvironmentParametersChannel() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_configuration) stats_channel = StatsSideChannel() env: BaseEnv = None # Set log level. On some platforms, the logger isn't common with the # main process, so we need to set it again. logging_util.set_log_level(log_level) def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None: parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) def _generate_all_results() -> AllStepResult: all_step_result: AllStepResult = {} for brain_name in env.behavior_specs: all_step_result[brain_name] = env.get_steps(brain_name) return all_step_result try: env = env_factory( worker_id, [env_parameters, engine_configuration_channel, stats_channel] ) while True: req: EnvironmentRequest = parent_conn.recv() if req.cmd == EnvironmentCommand.STEP: all_action_info = req.payload for brain_name, action_info in all_action_info.items(): if len(action_info.action) != 0: env.set_actions(brain_name, action_info.action) env.step() all_step_result = _generate_all_results() # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. env_stats = stats_channel.get_and_reset_stats() step_response = StepResponse( all_step_result, get_timer_root(), env_stats ) step_queue.put( EnvironmentResponse( EnvironmentCommand.STEP, worker_id, step_response ) ) reset_timers() elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS: _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs) elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS: for k, v in req.payload.items(): if isinstance(v, ParameterRandomizationSettings): v.apply(k, env_parameters) elif req.cmd == EnvironmentCommand.RESET: env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) elif req.cmd == EnvironmentCommand.CLOSE: break except ( KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException, UnityEnvironmentException, UnityCommunicatorStoppedException, ) as ex: logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put( EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex) ) _send_response(EnvironmentCommand.ENV_EXITED, ex) finally: # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread() # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for # more info. logger.debug(f"UnityEnvironment worker {worker_id} closing.") step_queue.cancel_join_thread() step_queue.close() if env is not None: env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.")
def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int, run_options: RunOptions, log_level: int = logging_util.INFO, ) -> None: env_factory: Callable[ [int, List[SideChannel]], UnityEnvironment ] = cloudpickle.loads(restricted_loads(pickled_env_factory)) env_parameters = EnvironmentParametersChannel() engine_config = EngineConfig( width=run_options.engine_settings.width, height=run_options.engine_settings.height, quality_level=run_options.engine_settings.quality_level, time_scale=run_options.engine_settings.time_scale, target_frame_rate=run_options.engine_settings.target_frame_rate, capture_frame_rate=run_options.engine_settings.capture_frame_rate, ) engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_config) stats_channel = StatsSideChannel() training_analytics_channel: Optional[TrainingAnalyticsSideChannel] = None if worker_id == 0: training_analytics_channel = TrainingAnalyticsSideChannel() env: UnityEnvironment = None # Set log level. On some platforms, the logger isn't common with the # main process, so we need to set it again. logging_util.set_log_level(log_level) def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None: parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) def _generate_all_results() -> AllStepResult: all_step_result: AllStepResult = {} for brain_name in env.behavior_specs: all_step_result[brain_name] = env.get_steps(brain_name) return all_step_result try: side_channels = [env_parameters, engine_configuration_channel, stats_channel] if training_analytics_channel is not None: side_channels.append(training_analytics_channel) env = env_factory(worker_id, side_channels) if ( not env.academy_capabilities or not env.academy_capabilities.trainingAnalytics ): # Make sure we don't try to send training analytics if the environment doesn't know how to process # them. This wouldn't be catastrophic, but would result in unknown SideChannel UUIDs being used. training_analytics_channel = None if training_analytics_channel: training_analytics_channel.environment_initialized(run_options) while True: req: EnvironmentRequest = parent_conn.recv() if req.cmd == EnvironmentCommand.STEP: all_action_info = req.payload for brain_name, action_info in all_action_info.items(): if len(action_info.agent_ids) > 0: env.set_actions(brain_name, action_info.env_action) env.step() all_step_result = _generate_all_results() # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. env_stats = stats_channel.get_and_reset_stats() step_response = StepResponse( all_step_result, get_timer_root(), env_stats ) step_queue.put( EnvironmentResponse( EnvironmentCommand.STEP, worker_id, step_response ) ) reset_timers() elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS: _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs) elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS: for k, v in req.payload.items(): if isinstance(v, ParameterRandomizationSettings): v.apply(k, env_parameters) elif req.cmd == EnvironmentCommand.TRAINING_STARTED: behavior_name, trainer_config = req.payload if training_analytics_channel: training_analytics_channel.training_started( behavior_name, trainer_config ) elif req.cmd == EnvironmentCommand.RESET: env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) elif req.cmd == EnvironmentCommand.CLOSE: break except ( KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException, UnityEnvironmentException, UnityCommunicatorStoppedException, ) as ex: logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put( EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex) ) _send_response(EnvironmentCommand.ENV_EXITED, ex) except Exception as ex: logger.exception( f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception." ) step_queue.put( EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex) ) _send_response(EnvironmentCommand.ENV_EXITED, ex) finally: logger.debug(f"UnityEnvironment worker {worker_id} closing.") if env is not None: env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.") parent_conn.close() step_queue.put(EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None)) step_queue.close()
def __init__(self, env_path, worker_id = 1, no_graphis = False, realtime_mode = False, config = None): """Instantiates the Unity Environment from a specified executable. Arguments: env_path {string} -- Path to the executable of the environment Keyword Arguments: worker_id {int} -- Port of the environment"s instance (default: {1}) no_graphis {bool} -- Whether to allow the executable to render or not (default: {False}) realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False}) config {dict} -- Specifies the reset parameters of the environment (default: {None}) """ # Disable logging logging.disable(logging.INFO) # Initialize channels self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() self._config = config self._realtime_mode = realtime_mode if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720) else: self.engine_config.set_configuration_parameters(time_scale=20.0, width=128, height=128) # Launch the environment's executable self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # Reset the environment self._env.reset() # Retrieve behavior configuration self._behavior_name = list(self._env.behavior_specs)[0] self._behavior_spec = self._env.behavior_specs[self._behavior_name] # Set action space properties if len(self._behavior_spec.action_shape) == 1: self._action_space = spaces.Discrete(self._behavior_spec.action_shape[0]) else: self._action_space = spaces.MultiDiscrete(self._behavior_spec.action_shape) self._action_names = ["Not available"] # Count visual and vector observations self._num_vis_obs, self._num_vec_obs = 0, 0 self._vec_obs_indices = [] for index, obs in enumerate(self._behavior_spec.observation_shapes): if len(obs) > 1: self._num_vis_obs = self._num_vis_obs + 1 self._vis_obs_index = index else: self._num_vec_obs = self._num_vec_obs + 1 self._vec_obs_indices.append(index) # Verify the environment self._verify_environment() # Set visual observation space property if self._num_vis_obs == 1: height = self._behavior_spec.observation_shapes[self._vis_obs_index][0] width = self._behavior_spec.observation_shapes[self._vis_obs_index][1] depth = self._behavior_spec.observation_shapes[self._vis_obs_index][2] self._visual_observation_space = spaces.Box( low = 0, high = 1.0, shape = (height, width, depth), dtype = np.float32) else: self._visual_observation_space = None # Set vector observation space property if self._num_vec_obs > 0: # Determine the length of vec obs by summing the length of each distinct one vec_obs_length = sum([self._behavior_spec.observation_shapes[i][0] for i in self._vec_obs_indices]) self._vector_observatoin_space = (vec_obs_length, ) else: self._vector_observatoin_space = None
class UnityWrapper(Env): """This class wraps Unity environments. This wrapper has notable constraints: - Only one agent (no multi-agent environments). - Only one visual observation - Only discrete and multi-discrete action spaces (no continuous action space)""" def __init__(self, env_path, reset_params, worker_id = 1, no_graphis = False, realtime_mode = False, record_trajectory = False): """Instantiates the Unity Environment from a specified executable. Arguments: env_path {string} -- Path to the executable of the environment reset_params {dict} -- Reset parameters of the environment such as the seed Keyword Arguments: worker_id {int} -- Port of the environment"s instance (default: {1}) no_graphis {bool} -- Whether to allow the executable to render or not (default: {False}) realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False}) record_trajectory {bool} -- Whether to record the trajectory of an entire episode. This can be used for video recording. (default: {False}) """ # Initialize channels self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() # Prepare default reset parameters self._default_reset_parameters = {} for key, value in reset_params.items(): self._default_reset_parameters[key] = value if key != "start-seed" or key != "num-seeds": self.reset_parameters.set_float_parameter(key, value) self._realtime_mode = realtime_mode if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720) else: self.engine_config.set_configuration_parameters(time_scale=30.0, width=256, height=256) # Whether to record the trajectory of an entire episode self._record = record_trajectory # Launch the environment's executable self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # If the Unity Editor chould be used instead of a build # self._env = UnityEnvironment(file_name = None, worker_id = 0, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # Reset the environment self._env.reset() # Retrieve behavior configuration self._behavior_name = list(self._env.behavior_specs)[0] self._behavior_spec = self._env.behavior_specs[self._behavior_name] # Check whether this Unity environment is supported self._verify_environment() # Set action space properties if self._behavior_spec.action_spec.is_discrete(): num_action_branches = self._behavior_spec.action_spec.discrete_size action_branch_dimensions = self._behavior_spec.action_spec.discrete_branches if num_action_branches == 1: self._action_space = spaces.Discrete(action_branch_dimensions[0]) else: self._action_space = spaces.MultiDiscrete(action_branch_dimensions) # Count visual and vector observations self._num_vis_obs, self._num_vec_obs = 0, 0 self._vec_obs_indices = [] for index, obs in enumerate(self._behavior_spec.observation_specs): if len(obs) > 1: self._num_vis_obs = self._num_vis_obs + 1 self._vis_obs_index = index else: self._num_vec_obs = self._num_vec_obs + 1 self._vec_obs_indices.append(index) # Set visual observation space property if self._num_vis_obs == 1: vis_obs_shape = self._behavior_spec.observation_specs[self._vis_obs_index].shape self._visual_observation_space = spaces.Box( low = 0, high = 1.0, shape = vis_obs_shape, dtype = np.float32) else: self._visual_observation_space = None # Set vector observation space property if self._num_vec_obs > 0: # Determine the length of vec obs by summing the length of each distinct one vec_obs_length = sum([self._behavior_spec.observation_specs[i][0] for i in self._vec_obs_indices]) self._vector_observatoin_space = (vec_obs_length, ) else: self._vector_observatoin_space = None # Videos can only be recorded if the environment provides visual observations if self._record and self._visual_observation_space is None: UnityEnvironmentException("Videos cannot be rendered for a Unity environment that does not provide visual observations.") @property def unwrapped(self): """ Returns: {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state """ return self @property def action_space(self): """Returns the shape of the action space of the agent.""" return self._action_space @property def action_names(self): return None @property def get_episode_trajectory(self): """Returns the trajectory of an entire episode as dictionary (vis_obs, vec_obs, rewards, actions). """ self._trajectory["action_names"] = self.action_names return self._trajectory if self._trajectory else None @property def visual_observation_space(self): return self._visual_observation_space @property def vector_observation_space(self): return self._vector_observatoin_space def reset(self, reset_params = None): """Resets the environment based on a global or just specified config. Keyword Arguments: config {dict} -- Reset parameters to configure the environment (default: {None}) Returns: {numpy.ndarray} -- Visual observation {numpy.ndarray} -- Vector observation """ # Track rewards of an entire episode self._rewards = [] # Use initial or new reset parameters if reset_params is None: reset_params = self._default_reset_parameters else: reset_params = reset_params # Apply reset parameters for key, value in reset_params.items(): # Skip reset parameters that are not used by the Unity environment if key != "start-seed" or key != "num-seeds": self.reset_parameters.set_float_parameter(key, value) # Sample the to be used seed if reset_params["start-seed"] > -1: seed = randint(reset_params["start-seed"], reset_params["start-seed"] + reset_params["num-seeds"] - 1) else: # Use unlimited seeds seed = -1 self.reset_parameters.set_float_parameter("seed", seed) # Reset and verify the environment self._env.reset() info, terminal_info = self._env.get_steps(self._behavior_name) self._verify_environment() # Retrieve initial observations vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info) # Prepare trajectory recording self._trajectory = { "vis_obs": [vis_obs * 255], "vec_obs": [vec_obs], "rewards": [0.0], "actions": [] } return vis_obs, vec_obs def step(self, action): """Runs one timestep of the environment"s dynamics. Once an episode is done, reset() has to be called manually. Arguments: action {List} -- A list of at least one discrete action to be executed by the agent Returns: {numpy.ndarray} -- Visual observation {numpy.ndarray} -- Vector observation {float} -- (Total) Scalar reward signaled by the environment {bool} -- Whether the episode of the environment terminated {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed """ # Carry out the agent's action action_tuple = ActionTuple() action_tuple.add_discrete(np.asarray(action).reshape([1, -1])) self._env.set_actions(self._behavior_name, action_tuple) self._env.step() info, terminal_info = self._env.get_steps(self._behavior_name) # Process step results vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info) self._rewards.append(reward) # Record trajectory data if self._record: self._trajectory["vis_obs"].append(vis_obs * 255) self._trajectory["vec_obs"].append(vec_obs) self._trajectory["rewards"].append(reward) self._trajectory["actions"].append(action) # Episode information if done: info = {"reward": sum(self._rewards), "length": len(self._rewards)} else: info = None return vis_obs, vec_obs, reward, done, info def close(self): """Shut down the environment.""" self._env.close() def _process_agent_info(self, info, terminal_info): """Extracts the observations, rewards, dones, and episode infos. Args: info {DecisionSteps}: Current state terminal_info {TerminalSteps}: Terminal state Returns: vis_obs {ndarray} -- Visual observation if available, else None vec_obs {ndarray} -- Vector observation if available, else None reward {float} -- Reward signal from the environment done {bool} -- Whether the episode terminated or not """ # Determine if the episode terminated or not if len(terminal_info) == 0: done = False use_info = info else: done = True use_info = terminal_info # Process visual observations if self.visual_observation_space is not None: vis_obs = use_info.obs[self._vis_obs_index][0] else: vis_obs = None # Process vector observations if self.vector_observation_space is not None: for i, dim in enumerate(self._vec_obs_indices): if i == 0: vec_obs = use_info.obs[dim][0] else: vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0])) else: vec_obs = None return vis_obs, vec_obs, use_info.reward[0], done def _verify_environment(self): # Verify number of agent behavior types if len(self._env.behavior_specs) != 1: raise UnityEnvironmentException("The unity environment containts more than one agent type.") # Verify number of agents decision_steps, _ = self._env.get_steps(self._behavior_name) if len(decision_steps) > 1: raise UnityEnvironmentException("The unity environment contains more than one agent, which is not supported.") # Verify action space type if not self._behavior_spec.action_spec.is_discrete() or self._behavior_spec.action_spec.is_continuous(): raise UnityEnvironmentException("Continuous action spaces are not supported. " "Only discrete and MultiDiscrete spaces are supported.") # Verify that at least one observation is provided num_vis_obs = 0 num_vec_obs = 0 for obs_spec in self._behavior_spec.observation_specs: if len(obs_spec.shape) == 3: num_vis_obs += 1 elif(len(obs_spec.shape)) == 1: num_vec_obs += 1 if num_vis_obs == 0 and num_vec_obs == 0: raise UnityEnvironmentException("The unity environment does not contain any observations.") # Verify number of visual observations if num_vis_obs > 1: raise UnityEnvironmentException("The unity environment contains more than one visual observation.")
def __init__(self, conn: multiprocessing.connection.Connection = None, train_mode=True, file_name=None, worker_id=0, base_port=5005, no_graphics=True, seed=None, scene=None, additional_args=None, n_agents=1): """ Args: train_mode: If in train mode, Unity will speed up file_name: The executable path. The UnityEnvironment will run in editor if None worker_id: Offset from base_port base_port: The port that communicate to Unity. It will be set to 5004 automatically if in editor. no_graphics: If Unity runs in no graphic mode. It must be set to False if Unity has camera sensor. seed: Random seed scene: The scene name n_agents: The agents count """ self.scene = scene self.n_agents = n_agents seed = seed if seed is not None else np.random.randint(0, 65536) additional_args = [] if additional_args is None else additional_args.split( ' ') self.engine_configuration_channel = EngineConfigurationChannel() self.environment_parameters_channel = EnvironmentParametersChannel() self.environment_parameters_channel.set_float_parameter( 'env_copys', float(n_agents)) if conn: try: from algorithm import config_helper config_helper.set_logger() except: pass self._logger = logging.getLogger( f'UnityWrapper.Process_{os.getpid()}') else: self._logger = logging.getLogger('UnityWrapper.Process') self._env = UnityEnvironment( file_name=file_name, worker_id=worker_id, base_port=base_port if file_name else None, no_graphics=no_graphics and train_mode, seed=seed, additional_args=['--scene', scene] + additional_args, side_channels=[ self.engine_configuration_channel, self.environment_parameters_channel ]) self.engine_configuration_channel.set_configuration_parameters( width=200 if train_mode else 1280, height=200 if train_mode else 720, quality_level=5, time_scale=20 if train_mode else 1) self._env.reset() self.bahavior_name = list(self._env.behavior_specs)[0] if conn: try: while True: cmd, data = conn.recv() if cmd == INIT: conn.send(self.init()) elif cmd == RESET: conn.send(self.reset(data)) elif cmd == STEP: conn.send(self.step(*data)) elif cmd == CLOSE: self.close() except: self._logger.error(traceback.format_exc())
class UnityWrapperProcess: def __init__(self, conn: multiprocessing.connection.Connection = None, train_mode=True, file_name=None, worker_id=0, base_port=5005, no_graphics=True, seed=None, scene=None, additional_args=None, n_agents=1): """ Args: train_mode: If in train mode, Unity will speed up file_name: The executable path. The UnityEnvironment will run in editor if None worker_id: Offset from base_port base_port: The port that communicate to Unity. It will be set to 5004 automatically if in editor. no_graphics: If Unity runs in no graphic mode. It must be set to False if Unity has camera sensor. seed: Random seed scene: The scene name n_agents: The agents count """ self.scene = scene self.n_agents = n_agents seed = seed if seed is not None else np.random.randint(0, 65536) additional_args = [] if additional_args is None else additional_args.split( ' ') self.engine_configuration_channel = EngineConfigurationChannel() self.environment_parameters_channel = EnvironmentParametersChannel() self.environment_parameters_channel.set_float_parameter( 'env_copys', float(n_agents)) if conn: try: from algorithm import config_helper config_helper.set_logger() except: pass self._logger = logging.getLogger( f'UnityWrapper.Process_{os.getpid()}') else: self._logger = logging.getLogger('UnityWrapper.Process') self._env = UnityEnvironment( file_name=file_name, worker_id=worker_id, base_port=base_port if file_name else None, no_graphics=no_graphics and train_mode, seed=seed, additional_args=['--scene', scene] + additional_args, side_channels=[ self.engine_configuration_channel, self.environment_parameters_channel ]) self.engine_configuration_channel.set_configuration_parameters( width=200 if train_mode else 1280, height=200 if train_mode else 720, quality_level=5, time_scale=20 if train_mode else 1) self._env.reset() self.bahavior_name = list(self._env.behavior_specs)[0] if conn: try: while True: cmd, data = conn.recv() if cmd == INIT: conn.send(self.init()) elif cmd == RESET: conn.send(self.reset(data)) elif cmd == STEP: conn.send(self.step(*data)) elif cmd == CLOSE: self.close() except: self._logger.error(traceback.format_exc()) def init(self): """ Returns: observation shapes: tuple[(o1, ), (o2, ), (o3_1, o3_2, o3_3), ...] discrete action size: int, sum of all action branches continuous action size: int """ behavior_spec = self._env.behavior_specs[self.bahavior_name] obs_names = [o.name for o in behavior_spec.observation_specs] self._logger.info(f'Observation names: {obs_names}') obs_shapes = [o.shape for o in behavior_spec.observation_specs] self._logger.info(f'Observation shapes: {obs_shapes}') self._empty_action = behavior_spec.action_spec.empty_action discrete_action_size = 0 if behavior_spec.action_spec.discrete_size > 0: discrete_action_size = 1 action_product_list = [] for action, branch_size in enumerate( behavior_spec.action_spec.discrete_branches): discrete_action_size *= branch_size action_product_list.append(range(branch_size)) self._logger.info( f"Discrete action branch {action} has {branch_size} different actions" ) self.action_product = np.array( list(itertools.product(*action_product_list))) continuous_action_size = behavior_spec.action_spec.continuous_size self._logger.info(f'Continuous action size: {continuous_action_size}') self.d_action_size = discrete_action_size self.c_action_size = continuous_action_size for o in behavior_spec.observation_specs: if len(o.shape) >= 3: self.engine_configuration_channel.set_configuration_parameters( quality_level=5) break return obs_shapes, discrete_action_size, continuous_action_size def reset(self, reset_config=None): """ return: observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)] """ reset_config = {} if reset_config is None else reset_config for k, v in reset_config.items(): self.environment_parameters_channel.set_float_parameter( k, float(v)) self._env.reset() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) return [obs.astype(np.float32) for obs in decision_steps.obs] def step(self, d_action, c_action): """ Args: d_action: (NAgents, discrete_action_size), one hot like action c_action: (NAgents, continuous_action_size) Returns: observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)] rewards: (NAgents, ) done: (NAgents, ), np.bool max_step: (NAgents, ), np.bool """ if self.d_action_size: d_action = np.argmax(d_action, axis=1) d_action = self.action_product[d_action] self._env.set_actions( self.bahavior_name, ActionTuple(continuous=c_action, discrete=d_action)) self._env.step() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) tmp_terminal_steps = terminal_steps while len(decision_steps) == 0: self._env.set_actions(self.bahavior_name, self._empty_action(0)) self._env.step() decision_steps, terminal_steps = self._env.get_steps( self.bahavior_name) tmp_terminal_steps.agent_id = np.concatenate( [tmp_terminal_steps.agent_id, terminal_steps.agent_id]) tmp_terminal_steps.reward = np.concatenate( [tmp_terminal_steps.reward, terminal_steps.reward]) tmp_terminal_steps.interrupted = np.concatenate( [tmp_terminal_steps.interrupted, terminal_steps.interrupted]) reward = decision_steps.reward reward[tmp_terminal_steps.agent_id] = tmp_terminal_steps.reward done = np.full([ len(decision_steps), ], False, dtype=np.bool) done[tmp_terminal_steps.agent_id] = True max_step = np.full([ len(decision_steps), ], False, dtype=np.bool) max_step[tmp_terminal_steps.agent_id] = tmp_terminal_steps.interrupted return ([obs.astype(np.float32) for obs in decision_steps.obs], decision_steps.reward.astype(np.float32), done, max_step) def close(self): self._env.close() self._logger.warning(f'Process {os.getpid()} exits')
# from env_1 import Action, Goal, UnityToGoalGymWrapper from env_2 import Action, Goal, UnityToGoalGymWrapper from mlagents_envs.environment import UnityEnvironment from mlagents_envs.side_channel.environment_parameters_channel import EnvironmentParametersChannel if __name__ == "__main__": """ demo interaction with the gym interface using live Unity environment """ channel = EnvironmentParametersChannel() # create a params sidechannel # None -> live interaction = blocking call to unity unity_env = UnityEnvironment(file_name=None, seed=1, side_channels=[channel]) env = UnityToGoalGymWrapper(unity_env, channel, uint8_visual=True, allow_multiple_obs=True) while True: obs = env.reset(Goal(3.0, 3.0)) for i in range(1000): obs_dict = env.step(Action(0.01, 0.01)) gym_observation = obs_dict["observation"] observation, reward, done, info = gym_observation print(observation) print(f"reward = {reward}") if done:
k = ".".join([k_prefix, k]) if k_prefix is not None else k if isinstance(v, AttrDict): assign_config(_channel, v, k) else: _channel.set_float_parameter(k, v) # setup environment if sys.platform == "win32": env_build = "../env/FreeFall/windows/FreeFall.exe" elif sys.platform == "linux": env_build = "../env/FreeFall/linux/FreeFall.x86_64" elif sys.platform == "darwin": env_build = "../env/FreeFall/mac.app" else: raise AttributeError("{} platform is not supported.".format(sys.platform)) channel = EnvironmentParametersChannel() unity_env = UnityEnvironment(env_build, side_channels=[channel]) env = UnityToGymWrapper(unity_env, uint8_visual=True, allow_multiple_obs=True) assign_config(channel, config) # interface key_ws = np.array([False] * 2) def key_press(event): # NOTE: cannot handle multiple key press at the same time global key_ws try: key = event.key.lower() except: key = event.key key_ws[0] = True if key in ['w', 'up'] else False key_ws[1] = True if key in ['s', 'down'] else False if key == 'q':
def __init__(self, env_path, reset_params, worker_id = 1, no_graphis = False, realtime_mode = False, record_trajectory = False): """Instantiates the Unity Environment from a specified executable. Arguments: env_path {string} -- Path to the executable of the environment reset_params {dict} -- Reset parameters of the environment such as the seed Keyword Arguments: worker_id {int} -- Port of the environment"s instance (default: {1}) no_graphis {bool} -- Whether to allow the executable to render or not (default: {False}) realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False}) record_trajectory {bool} -- Whether to record the trajectory of an entire episode. This can be used for video recording. (default: {False}) """ # Initialize channels self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() # Prepare default reset parameters self._default_reset_parameters = {} for key, value in reset_params.items(): self._default_reset_parameters[key] = value if key != "start-seed" or key != "num-seeds": self.reset_parameters.set_float_parameter(key, value) self._realtime_mode = realtime_mode if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720) else: self.engine_config.set_configuration_parameters(time_scale=30.0, width=256, height=256) # Whether to record the trajectory of an entire episode self._record = record_trajectory # Launch the environment's executable self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # If the Unity Editor chould be used instead of a build # self._env = UnityEnvironment(file_name = None, worker_id = 0, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # Reset the environment self._env.reset() # Retrieve behavior configuration self._behavior_name = list(self._env.behavior_specs)[0] self._behavior_spec = self._env.behavior_specs[self._behavior_name] # Check whether this Unity environment is supported self._verify_environment() # Set action space properties if self._behavior_spec.action_spec.is_discrete(): num_action_branches = self._behavior_spec.action_spec.discrete_size action_branch_dimensions = self._behavior_spec.action_spec.discrete_branches if num_action_branches == 1: self._action_space = spaces.Discrete(action_branch_dimensions[0]) else: self._action_space = spaces.MultiDiscrete(action_branch_dimensions) # Count visual and vector observations self._num_vis_obs, self._num_vec_obs = 0, 0 self._vec_obs_indices = [] for index, obs in enumerate(self._behavior_spec.observation_specs): if len(obs) > 1: self._num_vis_obs = self._num_vis_obs + 1 self._vis_obs_index = index else: self._num_vec_obs = self._num_vec_obs + 1 self._vec_obs_indices.append(index) # Set visual observation space property if self._num_vis_obs == 1: vis_obs_shape = self._behavior_spec.observation_specs[self._vis_obs_index].shape self._visual_observation_space = spaces.Box( low = 0, high = 1.0, shape = vis_obs_shape, dtype = np.float32) else: self._visual_observation_space = None # Set vector observation space property if self._num_vec_obs > 0: # Determine the length of vec obs by summing the length of each distinct one vec_obs_length = sum([self._behavior_spec.observation_specs[i][0] for i in self._vec_obs_indices]) self._vector_observatoin_space = (vec_obs_length, ) else: self._vector_observatoin_space = None # Videos can only be recorded if the environment provides visual observations if self._record and self._visual_observation_space is None: UnityEnvironmentException("Videos cannot be rendered for a Unity environment that does not provide visual observations.")
class UnityWrapper(Env): """This class wraps Unity environments. This wrapper has notable constraints: - Only one agent (no multi-agent environments). - Only one visual observation - Only discrete and multi-discrete action spaces (no continuous action space)""" def __init__(self, env_path, worker_id = 1, no_graphis = False, realtime_mode = False, config = None): """Instantiates the Unity Environment from a specified executable. Arguments: env_path {string} -- Path to the executable of the environment Keyword Arguments: worker_id {int} -- Port of the environment"s instance (default: {1}) no_graphis {bool} -- Whether to allow the executable to render or not (default: {False}) realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False}) config {dict} -- Specifies the reset parameters of the environment (default: {None}) """ # Disable logging logging.disable(logging.INFO) # Initialize channels self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() self._config = config self._realtime_mode = realtime_mode if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720) else: self.engine_config.set_configuration_parameters(time_scale=20.0, width=128, height=128) # Launch the environment's executable self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config]) # Reset the environment self._env.reset() # Retrieve behavior configuration self._behavior_name = list(self._env.behavior_specs)[0] self._behavior_spec = self._env.behavior_specs[self._behavior_name] # Set action space properties if len(self._behavior_spec.action_shape) == 1: self._action_space = spaces.Discrete(self._behavior_spec.action_shape[0]) else: self._action_space = spaces.MultiDiscrete(self._behavior_spec.action_shape) self._action_names = ["Not available"] # Count visual and vector observations self._num_vis_obs, self._num_vec_obs = 0, 0 self._vec_obs_indices = [] for index, obs in enumerate(self._behavior_spec.observation_shapes): if len(obs) > 1: self._num_vis_obs = self._num_vis_obs + 1 self._vis_obs_index = index else: self._num_vec_obs = self._num_vec_obs + 1 self._vec_obs_indices.append(index) # Verify the environment self._verify_environment() # Set visual observation space property if self._num_vis_obs == 1: height = self._behavior_spec.observation_shapes[self._vis_obs_index][0] width = self._behavior_spec.observation_shapes[self._vis_obs_index][1] depth = self._behavior_spec.observation_shapes[self._vis_obs_index][2] self._visual_observation_space = spaces.Box( low = 0, high = 1.0, shape = (height, width, depth), dtype = np.float32) else: self._visual_observation_space = None # Set vector observation space property if self._num_vec_obs > 0: # Determine the length of vec obs by summing the length of each distinct one vec_obs_length = sum([self._behavior_spec.observation_shapes[i][0] for i in self._vec_obs_indices]) self._vector_observatoin_space = (vec_obs_length, ) else: self._vector_observatoin_space = None @property def unwrapped(self): """ Returns: {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state """ return self @property def action_space(self): """Returns the shape of the action space of the agent.""" return self._action_space @property def action_names(self): return self._action_names @property def visual_observation_space(self): return self._visual_observation_space @property def vector_observation_space(self): return self._vector_observatoin_space def reset(self, reset_params = None): """Resets the environment based on a global or just specified config. Keyword Arguments: config {dict} -- Reset parameters to configure the environment (default: {None}) Returns: {numpy.ndarray} -- Visual observation {numpy.ndarray} -- Vector observation """ # Track rewards of an entire episode self._rewards = [] # Process config: Either load global or new config (if specified) if reset_params is None: reset_params = {} if self._config is not None: reset_params = self._config else: reset_params = reset_params # Apply reset parameters for key, value in reset_params.items(): self.reset_parameters.set_float_parameter(key, value) # Reset and verify the environment self._env.reset() info, terminal_info = self._env.get_steps(self._behavior_name) self._verify_environment(len(info)) # Retrieve initial observations vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info) return vis_obs, vec_obs def step(self, action): """Runs one timestep of the environment"s dynamics. Once an episode is done, reset() has to be called manually. Arguments: action {List} -- A list of at least one discrete action to be executed by the agent Returns: {numpy.ndarray} -- Visual observation {numpy.ndarray} -- Vector observation {float} -- (Total) Scalar reward signaled by the environment {bool} -- Whether the episode of the environment terminated {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed """ # Carry out the agent's action self._env.set_actions(self._behavior_name, action.reshape([1, -1])) self._env.step() info, terminal_info = self._env.get_steps(self._behavior_name) # Process step results vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info) self._rewards.append(reward) # Episode information if done: info = {"reward": sum(self._rewards), "length": len(self._rewards)} else: info = None return vis_obs, vec_obs, reward, done, info def close(self): """Shut down the environment.""" self._env.close() def _process_agent_info(self, info, terminal_info): """Extracts the observations, rewards, dones, and episode infos. Args: info {DecisionSteps}: Current state terminal_info {TerminalSteps}: Terminal state Returns: vis_obs {ndarray} -- Visual observation if available, else None vec_obs {ndarray} -- Vector observation if available, else None reward {float} -- Reward signal from the environment done {bool} -- Whether the episode terminated or not """ # Determine if the episode terminated or not if len(terminal_info) == 0: done = False use_info = info else: done = True use_info = terminal_info # Process visual observations if self.visual_observation_space is not None: vis_obs = use_info.obs[self._vis_obs_index][0] else: vis_obs = None # Process vector observations if self.vector_observation_space is not None: for i, dim in enumerate(self._vec_obs_indices): if i == 0: vec_obs = use_info.obs[dim][0] else: vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0])) else: vec_obs = None return vis_obs, vec_obs, use_info.reward[0], done def _verify_environment(self, num_agents = None): """Checks if the environment meets the requirements of this wrapper. Only one agent and at maximum one visual observation is allowed. Only Discrete and MultiDiscrete action spaces are supported. Arguments: num_agents {int} -- Number of agents (default: {None}) """ # Verify number of agent types if len(self._env.behavior_specs) != 1: raise UnityEnvironmentException("The unity environment containts more than one agent type.") # Verify action space type if int(self._behavior_spec.action_type.value) == 1: raise UnityEnvironmentException("Continuous action spaces are not supported. Only discrete and MultiDiscrete spaces are supported.") # Verify number of visual observations if self._num_vis_obs > 1: raise UnityEnvironmentException("The unity environment contains more than one visual observation.") # Verify agent count if num_agents is not None and num_agents > 1: raise UnityEnvironmentException("The unity environment contains more than one agent.")