Пример #1
0
def test_environment_parameters():
    sender = EnvironmentParametersChannel()
    # We use a raw bytes channel to interpred the data
    receiver = RawBytesChannel(sender.channel_id)

    sender.set_float_parameter("param-1", 0.1)
    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
    key = message.read_string()
    dtype = message.read_int32()
    value = message.read_float32()
    assert key == "param-1"
    assert dtype == EnvironmentParametersChannel.EnvironmentDataTypes.FLOAT
    assert value - 0.1 < 1e-8

    sender.set_float_parameter("param-1", 0.1)
    sender.set_float_parameter("param-2", 0.1)
    sender.set_float_parameter("param-3", 0.1)

    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    assert len(receiver.get_and_clear_received_messages()) == 3

    with pytest.raises(UnityCommunicationException):
        # try to send data to the EngineConfigurationChannel
        sender.set_float_parameter("param-1", 0.1)
        data = SideChannelManager([sender]).generate_side_channel_messages()
        SideChannelManager([sender]).process_side_channel_message(data)
Пример #2
0
    def __init__(self, worker_id, realtime_mode=False):
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4"

        self._env = UnityEnvironment(
            env_path,
            worker_id,
            side_channels=[self.reset_parameters, self.engine_config])
        self._env.reset()

        self.behavior_name = list(self._env.behavior_specs)[0]
        behavior_spec = self._env.behavior_specs[self.behavior_name]
        print(behavior_spec)

        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0)
            self.reset_parameters.set_float_parameter("train-mode", 0.0)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0)
            self.reset_parameters.set_float_parameter("train-mode", 1.0)

        self._flattener = ActionFlattener(
            behavior_spec.action_spec.discrete_branches)
Пример #3
0
    def make_unity_env(self,
                       env_name,
                       float_params=dict(),
                       time_scale=1,
                       seed=time.time(),
                       worker_id=None,
                       **kwargs):
        """
        creates a gym environment from a unity game

        env_name: str
            the path to the game
        float_params: dict or None
            this should be a dict of argument settings for the unity
            environment
            keys: varies by environment
        time_scale: float
            argument to set Unity's time scale. This applies less to
            gym wrapped versions of Unity Environments, I believe..
            but I'm not sure
        seed: int
            the seed for randomness
        worker_id: int
            must specify a unique worker id for each unity process
            on this machine
        """
        if float_params is None: float_params = dict()
        path = os.path.expanduser(env_name)
        channel = EngineConfigurationChannel()
        env_channel = EnvironmentParametersChannel()
        channel.set_configuration_parameters(time_scale=1)
        for k, v in float_params.items():
            if k == "validation" and v >= 1:
                print("Game in validation mode")
            env_channel.set_float_parameter(k, float(v))
        if worker_id is None: worker_id = seed % 500 + 1
        env_made = False
        n_loops = 0
        worker_id = 0
        while not env_made and n_loops < 50:
            try:
                env = UnityEnvironment(file_name=path,
                                       side_channels=[channel, env_channel],
                                       worker_id=worker_id,
                                       seed=seed)
                env_made = True
            except:
                s = "Error encountered making environment, "
                s += "trying new worker_id"
                print(s)
                worker_id = (worker_id + 1 +
                             int(np.random.random() * 100)) % 500
                try:
                    env.close()
                except:
                    pass
                n_loops += 1
        env = UnityToGymWrapper(env, allow_multiple_obs=True)
        return env
Пример #4
0
 def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
     """
     Helper method to send sampler settings over EnvironmentParametersChannel
     Calls the constant sampler type set method.
     :param key: environment parameter to be sampled
     :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment
     """
     env_channel.set_float_parameter(key, self.value)
Пример #5
0
 def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
     """
     Helper method to send sampler settings over EnvironmentParametersChannel
     Calls the gaussian sampler type set method.
     :param key: environment parameter to be sampled
     :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment
     """
     env_channel.set_gaussian_sampler_parameters(
         key, self.mean, self.st_dev, self.seed
     )
Пример #6
0
 def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
     """
     Helper method to send sampler settings over EnvironmentParametersChannel
     Calls the multirangeuniform sampler type set method.
     :param key: environment parameter to be sampled
     :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment
     """
     env_channel.set_multirangeuniform_sampler_parameters(
         key, self.intervals, self.seed
     )
Пример #7
0
class UnityWrapper(object):
    def __init__(self, env_args):
        self.engine_configuration_channel = EngineConfigurationChannel()
        if env_args['train_mode']:
            self.engine_configuration_channel.set_configuration_parameters(
                time_scale=env_args['train_time_scale'])
        else:
            self.engine_configuration_channel.set_configuration_parameters(
                width=env_args['width'],
                height=env_args['height'],
                quality_level=env_args['quality_level'],
                time_scale=env_args['inference_time_scale'],
                target_frame_rate=env_args['target_frame_rate'])
        self.float_properties_channel = EnvironmentParametersChannel()
        if env_args['file_path'] is None:
            self._env = UnityEnvironment(base_port=5004,
                                         seed=env_args['env_seed'],
                                         side_channels=[
                                             self.engine_configuration_channel,
                                             self.float_properties_channel
                                         ])
        else:
            unity_env_dict = load_yaml('/'.join(
                [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml']))
            self._env = UnityEnvironment(
                file_name=env_args['file_path'],
                base_port=env_args['port'],
                no_graphics=not env_args['render'],
                seed=env_args['env_seed'],
                side_channels=[
                    self.engine_configuration_channel,
                    self.float_properties_channel
                ],
                additional_args=[
                    '--scene',
                    str(
                        unity_env_dict.get(env_args.get('env_name', 'Roller'),
                                           'None')), '--n_agents',
                    str(env_args.get('env_num', 1))
                ])
        self.reset_config = env_args['reset_config']

    def reset(self, **kwargs):
        reset_config = kwargs.get('reset_config', None) or self.reset_config
        for k, v in reset_config.items():
            self.float_properties_channel.set_float_parameter(k, v)
        self._env.reset()

    def __getattr__(self, name):
        if name.startswith('_'):
            raise AttributeError(
                "attempted to get missing private attribute '{}'".format(name))
        return getattr(self._env, name)
Пример #8
0
    def initialize_env(self, config, env_file) -> Environment:
        """
        Initialize the environment.

        Args:
            config: the configuration parameters.
            env_file: the environment file.
        Returns:
            env: Environment
        """
        # [3] Environment configuration
        base_port = int(input("Enter base port: "))
        time_scale = int(config.get("time_scale"))
        width = int(config.get("width"))
        height = int(config.get("height"))

        channel_config = EngineConfigurationChannel()
        channel_param = EnvironmentParametersChannel()

        env = Environment(
            file_name=env_file,
            base_port=base_port,
            side_channels=[channel_config, channel_param],
        )

        channel_config.set_configuration_parameters(time_scale=time_scale,
                                                    quality_level=1,
                                                    width=width,
                                                    height=height)

        env.set_float_parameters(config)
        return env
Пример #9
0
def _check_environment_trains(
    env,
    trainer_config,
    reward_processor=default_reward_processor,
    meta_curriculum=None,
    success_threshold=0.9,
    env_manager=None,
):
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        save_freq = 99999
        seed = 1337
        StatsReporter.writers.clear(
        )  # Clear StatsReporters so we don't write to file
        debug_writer = DebugWriter()
        StatsReporter.add_writer(debug_writer)
        # Make sure threading is turned off for determinism
        trainer_config["threading"] = False
        if env_manager is None:
            env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            summaries_dir=dir,
            run_id=run_id,
            model_path=dir,
            keep_checkpoints=1,
            train_model=True,
            load_model=False,
            seed=seed,
            meta_curriculum=meta_curriculum,
            multi_gpu=False,
        )

        tc = TrainerController(
            trainer_factory=trainer_factory,
            summaries_dir=dir,
            model_path=dir,
            run_id=run_id,
            meta_curriculum=meta_curriculum,
            train=True,
            training_seed=seed,
            sampler_manager=SamplerManager(None),
            resampling_interval=None,
            save_freq=save_freq,
        )

        # Begin training
        tc.start_learning(env_manager)
        if (success_threshold is not None
            ):  # For tests where we are just checking setup and not reward
            processed_rewards = [
                reward_processor(rewards)
                for rewards in env.final_rewards.values()
            ]
            assert all(not math.isnan(reward) for reward in processed_rewards)
            assert all(reward > success_threshold
                       for reward in processed_rewards)
Пример #10
0
 def initialize_all_side_channels(self, kwargs):
     '''
     初始化所有的通讯频道
     '''
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration_parameters(
         width=kwargs['width'],
         height=kwargs['height'],
         quality_level=kwargs['quality_level'],
         time_scale=1
         if bool(kwargs.get('inference', False)) else kwargs['time_scale'],
         target_frame_rate=kwargs['target_frame_rate'],
         capture_frame_rate=kwargs['capture_frame_rate'])
     float_properties_channel = EnvironmentParametersChannel()
     for k, v in kwargs.get('initialize_config', {}).items():
         float_properties_channel.set_float_parameter(k, v)
     return dict(engine_configuration_channel=engine_configuration_channel,
                 float_properties_channel=float_properties_channel)
Пример #11
0
 def __init__(self, env_args):
     self.engine_configuration_channel = EngineConfigurationChannel()
     if env_args['train_mode']:
         self.engine_configuration_channel.set_configuration_parameters(
             time_scale=env_args['train_time_scale'])
     else:
         self.engine_configuration_channel.set_configuration_parameters(
             width=env_args['width'],
             height=env_args['height'],
             quality_level=env_args['quality_level'],
             time_scale=env_args['inference_time_scale'],
             target_frame_rate=env_args['target_frame_rate'])
     self.float_properties_channel = EnvironmentParametersChannel()
     if env_args['file_path'] is None:
         self._env = UnityEnvironment(base_port=5004,
                                      seed=env_args['env_seed'],
                                      side_channels=[
                                          self.engine_configuration_channel,
                                          self.float_properties_channel
                                      ])
     else:
         unity_env_dict = load_yaml('/'.join(
             [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml']))
         self._env = UnityEnvironment(
             file_name=env_args['file_path'],
             base_port=env_args['port'],
             no_graphics=not env_args['render'],
             seed=env_args['env_seed'],
             side_channels=[
                 self.engine_configuration_channel,
                 self.float_properties_channel
             ],
             additional_args=[
                 '--scene',
                 str(
                     unity_env_dict.get(env_args.get('env_name', 'Roller'),
                                        'None')), '--n_agents',
                 str(env_args.get('env_num', 1))
             ])
     self.reset_config = env_args['reset_config']
def check_environment_trains(
    env,
    trainer_config,
    reward_processor=default_reward_processor,
    env_parameter_manager=None,
    success_threshold=0.9,
    env_manager=None,
    training_seed=None,
):
    if env_parameter_manager is None:
        env_parameter_manager = EnvironmentParameterManager()
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        seed = 1337 if training_seed is None else training_seed
        StatsReporter.writers.clear(
        )  # Clear StatsReporters so we don't write to file
        debug_writer = DebugWriter()
        StatsReporter.add_writer(debug_writer)
        if env_manager is None:
            env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            output_path=dir,
            train_model=True,
            load_model=False,
            seed=seed,
            param_manager=env_parameter_manager,
            multi_gpu=False,
        )

        tc = TrainerController(
            trainer_factory=trainer_factory,
            output_path=dir,
            run_id=run_id,
            param_manager=env_parameter_manager,
            train=True,
            training_seed=seed,
        )

        # Begin training
        tc.start_learning(env_manager)
        if (success_threshold is not None
            ):  # For tests where we are just checking setup and not reward
            processed_rewards = [
                reward_processor(rewards)
                for rewards in env.final_rewards.values()
            ]
            assert all(not math.isnan(reward) for reward in processed_rewards)
            assert all(reward > success_threshold
                       for reward in processed_rewards)
    def __init__(self, env_config):
        self.worker_index = 0

        if 'SM_CHANNEL_TRAIN' in os.environ:
            env_name = os.environ['SM_CHANNEL_TRAIN'] +'/'+ env_config['env_name']
            os.chmod(env_name, 0o755)
            print("Changed environment binary into executable mode.")
            # Try connecting to the Unity3D game instance.
            while True:
                try:
                    channel = EnvironmentParametersChannel()
                    unity_env = UnityEnvironment(
                                    env_name, 
                                    no_graphics=True, 
                                    worker_id=self.worker_index, 
                                    side_channels=[channel], 
                                    additional_args=['-logFile', 'unity.log'])
                    channel.set_float_parameter("simulation_mode", 1.0)
                except UnityWorkerInUseException:
                    self.worker_index += 1
                else:
                    break
        else:
            env_name = env_config['env_name']
            while True:
                try:
                    unity_env = default_registry[env_name].make(
                        no_graphics=True,
                        worker_id=self.worker_index,
                        additional_args=['-logFile', 'unity.log'])
                except UnityWorkerInUseException:
                    self.worker_index += 1
                else:
                    break
            
        self.env = UnityToGymWrapper(unity_env) 
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
Пример #14
0
    def __init__(self, config=DEFAULT_ENV_CONFIG):
        """
        Environment initialization
        :param config: Configuration of the environment.
        """

        # create side channels
        self.env_param_channel = EnvironmentParametersChannel()
        self.engine_channel = EngineConfigurationChannel()
        self.color_pool_channel = IntListPropertiesChannel()

        side_channels = [
            self.env_param_channel,
            self.engine_channel,
            self.color_pool_channel,
        ]

        # flag whether the config has been apllied to the environment
        self.is_already_initialized = False
        # create environment with config and side channels
        super().__init__(config,
                         DEFAULT_ENV_CONFIG,
                         side_channels=side_channels)
    def __init__(self,
                 train_mode=True,
                 file_name=None,
                 base_port=5005,
                 seed=None,
                 scene=None,
                 n_agents=1):

        seed = seed if seed is not None else np.random.randint(0, 65536)

        self.engine_configuration_channel = EngineConfigurationChannel()
        self.environment_parameters_channel = EnvironmentParametersChannel()

        self._env = UnityEnvironment(
            file_name=file_name,
            base_port=base_port,
            seed=seed,
            args=['--scene', scene, '--n_agents',
                  str(n_agents)],
            side_channels=[
                self.engine_configuration_channel,
                self.environment_parameters_channel
            ])

        if train_mode:
            self.engine_configuration_channel.set_configuration_parameters(
                width=200, height=200, quality_level=0, time_scale=100)
        else:
            self.engine_configuration_channel.set_configuration_parameters(
                width=1028,
                height=720,
                quality_level=5,
                time_scale=5,
                target_frame_rate=60)

        self._env.reset()
        self.bahavior_name = self._env.get_behavior_names()[0]
Пример #16
0
 def initialize_all_side_channels(self, initialize_config, engine_config):
     """
     初始化所有的通讯频道
     """
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration_parameters(
         **engine_config)
     float_properties_channel = EnvironmentParametersChannel()
     float_properties_channel.set_float_parameter('env_copies',
                                                  self._n_copies)
     for k, v in initialize_config.items():
         float_properties_channel.set_float_parameter(k, v)
     return dict(engine_configuration_channel=engine_configuration_channel,
                 float_properties_channel=float_properties_channel)
Пример #17
0
def make_unity_env(config):
    # setup environment
    if sys.platform == "win32":
        env_build = "../env/FreeFallVer2/windows/FreeFall.exe"
    elif sys.platform == "linux":
        env_build = "../env/FreeFallVer2/linux/FreeFall.x86_64"
    elif sys.platform == "darwin":
        env_build = "../env/FreeFallVer2/mac.app"
    else:
        raise AttributeError("{} platform is not supported.".format(
            sys.platform))
    channel = EnvironmentParametersChannel()
    unity_env = UnityEnvironment(env_build,
                                 side_channels=[channel],
                                 additional_args=["-batchmode"])
    env = UnityToGymWrapper(unity_env,
                            uint8_visual=True,
                            allow_multiple_obs=True)
    env = DistanceWrapper(env)
    env = MatplotlibWrapper(env)
    assign_config(channel, config)

    return env
Пример #18
0
from mlagents_envs.environment import UnityEnvironment
from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.side_channel.environment_parameters_channel import EnvironmentParametersChannel


# setup environment
if sys.platform == 'win32':
    env_build = "../env/FlyCamera/windows/FlyCamera.exe"
elif sys.platform == 'linux':
    env_build = "../env/FlyCamera/linux/FlyCamera.x86_64"
elif sys.platform == "darwin":
    env_build = "../env/FlyCamera/mac.app"
else:
    raise AttributeError("{} platform is not supported.".format(sys.platform))
channel = EnvironmentParametersChannel()
unity_env = UnityEnvironment(env_build, side_channels=[channel])
channel.set_float_parameter("key_speed", 10.0)
channel.set_float_parameter("cam_sens", 0.25)
env = UnityToGymWrapper(unity_env, uint8_visual=True)

# interface
max_mouse_move = 10 # in pixel; to limit mouse "jump" due to slow in-loop process
mouse_position = np.zeros((2,))
def mouse_move(event):
    global mouse_position
    x, y = event.xdata, event.ydata
    mouse_position = np.array([x, y])

key_wasd = np.array([False] * 4)
def key_press(event): # NOTE: cannot handle multiple key press at the same time
Пример #19
0
class MyEnv(gym.Env):
    def __init__(self, worker_id, realtime_mode=False):
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4"

        self._env = UnityEnvironment(
            env_path,
            worker_id,
            side_channels=[self.reset_parameters, self.engine_config])
        self._env.reset()

        self.behavior_name = list(self._env.behavior_specs)[0]
        behavior_spec = self._env.behavior_specs[self.behavior_name]
        print(behavior_spec)

        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0)
            self.reset_parameters.set_float_parameter("train-mode", 0.0)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0)
            self.reset_parameters.set_float_parameter("train-mode", 1.0)

        self._flattener = ActionFlattener(
            behavior_spec.action_spec.discrete_branches)

    def reset(self):
        # for key, value in reset_params.items():
        #     self.reset_parameters.set_float_parameter(key, value)
        self._env.reset()
        info, terminal_info = self._env.get_steps(self.behavior_name)
        self.game_over = False

        obs, reward, done, info = self._single_step(info, terminal_info)
        return obs

    def step(self, action):
        # Use random actions for all other agents in environment.
        if self._flattener is not None and type(action) == int:
            # Translate action into list
            action = np.array(self._flattener.lookup_action(action))

        c_action = Action(action)

        self._env.set_actions(self.behavior_name, c_action)
        self._env.step()
        running_info, terminal_info = self._env.get_steps(self.behavior_name)
        obs, reward, done, info = self._single_step(running_info,
                                                    terminal_info)
        self.game_over = done

        return obs, reward, done, info

    def _single_step(self, info, terminal_info):
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # 카메라, 센서 순으로 나옴
        output_info = {}
        output_info["visual_obs"] = use_info.obs[0][0]

        #obs = np.concatenate([use_info.obs[1][0], use_info.obs[2][0]])
        return use_info.obs[1][0], use_info.reward[0], done, output_info

    def close(self):
        self._env.close()

    def render(self):
        pass
Пример #20
0
class FooCarEnv(gym.Env):
	_channel = EnvironmentParametersChannel()

	PathSpace = {
		'xyz': 0,
		'xy': 2,
		'yz': 2,
		'xz': 2
	}

	def __init__(self, no_graphics:bool=False, seed:int=1, **config):
		self._config = config
		worker_id = 0
		if 'worker_id' in config:
			worker_id = config['worker_id']
		self._unity_env = UnityEnvironment(
			file_name=UNITY_ENV_EXE_FILE,
			# file_name=None, # Unity Editor Mode (debug)
			no_graphics=no_graphics,
			seed=seed, 
			side_channels=[self._channel],
			worker_id=worker_id
		)
		for key, value in config.items():
			self._channel.set_float_parameter(key, float(value))
		
		self._gym_env = UnityToGymWrapper(self._unity_env)

	def step(self, action):
		obs, reward, done, info = self._gym_env.step(action)
		size = self.observation_size

		return obs[:size], reward, done, info

	def reset(self):
		obs = self._gym_env.reset()
		size = self.observation_size
		return obs[:size]

	def render(self, mode="rgb_array"):
		return self._gym_env.render(mode=mode)
	
	def seed(self, seed=None):
		self._gym_env.seed(seed=seed) # it will throw a warning

	def close(self):
		self._gym_env.close()

	@property
	def metadata(self):
		return self._gym_env.metadata

	@property
	def reward_range(self) -> Tuple[float, float]:
		return self._gym_env.reward_range

	@property
	def action_space(self):
		return self._gym_env.action_space

	@property
	def observation_space(self):
		config = self._config
		space = self.PathSpace

		path_space = config['path_space'] if 'path_space' in config else space['xz']
		r = config['radius_anchor_circle'] if 'radius_anchor_circle' in config else 8.0
		r_e = config['radius_epsilon_ratio'] if 'radius_epsilon_ratio' in config else 0.7
		h = config['max_anchor_height'] if 'max_anchor_height' in config else 1.0

		xyz_mode = (path_space == space['xyz'])
		bound = max(r * (1 + r_e), h if xyz_mode else 0)
		shape = (self.observation_size,)
		
		return gym.spaces.Box(-bound, +bound, dtype=np.float32, shape=shape)
		
	@property
	def observation_size(self):
		# Reference: readonly variable (Unity)FooCar/CarAgent.ObservationSize
		config = self._config
		space = self.PathSpace

		path_space = config['path_space'] if 'path_space' in config else space['xz']
		ticker_end = config['ticker_end'] if 'ticker_end' in config else 5
		ticker_start = config['ticker_start'] if 'ticker_start' in config else -3

		xyz_mode = (path_space == space['xyz'])
		basic_num = 6
		point_dim = 3 if xyz_mode else 2

		return basic_num + 2 * point_dim * (ticker_end - ticker_start + 1)
def worker(
    parent_conn: Connection,
    step_queue: Queue,
    pickled_env_factory: str,
    worker_id: int,
    engine_configuration: EngineConfig,
    log_level: int = logging_util.INFO,
) -> None:
    env_factory: Callable[
        [int, List[SideChannel]], UnityEnvironment
    ] = cloudpickle.loads(pickled_env_factory)
    env_parameters = EnvironmentParametersChannel()
    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_configuration)
    stats_channel = StatsSideChannel()
    env: BaseEnv = None
    # Set log level. On some platforms, the logger isn't common with the
    # main process, so we need to set it again.
    logging_util.set_log_level(log_level)

    def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:
        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
        for brain_name in env.behavior_specs:
            all_step_result[brain_name] = env.get_steps(brain_name)
        return all_step_result

    try:
        env = env_factory(
            worker_id, [env_parameters, engine_configuration_channel, stats_channel]
        )
        while True:
            req: EnvironmentRequest = parent_conn.recv()
            if req.cmd == EnvironmentCommand.STEP:
                all_action_info = req.payload
                for brain_name, action_info in all_action_info.items():
                    if len(action_info.action) != 0:
                        env.set_actions(brain_name, action_info.action)
                env.step()
                all_step_result = _generate_all_results()
                # The timers in this process are independent from all the processes and the "main" process
                # So after we send back the root timer, we can safely clear them.
                # Note that we could randomly return timers a fraction of the time if we wanted to reduce
                # the data transferred.
                # TODO get gauges from the workers and merge them in the main process too.
                env_stats = stats_channel.get_and_reset_stats()
                step_response = StepResponse(
                    all_step_result, get_timer_root(), env_stats
                )
                step_queue.put(
                    EnvironmentResponse(
                        EnvironmentCommand.STEP, worker_id, step_response
                    )
                )
                reset_timers()
            elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS:
                _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs)
            elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS:
                for k, v in req.payload.items():
                    if isinstance(v, ParameterRandomizationSettings):
                        v.apply(k, env_parameters)
            elif req.cmd == EnvironmentCommand.RESET:
                env.reset()
                all_step_result = _generate_all_results()
                _send_response(EnvironmentCommand.RESET, all_step_result)
            elif req.cmd == EnvironmentCommand.CLOSE:
                break
    except (
        KeyboardInterrupt,
        UnityCommunicationException,
        UnityTimeOutException,
        UnityEnvironmentException,
        UnityCommunicatorStoppedException,
    ) as ex:
        logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.")
        step_queue.put(
            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
        )
        _send_response(EnvironmentCommand.ENV_EXITED, ex)
    finally:
        # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process
        # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread()
        # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for
        # more info.
        logger.debug(f"UnityEnvironment worker {worker_id} closing.")
        step_queue.cancel_join_thread()
        step_queue.close()
        if env is not None:
            env.close()
        logger.debug(f"UnityEnvironment worker {worker_id} done.")
Пример #22
0
def worker(
    parent_conn: Connection,
    step_queue: Queue,
    pickled_env_factory: str,
    worker_id: int,
    run_options: RunOptions,
    log_level: int = logging_util.INFO,
) -> None:
    env_factory: Callable[
        [int, List[SideChannel]], UnityEnvironment
    ] = cloudpickle.loads(restricted_loads(pickled_env_factory))
    env_parameters = EnvironmentParametersChannel()

    engine_config = EngineConfig(
        width=run_options.engine_settings.width,
        height=run_options.engine_settings.height,
        quality_level=run_options.engine_settings.quality_level,
        time_scale=run_options.engine_settings.time_scale,
        target_frame_rate=run_options.engine_settings.target_frame_rate,
        capture_frame_rate=run_options.engine_settings.capture_frame_rate,
    )
    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_config)

    stats_channel = StatsSideChannel()
    training_analytics_channel: Optional[TrainingAnalyticsSideChannel] = None
    if worker_id == 0:
        training_analytics_channel = TrainingAnalyticsSideChannel()
    env: UnityEnvironment = None
    # Set log level. On some platforms, the logger isn't common with the
    # main process, so we need to set it again.
    logging_util.set_log_level(log_level)

    def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:
        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
        for brain_name in env.behavior_specs:
            all_step_result[brain_name] = env.get_steps(brain_name)
        return all_step_result

    try:
        side_channels = [env_parameters, engine_configuration_channel, stats_channel]
        if training_analytics_channel is not None:
            side_channels.append(training_analytics_channel)

        env = env_factory(worker_id, side_channels)
        if (
            not env.academy_capabilities
            or not env.academy_capabilities.trainingAnalytics
        ):
            # Make sure we don't try to send training analytics if the environment doesn't know how to process
            # them. This wouldn't be catastrophic, but would result in unknown SideChannel UUIDs being used.
            training_analytics_channel = None
        if training_analytics_channel:
            training_analytics_channel.environment_initialized(run_options)

        while True:
            req: EnvironmentRequest = parent_conn.recv()
            if req.cmd == EnvironmentCommand.STEP:
                all_action_info = req.payload
                for brain_name, action_info in all_action_info.items():
                    if len(action_info.agent_ids) > 0:
                        env.set_actions(brain_name, action_info.env_action)
                env.step()
                all_step_result = _generate_all_results()
                # The timers in this process are independent from all the processes and the "main" process
                # So after we send back the root timer, we can safely clear them.
                # Note that we could randomly return timers a fraction of the time if we wanted to reduce
                # the data transferred.
                # TODO get gauges from the workers and merge them in the main process too.
                env_stats = stats_channel.get_and_reset_stats()
                step_response = StepResponse(
                    all_step_result, get_timer_root(), env_stats
                )
                step_queue.put(
                    EnvironmentResponse(
                        EnvironmentCommand.STEP, worker_id, step_response
                    )
                )
                reset_timers()
            elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS:
                _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs)
            elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS:
                for k, v in req.payload.items():
                    if isinstance(v, ParameterRandomizationSettings):
                        v.apply(k, env_parameters)
            elif req.cmd == EnvironmentCommand.TRAINING_STARTED:
                behavior_name, trainer_config = req.payload
                if training_analytics_channel:
                    training_analytics_channel.training_started(
                        behavior_name, trainer_config
                    )
            elif req.cmd == EnvironmentCommand.RESET:
                env.reset()
                all_step_result = _generate_all_results()
                _send_response(EnvironmentCommand.RESET, all_step_result)
            elif req.cmd == EnvironmentCommand.CLOSE:
                break
    except (
        KeyboardInterrupt,
        UnityCommunicationException,
        UnityTimeOutException,
        UnityEnvironmentException,
        UnityCommunicatorStoppedException,
    ) as ex:
        logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.")
        step_queue.put(
            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
        )
        _send_response(EnvironmentCommand.ENV_EXITED, ex)
    except Exception as ex:
        logger.exception(
            f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception."
        )
        step_queue.put(
            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
        )
        _send_response(EnvironmentCommand.ENV_EXITED, ex)
    finally:
        logger.debug(f"UnityEnvironment worker {worker_id} closing.")
        if env is not None:
            env.close()
        logger.debug(f"UnityEnvironment worker {worker_id} done.")
        parent_conn.close()
        step_queue.put(EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None))
        step_queue.close()
Пример #23
0
    def __init__(self, env_path, worker_id = 1, no_graphis = False, realtime_mode = False, config = None):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            config {dict} -- Specifies the reset parameters of the environment (default: {None})
        """
        # Disable logging
        logging.disable(logging.INFO)

        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        self._config = config
        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0, width=128, height=128)

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Set action space properties
        if len(self._behavior_spec.action_shape) == 1:
            self._action_space = spaces.Discrete(self._behavior_spec.action_shape[0])
        else:
            self._action_space = spaces.MultiDiscrete(self._behavior_spec.action_shape)
        self._action_names = ["Not available"]
        
        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_shapes):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Verify the environment
        self._verify_environment()

        # Set visual observation space property
        if self._num_vis_obs == 1:
            height = self._behavior_spec.observation_shapes[self._vis_obs_index][0]
            width = self._behavior_spec.observation_shapes[self._vis_obs_index][1]
            depth = self._behavior_spec.observation_shapes[self._vis_obs_index][2]
            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = (height, width, depth),
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_shapes[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None
Пример #24
0
class UnityWrapper(Env):
    """This class wraps Unity environments.

    This wrapper has notable constraints:
        - Only one agent (no multi-agent environments).
        - Only one visual observation
        - Only discrete and multi-discrete action spaces (no continuous action space)"""

    def __init__(self, env_path, reset_params, worker_id = 1, no_graphis = False, realtime_mode = False,  record_trajectory = False):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
            reset_params {dict} -- Reset parameters of the environment such as the seed
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            record_trajectory {bool} -- Whether to record the trajectory of an entire episode. This can be used for video recording. (default: {False})
        """
        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        # Prepare default reset parameters
        self._default_reset_parameters = {}
        for key, value in reset_params.items():
            self._default_reset_parameters[key] = value
            if key != "start-seed" or key != "num-seeds":
                self.reset_parameters.set_float_parameter(key, value)

        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=30.0, width=256, height=256)

        # Whether to record the trajectory of an entire episode
        self._record = record_trajectory

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # If the Unity Editor chould be used instead of a build
        # self._env = UnityEnvironment(file_name = None, worker_id = 0, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])

        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Check whether this Unity environment is supported
        self._verify_environment()

        # Set action space properties
        if self._behavior_spec.action_spec.is_discrete():
            num_action_branches = self._behavior_spec.action_spec.discrete_size
            action_branch_dimensions = self._behavior_spec.action_spec.discrete_branches
            if num_action_branches == 1:
                self._action_space = spaces.Discrete(action_branch_dimensions[0])
            else:
                self._action_space = spaces.MultiDiscrete(action_branch_dimensions)

        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_specs):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Set visual observation space property
        if self._num_vis_obs == 1:
            vis_obs_shape = self._behavior_spec.observation_specs[self._vis_obs_index].shape

            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = vis_obs_shape,
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_specs[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None

        # Videos can only be recorded if the environment provides visual observations
        if self._record and self._visual_observation_space is None:
            UnityEnvironmentException("Videos cannot be rendered for a Unity environment that does not provide visual observations.")

    @property
    def unwrapped(self):
        """        
        Returns:
            {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state
        """
        return self
    
    @property
    def action_space(self):
        """Returns the shape of the action space of the agent."""
        return self._action_space

    @property
    def action_names(self):
        return None

    @property
    def get_episode_trajectory(self):
        """Returns the trajectory of an entire episode as dictionary (vis_obs, vec_obs, rewards, actions). 
        """
        self._trajectory["action_names"] = self.action_names
        return self._trajectory if self._trajectory else None

    @property
    def visual_observation_space(self):
        return self._visual_observation_space

    @property
    def vector_observation_space(self):
        return self._vector_observatoin_space

    def reset(self, reset_params = None):
        """Resets the environment based on a global or just specified config.
        
        Keyword Arguments:
            config {dict} -- Reset parameters to configure the environment (default: {None})
        
        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
        """
        # Track rewards of an entire episode
        self._rewards = []

        # Use initial or new reset parameters
        if reset_params is None:
            reset_params = self._default_reset_parameters
        else:
            reset_params = reset_params

        # Apply reset parameters
        for key, value in reset_params.items():
            # Skip reset parameters that are not used by the Unity environment
            if key != "start-seed" or key != "num-seeds":
                self.reset_parameters.set_float_parameter(key, value)

        # Sample the to be used seed
        if reset_params["start-seed"] > -1:
            seed = randint(reset_params["start-seed"], reset_params["start-seed"] + reset_params["num-seeds"] - 1)
        else:
            # Use unlimited seeds
            seed = -1
        self.reset_parameters.set_float_parameter("seed", seed)

        # Reset and verify the environment
        self._env.reset()
        info, terminal_info = self._env.get_steps(self._behavior_name)
        self._verify_environment()
        
        # Retrieve initial observations
        vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info)

        # Prepare trajectory recording
        self._trajectory = {
            "vis_obs": [vis_obs * 255], "vec_obs": [vec_obs],
            "rewards": [0.0], "actions": []
        }

        return vis_obs, vec_obs

    def step(self, action):
        """Runs one timestep of the environment"s dynamics.
        Once an episode is done, reset() has to be called manually.
                
        Arguments:
            action {List} -- A list of at least one discrete action to be executed by the agent

        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
            {float} -- (Total) Scalar reward signaled by the environment
            {bool} -- Whether the episode of the environment terminated
            {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed
        """
        # Carry out the agent's action
        action_tuple = ActionTuple()
        action_tuple.add_discrete(np.asarray(action).reshape([1, -1]))
        self._env.set_actions(self._behavior_name, action_tuple)
        self._env.step()
        info, terminal_info = self._env.get_steps(self._behavior_name)

        # Process step results
        vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info)
        self._rewards.append(reward)

        # Record trajectory data
        if self._record:
            self._trajectory["vis_obs"].append(vis_obs * 255)
            self._trajectory["vec_obs"].append(vec_obs)
            self._trajectory["rewards"].append(reward)
            self._trajectory["actions"].append(action)

        # Episode information
        if done:
            info = {"reward": sum(self._rewards),
                    "length": len(self._rewards)}
        else:
            info = None

        return vis_obs, vec_obs, reward, done, info

    def close(self):
        """Shut down the environment."""
        self._env.close()

    def _process_agent_info(self, info, terminal_info):
        """Extracts the observations, rewards, dones, and episode infos.

        Args:
            info {DecisionSteps}: Current state
            terminal_info {TerminalSteps}: Terminal state

        Returns:
            vis_obs {ndarray} -- Visual observation if available, else None
            vec_obs {ndarray} -- Vector observation if available, else None
            reward {float} -- Reward signal from the environment
            done {bool} -- Whether the episode terminated or not
        """
        # Determine if the episode terminated or not
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # Process visual observations
        if self.visual_observation_space is not None:
            vis_obs = use_info.obs[self._vis_obs_index][0]
        else:
            vis_obs = None

        # Process vector observations
        if self.vector_observation_space is not None:
            for i, dim in enumerate(self._vec_obs_indices):
                if i == 0:
                    vec_obs = use_info.obs[dim][0]
                else:
                    vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0]))
        else:
            vec_obs = None

        return vis_obs, vec_obs, use_info.reward[0], done

    def _verify_environment(self):
        # Verify number of agent behavior types
        if len(self._env.behavior_specs) != 1:
            raise UnityEnvironmentException("The unity environment containts more than one agent type.")
        # Verify number of agents
        decision_steps, _ = self._env.get_steps(self._behavior_name)
        if len(decision_steps) > 1:
            raise UnityEnvironmentException("The unity environment contains more than one agent, which is not supported.")
        # Verify action space type
        if not self._behavior_spec.action_spec.is_discrete() or self._behavior_spec.action_spec.is_continuous():
            raise UnityEnvironmentException("Continuous action spaces are not supported. " 
                                            "Only discrete and MultiDiscrete spaces are supported.")
        # Verify that at least one observation is provided
        num_vis_obs = 0
        num_vec_obs = 0
        for obs_spec in self._behavior_spec.observation_specs:
            if len(obs_spec.shape) == 3:
                num_vis_obs += 1
            elif(len(obs_spec.shape)) == 1:
                num_vec_obs += 1
        if num_vis_obs == 0 and num_vec_obs == 0:
            raise UnityEnvironmentException("The unity environment does not contain any observations.")
        # Verify number of visual observations
        if num_vis_obs > 1:
            raise UnityEnvironmentException("The unity environment contains more than one visual observation.")
    def __init__(self,
                 conn: multiprocessing.connection.Connection = None,
                 train_mode=True,
                 file_name=None,
                 worker_id=0,
                 base_port=5005,
                 no_graphics=True,
                 seed=None,
                 scene=None,
                 additional_args=None,
                 n_agents=1):
        """
        Args:
            train_mode: If in train mode, Unity will speed up
            file_name: The executable path. The UnityEnvironment will run in editor if None
            worker_id: Offset from base_port
            base_port: The port that communicate to Unity. It will be set to 5004 automatically if in editor.
            no_graphics: If Unity runs in no graphic mode. It must be set to False if Unity has camera sensor.
            seed: Random seed
            scene: The scene name
            n_agents: The agents count
        """
        self.scene = scene
        self.n_agents = n_agents

        seed = seed if seed is not None else np.random.randint(0, 65536)
        additional_args = [] if additional_args is None else additional_args.split(
            ' ')

        self.engine_configuration_channel = EngineConfigurationChannel()
        self.environment_parameters_channel = EnvironmentParametersChannel()

        self.environment_parameters_channel.set_float_parameter(
            'env_copys', float(n_agents))

        if conn:
            try:
                from algorithm import config_helper
                config_helper.set_logger()
            except:
                pass

            self._logger = logging.getLogger(
                f'UnityWrapper.Process_{os.getpid()}')
        else:
            self._logger = logging.getLogger('UnityWrapper.Process')

        self._env = UnityEnvironment(
            file_name=file_name,
            worker_id=worker_id,
            base_port=base_port if file_name else None,
            no_graphics=no_graphics and train_mode,
            seed=seed,
            additional_args=['--scene', scene] + additional_args,
            side_channels=[
                self.engine_configuration_channel,
                self.environment_parameters_channel
            ])

        self.engine_configuration_channel.set_configuration_parameters(
            width=200 if train_mode else 1280,
            height=200 if train_mode else 720,
            quality_level=5,
            time_scale=20 if train_mode else 1)

        self._env.reset()
        self.bahavior_name = list(self._env.behavior_specs)[0]

        if conn:
            try:
                while True:
                    cmd, data = conn.recv()
                    if cmd == INIT:
                        conn.send(self.init())
                    elif cmd == RESET:
                        conn.send(self.reset(data))
                    elif cmd == STEP:
                        conn.send(self.step(*data))
                    elif cmd == CLOSE:
                        self.close()
            except:
                self._logger.error(traceback.format_exc())
class UnityWrapperProcess:
    def __init__(self,
                 conn: multiprocessing.connection.Connection = None,
                 train_mode=True,
                 file_name=None,
                 worker_id=0,
                 base_port=5005,
                 no_graphics=True,
                 seed=None,
                 scene=None,
                 additional_args=None,
                 n_agents=1):
        """
        Args:
            train_mode: If in train mode, Unity will speed up
            file_name: The executable path. The UnityEnvironment will run in editor if None
            worker_id: Offset from base_port
            base_port: The port that communicate to Unity. It will be set to 5004 automatically if in editor.
            no_graphics: If Unity runs in no graphic mode. It must be set to False if Unity has camera sensor.
            seed: Random seed
            scene: The scene name
            n_agents: The agents count
        """
        self.scene = scene
        self.n_agents = n_agents

        seed = seed if seed is not None else np.random.randint(0, 65536)
        additional_args = [] if additional_args is None else additional_args.split(
            ' ')

        self.engine_configuration_channel = EngineConfigurationChannel()
        self.environment_parameters_channel = EnvironmentParametersChannel()

        self.environment_parameters_channel.set_float_parameter(
            'env_copys', float(n_agents))

        if conn:
            try:
                from algorithm import config_helper
                config_helper.set_logger()
            except:
                pass

            self._logger = logging.getLogger(
                f'UnityWrapper.Process_{os.getpid()}')
        else:
            self._logger = logging.getLogger('UnityWrapper.Process')

        self._env = UnityEnvironment(
            file_name=file_name,
            worker_id=worker_id,
            base_port=base_port if file_name else None,
            no_graphics=no_graphics and train_mode,
            seed=seed,
            additional_args=['--scene', scene] + additional_args,
            side_channels=[
                self.engine_configuration_channel,
                self.environment_parameters_channel
            ])

        self.engine_configuration_channel.set_configuration_parameters(
            width=200 if train_mode else 1280,
            height=200 if train_mode else 720,
            quality_level=5,
            time_scale=20 if train_mode else 1)

        self._env.reset()
        self.bahavior_name = list(self._env.behavior_specs)[0]

        if conn:
            try:
                while True:
                    cmd, data = conn.recv()
                    if cmd == INIT:
                        conn.send(self.init())
                    elif cmd == RESET:
                        conn.send(self.reset(data))
                    elif cmd == STEP:
                        conn.send(self.step(*data))
                    elif cmd == CLOSE:
                        self.close()
            except:
                self._logger.error(traceback.format_exc())

    def init(self):
        """
        Returns:
            observation shapes: tuple[(o1, ), (o2, ), (o3_1, o3_2, o3_3), ...]
            discrete action size: int, sum of all action branches
            continuous action size: int
        """
        behavior_spec = self._env.behavior_specs[self.bahavior_name]
        obs_names = [o.name for o in behavior_spec.observation_specs]
        self._logger.info(f'Observation names: {obs_names}')
        obs_shapes = [o.shape for o in behavior_spec.observation_specs]
        self._logger.info(f'Observation shapes: {obs_shapes}')

        self._empty_action = behavior_spec.action_spec.empty_action

        discrete_action_size = 0
        if behavior_spec.action_spec.discrete_size > 0:
            discrete_action_size = 1
            action_product_list = []
            for action, branch_size in enumerate(
                    behavior_spec.action_spec.discrete_branches):
                discrete_action_size *= branch_size
                action_product_list.append(range(branch_size))
                self._logger.info(
                    f"Discrete action branch {action} has {branch_size} different actions"
                )

            self.action_product = np.array(
                list(itertools.product(*action_product_list)))

        continuous_action_size = behavior_spec.action_spec.continuous_size

        self._logger.info(f'Continuous action size: {continuous_action_size}')

        self.d_action_size = discrete_action_size
        self.c_action_size = continuous_action_size

        for o in behavior_spec.observation_specs:
            if len(o.shape) >= 3:
                self.engine_configuration_channel.set_configuration_parameters(
                    quality_level=5)
                break

        return obs_shapes, discrete_action_size, continuous_action_size

    def reset(self, reset_config=None):
        """
        return:
            observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)]
        """
        reset_config = {} if reset_config is None else reset_config
        for k, v in reset_config.items():
            self.environment_parameters_channel.set_float_parameter(
                k, float(v))

        self._env.reset()
        decision_steps, terminal_steps = self._env.get_steps(
            self.bahavior_name)

        return [obs.astype(np.float32) for obs in decision_steps.obs]

    def step(self, d_action, c_action):
        """
        Args:
            d_action: (NAgents, discrete_action_size), one hot like action
            c_action: (NAgents, continuous_action_size)

        Returns:
            observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)]
            rewards: (NAgents, )
            done: (NAgents, ), np.bool
            max_step: (NAgents, ), np.bool
        """

        if self.d_action_size:
            d_action = np.argmax(d_action, axis=1)
            d_action = self.action_product[d_action]

        self._env.set_actions(
            self.bahavior_name,
            ActionTuple(continuous=c_action, discrete=d_action))
        self._env.step()

        decision_steps, terminal_steps = self._env.get_steps(
            self.bahavior_name)

        tmp_terminal_steps = terminal_steps

        while len(decision_steps) == 0:
            self._env.set_actions(self.bahavior_name, self._empty_action(0))
            self._env.step()
            decision_steps, terminal_steps = self._env.get_steps(
                self.bahavior_name)
            tmp_terminal_steps.agent_id = np.concatenate(
                [tmp_terminal_steps.agent_id, terminal_steps.agent_id])
            tmp_terminal_steps.reward = np.concatenate(
                [tmp_terminal_steps.reward, terminal_steps.reward])
            tmp_terminal_steps.interrupted = np.concatenate(
                [tmp_terminal_steps.interrupted, terminal_steps.interrupted])

        reward = decision_steps.reward
        reward[tmp_terminal_steps.agent_id] = tmp_terminal_steps.reward

        done = np.full([
            len(decision_steps),
        ], False, dtype=np.bool)
        done[tmp_terminal_steps.agent_id] = True

        max_step = np.full([
            len(decision_steps),
        ], False, dtype=np.bool)
        max_step[tmp_terminal_steps.agent_id] = tmp_terminal_steps.interrupted

        return ([obs.astype(np.float32) for obs in decision_steps.obs],
                decision_steps.reward.astype(np.float32), done, max_step)

    def close(self):
        self._env.close()
        self._logger.warning(f'Process {os.getpid()} exits')
Пример #27
0
# from env_1 import Action, Goal, UnityToGoalGymWrapper
from env_2 import Action, Goal, UnityToGoalGymWrapper
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.environment_parameters_channel import EnvironmentParametersChannel

if __name__ == "__main__":
    """
    demo interaction with the gym interface using live Unity environment
    """
    channel = EnvironmentParametersChannel()  # create a params sidechannel
    # None -> live interaction = blocking call to unity
    unity_env = UnityEnvironment(file_name=None,
                                 seed=1,
                                 side_channels=[channel])

    env = UnityToGoalGymWrapper(unity_env,
                                channel,
                                uint8_visual=True,
                                allow_multiple_obs=True)

    while True:
        obs = env.reset(Goal(3.0, 3.0))

        for i in range(1000):
            obs_dict = env.step(Action(0.01, 0.01))
            gym_observation = obs_dict["observation"]
            observation, reward, done, info = gym_observation
            print(observation)
            print(f"reward = {reward}")

            if done:
Пример #28
0
        k = ".".join([k_prefix, k]) if k_prefix is not None else k
        if isinstance(v, AttrDict):    
            assign_config(_channel, v, k)
        else:
            _channel.set_float_parameter(k, v)

# setup environment
if sys.platform == "win32":
    env_build = "../env/FreeFall/windows/FreeFall.exe"
elif sys.platform == "linux":
    env_build = "../env/FreeFall/linux/FreeFall.x86_64"
elif sys.platform == "darwin":
    env_build = "../env/FreeFall/mac.app"
else:
    raise AttributeError("{} platform is not supported.".format(sys.platform))
channel = EnvironmentParametersChannel()
unity_env = UnityEnvironment(env_build, side_channels=[channel])
env = UnityToGymWrapper(unity_env, uint8_visual=True, allow_multiple_obs=True)
assign_config(channel, config)

# interface
key_ws = np.array([False] * 2)
def key_press(event): # NOTE: cannot handle multiple key press at the same time
    global key_ws
    try:
        key = event.key.lower()
    except:
        key = event.key
    key_ws[0] = True if key in ['w', 'up'] else False
    key_ws[1] = True if key in ['s', 'down'] else False
    if key == 'q':
Пример #29
0
    def __init__(self, env_path, reset_params, worker_id = 1, no_graphis = False, realtime_mode = False,  record_trajectory = False):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
            reset_params {dict} -- Reset parameters of the environment such as the seed
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            record_trajectory {bool} -- Whether to record the trajectory of an entire episode. This can be used for video recording. (default: {False})
        """
        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        # Prepare default reset parameters
        self._default_reset_parameters = {}
        for key, value in reset_params.items():
            self._default_reset_parameters[key] = value
            if key != "start-seed" or key != "num-seeds":
                self.reset_parameters.set_float_parameter(key, value)

        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=30.0, width=256, height=256)

        # Whether to record the trajectory of an entire episode
        self._record = record_trajectory

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # If the Unity Editor chould be used instead of a build
        # self._env = UnityEnvironment(file_name = None, worker_id = 0, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])

        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Check whether this Unity environment is supported
        self._verify_environment()

        # Set action space properties
        if self._behavior_spec.action_spec.is_discrete():
            num_action_branches = self._behavior_spec.action_spec.discrete_size
            action_branch_dimensions = self._behavior_spec.action_spec.discrete_branches
            if num_action_branches == 1:
                self._action_space = spaces.Discrete(action_branch_dimensions[0])
            else:
                self._action_space = spaces.MultiDiscrete(action_branch_dimensions)

        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_specs):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Set visual observation space property
        if self._num_vis_obs == 1:
            vis_obs_shape = self._behavior_spec.observation_specs[self._vis_obs_index].shape

            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = vis_obs_shape,
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_specs[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None

        # Videos can only be recorded if the environment provides visual observations
        if self._record and self._visual_observation_space is None:
            UnityEnvironmentException("Videos cannot be rendered for a Unity environment that does not provide visual observations.")
Пример #30
0
class UnityWrapper(Env):
    """This class wraps Unity environments.

    This wrapper has notable constraints:
        - Only one agent (no multi-agent environments).
        - Only one visual observation
        - Only discrete and multi-discrete action spaces (no continuous action space)"""

    def __init__(self, env_path, worker_id = 1, no_graphis = False, realtime_mode = False, config = None):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            config {dict} -- Specifies the reset parameters of the environment (default: {None})
        """
        # Disable logging
        logging.disable(logging.INFO)

        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        self._config = config
        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0, width=128, height=128)

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Set action space properties
        if len(self._behavior_spec.action_shape) == 1:
            self._action_space = spaces.Discrete(self._behavior_spec.action_shape[0])
        else:
            self._action_space = spaces.MultiDiscrete(self._behavior_spec.action_shape)
        self._action_names = ["Not available"]
        
        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_shapes):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Verify the environment
        self._verify_environment()

        # Set visual observation space property
        if self._num_vis_obs == 1:
            height = self._behavior_spec.observation_shapes[self._vis_obs_index][0]
            width = self._behavior_spec.observation_shapes[self._vis_obs_index][1]
            depth = self._behavior_spec.observation_shapes[self._vis_obs_index][2]
            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = (height, width, depth),
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_shapes[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None

    @property
    def unwrapped(self):
        """        
        Returns:
            {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state
        """
        return self
    
    @property
    def action_space(self):
        """Returns the shape of the action space of the agent."""
        return self._action_space

    @property
    def action_names(self):
        return self._action_names

    @property
    def visual_observation_space(self):
        return self._visual_observation_space

    @property
    def vector_observation_space(self):
        return self._vector_observatoin_space

    def reset(self, reset_params = None):
        """Resets the environment based on a global or just specified config.
        
        Keyword Arguments:
            config {dict} -- Reset parameters to configure the environment (default: {None})
        
        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
        """
        # Track rewards of an entire episode
        self._rewards = []

        # Process config: Either load global or new config (if specified)
        if reset_params is None:
            reset_params = {}
            if self._config is not None:
                reset_params = self._config
        else:
            reset_params = reset_params

        # Apply reset parameters
        for key, value in reset_params.items():
            self.reset_parameters.set_float_parameter(key, value)

        # Reset and verify the environment
        self._env.reset()
        info, terminal_info = self._env.get_steps(self._behavior_name)
        self._verify_environment(len(info))
        
        # Retrieve initial observations
        vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info)
        return vis_obs, vec_obs

    def step(self, action):
        """Runs one timestep of the environment"s dynamics.
        Once an episode is done, reset() has to be called manually.
                
        Arguments:
            action {List} -- A list of at least one discrete action to be executed by the agent

        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
            {float} -- (Total) Scalar reward signaled by the environment
            {bool} -- Whether the episode of the environment terminated
            {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed
        """
        # Carry out the agent's action
        self._env.set_actions(self._behavior_name, action.reshape([1, -1]))
        self._env.step()
        info, terminal_info = self._env.get_steps(self._behavior_name)

        # Process step results
        vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info)
        self._rewards.append(reward)

        # Episode information
        if done:
            info = {"reward": sum(self._rewards),
                    "length": len(self._rewards)}
        else:
            info = None

        return vis_obs, vec_obs, reward, done, info

    def close(self):
        """Shut down the environment."""
        self._env.close()

    def _process_agent_info(self, info, terminal_info):
        """Extracts the observations, rewards, dones, and episode infos.

        Args:
            info {DecisionSteps}: Current state
            terminal_info {TerminalSteps}: Terminal state

        Returns:
            vis_obs {ndarray} -- Visual observation if available, else None
            vec_obs {ndarray} -- Vector observation if available, else None
            reward {float} -- Reward signal from the environment
            done {bool} -- Whether the episode terminated or not
        """
        # Determine if the episode terminated or not
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # Process visual observations
        if self.visual_observation_space is not None:
            vis_obs = use_info.obs[self._vis_obs_index][0]
        else:
            vis_obs = None

        # Process vector observations
        if self.vector_observation_space is not None:
            for i, dim in enumerate(self._vec_obs_indices):
                if i == 0:
                    vec_obs = use_info.obs[dim][0]
                else:
                    vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0]))
        else:
            vec_obs = None

        return vis_obs, vec_obs, use_info.reward[0], done

    def _verify_environment(self, num_agents = None):
        """Checks if the environment meets the requirements of this wrapper.
        Only one agent and at maximum one visual observation is allowed.
        Only Discrete and MultiDiscrete action spaces are supported.

        Arguments:
            num_agents {int} -- Number of agents (default: {None})
        """
        # Verify number of agent types
        if len(self._env.behavior_specs) != 1:
            raise UnityEnvironmentException("The unity environment containts more than one agent type.")
        # Verify action space type
        if int(self._behavior_spec.action_type.value) == 1:
            raise UnityEnvironmentException("Continuous action spaces are not supported. Only discrete and MultiDiscrete spaces are supported.")
        # Verify number of visual observations
        if self._num_vis_obs > 1:
            raise UnityEnvironmentException("The unity environment contains more than one visual observation.")
        # Verify agent count
        if num_agents is not None and num_agents > 1:
            raise UnityEnvironmentException("The unity environment contains more than one agent.")