Пример #1
0
    def make_unity_env(self,
                       env_name,
                       float_params=dict(),
                       time_scale=1,
                       seed=time.time(),
                       worker_id=None,
                       **kwargs):
        """
        creates a gym environment from a unity game

        env_name: str
            the path to the game
        float_params: dict or None
            this should be a dict of argument settings for the unity
            environment
            keys: varies by environment
        time_scale: float
            argument to set Unity's time scale. This applies less to
            gym wrapped versions of Unity Environments, I believe..
            but I'm not sure
        seed: int
            the seed for randomness
        worker_id: int
            must specify a unique worker id for each unity process
            on this machine
        """
        if float_params is None: float_params = dict()
        path = os.path.expanduser(env_name)
        channel = EngineConfigurationChannel()
        env_channel = EnvironmentParametersChannel()
        channel.set_configuration_parameters(time_scale=1)
        for k, v in float_params.items():
            if k == "validation" and v >= 1:
                print("Game in validation mode")
            env_channel.set_float_parameter(k, float(v))
        if worker_id is None: worker_id = seed % 500 + 1
        env_made = False
        n_loops = 0
        worker_id = 0
        while not env_made and n_loops < 50:
            try:
                env = UnityEnvironment(file_name=path,
                                       side_channels=[channel, env_channel],
                                       worker_id=worker_id,
                                       seed=seed)
                env_made = True
            except:
                s = "Error encountered making environment, "
                s += "trying new worker_id"
                print(s)
                worker_id = (worker_id + 1 +
                             int(np.random.random() * 100)) % 500
                try:
                    env.close()
                except:
                    pass
                n_loops += 1
        env = UnityToGymWrapper(env, allow_multiple_obs=True)
        return env
Пример #2
0
 def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
     """
     Helper method to send sampler settings over EnvironmentParametersChannel
     Calls the constant sampler type set method.
     :param key: environment parameter to be sampled
     :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment
     """
     env_channel.set_float_parameter(key, self.value)
Пример #3
0
class UnityWrapper(object):
    def __init__(self, env_args):
        self.engine_configuration_channel = EngineConfigurationChannel()
        if env_args['train_mode']:
            self.engine_configuration_channel.set_configuration_parameters(
                time_scale=env_args['train_time_scale'])
        else:
            self.engine_configuration_channel.set_configuration_parameters(
                width=env_args['width'],
                height=env_args['height'],
                quality_level=env_args['quality_level'],
                time_scale=env_args['inference_time_scale'],
                target_frame_rate=env_args['target_frame_rate'])
        self.float_properties_channel = EnvironmentParametersChannel()
        if env_args['file_path'] is None:
            self._env = UnityEnvironment(base_port=5004,
                                         seed=env_args['env_seed'],
                                         side_channels=[
                                             self.engine_configuration_channel,
                                             self.float_properties_channel
                                         ])
        else:
            unity_env_dict = load_yaml('/'.join(
                [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml']))
            self._env = UnityEnvironment(
                file_name=env_args['file_path'],
                base_port=env_args['port'],
                no_graphics=not env_args['render'],
                seed=env_args['env_seed'],
                side_channels=[
                    self.engine_configuration_channel,
                    self.float_properties_channel
                ],
                additional_args=[
                    '--scene',
                    str(
                        unity_env_dict.get(env_args.get('env_name', 'Roller'),
                                           'None')), '--n_agents',
                    str(env_args.get('env_num', 1))
                ])
        self.reset_config = env_args['reset_config']

    def reset(self, **kwargs):
        reset_config = kwargs.get('reset_config', None) or self.reset_config
        for k, v in reset_config.items():
            self.float_properties_channel.set_float_parameter(k, v)
        self._env.reset()

    def __getattr__(self, name):
        if name.startswith('_'):
            raise AttributeError(
                "attempted to get missing private attribute '{}'".format(name))
        return getattr(self._env, name)
Пример #4
0
def test_environment_parameters():
    sender = EnvironmentParametersChannel()
    # We use a raw bytes channel to interpred the data
    receiver = RawBytesChannel(sender.channel_id)

    sender.set_float_parameter("param-1", 0.1)
    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
    key = message.read_string()
    dtype = message.read_int32()
    value = message.read_float32()
    assert key == "param-1"
    assert dtype == EnvironmentParametersChannel.EnvironmentDataTypes.FLOAT
    assert value - 0.1 < 1e-8

    sender.set_float_parameter("param-1", 0.1)
    sender.set_float_parameter("param-2", 0.1)
    sender.set_float_parameter("param-3", 0.1)

    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    assert len(receiver.get_and_clear_received_messages()) == 3

    with pytest.raises(UnityCommunicationException):
        # try to send data to the EngineConfigurationChannel
        sender.set_float_parameter("param-1", 0.1)
        data = SideChannelManager([sender]).generate_side_channel_messages()
        SideChannelManager([sender]).process_side_channel_message(data)
Пример #5
0
 def initialize_all_side_channels(self, initialize_config, engine_config):
     """
     初始化所有的通讯频道
     """
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration_parameters(
         **engine_config)
     float_properties_channel = EnvironmentParametersChannel()
     float_properties_channel.set_float_parameter('env_copies',
                                                  self._n_copies)
     for k, v in initialize_config.items():
         float_properties_channel.set_float_parameter(k, v)
     return dict(engine_configuration_channel=engine_configuration_channel,
                 float_properties_channel=float_properties_channel)
Пример #6
0
 def initialize_all_side_channels(self, kwargs):
     '''
     初始化所有的通讯频道
     '''
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration_parameters(
         width=kwargs['width'],
         height=kwargs['height'],
         quality_level=kwargs['quality_level'],
         time_scale=1
         if bool(kwargs.get('inference', False)) else kwargs['time_scale'],
         target_frame_rate=kwargs['target_frame_rate'],
         capture_frame_rate=kwargs['capture_frame_rate'])
     float_properties_channel = EnvironmentParametersChannel()
     for k, v in kwargs.get('initialize_config', {}).items():
         float_properties_channel.set_float_parameter(k, v)
     return dict(engine_configuration_channel=engine_configuration_channel,
                 float_properties_channel=float_properties_channel)
    def __init__(self, env_config):
        self.worker_index = 0

        if 'SM_CHANNEL_TRAIN' in os.environ:
            env_name = os.environ['SM_CHANNEL_TRAIN'] +'/'+ env_config['env_name']
            os.chmod(env_name, 0o755)
            print("Changed environment binary into executable mode.")
            # Try connecting to the Unity3D game instance.
            while True:
                try:
                    channel = EnvironmentParametersChannel()
                    unity_env = UnityEnvironment(
                                    env_name, 
                                    no_graphics=True, 
                                    worker_id=self.worker_index, 
                                    side_channels=[channel], 
                                    additional_args=['-logFile', 'unity.log'])
                    channel.set_float_parameter("simulation_mode", 1.0)
                except UnityWorkerInUseException:
                    self.worker_index += 1
                else:
                    break
        else:
            env_name = env_config['env_name']
            while True:
                try:
                    unity_env = default_registry[env_name].make(
                        no_graphics=True,
                        worker_id=self.worker_index,
                        additional_args=['-logFile', 'unity.log'])
                except UnityWorkerInUseException:
                    self.worker_index += 1
                else:
                    break
            
        self.env = UnityToGymWrapper(unity_env) 
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
Пример #8
0
from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.side_channel.environment_parameters_channel import EnvironmentParametersChannel


# setup environment
if sys.platform == 'win32':
    env_build = "../env/FlyCamera/windows/FlyCamera.exe"
elif sys.platform == 'linux':
    env_build = "../env/FlyCamera/linux/FlyCamera.x86_64"
elif sys.platform == "darwin":
    env_build = "../env/FlyCamera/mac.app"
else:
    raise AttributeError("{} platform is not supported.".format(sys.platform))
channel = EnvironmentParametersChannel()
unity_env = UnityEnvironment(env_build, side_channels=[channel])
channel.set_float_parameter("key_speed", 10.0)
channel.set_float_parameter("cam_sens", 0.25)
env = UnityToGymWrapper(unity_env, uint8_visual=True)

# interface
max_mouse_move = 10 # in pixel; to limit mouse "jump" due to slow in-loop process
mouse_position = np.zeros((2,))
def mouse_move(event):
    global mouse_position
    x, y = event.xdata, event.ydata
    mouse_position = np.array([x, y])

key_wasd = np.array([False] * 4)
def key_press(event): # NOTE: cannot handle multiple key press at the same time
    global key_wasd
    try:
Пример #9
0
    if False: # sanity check
        # current environment lock motion of x and z axes
        dt = 1. / UNITY_STEP_FREQ
        acc = act * thrust_multiplier / mass
        next_velo_y = velo[1] + (GRAVITY + acc) * dt
        velo_y_err = np.absolute(next_velo_y - obs[1][4])

        # use velocity to compute distance
        dist_curr = dist_curr + obs[1][4] * dt
        dist_err = np.absolute(dist_curr - process_ray(obs[1]))

        print("Velocity error (y-axis) = {}, Distance error = {}".format(velo_y_err, dist_err))
    
    velo = obs[1][3:]
    distance_to_ground = process_ray(obs[1])
    ax.set_title("Velocity = ({:.2f}, {:.2f}, {:.2f})\n Distance to Ground: {:.8f}".format(\
        *velo, distance_to_ground))

    # example of resetting environment
    if False:
        i += 1
        if i > 10:
            channel.set_float_parameter("end_episode", 1.0)
            i = 0
        else:
            channel.set_float_parameter("end_episode", 0.0)

    # update mpl windows
    fig.canvas.draw()
    plt.pause(0.05)
Пример #10
0
class UnitySamplerSequenceLearning(ABC, Sampler):
    warning_done = False

    def __init__(self,
                 dataset,
                 name_dataset_unity,
                 unity_env_params,
                 nSc,
                 nSt,
                 nFc,
                 nFt,
                 k,
                 size_canvas,
                 grayscale,
                 play_mode=False,
                 change_lights=False,
                 place_camera_mode=PlaceCamerasMode.RND,
                 get_labels_mode=GetLabelsMode.RND,
                 train_comparison_type=TrainComparisonType.ALL,
                 batch_provider_args=None,
                 episodes_per_epoch=999999,
                 channel=0):
        if batch_provider_args is None:
            batch_provider_args = no_batch_args

        self.place_camera_mode = place_camera_mode
        self.get_labels_mode = get_labels_mode
        self.episodes_per_epoch = episodes_per_epoch
        self.dataset = dataset
        self.k = k
        self.nSc = nSc
        self.nSt = nSt
        self.nFc = nFc
        self.grayscale = grayscale
        self.nFt = nFt
        self.matrix_values = None
        if play_mode:
            self.scene = None
            channel = 0
        else:
            if os.name == 'nt':
                machine_name = 'win'
                ext = 'exe'
            else:
                machine_name = 'linux'
                ext = 'x86_64'
            self.scene_path = f'./Unity-ML-Agents-Computer-Vision/Builds/Builds_{machine_name}/SequenceLearning/'
            self.scene_folder = f'k{self.k}_nSt{self.nSt}_nSc{self.nSc}_nFt{self.nFt}_nFc{self.nFc}_sc{size_canvas[0]}_g{int(self.grayscale)}'
            self.scene = f'{self.scene_path}/{self.scene_folder}/scene.{ext}'
            if not os.path.exists(self.scene):
                assert False, f"Unity scene {self.scene} generator does not exist, create it in Windows!"

        log.set_log_level('INFO')

        self.additional_arguments = {
            '-dataset': name_dataset_unity,
            '-place_camera_mode': self.place_camera_mode.value,
            '-get_labels_mode': self.get_labels_mode.value,
            '-train_comparison_type': train_comparison_type.value,
            '-change_lights': int(change_lights),
            '-logFile': 'logout.txt',
            '-repeat_same_batch': -1,
            **batch_provider_args
        }

        self.observation_channel = EnvironmentParametersChannel()

        while True:
            try:
                env_params_channel = StringEnvParamsChannel(
                    "621f0a70-4f87-11ea-a6bf-784f4387d1f7")
                debug_channel = StringDebugLogChannel(
                    "8e8d2cbd-ea04-444d-9180-56ed79a2b94e")
                print(
                    f"\n*** Trying to open Unity scene with dataset: {name_dataset_unity}, folder: {self.scene_folder}"
                )
                self.env = UnityEnvironment(
                    file_name=self.scene,
                    seed=batch_provider_args['-seed'],
                    side_channels=[
                        self.observation_channel, env_params_channel,
                        debug_channel
                    ],
                    no_graphics=False,
                    worker_id=channel,
                    additional_args=list(
                        np.array([
                            [k, v]
                            for k, v in self.additional_arguments.items()
                        ]).flatten()),
                    timeout_wait=180
                    if batch_provider_args['-use_batch_provider'] == 1 else 60)
                break
            except UnityWorkerInUseException as e:
                channel += 1

        self.env_params = unity_env_params(self.observation_channel)

        self.env.reset()
        self.observation_channel.set_float_parameter("newLevel", float(0))

        self.behaviour_names = list(self.env.behavior_specs.keys())
        self.num_objects = env_params_channel.num_objects
        self.labels = None
        self.camera_positions = None

        self.tot_num_frames_each_iter = self.k * (self.nSt * self.nFt +
                                                  self.nSc * self.nFc)
        self.tot_num_matching = self.k
        self.num_labels_passed_by_unity = self.k * (2 if self.nSc > 0 else 1)
        self.tot_num_frames_each_comparison = int(
            self.tot_num_frames_each_iter / self.tot_num_matching)

        self.max_length_elements = np.max(
            (self.tot_num_matching, self.tot_num_frames_each_iter))
        self.dummy_labels = np.empty(self.tot_num_frames_each_iter)
        self.images = []

    def __len__(self):
        return self.episodes_per_epoch

    def update_optional_arguments(self):
        pass

    def send_episode_info(self, idx):
        pass

    def __iter__(self):
        # vh1, vh2, vh3 = [], [], []
        for idx in range(self.episodes_per_epoch):
            self.send_episode_info(idx)
            # remember that the images are passed in alphabetical order (which is C0, C1, T0, T1 ..), whereas the camera positions are passed in a more convenient format:
            # organizes in number of matching (k), and inside that all the Cs, then all the Ts
            # labels is a list of size k with [[C, T] *k]
            self.env.step()
            self.observation_channel.set_float_parameter("newLevel", float(0))

            DS, TS = self.env.get_steps(self.behaviour_names[0])

            # when agent receives an action, it setups a new batch
            self.env.set_actions(
                self.behaviour_names[0], np.array([[1]])
            )  # just give a random thing as an action, it doesn't matter here
            if self.nSc > 0:
                labels = DS.obs[-1][
                    0][:self.num_labels_passed_by_unity].astype(int).reshape(
                        (-1, 2))
            else:
                labels = torch.tensor(
                    DS.obs[-1][0][:self.num_labels_passed_by_unity]).type(
                        torch.LongTensor)
            camera_positions = DS.obs[-1][0][
                self.num_labels_passed_by_unity:].reshape(
                    (self.tot_num_matching,
                     self.tot_num_frames_each_comparison, 3))
            self.images = [
                i[0] for i in DS.obs[:-1]
            ]  # .reshape((self.tot_num_frames_each_iter, self.tot_num_frames_each_comparison, 64, 64, 3))

            #################################~~~~~~DEBUG~~~~~~###############################################
            # _, self.ax = framework_utils.create_sphere()
            # vh1, vh2 = [], []
            # import matplotlib.pyplot as plt
            # plt.show()
            # import copy
            # def unity2python(v):
            #     v = copy.deepcopy(v)
            #     v.T[[1, 2]] = v.T[[2, 1]]
            #     return v
            # for idx, c in enumerate(camera_positions):
            #     if vh1:
            #         # [i.remove() for i in vh1]
            #         # [i.remove() for i in vh2]
            #         vh1 = []
            #         vh2 = []
            #     for i in range(len(camera_positions[0]) - 1):
            #         vh2.append(framework_utils.add_norm_vector(unity2python(c[i + 1]), 'r', ax=self.ax))
            #         vh1.append(framework_utils.add_norm_vector(unity2python(c[0]), 'k', ax=self.ax))

            ## ali = framework_utils.align_vectors(c, t)
            ## vh3 = framework_utils.add_norm_vector(ali, 'r', ax=self.ax )
            #################################################################################################

            self.labels = labels
            self.camera_positions = camera_positions
            batch = self.images[:]
            self.post_process_labels()
            # yield [[b, l] for b, l in zip(batch, np.hstack((self.labels[:, 0], self.labels[:, 1])))] <--- if you want to have same length labels and images.
            yield [[b, l]
                   for b, l in zip(batch, labels)] if self.nSc == 0 else batch

    def post_process_labels(self):
        pass
class UnityWrapperProcess:
    def __init__(self,
                 conn: multiprocessing.connection.Connection = None,
                 train_mode=True,
                 file_name=None,
                 worker_id=0,
                 base_port=5005,
                 no_graphics=True,
                 seed=None,
                 scene=None,
                 additional_args=None,
                 n_agents=1):
        """
        Args:
            train_mode: If in train mode, Unity will speed up
            file_name: The executable path. The UnityEnvironment will run in editor if None
            worker_id: Offset from base_port
            base_port: The port that communicate to Unity. It will be set to 5004 automatically if in editor.
            no_graphics: If Unity runs in no graphic mode. It must be set to False if Unity has camera sensor.
            seed: Random seed
            scene: The scene name
            n_agents: The agents count
        """
        self.scene = scene
        self.n_agents = n_agents

        seed = seed if seed is not None else np.random.randint(0, 65536)
        additional_args = [] if additional_args is None else additional_args.split(
            ' ')

        self.engine_configuration_channel = EngineConfigurationChannel()
        self.environment_parameters_channel = EnvironmentParametersChannel()

        self.environment_parameters_channel.set_float_parameter(
            'env_copys', float(n_agents))

        if conn:
            try:
                from algorithm import config_helper
                config_helper.set_logger()
            except:
                pass

            self._logger = logging.getLogger(
                f'UnityWrapper.Process_{os.getpid()}')
        else:
            self._logger = logging.getLogger('UnityWrapper.Process')

        self._env = UnityEnvironment(
            file_name=file_name,
            worker_id=worker_id,
            base_port=base_port if file_name else None,
            no_graphics=no_graphics and train_mode,
            seed=seed,
            additional_args=['--scene', scene] + additional_args,
            side_channels=[
                self.engine_configuration_channel,
                self.environment_parameters_channel
            ])

        self.engine_configuration_channel.set_configuration_parameters(
            width=200 if train_mode else 1280,
            height=200 if train_mode else 720,
            quality_level=5,
            time_scale=20 if train_mode else 1)

        self._env.reset()
        self.bahavior_name = list(self._env.behavior_specs)[0]

        if conn:
            try:
                while True:
                    cmd, data = conn.recv()
                    if cmd == INIT:
                        conn.send(self.init())
                    elif cmd == RESET:
                        conn.send(self.reset(data))
                    elif cmd == STEP:
                        conn.send(self.step(*data))
                    elif cmd == CLOSE:
                        self.close()
            except:
                self._logger.error(traceback.format_exc())

    def init(self):
        """
        Returns:
            observation shapes: tuple[(o1, ), (o2, ), (o3_1, o3_2, o3_3), ...]
            discrete action size: int, sum of all action branches
            continuous action size: int
        """
        behavior_spec = self._env.behavior_specs[self.bahavior_name]
        obs_names = [o.name for o in behavior_spec.observation_specs]
        self._logger.info(f'Observation names: {obs_names}')
        obs_shapes = [o.shape for o in behavior_spec.observation_specs]
        self._logger.info(f'Observation shapes: {obs_shapes}')

        self._empty_action = behavior_spec.action_spec.empty_action

        discrete_action_size = 0
        if behavior_spec.action_spec.discrete_size > 0:
            discrete_action_size = 1
            action_product_list = []
            for action, branch_size in enumerate(
                    behavior_spec.action_spec.discrete_branches):
                discrete_action_size *= branch_size
                action_product_list.append(range(branch_size))
                self._logger.info(
                    f"Discrete action branch {action} has {branch_size} different actions"
                )

            self.action_product = np.array(
                list(itertools.product(*action_product_list)))

        continuous_action_size = behavior_spec.action_spec.continuous_size

        self._logger.info(f'Continuous action size: {continuous_action_size}')

        self.d_action_size = discrete_action_size
        self.c_action_size = continuous_action_size

        for o in behavior_spec.observation_specs:
            if len(o.shape) >= 3:
                self.engine_configuration_channel.set_configuration_parameters(
                    quality_level=5)
                break

        return obs_shapes, discrete_action_size, continuous_action_size

    def reset(self, reset_config=None):
        """
        return:
            observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)]
        """
        reset_config = {} if reset_config is None else reset_config
        for k, v in reset_config.items():
            self.environment_parameters_channel.set_float_parameter(
                k, float(v))

        self._env.reset()
        decision_steps, terminal_steps = self._env.get_steps(
            self.bahavior_name)

        return [obs.astype(np.float32) for obs in decision_steps.obs]

    def step(self, d_action, c_action):
        """
        Args:
            d_action: (NAgents, discrete_action_size), one hot like action
            c_action: (NAgents, continuous_action_size)

        Returns:
            observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)]
            rewards: (NAgents, )
            done: (NAgents, ), np.bool
            max_step: (NAgents, ), np.bool
        """

        if self.d_action_size:
            d_action = np.argmax(d_action, axis=1)
            d_action = self.action_product[d_action]

        self._env.set_actions(
            self.bahavior_name,
            ActionTuple(continuous=c_action, discrete=d_action))
        self._env.step()

        decision_steps, terminal_steps = self._env.get_steps(
            self.bahavior_name)

        tmp_terminal_steps = terminal_steps

        while len(decision_steps) == 0:
            self._env.set_actions(self.bahavior_name, self._empty_action(0))
            self._env.step()
            decision_steps, terminal_steps = self._env.get_steps(
                self.bahavior_name)
            tmp_terminal_steps.agent_id = np.concatenate(
                [tmp_terminal_steps.agent_id, terminal_steps.agent_id])
            tmp_terminal_steps.reward = np.concatenate(
                [tmp_terminal_steps.reward, terminal_steps.reward])
            tmp_terminal_steps.interrupted = np.concatenate(
                [tmp_terminal_steps.interrupted, terminal_steps.interrupted])

        reward = decision_steps.reward
        reward[tmp_terminal_steps.agent_id] = tmp_terminal_steps.reward

        done = np.full([
            len(decision_steps),
        ], False, dtype=np.bool)
        done[tmp_terminal_steps.agent_id] = True

        max_step = np.full([
            len(decision_steps),
        ], False, dtype=np.bool)
        max_step[tmp_terminal_steps.agent_id] = tmp_terminal_steps.interrupted

        return ([obs.astype(np.float32) for obs in decision_steps.obs],
                decision_steps.reward.astype(np.float32), done, max_step)

    def close(self):
        self._env.close()
        self._logger.warning(f'Process {os.getpid()} exits')
Пример #12
0
def get_environment(config: Config) -> BaseEnv:
    channel = EnvironmentParametersChannel()
    file_name = None
    if config.RuntimeArgs.run_in_unity:
        file_name = None
        print(Fore.CYAN + "Environment set. Press play within Unity" +
              Fore.RESET)
    elif sys.platform.startswith("win"):
        file_name = 'Build/GridWorld.exe'
    elif sys.platform.startswith("darwin"):
        file_name = 'Build.app/Contents/MacOS/GridWorld'
    elif sys.platform.startswith("linux"):
        raise Exception("Tell chris to support linux")
    else:
        raise Exception("Unable to find which executable to run for system:" +
                        sys.platform)

    # Load
    env = UnityEnvironment(file_name=file_name, side_channels=[channel])

    # Set the channel environment accordingly
    channel.set_float_parameter("num_targets", config.Game.num_targets)
    channel.set_float_parameter("num_fires", config.Game.num_fires)
    allow_light_source = 1.0 if config.Game.allow_light_source else 0.0
    channel.set_float_parameter("allow_light_source", allow_light_source)
    channel.set_float_parameter("step_reward", config.Game.step_reward)
    channel.set_float_parameter("target_reward", config.Game.target_reward)
    channel.set_float_parameter("fire_reward", config.Game.fire_reward)
    channel.set_float_parameter("max_steps", config.Game.max_steps)
    channel.set_float_parameter("time_scale", config.RuntimeArgs.time_scale)

    return env
Пример #13
0
def worker(
    parent_conn: Connection,
    step_queue: Queue,
    pickled_env_factory: str,
    worker_id: int,
    engine_configuration: EngineConfig,
    log_level: int = logging_util.INFO,
) -> None:
    env_factory: Callable[[int, List[SideChannel]],
                          UnityEnvironment] = cloudpickle.loads(
                              pickled_env_factory)
    env_parameters = EnvironmentParametersChannel()
    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_configuration)
    stats_channel = StatsSideChannel()
    env: BaseEnv = None
    # Set log level. On some platforms, the logger isn't common with the
    # main process, so we need to set it again.
    logging_util.set_log_level(log_level)

    def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:
        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
        for brain_name in env.behavior_specs:
            all_step_result[brain_name] = env.get_steps(brain_name)
        return all_step_result

    def external_brains():
        result = {}
        for behavior_name, behavior_specs in env.behavior_specs.items():
            result[behavior_name] = behavior_spec_to_brain_parameters(
                behavior_name, behavior_specs)
        return result

    try:
        env = env_factory(
            worker_id,
            [env_parameters, engine_configuration_channel, stats_channel])
        while True:
            req: EnvironmentRequest = parent_conn.recv()
            if req.cmd == EnvironmentCommand.STEP:
                all_action_info = req.payload
                for brain_name, action_info in all_action_info.items():
                    if len(action_info.action) != 0:
                        env.set_actions(brain_name, action_info.action)
                env.step()
                all_step_result = _generate_all_results()
                # The timers in this process are independent from all the processes and the "main" process
                # So after we send back the root timer, we can safely clear them.
                # Note that we could randomly return timers a fraction of the time if we wanted to reduce
                # the data transferred.
                # TODO get gauges from the workers and merge them in the main process too.
                env_stats = stats_channel.get_and_reset_stats()
                step_response = StepResponse(all_step_result, get_timer_root(),
                                             env_stats)
                step_queue.put(
                    EnvironmentResponse(EnvironmentCommand.STEP, worker_id,
                                        step_response))
                reset_timers()
            elif req.cmd == EnvironmentCommand.EXTERNAL_BRAINS:
                _send_response(EnvironmentCommand.EXTERNAL_BRAINS,
                               external_brains())
            elif req.cmd == EnvironmentCommand.RESET:
                for k, v in req.payload.items():
                    if isinstance(v, float):
                        env_parameters.set_float_parameter(k, v)
                    elif isinstance(v, ParameterRandomizationSettings):
                        v.apply(k, env_parameters)
                env.reset()
                all_step_result = _generate_all_results()
                _send_response(EnvironmentCommand.RESET, all_step_result)
            elif req.cmd == EnvironmentCommand.CLOSE:
                break
    except (
            KeyboardInterrupt,
            UnityCommunicationException,
            UnityTimeOutException,
            UnityEnvironmentException,
            UnityCommunicatorStoppedException,
    ) as ex:
        logger.info(
            f"UnityEnvironment worker {worker_id}: environment stopping.")
        step_queue.put(
            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex))
        _send_response(EnvironmentCommand.ENV_EXITED, ex)
    finally:
        # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process
        # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread()
        # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for
        # more info.
        logger.debug(f"UnityEnvironment worker {worker_id} closing.")
        step_queue.cancel_join_thread()
        step_queue.close()
        if env is not None:
            env.close()
        logger.debug(f"UnityEnvironment worker {worker_id} done.")
Пример #14
0
class ObstacleTowerEnv(gym.Env):
    ALLOWED_VERSIONS = ["4.0?team=0"]
    _REGISTRY_YAML = "https://storage.googleapis.com/obstacle-tower-build/v4.0/obstacle_tower_v4.0.yaml"

    def __init__(
        self,
        environment_filename=None,
        worker_id=0,
        retro=True,
        timeout_wait=30,
        realtime_mode=False,
        config=None,
        greyscale=False,
    ):
        """
        Arguments:
          environment_filename: The file path to the Unity executable.  Does not require the extension.
          docker_training: Whether this is running within a docker environment and should use a virtual 
            frame buffer (xvfb).
          worker_id: The index of the worker in the case where multiple environments are running.  Each 
            environment reserves port (5005 + worker_id) for communication with the Unity executable.
          retro: Resize visual observation to 84x84 (int8) and flattens action space.
          timeout_wait: Time for python interface to wait for environment to connect.
          realtime_mode: Whether to render the environment window image and run environment at realtime.
        """
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        if environment_filename is None:
            registry = UnityEnvRegistry()
            registry.register_from_yaml(self._REGISTRY_YAML)
            self._env = registry["ObstacleTower"].make(
                worker_id=worker_id,
                timeout_wait=timeout_wait,
                side_channels=[self.reset_parameters, self.engine_config])
        else:
            self._env = UnityEnvironment(
                environment_filename,
                worker_id,
                timeout_wait=timeout_wait,
                side_channels=[self.reset_parameters, self.engine_config],
            )

        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0)
        self._env.reset()
        behavior_name = list(self._env.behavior_specs)[0]
        split_name = behavior_name.split("-v")
        if len(split_name) == 2 and split_name[0] == "ObstacleTowerAgent":
            self.name, self.version = split_name
        else:
            raise UnityGymException(
                "Attempting to launch non-Obstacle Tower environment")

        if self.version not in self.ALLOWED_VERSIONS:
            raise UnityGymException(
                "Invalid Obstacle Tower version.  Your build is v" +
                self.version +
                " but only the following versions are compatible with this gym: "
                + str(self.ALLOWED_VERSIONS))

        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._flattener = None
        self._greyscale = greyscale

        # Environment reset parameters
        self._seed = None
        self._floor = None

        self.realtime_mode = realtime_mode
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.retro = retro
        if config != None:
            self.config = config
        else:
            self.config = None

        flatten_branched = self.retro
        uint8_visual = self.retro

        # Check behavior configuration
        if len(self._env.behavior_specs) != 1:
            raise UnityGymException(
                "There can only be one agent in this environment "
                "if it is wrapped in a gym.")
        self.behavior_name = behavior_name
        behavior_spec = self._env.behavior_specs[behavior_name]

        if len(behavior_spec) < 2:
            raise UnityGymException(
                "Environment provides too few observations.")

        self.uint8_visual = uint8_visual

        # Check for number of agents in scene.
        initial_info, terminal_info = self._env.get_steps(behavior_name)
        self._check_agents(len(initial_info))

        # Set observation and action spaces
        if len(behavior_spec.action_shape) == 1:
            self._action_space = spaces.Discrete(behavior_spec.action_shape[0])
        else:
            if flatten_branched:
                self._flattener = ActionFlattener(behavior_spec.action_shape)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(
                    behavior_spec.action_shape)

        if self._greyscale:
            depth = 1
        else:
            depth = 3
        image_space_max = 1.0
        image_space_dtype = np.float32
        camera_height = behavior_spec.observation_shapes[0][0]
        camera_width = behavior_spec.observation_shapes[0][1]
        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

        image_space = spaces.Box(
            0,
            image_space_max,
            dtype=image_space_dtype,
            shape=(camera_height, camera_width, depth),
        )
        if self.retro:
            self._observation_space = image_space
        else:
            max_float = np.finfo(np.float32).max
            keys_space = spaces.Discrete(5)
            time_remaining_space = spaces.Box(low=0.0,
                                              high=max_float,
                                              shape=(1, ),
                                              dtype=np.float32)
            floor_space = spaces.Discrete(9999)
            self._observation_space = spaces.Tuple(
                (image_space, keys_space, time_remaining_space, floor_space))

    def reset(self, config=None):
        """Resets the state of the environment and returns an initial observation.
        In the case of multi-agent environments, this is a list.
        Returns: observation (object/list): the initial observation of the
            space.
        """
        if config is None:
            reset_params = {}
            if self.config is not None:
                reset_params = self.config
        else:
            reset_params = config
        if self._floor is not None:
            reset_params["starting-floor"] = self._floor
        if self._seed is not None:
            reset_params["tower-seed"] = self._seed

        for key, value in reset_params.items():
            self.reset_parameters.set_float_parameter(key, value)
        self.reset_params = None
        self._env.reset()
        info, terminal_info = self._env.get_steps(self.behavior_name)
        n_agents = len(info)
        self._check_agents(n_agents)
        self.game_over = False

        obs, reward, done, info = self._single_step(info, terminal_info)
        return obs

    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        In the case of multi-agent environments, these are lists.
        Args:
            action (object/list): an action provided by the environment
        Returns:
            observation (object/list): agent's observation of the current environment
            reward (float/list) : amount of reward returned after previous action
            done (boolean/list): whether the episode has ended.
            info (dict): contains auxiliary diagnostic information, including BrainInfo.
        """

        # Use random actions for all other agents in environment.
        if self._flattener is not None:
            # Translate action into list
            action = np.array(self._flattener.lookup_action(action))

        self._env.set_actions(self.behavior_name, action.reshape([1, -1]))
        self._env.step()
        info, terminal_info = self._env.get_steps(self.behavior_name)
        n_agents = len(info)
        self._check_agents(n_agents)
        self._current_state = info

        obs, reward, done, info = self._single_step(info, terminal_info)
        self.game_over = done

        return obs, reward, done, info

    def _single_step(self, info, terminal_info):
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info
        self.visual_obs = self._preprocess_single(use_info.obs[0][0][:, :, :])

        self.visual_obs, keys, time, current_floor = self._prepare_tuple_observation(
            self.visual_obs, use_info.obs[1][0])

        if self.retro:
            self.visual_obs = self._resize_observation(self.visual_obs)
            self.visual_obs = self._add_stats_to_image(self.visual_obs,
                                                       use_info.obs[1][0])
            default_observation = self.visual_obs
        else:
            default_observation = self.visual_obs, keys, time, current_floor

        if self._greyscale:
            default_observation = self._greyscale_obs(default_observation)

        return (
            default_observation,
            use_info.reward[0],
            done,
            {
                "text_observation": None,
                "brain_info": use_info,
                "total_keys": keys,
                "time_remaining": time,
                "current_floor": current_floor,
            },
        )

    def _greyscale_obs(self, obs):
        new_obs = np.floor(np.expand_dims(np.mean(obs, axis=2),
                                          axis=2)).astype(np.uint8)
        return new_obs

    def _preprocess_single(self, single_visual_obs):
        if self.uint8_visual:
            return (255.0 * single_visual_obs).astype(np.uint8)
        else:
            return single_visual_obs

    def render(self, mode="rgb_array"):
        return self.visual_obs

    def close(self):
        """Override _close in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        self._env.close()

    def seed(self, seed=None):
        """Sets a fixed seed for this env's random number generator(s).
        The valid range for seeds is [0, 99999). By default a random seed
        will be chosen.
        """
        if seed is None:
            self._seed = seed
            return

        seed = int(seed)
        if seed < 0 or seed >= 99999:
            logger.warning(
                "Seed outside of valid range [0, 99999). A random seed "
                "within the valid range will be used on next reset.")
        logger.warning("New seed " + str(seed) + " will apply on next reset.")
        self._seed = seed

    def floor(self, floor=None):
        """Sets the starting floor to a fixed floor number on subsequent environment
        resets."""
        if floor is None:
            self._floor = floor
            return

        floor = int(floor)
        if floor < 0 or floor > 99:
            logger.warning(
                "Starting floor outside of valid range [0, 99]. Floor 0 will be used"
                "on next reset.")
        logger.warning("New starting floor " + str(floor) +
                       " will apply on next reset.")
        self._floor = floor

    @staticmethod
    def _resize_observation(observation):
        """
        Re-sizes visual observation to 84x84
        """
        obs_image = Image.fromarray(observation)
        obs_image = obs_image.resize((84, 84), Image.NEAREST)
        return np.array(obs_image)

    @staticmethod
    def _prepare_tuple_observation(vis_obs, vector_obs):
        """
        Converts separate visual and vector observation into prepared tuple
        """
        key = vector_obs[0:6]
        time = vector_obs[6]
        floor_number = vector_obs[7]
        key_num = np.argmax(key, axis=0)
        return vis_obs, key_num, time, floor_number

    @staticmethod
    def _add_stats_to_image(vis_obs, vector_obs):
        """
        Displays time left and number of keys on visual observation
        """
        key = vector_obs[0:6]
        time = vector_obs[6]
        key_num = int(np.argmax(key, axis=0))
        time_num = min(time, 10000) / 10000

        vis_obs[0:10, :, :] = 0
        for i in range(key_num):
            start = int(i * 16.8) + 4
            end = start + 10
            vis_obs[1:5, start:end, 0:2] = 255
        vis_obs[6:10, 0:int(time_num * 84), 1] = 255
        return vis_obs

    def _check_agents(self, n_agents):
        if n_agents > 1:
            raise UnityGymException(
                "The environment was launched as a single-agent environment, however"
                "there is more than one agent in the scene.")
        if self._n_agents is None:
            self._n_agents = n_agents
            logger.info("{} agents within environment.".format(n_agents))
        elif self._n_agents != n_agents:
            raise UnityGymException(
                "The number of agents in the environment has changed since "
                "initialization. This is not supported.")

    @property
    def metadata(self):
        return {"render.modes": ["rgb_array"]}

    @property
    def reward_range(self):
        return -float("inf"), float("inf")

    @property
    def spec(self):
        return None

    @property
    def action_space(self):
        return self._action_space

    @property
    def observation_space(self):
        return self._observation_space

    @property
    def number_agents(self):
        return self._n_agents
Пример #15
0
# setup environment
mass = 1.0
thrust_multiplier = 30.0
action_mode = [0.0, 1.0][0]  # thrust control (0.0) / velocity control (1.0)
if sys.platform == "win32":
    env_build = "../env/FreeFall/windows/FreeFall.exe"
elif sys.platform == "linux":
    env_build = "../env/FreeFall/linux/FreeFall.x86_64"
elif sys.platform == "darwin":
    env_build = "../env/FreeFall/mac.app"
else:
    raise AttributeError("{} platform is not supported.".format(sys.platform))
channel = EnvironmentParametersChannel()
unity_env = UnityEnvironment(env_build, side_channels=[channel])
env = UnityToGymWrapper(unity_env, uint8_visual=True, allow_multiple_obs=True)
channel.set_float_parameter("mass", mass)
channel.set_float_parameter("thrust_multiplier", thrust_multiplier)
channel.set_float_parameter("action_mode", action_mode)
# NOTE: you can also set agent's starting position through position.x, position.y, position.z

# interface
key_ws = np.array([False] * 2)


def key_press(
        event):  # NOTE: cannot handle multiple key press at the same time
    global key_ws
    try:
        key = event.key.lower()
    except:
        key = event.key
class UnityWrapper:
    def __init__(self,
                 train_mode=True,
                 file_name=None,
                 base_port=5005,
                 seed=None,
                 scene=None,
                 n_agents=1):

        seed = seed if seed is not None else np.random.randint(0, 65536)

        self.engine_configuration_channel = EngineConfigurationChannel()
        self.environment_parameters_channel = EnvironmentParametersChannel()

        self._env = UnityEnvironment(
            file_name=file_name,
            base_port=base_port,
            seed=seed,
            args=['--scene', scene, '--n_agents',
                  str(n_agents)],
            side_channels=[
                self.engine_configuration_channel,
                self.environment_parameters_channel
            ])

        if train_mode:
            self.engine_configuration_channel.set_configuration_parameters(
                width=200, height=200, quality_level=0, time_scale=100)
        else:
            self.engine_configuration_channel.set_configuration_parameters(
                width=1028,
                height=720,
                quality_level=5,
                time_scale=5,
                target_frame_rate=60)

        self._env.reset()
        self.bahavior_name = self._env.get_behavior_names()[0]

    def init(self):
        behavior_spec = self._env.get_behavior_spec(self.bahavior_name)
        logger.info(f'Observation shapes: {behavior_spec.observation_shapes}')
        is_discrete = behavior_spec.is_action_discrete()
        logger.info(
            f'Action size: {behavior_spec.action_size}. Is discrete: {is_discrete}'
        )

        for o in behavior_spec.observation_shapes:
            if len(o) >= 3:
                self.engine_configuration_channel.set_configuration_parameters(
                    quality_level=5)
                break

        return behavior_spec.observation_shapes, behavior_spec.action_size, is_discrete

    def reset(self, reset_config=None):
        reset_config = {} if reset_config is None else reset_config
        for k, v in reset_config.items():
            self.environment_parameters_channel.set_float_parameter(
                k, float(v))

        self._env.reset()
        decision_steps, terminal_steps = self._env.get_steps(
            self.bahavior_name)

        return [obs.astype(np.float32) for obs in decision_steps.obs]

    def step(self, action):
        self._env.set_actions(self.bahavior_name, action)
        self._env.step()
        decision_steps, terminal_steps = self._env.get_steps(
            self.bahavior_name)

        tmp_terminal_steps = terminal_steps

        while len(decision_steps) == 0:
            self._env.set_actions(self.bahavior_name,
                                  np.empty([0, action.shape[-1]]))
            self._env.step()
            decision_steps, terminal_steps = self._env.get_steps(
                self.bahavior_name)
            tmp_terminal_steps.agent_id = np.concatenate(
                [tmp_terminal_steps.agent_id, terminal_steps.agent_id])
            tmp_terminal_steps.reward = np.concatenate(
                [tmp_terminal_steps.reward, terminal_steps.reward])
            tmp_terminal_steps.max_step = np.concatenate(
                [tmp_terminal_steps.max_step, terminal_steps.max_step])

        reward = decision_steps.reward
        reward[tmp_terminal_steps.agent_id] = tmp_terminal_steps.reward

        done = np.full([
            len(decision_steps),
        ], False, dtype=np.bool)
        done[tmp_terminal_steps.agent_id] = True

        max_step = np.full([
            len(decision_steps),
        ], False, dtype=np.bool)
        max_step[tmp_terminal_steps.agent_id] = tmp_terminal_steps.max_step

        return ([obs.astype(np.float32) for obs in decision_steps.obs],
                decision_steps.reward.astype(np.float32), done, max_step)

    def close(self):
        self._env.close()
Пример #17
0
class StorageEnvController(ConFormSimUnityEnvController):

    _BASE_PORT = 5004

    def __init__(self, config=DEFAULT_ENV_CONFIG):
        """
        Environment initialization
        :param config: Configuration of the environment.
        """

        # create side channels
        self.env_param_channel = EnvironmentParametersChannel()
        self.engine_channel = EngineConfigurationChannel()
        self.color_pool_channel = IntListPropertiesChannel()

        side_channels = [
            self.env_param_channel,
            self.engine_channel,
            self.color_pool_channel,
        ]

        # flag whether the config has been apllied to the environment
        self.is_already_initialized = False
        # create environment with config and side channels
        super().__init__(config,
                         DEFAULT_ENV_CONFIG,
                         side_channels=side_channels)

    def apply_config(self):
        # set FloatProperties
        grid_size_x = self.config.get("grid_size_x")
        if not isinstance(grid_size_x, list) or len(grid_size_x) != 2:
            raise ("The provided grid_size_x parameter is no list of type "
                   "[min, max]. Please correct this.")
        grid_size_y = self.config.get("grid_size_y")
        if not isinstance(grid_size_y, list) or len(grid_size_y) != 2:
            raise ("The provided grid_size_y parameter is no list of type "
                   "[min, max]. Please correct this.")

        vis_obs_size = self.config.get("vis_obs_size")
        if not isinstance(vis_obs_size, list) or len(vis_obs_size) != 2:
            raise ("The provided vis_obs_size parameter is no list of type "
                   "[min, max]. Please correct this.")

        base_size_x = self.config.get("base_size_x")
        if not isinstance(base_size_x, list) or len(base_size_x) != 2:
            raise ("The provided base_size_x parameter is no list of type "
                   "[min, max]. Please correct this.")
        base_size_y = self.config.get("base_size_x")
        if not isinstance(base_size_x, list) or len(base_size_x) != 2:
            raise ("The provided base_size_x parameter is no list of type "
                   "[min, max]. Please correct this.")
        num_per_base_type = self.config.get("num_per_base_type")
        if not isinstance(num_per_base_type,
                          list) or len(num_per_base_type) != 2:
            raise (
                "The provided num_per_base_type parameter is no list of type "
                "[min, max]. Please correct this.")

        num_per_item = self.config.get("num_per_item")
        if not isinstance(num_per_item, list) or len(num_per_item) != 2:
            raise ("The provided num_per_item parameter is no list of type "
                   "[min, max]. Please correct this.")

        color_pool = self.config.get("color_pool")
        if not isinstance(color_pool, list):
            raise ("The provided color_pool parameter is not of type list. "
                   "Please correct this.")

        camera_type = self.config.get("camera_type")
        camera_type_f: float = CAMERA_TYPES[camera_type] or 0.0

        # set properties in reset channel
        self.env_param_channel.set_float_parameter("minGridSizeX",
                                                   grid_size_x[0])
        self.env_param_channel.set_float_parameter("maxGridSizeX",
                                                   grid_size_x[1])
        self.env_param_channel.set_float_parameter("minGridSizeY",
                                                   grid_size_y[0])
        self.env_param_channel.set_float_parameter("maxGridSizeY",
                                                   grid_size_y[1])
        self.env_param_channel.set_float_parameter("cameraType", camera_type_f)
        # area settings
        # check if num train areas should be set
        if self.is_already_initialized:
            print("You're trying to change the number of "
                  "train areas, during runtime. This is only possible at "
                  "initialization.")
        else:
            self.env_param_channel.set_float_parameter(
                "numTrainAreas", self.config.get("num_train_areas"))

        self.env_param_channel.set_float_parameter(
            "numBaseTypesToUse", self.config.get("num_base_types"))
        self.env_param_channel.set_float_parameter("numberPerBaseTypeMax",
                                                   num_per_base_type[1])
        self.env_param_channel.set_float_parameter("numberPerBaseTypeMin",
                                                   num_per_base_type[0])
        self.env_param_channel.set_float_parameter("baseSizeXMax",
                                                   base_size_x[1])
        self.env_param_channel.set_float_parameter("baseSizeXMin",
                                                   base_size_x[0])
        self.env_param_channel.set_float_parameter("baseSizeZMax",
                                                   base_size_y[1])
        self.env_param_channel.set_float_parameter("baseSizeZMin",
                                                   base_size_y[0])
        self.env_param_channel.set_float_parameter(
            "baseInCornersOnly",
            1 if self.config.get("base_in_corners_only") else 0)
        self.env_param_channel.set_float_parameter(
            "boxesVanish", 1 if self.config.get("boxes_vanish") else 0)
        self.env_param_channel.set_float_parameter(
            "boxesNeedDrop", 1 if self.config.get("boxes_need_drop") else 0)
        self.env_param_channel.set_float_parameter(
            "sparseReward", 1 if self.config.get("sparse_reward_only") else 0)
        # color settings
        self.env_param_channel.set_float_parameter(
            "noBaseFillColor",
            1 if self.config.get("no_base_fill_color") else 0)
        self.env_param_channel.set_float_parameter(
            "brighterBases", 1 if self.config.get("brighter_bases") else 0)
        self.env_param_channel.set_float_parameter(
            "full_base_line", 1 if self.config.get("fullBaseLine") else 0)
        # item settings
        self.env_param_channel.set_float_parameter(
            "numItemTypesToUse", self.config.get("num_item_types"))
        self.env_param_channel.set_float_parameter("numberPerItemTypeMax",
                                                   num_per_item[1])
        self.env_param_channel.set_float_parameter("numberPerItemTypeMin",
                                                   num_per_item[0])
        # general settings
        self.env_param_channel.set_float_parameter(
            "noDisplay", 1 if self.config.get("no_display") else 0)
        self.env_param_channel.set_float_parameter("visObsWidth",
                                                   vis_obs_size[0])
        self.env_param_channel.set_float_parameter("visObsHeight",
                                                   vis_obs_size[1])
        self.env_param_channel.set_float_parameter(
            "useVisual", 1 if self.config.get("use_visual")
            and not self.config.get("use_object_property_camera") else 0)
        self.env_param_channel.set_float_parameter(
            "useRayPerception",
            1 if self.config.get("use_ray_perception") else 0)
        self.env_param_channel.set_float_parameter(
            "useObjectPropertyCamera",
            1 if self.config.get("use_object_property_camera") else 0)
        self.env_param_channel.set_float_parameter(
            "maxSteps", self.config.get("max_steps"))
        self.env_param_channel.set_float_parameter(
            "taskLevel", self.config.get("task_level"))

        # Read engine config
        engine_config = self.config.get("engine_config")
        # Configure the Engine
        engine_config = EngineConfig(
            width=engine_config.get("window_width"),
            height=engine_config.get("window_height"),
            quality_level=engine_config.get("quality_level"),
            time_scale=engine_config.get("sim_speed"),
            target_frame_rate=engine_config.get("target_frame_rate"),
            capture_frame_rate=60)
        self.engine_channel.set_configuration(engine_config)

        # set list properties
        self.color_pool_channel.set_property("colorPool",
                                             self.config.get("color_pool"))
        self.is_already_initialized = True
Пример #18
0
print("Seed:", seed)
print("Params:", params)
torch.manual_seed(seed)
np.random.seed(seed)

game_path = os.path.expanduser(
    "/data4/pdp/grantsrb/loc_games/LocationGame2dLinux_9/LocationGame2dLinux.x86_64"
)
channel = EngineConfigurationChannel()
env_channel = EnvironmentParametersChannel()
env = UnityEnvironment(file_name=game_path,
                       side_channels=[channel, env_channel],
                       seed=seed)
channel.set_configuration_parameters(time_scale=1)
for k, v in params.items():
    env_channel.set_float_parameter(k, v)
env = UnityToGymWrapper(env, allow_multiple_obs=True)
print("Environment created!")

#matplotlib.use("tkagg")
obs = env.reset()
#plt.imshow(obs[0])
#plt.show()
done = False
while True:
    print("stepping")
    x, z = [float(y.strip()) for y in str(input("action: ")).split(",")]
    # The obs is a list of length 2 in which the first element is the image and the second is the goal coordinate
    # Reward in this case is the difference between the action location and the nearest object to the action location
    obs, rew, done, _ = env.step([x, z])
    plt.imsave("sample.png", obs[0])
Пример #19
0
class MyEnv(gym.Env):
    def __init__(self, worker_id, realtime_mode=False):
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4"

        self._env = UnityEnvironment(
            env_path,
            worker_id,
            side_channels=[self.reset_parameters, self.engine_config])
        self._env.reset()

        self.behavior_name = list(self._env.behavior_specs)[0]
        behavior_spec = self._env.behavior_specs[self.behavior_name]
        print(behavior_spec)

        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0)
            self.reset_parameters.set_float_parameter("train-mode", 0.0)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0)
            self.reset_parameters.set_float_parameter("train-mode", 1.0)

        self._flattener = ActionFlattener(
            behavior_spec.action_spec.discrete_branches)

    def reset(self):
        # for key, value in reset_params.items():
        #     self.reset_parameters.set_float_parameter(key, value)
        self._env.reset()
        info, terminal_info = self._env.get_steps(self.behavior_name)
        self.game_over = False

        obs, reward, done, info = self._single_step(info, terminal_info)
        return obs

    def step(self, action):
        # Use random actions for all other agents in environment.
        if self._flattener is not None and type(action) == int:
            # Translate action into list
            action = np.array(self._flattener.lookup_action(action))

        c_action = Action(action)

        self._env.set_actions(self.behavior_name, c_action)
        self._env.step()
        running_info, terminal_info = self._env.get_steps(self.behavior_name)
        obs, reward, done, info = self._single_step(running_info,
                                                    terminal_info)
        self.game_over = done

        return obs, reward, done, info

    def _single_step(self, info, terminal_info):
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # 카메라, 센서 순으로 나옴
        output_info = {}
        output_info["visual_obs"] = use_info.obs[0][0]

        #obs = np.concatenate([use_info.obs[1][0], use_info.obs[2][0]])
        return use_info.obs[1][0], use_info.reward[0], done, output_info

    def close(self):
        self._env.close()

    def render(self):
        pass
Пример #20
0
class UnityWrapper(Env):
    """This class wraps Unity environments.

    This wrapper has notable constraints:
        - Only one agent (no multi-agent environments).
        - Only one visual observation
        - Only discrete and multi-discrete action spaces (no continuous action space)"""

    def __init__(self, env_path, reset_params, worker_id = 1, no_graphis = False, realtime_mode = False,  record_trajectory = False):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
            reset_params {dict} -- Reset parameters of the environment such as the seed
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            record_trajectory {bool} -- Whether to record the trajectory of an entire episode. This can be used for video recording. (default: {False})
        """
        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        # Prepare default reset parameters
        self._default_reset_parameters = {}
        for key, value in reset_params.items():
            self._default_reset_parameters[key] = value
            if key != "start-seed" or key != "num-seeds":
                self.reset_parameters.set_float_parameter(key, value)

        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=30.0, width=256, height=256)

        # Whether to record the trajectory of an entire episode
        self._record = record_trajectory

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # If the Unity Editor chould be used instead of a build
        # self._env = UnityEnvironment(file_name = None, worker_id = 0, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])

        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Check whether this Unity environment is supported
        self._verify_environment()

        # Set action space properties
        if self._behavior_spec.action_spec.is_discrete():
            num_action_branches = self._behavior_spec.action_spec.discrete_size
            action_branch_dimensions = self._behavior_spec.action_spec.discrete_branches
            if num_action_branches == 1:
                self._action_space = spaces.Discrete(action_branch_dimensions[0])
            else:
                self._action_space = spaces.MultiDiscrete(action_branch_dimensions)

        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_specs):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Set visual observation space property
        if self._num_vis_obs == 1:
            vis_obs_shape = self._behavior_spec.observation_specs[self._vis_obs_index].shape

            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = vis_obs_shape,
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_specs[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None

        # Videos can only be recorded if the environment provides visual observations
        if self._record and self._visual_observation_space is None:
            UnityEnvironmentException("Videos cannot be rendered for a Unity environment that does not provide visual observations.")

    @property
    def unwrapped(self):
        """        
        Returns:
            {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state
        """
        return self
    
    @property
    def action_space(self):
        """Returns the shape of the action space of the agent."""
        return self._action_space

    @property
    def action_names(self):
        return None

    @property
    def get_episode_trajectory(self):
        """Returns the trajectory of an entire episode as dictionary (vis_obs, vec_obs, rewards, actions). 
        """
        self._trajectory["action_names"] = self.action_names
        return self._trajectory if self._trajectory else None

    @property
    def visual_observation_space(self):
        return self._visual_observation_space

    @property
    def vector_observation_space(self):
        return self._vector_observatoin_space

    def reset(self, reset_params = None):
        """Resets the environment based on a global or just specified config.
        
        Keyword Arguments:
            config {dict} -- Reset parameters to configure the environment (default: {None})
        
        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
        """
        # Track rewards of an entire episode
        self._rewards = []

        # Use initial or new reset parameters
        if reset_params is None:
            reset_params = self._default_reset_parameters
        else:
            reset_params = reset_params

        # Apply reset parameters
        for key, value in reset_params.items():
            # Skip reset parameters that are not used by the Unity environment
            if key != "start-seed" or key != "num-seeds":
                self.reset_parameters.set_float_parameter(key, value)

        # Sample the to be used seed
        if reset_params["start-seed"] > -1:
            seed = randint(reset_params["start-seed"], reset_params["start-seed"] + reset_params["num-seeds"] - 1)
        else:
            # Use unlimited seeds
            seed = -1
        self.reset_parameters.set_float_parameter("seed", seed)

        # Reset and verify the environment
        self._env.reset()
        info, terminal_info = self._env.get_steps(self._behavior_name)
        self._verify_environment()
        
        # Retrieve initial observations
        vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info)

        # Prepare trajectory recording
        self._trajectory = {
            "vis_obs": [vis_obs * 255], "vec_obs": [vec_obs],
            "rewards": [0.0], "actions": []
        }

        return vis_obs, vec_obs

    def step(self, action):
        """Runs one timestep of the environment"s dynamics.
        Once an episode is done, reset() has to be called manually.
                
        Arguments:
            action {List} -- A list of at least one discrete action to be executed by the agent

        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
            {float} -- (Total) Scalar reward signaled by the environment
            {bool} -- Whether the episode of the environment terminated
            {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed
        """
        # Carry out the agent's action
        action_tuple = ActionTuple()
        action_tuple.add_discrete(np.asarray(action).reshape([1, -1]))
        self._env.set_actions(self._behavior_name, action_tuple)
        self._env.step()
        info, terminal_info = self._env.get_steps(self._behavior_name)

        # Process step results
        vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info)
        self._rewards.append(reward)

        # Record trajectory data
        if self._record:
            self._trajectory["vis_obs"].append(vis_obs * 255)
            self._trajectory["vec_obs"].append(vec_obs)
            self._trajectory["rewards"].append(reward)
            self._trajectory["actions"].append(action)

        # Episode information
        if done:
            info = {"reward": sum(self._rewards),
                    "length": len(self._rewards)}
        else:
            info = None

        return vis_obs, vec_obs, reward, done, info

    def close(self):
        """Shut down the environment."""
        self._env.close()

    def _process_agent_info(self, info, terminal_info):
        """Extracts the observations, rewards, dones, and episode infos.

        Args:
            info {DecisionSteps}: Current state
            terminal_info {TerminalSteps}: Terminal state

        Returns:
            vis_obs {ndarray} -- Visual observation if available, else None
            vec_obs {ndarray} -- Vector observation if available, else None
            reward {float} -- Reward signal from the environment
            done {bool} -- Whether the episode terminated or not
        """
        # Determine if the episode terminated or not
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # Process visual observations
        if self.visual_observation_space is not None:
            vis_obs = use_info.obs[self._vis_obs_index][0]
        else:
            vis_obs = None

        # Process vector observations
        if self.vector_observation_space is not None:
            for i, dim in enumerate(self._vec_obs_indices):
                if i == 0:
                    vec_obs = use_info.obs[dim][0]
                else:
                    vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0]))
        else:
            vec_obs = None

        return vis_obs, vec_obs, use_info.reward[0], done

    def _verify_environment(self):
        # Verify number of agent behavior types
        if len(self._env.behavior_specs) != 1:
            raise UnityEnvironmentException("The unity environment containts more than one agent type.")
        # Verify number of agents
        decision_steps, _ = self._env.get_steps(self._behavior_name)
        if len(decision_steps) > 1:
            raise UnityEnvironmentException("The unity environment contains more than one agent, which is not supported.")
        # Verify action space type
        if not self._behavior_spec.action_spec.is_discrete() or self._behavior_spec.action_spec.is_continuous():
            raise UnityEnvironmentException("Continuous action spaces are not supported. " 
                                            "Only discrete and MultiDiscrete spaces are supported.")
        # Verify that at least one observation is provided
        num_vis_obs = 0
        num_vec_obs = 0
        for obs_spec in self._behavior_spec.observation_specs:
            if len(obs_spec.shape) == 3:
                num_vis_obs += 1
            elif(len(obs_spec.shape)) == 1:
                num_vec_obs += 1
        if num_vis_obs == 0 and num_vec_obs == 0:
            raise UnityEnvironmentException("The unity environment does not contain any observations.")
        # Verify number of visual observations
        if num_vis_obs > 1:
            raise UnityEnvironmentException("The unity environment contains more than one visual observation.")
Пример #21
0
class UnityWrapper(Env):
    """This class wraps Unity environments.

    This wrapper has notable constraints:
        - Only one agent (no multi-agent environments).
        - Only one visual observation
        - Only discrete and multi-discrete action spaces (no continuous action space)"""

    def __init__(self, env_path, worker_id = 1, no_graphis = False, realtime_mode = False, config = None):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            config {dict} -- Specifies the reset parameters of the environment (default: {None})
        """
        # Disable logging
        logging.disable(logging.INFO)

        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        self._config = config
        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0, width=128, height=128)

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Set action space properties
        if len(self._behavior_spec.action_shape) == 1:
            self._action_space = spaces.Discrete(self._behavior_spec.action_shape[0])
        else:
            self._action_space = spaces.MultiDiscrete(self._behavior_spec.action_shape)
        self._action_names = ["Not available"]
        
        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_shapes):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Verify the environment
        self._verify_environment()

        # Set visual observation space property
        if self._num_vis_obs == 1:
            height = self._behavior_spec.observation_shapes[self._vis_obs_index][0]
            width = self._behavior_spec.observation_shapes[self._vis_obs_index][1]
            depth = self._behavior_spec.observation_shapes[self._vis_obs_index][2]
            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = (height, width, depth),
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_shapes[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None

    @property
    def unwrapped(self):
        """        
        Returns:
            {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state
        """
        return self
    
    @property
    def action_space(self):
        """Returns the shape of the action space of the agent."""
        return self._action_space

    @property
    def action_names(self):
        return self._action_names

    @property
    def visual_observation_space(self):
        return self._visual_observation_space

    @property
    def vector_observation_space(self):
        return self._vector_observatoin_space

    def reset(self, reset_params = None):
        """Resets the environment based on a global or just specified config.
        
        Keyword Arguments:
            config {dict} -- Reset parameters to configure the environment (default: {None})
        
        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
        """
        # Track rewards of an entire episode
        self._rewards = []

        # Process config: Either load global or new config (if specified)
        if reset_params is None:
            reset_params = {}
            if self._config is not None:
                reset_params = self._config
        else:
            reset_params = reset_params

        # Apply reset parameters
        for key, value in reset_params.items():
            self.reset_parameters.set_float_parameter(key, value)

        # Reset and verify the environment
        self._env.reset()
        info, terminal_info = self._env.get_steps(self._behavior_name)
        self._verify_environment(len(info))
        
        # Retrieve initial observations
        vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info)
        return vis_obs, vec_obs

    def step(self, action):
        """Runs one timestep of the environment"s dynamics.
        Once an episode is done, reset() has to be called manually.
                
        Arguments:
            action {List} -- A list of at least one discrete action to be executed by the agent

        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
            {float} -- (Total) Scalar reward signaled by the environment
            {bool} -- Whether the episode of the environment terminated
            {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed
        """
        # Carry out the agent's action
        self._env.set_actions(self._behavior_name, action.reshape([1, -1]))
        self._env.step()
        info, terminal_info = self._env.get_steps(self._behavior_name)

        # Process step results
        vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info)
        self._rewards.append(reward)

        # Episode information
        if done:
            info = {"reward": sum(self._rewards),
                    "length": len(self._rewards)}
        else:
            info = None

        return vis_obs, vec_obs, reward, done, info

    def close(self):
        """Shut down the environment."""
        self._env.close()

    def _process_agent_info(self, info, terminal_info):
        """Extracts the observations, rewards, dones, and episode infos.

        Args:
            info {DecisionSteps}: Current state
            terminal_info {TerminalSteps}: Terminal state

        Returns:
            vis_obs {ndarray} -- Visual observation if available, else None
            vec_obs {ndarray} -- Vector observation if available, else None
            reward {float} -- Reward signal from the environment
            done {bool} -- Whether the episode terminated or not
        """
        # Determine if the episode terminated or not
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # Process visual observations
        if self.visual_observation_space is not None:
            vis_obs = use_info.obs[self._vis_obs_index][0]
        else:
            vis_obs = None

        # Process vector observations
        if self.vector_observation_space is not None:
            for i, dim in enumerate(self._vec_obs_indices):
                if i == 0:
                    vec_obs = use_info.obs[dim][0]
                else:
                    vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0]))
        else:
            vec_obs = None

        return vis_obs, vec_obs, use_info.reward[0], done

    def _verify_environment(self, num_agents = None):
        """Checks if the environment meets the requirements of this wrapper.
        Only one agent and at maximum one visual observation is allowed.
        Only Discrete and MultiDiscrete action spaces are supported.

        Arguments:
            num_agents {int} -- Number of agents (default: {None})
        """
        # Verify number of agent types
        if len(self._env.behavior_specs) != 1:
            raise UnityEnvironmentException("The unity environment containts more than one agent type.")
        # Verify action space type
        if int(self._behavior_spec.action_type.value) == 1:
            raise UnityEnvironmentException("Continuous action spaces are not supported. Only discrete and MultiDiscrete spaces are supported.")
        # Verify number of visual observations
        if self._num_vis_obs > 1:
            raise UnityEnvironmentException("The unity environment contains more than one visual observation.")
        # Verify agent count
        if num_agents is not None and num_agents > 1:
            raise UnityEnvironmentException("The unity environment contains more than one agent.")
Пример #22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--run_id", help="The run id")
    parser.add_argument("--config_file",
                        default=None,
                        help="The configuration file.")
    parser.add_argument(
        "--env_location",
        default=None,
        help=
        "The location of the environment executable. If not set connects to the editor (Default: None"
    )
    parser.add_argument("--exec_type",
                        default="eval",
                        help="The execution type (Default: eval)")
    parser.add_argument(
        "--eval_best",
        default="false",
        help=
        "Wether to load the best model or the last saved model (Default: true)"
    )
    parser.add_argument("--device",
                        default="cpu",
                        help="The device to run the model on (Default: cpu)")
    parser.add_argument("--simu_spd",
                        default=1.0,
                        type=float,
                        help="The simulation speed (Default: 1.0)")
    parser.add_argument("--eval_episodes",
                        default=-1.0,
                        type=float,
                        help="The simulation speed (Default: 1.0)")
    parser.add_argument(
        "--seed",
        default=0,
        type=int,
        help=
        "The number of episodes when evaluating. If -1 is passed, uses the value on the parameters file. (Default: -1)"
    )
    parser.add_argument(
        "--manual_control",
        default="false",
        help=
        "Overrides the RL agent and reads input from the gamepad (Default: false)"
    )
    parser.add_argument(
        "--naive_policy",
        default="false",
        help="Uses a naive policy that only goes straight (Default: false)")
    parser.add_argument("--visualize_input",
                        default="false",
                        help="Visualize agent image input (Default: false)")

    args = parser.parse_args()
    with open(args.config_file) as file:
        parameters = yaml.load(file, Loader=yaml.FullLoader)

    conf_channel = EngineConfigurationChannel()
    parameter_channel = EnvironmentParametersChannel()
    string_log = StringLogChannel()
    if (args.seed != 0):
        # This means that the used set a diferent seed in cmd
        parameters["random_seed"] = args.seed
    # if(args.simu_spd != 1.0):
    #     # This means that the used set a diferent simulation speed in cmd
    #     parameters["time_scale"] = args.simu_spd

    if (args.env_location is None):
        unity_env = UnityEnvironment(
            side_channels=[conf_channel, string_log, parameter_channel])
    else:
        unity_env = UnityEnvironment(
            args.env_location,
            side_channels=[conf_channel, string_log, parameter_channel])
    parameter_channel.set_float_parameter("seed", parameters["random_seed"])
    env_parameters = parameters["simulation"]
    for element in env_parameters:
        parameter_channel.set_float_parameter(element, env_parameters[element])
    if (args.exec_type == "train"):
        parameter_channel.set_float_parameter("training", 1.0)
    else:
        parameters["time_scale"] = args.simu_spd
        parameter_channel.set_float_parameter("training", 0.0)

    if (args.eval_episodes != -1.0):
        parameters["eval_episodes"] = args.eval_episodes

    conf_channel.set_configuration_parameters(
        time_scale=parameters["time_scale"])
    parameter_channel.set_float_parameter("parameters_set", 1.0)

    env = MultiAgentUnityEnv(unity_env, encoder=None)

    model = None

    simu_info = {}
    print("----- ENV INFO -------")
    print(parameters["random_seed"])
    print(env.state_dim)
    print(env.action_dim)
    print(env.action_magnitude)
    print(env.no_of_agents)
    print(env.visual_obs_indexes)
    print(env.non_visual_obs_index)

    simu_info["state_dimension"] = env.state_dim
    simu_info["action_dimension"] = env.action_dim
    simu_info["action_magnitude"] = env.action_magnitude
    simu_info["no_of_agents"] = env.no_of_agents

    if (args.env_location == None):
        simu_info["env_type"] = "Editor"
    else:
        simu_info["env_type"] = args.env_location.split("/")[-1].split(".")[0]
    parameters["simu_info"] = simu_info
    print("------------")
    # quit()

    # env.seed(seed)
    torch.manual_seed(parameters["random_seed"])
    np.random.seed(parameters["random_seed"])
    rl_algorithm = parameters["rl_algorithm"]
    if "memory" in parameters:
        mem_parameters = parameters["memory"]
    else:
        mem_parameters = None
    if "augmentation" in parameters:
        aug_parameters = parameters["augmentation"]
    else:
        aug_parameters = {}
        aug_parameters["indexes"] = None
    # quit()
    if (rl_algorithm["type"] == "DDPG"):
        pass
        # model = DDPG(
        #     num_states,
        #     num_actions,
        #     model_name=args.model_name,
        #     actor_lr=1e-4,
        #     critic_lr=1e-3,
        #     device=args.device,
        #     net_config=args.net_name
        # )
    elif (rl_algorithm["type"] == "TD3"):
        kwargs = {
            "state_dim": env.state_dim,
            "action_dim": env.action_dim,
            # "model_name": parameters["run_id"],
            "model_name": args.run_id,
            "max_action": env.action_magnitude,
            "net_config_name": parameters["architecture_type"],
            "device": args.device,
            "discount": rl_algorithm["discount"],
            "tau": rl_algorithm["tau"],
            "policy_noise":
            rl_algorithm["policy_noise"] * env.action_magnitude,
            "expl_noise": rl_algorithm["expl_noise"],
            "noise_clip": rl_algorithm["noise_clip"] * env.action_magnitude,
            "policy_freq": rl_algorithm["policy_freq"],
            "mem_parameters": mem_parameters
        }
        model = TD3(**kwargs)
        simu_info["actor_total_params"] = model.actor_total_params
        simu_info["critic_total_params"] = model.critic_total_params
    if (args.exec_type == "train"):
        rb_parameters = parameters["replay_buffer"]
        has_curriculum = parameters["base_run_id"] != "None"
        if (rb_parameters["location"] != "None"):
            rb = ReplayBuffer.load(rb_parameters["location"], device="cpu")
        else:
            if (model.actor.memory_capable()
                    and model.critic.memory_capable()):
                rb = ReplayBufferM(
                    state_space_dim=env.state_dim,
                    action_dim=env.action_dim,
                    no_of_agents=env.no_of_agents,
                    memory_length=mem_parameters["memory_length"],
                    buffer_capacity=rb_parameters["size"],
                    batch_size=parameters["batch_size"],
                    a_lstm_hidden_dim=model.actor.lstm_hidden_dim,
                    c_lstm_hidden_dim=model.critic.lstm_hidden_dim,
                    device="cpu")
            else:
                rb = ReplayBuffer(env.state_dim,
                                  env.action_dim,
                                  rb_parameters["size"],
                                  parameters["batch_size"],
                                  device="cpu")
        if (has_curriculum):
            model_type_str = "best" if args.eval_best == "true" else "latest"
            print(
                "Transfering learning from a previous model. The %s model will be loaded..."
                % (model_type_str))
            if (args.eval_best == "true"):
                model.load("./models",
                           name=parameters["base_run_id"],
                           prefix="")
            else:
                model.load("./models",
                           name=parameters["base_run_id"],
                           prefix="last_exec_")
            # model.load("./models", name=parameters["base_run_id"])
        # quit()
        # Saving model information:
        print("Saving training information...")
        model.save_model_info("./models", parameters)
        print("Done!")
        train_model(
            model,
            env,
            rb,
            string_log,
            buffer_size_to_train=rb_parameters["minimum_obs_before_training"],
            eval_freq=parameters["eval_frequency"],
            number_of_eval_episodes=parameters["eval_episodes"],
            max_steps=parameters["max_step_count"],
            save_best=True,
            render=False,
            # writer=None
            # writer=SummaryWriter("./models/logs/" + parameters["run_id"]),
            writer=SummaryWriter("./models/logs/" + args.run_id),
            # buffer_op = args.buffer_op,
            curriculum=has_curriculum,
            # use_augmentation = (model.actor.augmentation_capable() and model.critic.augmentation_capable()),
            use_memory=(model.actor.memory_capable()
                        and model.critic.memory_capable()),
            step_update_ratio=parameters["step_update_ratio"],
            augmentation_indexes=aug_parameters["indexes"],
            parameters=parameters)
    elif (args.exec_type == "eval"):
        if (args.visualize_input == "true"):
            image = np.zeros((256, 256))
            cv2.imshow('Agent image', image)
            # cv2.moveWindow('Agent image',int(960-368/2),0)
            # cv2.waitKey(0)
        rec_arch = False
        if (args.manual_control == "true"):
            model = HumanOperator("./src/Utils/xbox.yaml", env.action_dim)
        elif (args.naive_policy == "true"):
            model = NaiveModel()
            (mr, r_std), (mel, mel_std), (suc, suc_std), ev_steps = eval_model(
                model,
                env,
                parameters["eval_episodes"],
                rec_arch=False,
                verbose=True,
                parameters=parameters,
                render=(args.visualize_input == "true"))
        else:
            model_type_str = "best" if args.eval_best == "true" else "latest"
            print("Evaluating model. The %s model will be loaded..." %
                  (model_type_str))
            if (args.eval_best == "true"):
                model.load("./models", prefix="")
            else:
                model.load("./models", prefix="last_exec_")
            rec_arch = (model.actor.memory_capable()
                        and model.critic.memory_capable())
        (mr, r_std), (mel, mel_std), (suc, suc_std), ev_steps = eval_model(
            model,
            env,
            parameters["eval_episodes"],
            rec_arch=rec_arch,
            render=(args.visualize_input == "true"),
            verbose=True,
            parameters=parameters)
        print("Evaluated the model for %d episodes. Summary:" %
              (parameters["eval_episodes"]))
        print("\tMean reward %f (± %f)" % (mr, r_std))
        print("\tMean success %.2f%% (± %f%%)" % (suc * 100, suc_std * 100))
        print("\tMean episode length %f (± %f)" % (mel, mel_std))
        print("\tTotal steps %f" % (ev_steps))
        if (args.manual_control == "true"):
            model.controller.stop()