Пример #1
0
 def _parse_side_channel_message(
     side_channels: Dict[int, SideChannel], data: bytes
 ) -> None:
     offset = 0
     while offset < len(data):
         try:
             channel_type, message_len = struct.unpack_from("<ii", data, offset)
             offset = offset + 8
             message_data = data[offset : offset + message_len]
             offset = offset + message_len
         except Exception:
             raise UnityEnvironmentException(
                 "There was a problem reading a message in a SideChannel. "
                 "Please make sure the version of MLAgents in Unity is "
                 "compatible with the Python version."
             )
         if len(message_data) != message_len:
             raise UnityEnvironmentException(
                 "The message received by the side channel {0} was "
                 "unexpectedly short. Make sure your Unity Environment "
                 "sending side channel data properly.".format(channel_type)
             )
         if channel_type in side_channels:
             side_channels[channel_type].on_message_received(message_data)
         else:
             logger.warning(
                 "Unknown side channel data received. Channel type "
                 ": {0}.".format(channel_type)
             )
def load_config(config_path: str) -> Dict[str, Any]:
    try:
        with open(config_path) as data_file:
            return _load_config(data_file)
    except IOError:
        raise UnityEnvironmentException(
            f"Config file could not be found at {config_path}.")
    except UnicodeDecodeError:
        raise UnityEnvironmentException(
            f"There was an error decoding Config file from {config_path}. "
            f"Make sure your file is save using UTF-8")
Пример #3
0
def load_config(trainer_config_path):
    try:
        with open(trainer_config_path) as data_file:
            trainer_config = yaml.load(data_file)
            return trainer_config
    except IOError:
        raise UnityEnvironmentException('Parameter file could not be found '
                                        'at {}.'.format(trainer_config_path))
    except UnicodeDecodeError:
        raise UnityEnvironmentException(
            'There was an error decoding '
            'Trainer Config from this path : {}'.format(trainer_config_path))
Пример #4
0
def load_config(trainer_config_path: str) -> Dict[str, Any]:
    try:
        with open(trainer_config_path) as data_file:
            trainer_config = yaml.safe_load(data_file)
            return trainer_config
    except IOError:
        raise UnityEnvironmentException("Parameter file could not be found "
                                        "at {}.".format(trainer_config_path))
    except UnicodeDecodeError:
        raise UnityEnvironmentException(
            "There was an error decoding "
            "Trainer Config from this path : {}".format(trainer_config_path))
Пример #5
0
 def close(self):
     """
     Sends a shutdown signal to the unity environment, and closes the socket connection.
     """
     if self._loaded:
         self._close()
     else:
         raise UnityEnvironmentException("No Unity environment is loaded.")
Пример #6
0
 def _create_model_path(model_path):
     try:
         if not os.path.exists(model_path):
             os.makedirs(model_path)
     except Exception:
         raise UnityEnvironmentException(
             "The folder {} containing the "
             "generated model could not be "
             "accessed. Please make sure the "
             "permissions are set correctly.".format(model_path))
def _load_config(fp: TextIO) -> Dict[str, Any]:
    """
    Load the yaml config from the file-like object.
    """
    try:
        return yaml.safe_load(fp)
    except yaml.parser.ParserError as e:
        raise UnityEnvironmentException(
            "Error parsing yaml file. Please check for formatting errors. "
            "A tool such as http://www.yamllint.com/ can be helpful with this."
        ) from e
Пример #8
0
 def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]):
     """
     Initialization of the trainers
     :param trainer_config: The configurations of the trainers
     """
     trainer_parameters_dict = {}
     print("External Brains")
     print(self.external_brains)
     for brain_name in self.external_brains:
         print(brain_name)
         trainer_parameters = trainer_config['default'].copy()
         trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
             basedir=self.summaries_dir,
             name=str(self.run_id) + '_' + brain_name)
         trainer_parameters['model_path'] = '{basedir}/{name}'.format(
             basedir=self.model_path, name=brain_name)
         trainer_parameters['keep_checkpoints'] = self.keep_checkpoints
         if brain_name in trainer_config:
             _brain_key = brain_name
             while not isinstance(trainer_config[_brain_key], dict):
                 _brain_key = trainer_config[_brain_key]
             for k in trainer_config[_brain_key]:
                 trainer_parameters[k] = trainer_config[_brain_key][k]
         trainer_parameters_dict[brain_name] = trainer_parameters.copy()
     for brain_name in self.external_brains:
         if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc':
             self.trainers[brain_name] = OfflineBCTrainer(
                 self.external_brains[brain_name],
                 trainer_parameters_dict[brain_name], self.train_model,
                 self.load_model, self.seed, self.run_id)
         elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc':
             self.trainers[brain_name] = OnlineBCTrainer(
                 self.external_brains[brain_name],
                 trainer_parameters_dict[brain_name], self.train_model,
                 self.load_model, self.seed, self.run_id)
         elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo':
             print("Now implement ppo method in trainer_controller")
             print(
                 "Parameters brain name for PP////////O trainer in trainer_controller"
             )
             print(self.external_brains[brain_name])
             print("Now end of parameters")
             self.trainers[brain_name] = PPOTrainer(
                 self.external_brains[brain_name],
                 self.meta_curriculum.brains_to_curriculums[brain_name].
                 min_lesson_length if self.meta_curriculum else 0,
                 trainer_parameters_dict[brain_name], self.train_model,
                 self.load_model, self.seed, self.run_id)
             self.trainer_metrics[brain_name] = self.trainers[
                 brain_name].trainer_metrics
         else:
             raise UnityEnvironmentException('The trainer config contains '
                                             'an unknown trainer type for '
                                             'brain {}'.format(brain_name))
Пример #9
0
 def reset(self) -> None:
     if self._loaded:
         outputs = self.communicator.exchange(self._generate_reset_input())
         if outputs is None:
             raise UnityCommunicationException("Communicator has stopped.")
         self._update_group_specs(outputs)
         rl_output = outputs.rl_output
         self._update_state(rl_output)
         self._is_first_message = False
         self._env_actions.clear()
     else:
         raise UnityEnvironmentException("No Unity environment is loaded.")
Пример #10
0
def step_result_to_brain_info(
    step_result: BatchedStepResult,
    group_spec: AgentGroupSpec,
    agent_id_prefix: int = None,
) -> BrainInfo:
    n_agents = step_result.n_agents()
    vis_obs_indices = []
    vec_obs_indices = []
    for index, observation in enumerate(step_result.obs):
        if len(observation.shape) == 2:
            vec_obs_indices.append(index)
        elif len(observation.shape) == 4:
            vis_obs_indices.append(index)
        else:
            raise UnityEnvironmentException(
                "Invalid input received from the environment, the observation should "
                "either be a vector of float or a PNG image")
    if len(vec_obs_indices) == 0:
        vec_obs = np.zeros((n_agents, 0), dtype=np.float32)
    else:
        vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices],
                                 axis=1)
    vis_obs = [step_result.obs[i] for i in vis_obs_indices]
    mask = np.ones((n_agents, np.sum(group_spec.action_size)),
                   dtype=np.float32)
    if group_spec.is_action_discrete():
        mask = np.ones((n_agents, np.sum(group_spec.discrete_action_branches)),
                       dtype=np.float32)
        if step_result.action_mask is not None:
            mask = 1 - np.concatenate(step_result.action_mask, axis=1)
    if agent_id_prefix is None:
        agent_ids = [str(ag_id) for ag_id in list(step_result.agent_id)]
    else:
        agent_ids = [
            f"${agent_id_prefix}-{ag_id}" for ag_id in step_result.agent_id
        ]
    return BrainInfo(
        vis_obs,
        vec_obs,
        list(step_result.reward),
        agent_ids,
        list(step_result.done),
        list(step_result.max_step),
        mask,
    )
    def _initialize_trainers(self, trainer_config, sess):
        trainer_parameters_dict = {}
        # TODO: This probably doesn't need to be reinitialized.
        self.trainers = {}
        for brain_name in self.env.external_brain_names:
            trainer_parameters = trainer_config['default'].copy()
            if len(self.env.external_brain_names) > 1:
                graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name)
                trainer_parameters['graph_scope'] = graph_scope
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id) + '_' + graph_scope)
            else:
                trainer_parameters['graph_scope'] = ''
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir, name=str(self.run_id))
            if brain_name in trainer_config:
                _brain_key = brain_name
                while not isinstance(trainer_config[_brain_key], dict):
                    _brain_key = trainer_config[_brain_key]
                for k in trainer_config[_brain_key]:
                    trainer_parameters[k] = trainer_config[_brain_key][k]
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in self.env.external_brain_names:
            if trainer_parameters_dict[brain_name]['trainer'] == 'imitation':
                self.trainers[brain_name] = BehavioralCloningTrainer(
                    sess, self.env.brains[brain_name],
                    trainer_parameters_dict[brain_name], self.train_model,
                    self.seed, self.run_id)
            elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo':

                ###############################################################################
                #######         External brain becomes internal brain in here        ##########
                ###############################################################################

                self.trainers[brain_name] = PPOTrainer(
                    sess, self.env.brains[brain_name],
                    self.meta_curriculum.brains_to_curriculums[brain_name].
                    min_lesson_length if self.meta_curriculum else 0,
                    trainer_parameters_dict[brain_name], self.train_model,
                    self.seed, self.run_id)
            else:
                raise UnityEnvironmentException('The trainer config contains '
                                                'an unknown trainer type for '
                                                'brain {}'.format(brain_name))
Пример #12
0
 def step(self) -> None:
     if self._is_first_message:
         return self.reset()
     if not self._loaded:
         raise UnityEnvironmentException("No Unity environment is loaded.")
     # fill the blanks for missing actions
     for group_name in self._env_specs:
         if group_name not in self._env_actions:
             n_agents = 0
             if group_name in self._env_state:
                 n_agents = self._env_state[group_name].n_agents()
             self._env_actions[group_name] = self._env_specs[
                 group_name
             ].create_empty_action(n_agents)
     step_input = self._generate_step_input(self._env_actions)
     with hierarchical_timer("communicator.exchange"):
         outputs = self.communicator.exchange(step_input)
     if outputs is None:
         raise UnityCommunicationException("Communicator has stopped.")
     self._update_group_specs(outputs)
     rl_output = outputs.rl_output
     self._update_state(rl_output)
     self._env_actions.clear()
Пример #13
0
    def executable_launcher(self, file_name, docker_training, no_graphics, args):
        cwd = os.getcwd()
        file_name = (
            file_name.strip()
            .replace(".app", "")
            .replace(".exe", "")
            .replace(".x86_64", "")
            .replace(".x86", "")
        )
        true_filename = os.path.basename(os.path.normpath(file_name))
        logger.debug("The true file name is {}".format(true_filename))
        launch_string = None
        if platform == "linux" or platform == "linux2":
            candidates = glob.glob(os.path.join(cwd, file_name) + ".x86_64")
            if len(candidates) == 0:
                candidates = glob.glob(os.path.join(cwd, file_name) + ".x86")
            if len(candidates) == 0:
                candidates = glob.glob(file_name + ".x86_64")
            if len(candidates) == 0:
                candidates = glob.glob(file_name + ".x86")
            if len(candidates) > 0:
                launch_string = candidates[0]

        elif platform == "darwin":
            candidates = glob.glob(
                os.path.join(
                    cwd, file_name + ".app", "Contents", "MacOS", true_filename
                )
            )
            if len(candidates) == 0:
                candidates = glob.glob(
                    os.path.join(file_name + ".app", "Contents", "MacOS", true_filename)
                )
            if len(candidates) == 0:
                candidates = glob.glob(
                    os.path.join(cwd, file_name + ".app", "Contents", "MacOS", "*")
                )
            if len(candidates) == 0:
                candidates = glob.glob(
                    os.path.join(file_name + ".app", "Contents", "MacOS", "*")
                )
            if len(candidates) > 0:
                launch_string = candidates[0]
        elif platform == "win32":
            candidates = glob.glob(os.path.join(cwd, file_name + ".exe"))
            if len(candidates) == 0:
                candidates = glob.glob(file_name + ".exe")
            if len(candidates) > 0:
                launch_string = candidates[0]
        if launch_string is None:
            self._close()
            raise UnityEnvironmentException(
                "Couldn't launch the {0} environment. "
                "Provided filename does not match any environments.".format(
                    true_filename
                )
            )
        else:
            logger.debug("This is the launch string {}".format(launch_string))
            # Launch Unity environment
            if not docker_training:
                subprocess_args = [launch_string]
                if no_graphics:
                    subprocess_args += ["-nographics", "-batchmode"]
                subprocess_args += ["--port", str(self.port)]
                subprocess_args += args
                try:
                    self.proc1 = subprocess.Popen(
                        subprocess_args,
                        # start_new_session=True means that signals to the parent python process
                        # (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms.
                        # This is generally good since we want the environment to have a chance to shutdown,
                        # but may be undesirable in come cases; if so, we'll add a command-line toggle.
                        # Note that on Windows, the CTRL_C signal will still be sent.
                        start_new_session=True,
                    )
                except PermissionError as perm:
                    # This is likely due to missing read or execute permissions on file.
                    raise UnityEnvironmentException(
                        f"Error when trying to launch environment - make sure "
                        f"permissions are set correctly. For example "
                        f'"chmod -R 755 {launch_string}"'
                    ) from perm

            else:
                # Comments for future maintenance:
                #     xvfb-run is a wrapper around Xvfb, a virtual xserver where all
                #     rendering is done to virtual memory. It automatically creates a
                #     new virtual server automatically picking a server number `auto-servernum`.
                #     The server is passed the arguments using `server-args`, we are telling
                #     Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits.
                #     Note that 640 X 480 are the default width and height. The main reason for
                #     us to add this is because we'd like to change the depth from the default
                #     of 8 bits to 24.
                #     Unfortunately, this means that we will need to pass the arguments through
                #     a shell which is why we set `shell=True`. Now, this adds its own
                #     complications. E.g SIGINT can bounce off the shell and not get propagated
                #     to the child processes. This is why we add `exec`, so that the shell gets
                #     launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell
                #     we created with `xvfb`.
                #
                docker_ls = (
                    "exec xvfb-run --auto-servernum"
                    " --server-args='-screen 0 640x480x24'"
                    " {0} --port {1}"
                ).format(launch_string, str(self.port))
                self.proc1 = subprocess.Popen(
                    docker_ls,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    shell=True,
                )
def initialize_trainers(
    trainer_config: Dict[str, Any],
    external_brains: Dict[str, BrainParameters],
    summaries_dir: str,
    run_id: str,
    model_path: str,
    keep_checkpoints: int,
    train_model: bool,
    load_model: bool,
    seed: int,
    meta_curriculum: MetaCurriculum = None,
    multi_gpu: bool = False,
) -> Dict[str, Trainer]:
    """
    Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as
    some general training session options.

    :param trainer_config: Original trainer configuration loaded from YAML
    :param external_brains: BrainParameters provided by the Unity environment
    :param summaries_dir: Directory to store trainer summary statistics
    :param run_id: Run ID to associate with this training run
    :param model_path: Path to save the model
    :param keep_checkpoints: How many model checkpoints to keep
    :param train_model: Whether to train the model (vs. run inference)
    :param load_model: Whether to load the model or randomly initialize
    :param seed: The random seed to use
    :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer
    :param multi_gpu: Whether to use multi-GPU training
    :return:
    """
    trainers = {}
    trainer_parameters_dict = {}
    for brain_name in external_brains:
        trainer_parameters = trainer_config["default"].copy()
        trainer_parameters["summary_path"] = "{basedir}/{name}".format(
            basedir=summaries_dir, name=str(run_id) + "_" + brain_name)
        trainer_parameters["model_path"] = "{basedir}/{name}".format(
            basedir=model_path, name=brain_name)
        trainer_parameters["keep_checkpoints"] = keep_checkpoints
        if brain_name in trainer_config:
            _brain_key: Any = brain_name
            while not isinstance(trainer_config[_brain_key], dict):
                _brain_key = trainer_config[_brain_key]
            trainer_parameters.update(trainer_config[_brain_key])
        trainer_parameters_dict[brain_name] = trainer_parameters.copy()
    for brain_name in external_brains:
        if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
            trainers[brain_name] = OfflineBCTrainer(
                external_brains[brain_name],
                trainer_parameters_dict[brain_name],
                train_model,
                load_model,
                seed,
                run_id,
            )
        elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
            trainers[brain_name] = OnlineBCTrainer(
                external_brains[brain_name],
                trainer_parameters_dict[brain_name],
                train_model,
                load_model,
                seed,
                run_id,
            )
        elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
            trainers[brain_name] = PPOTrainer(
                external_brains[brain_name],
                meta_curriculum.brains_to_curriculums[brain_name].
                min_lesson_length if meta_curriculum else 1,
                trainer_parameters_dict[brain_name],
                train_model,
                load_model,
                seed,
                run_id,
                multi_gpu,
            )
        elif trainer_parameters_dict[brain_name]["trainer"] == "sac":
            trainers[brain_name] = SACTrainer(
                external_brains[brain_name],
                meta_curriculum.brains_to_curriculums[brain_name].
                min_lesson_length if meta_curriculum else 1,
                trainer_parameters_dict[brain_name],
                train_model,
                load_model,
                seed,
                run_id,
            )
        else:
            raise UnityEnvironmentException("The trainer config contains "
                                            "an unknown trainer type for "
                                            "brain {}".format(brain_name))
    return trainers
Пример #15
0
    def __init__(
        self,
        file_name: Optional[str] = None,
        worker_id: int = 0,
        base_port: int = 5005,
        seed: int = 0,
        docker_training: bool = False,
        no_graphics: bool = False,
        timeout_wait: int = 60,
        args: Optional[List[str]] = None,
        side_channels: Optional[List[SideChannel]] = None,
    ):
        """
        Starts a new unity environment and establishes a connection with the environment.
        Notice: Currently communication between Unity and Python takes place over an open socket without authentication.
        Ensure that the network where training takes place is secure.

        :string file_name: Name of Unity environment binary.
        :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
        :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
        :bool docker_training: Informs this class whether the process is being run within a container.
        :bool no_graphics: Whether to run the Unity simulator in no-graphics mode
        :int timeout_wait: Time (in seconds) to wait for connection from environment.
        :bool train_mode: Whether to run in training mode, speeding up the simulation, by default.
        :list args: Addition Unity command line arguments
        :list side_channels: Additional side channel for no-rl communication with Unity
        """
        args = args or []
        atexit.register(self._close)
        self.port = base_port + worker_id
        self._buffer_size = 12000
        self._version_ = UnityEnvironment.API_VERSION
        # If true, this means the environment was successfully loaded
        self._loaded = False
        # The process that is started. If None, no process was started
        self.proc1 = None
        self.timeout_wait: int = timeout_wait
        self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
        self.worker_id = worker_id
        self.side_channels: Dict[int, SideChannel] = {}
        if side_channels is not None:
            for _sc in side_channels:
                if _sc.channel_type in self.side_channels:
                    raise UnityEnvironmentException(
                        "There cannot be two side channels with the same channel type {0}.".format(
                            _sc.channel_type
                        )
                    )
                self.side_channels[_sc.channel_type] = _sc

        # If the environment name is None, a new environment will not be launched
        # and the communicator will directly try to connect to an existing unity environment.
        # If the worker-id is not 0 and the environment name is None, an error is thrown
        if file_name is None and worker_id != 0:
            raise UnityEnvironmentException(
                "If the environment name is None, "
                "the worker-id must be 0 in order to connect with the Editor."
            )
        if file_name is not None:
            self.executable_launcher(file_name, docker_training, no_graphics, args)
        else:
            logger.info(
                f"Listening on port {self.port}. "
                f"Start training by pressing the Play button in the Unity Editor."
            )
        self._loaded = True

        rl_init_parameters_in = UnityRLInitializationInputProto(seed=seed)
        try:
            aca_output = self.send_academy_parameters(rl_init_parameters_in)
            aca_params = aca_output.rl_initialization_output
        except UnityTimeOutException:
            self._close()
            raise
        # TODO : think of a better way to expose the academyParameters
        self._unity_version = aca_params.version
        if self._unity_version != self._version_:
            self._close()
            raise UnityEnvironmentException(
                f"The API number is not compatible between Unity and python. "
                f"Python API: {self._version_}, Unity API: {self._unity_version}.\n"
                f"Please go to https://github.com/Unity-Technologies/ml-agents/releases/tag/latest_release"
                f"to download the latest version of ML-Agents."
            )
        self._env_state: Dict[str, BatchedStepResult] = {}
        self._env_specs: Dict[str, AgentGroupSpec] = {}
        self._env_actions: Dict[str, np.ndarray] = {}
        self._is_first_message = True
        self._update_group_specs(aca_output)
Пример #16
0
    def initialize_trainers(
        self,
        trainer_config: Dict[str, Any],
        external_brains: Dict[str, BrainParameters],
    ) -> None:
        """
        Initialization of the trainers
        :param trainer_config: The configurations of the trainers
        """
        trainer_parameters_dict = {}
        for brain_name in external_brains:
            trainer_parameters = trainer_config["default"].copy()
            trainer_parameters["summary_path"] = "{basedir}/{name}".format(
                basedir=self.summaries_dir,
                name=str(self.run_id) + "_" + brain_name)
            trainer_parameters["model_path"] = "{basedir}/{name}".format(
                basedir=self.model_path, name=brain_name)
            trainer_parameters["keep_checkpoints"] = self.keep_checkpoints
            if brain_name in trainer_config:
                _brain_key: Any = brain_name
                while not isinstance(trainer_config[_brain_key], dict):
                    _brain_key = trainer_config[_brain_key]
                trainer_parameters.update(trainer_config[_brain_key])
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in external_brains:
            if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
                self.trainers[brain_name] = OfflineBCTrainer(
                    brain=external_brains[brain_name],
                    trainer_parameters=trainer_parameters_dict[brain_name],
                    training=self.train_model,
                    load=self.load_model,
                    seed=self.seed,
                    run_id=self.run_id,
                )
            elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
                self.trainers[brain_name] = OnlineBCTrainer(
                    brain=external_brains[brain_name],
                    trainer_parameters=trainer_parameters_dict[brain_name],
                    training=self.train_model,
                    load=self.load_model,
                    seed=self.seed,
                    run_id=self.run_id,
                )
            elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
                # Find lesson length based on the form of learning
                if self.meta_curriculum:
                    lesson_length = self.meta_curriculum.brains_to_curriculums[
                        brain_name].min_lesson_length
                else:
                    lesson_length = 1

                self.trainers[brain_name] = PPOTrainer(
                    brain=external_brains[brain_name],
                    reward_buff_cap=lesson_length,
                    trainer_parameters=trainer_parameters_dict[brain_name],
                    training=self.train_model,
                    load=self.load_model,
                    seed=self.seed,
                    run_id=self.run_id,
                )
                self.trainer_metrics[brain_name] = self.trainers[
                    brain_name].trainer_metrics
            else:
                raise UnityEnvironmentException("The trainer config contains "
                                                "an unknown trainer type for "
                                                "brain {}".format(brain_name))
 def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]):
     """
     Initialization of the trainers
     :param trainer_config: The configurations of the trainers
     """
     trainer_parameters_dict = {}
     for brain_name in self.external_brains:
         trainer_parameters = trainer_config["default"].copy()
         trainer_parameters["summary_path"] = "{basedir}/{name}".format(
             basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name
         )
         trainer_parameters["model_path"] = "{basedir}/{name}".format(
             basedir=self.model_path, name=brain_name
         )
         trainer_parameters["keep_checkpoints"] = self.keep_checkpoints
         if brain_name in trainer_config:
             _brain_key = brain_name
             while not isinstance(trainer_config[_brain_key], dict):
                 _brain_key = trainer_config[_brain_key]
             for k in trainer_config[_brain_key]:
                 trainer_parameters[k] = trainer_config[_brain_key][k]
         trainer_parameters_dict[brain_name] = trainer_parameters.copy()
     for brain_name in self.external_brains:
         if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
             self.trainers[brain_name] = OfflineBCTrainer(
                 self.external_brains[brain_name],
                 trainer_parameters_dict[brain_name],
                 self.train_model,
                 self.load_model,
                 self.seed,
                 self.run_id,
             )
         elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
             self.trainers[brain_name] = OnlineBCTrainer(
                 self.external_brains[brain_name],
                 trainer_parameters_dict[brain_name],
                 self.train_model,
                 self.load_model,
                 self.seed,
                 self.run_id,
             )
         elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
             self.trainers[brain_name] = PPOTrainer(
                 self.external_brains[brain_name],
                 self.meta_curriculum.brains_to_curriculums[
                     brain_name
                 ].min_lesson_length
                 if self.meta_curriculum
                 else 0,
                 trainer_parameters_dict[brain_name],
                 self.train_model,
                 self.load_model,
                 self.seed,
                 self.run_id,
             )
             self.trainer_metrics[brain_name] = self.trainers[
                 brain_name
             ].trainer_metrics
         else:
             raise UnityEnvironmentException(
                 "The trainer config contains "
                 "an unknown trainer type for "
                 "brain {}".format(brain_name)
             )