def _initialize_trainers(self, trainer_config, sess):
     trainer_parameters_dict = {}
     self.trainers = {}
     for brain_name in self.env.external_brain_names:
         trainer_parameters = trainer_config['default'].copy()
         if len(self.env.external_brain_names) > 1:
             graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name)
             trainer_parameters['graph_scope'] = graph_scope
             trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                 basedir=self.summaries_dir,
                 name=str(self.run_id) + '_' + graph_scope)
         else:
             trainer_parameters['graph_scope'] = ''
             trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                 basedir=self.summaries_dir, name=str(self.run_id))
         if brain_name in trainer_config:
             _brain_key = brain_name
             while not isinstance(trainer_config[_brain_key], dict):
                 _brain_key = trainer_config[_brain_key]
             for k in trainer_config[_brain_key]:
                 trainer_parameters[k] = trainer_config[_brain_key][k]
         trainer_parameters_dict[brain_name] = trainer_parameters.copy()
     for brain_name in self.env.external_brain_names:
         if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
             self.trainers[brain_name] = BehavioralCloningTrainer(
                 sess, self.env, brain_name,
                 trainer_parameters_dict[brain_name], self.train_model,
                 self.seed)
         elif trainer_parameters_dict[brain_name]['trainer'] == "ppo":
             self.trainers[brain_name] = PPOTrainer(
                 sess, self.env, brain_name,
                 trainer_parameters_dict[brain_name], self.train_model,
                 self.seed)
         else:
             raise UnityEnvironmentException(
                 "The trainer config contains an unknown trainer type for brain {}"
                 .format(brain_name))
    def _initialize_trainers(self, trainer_config, sess):
        self.trainer_parameters_dict = {}
        self.trainers = {}
        for brain_name in self.env.external_brain_names:
            trainer_parameters = trainer_config['default'].copy()
            if len(self.env.external_brain_names) > 1:
                graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name)
                trainer_parameters['graph_scope'] = graph_scope
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id) + '_' + graph_scope)
            else:
                trainer_parameters['graph_scope'] = ''
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id))
            if brain_name in trainer_config:
                _brain_key = brain_name
                while not isinstance(trainer_config[_brain_key], dict):
                    _brain_key = trainer_config[_brain_key]
                for k in trainer_config[_brain_key]:
                    trainer_parameters[k] = trainer_config[_brain_key][k]
            self.trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in self.env.external_brain_names:
            if self.trainer_parameters_dict[brain_name]['trainer'] == "imitation":
                self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
                                                                     self.trainer_parameters_dict[brain_name],
                                                                     self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "ppo":
                self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "dqn":
                self.trainers[brain_name] = DQNTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "madqn":
                self.trainers[brain_name] = MADQNTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "mappo":
                self.trainers[brain_name] = MAPPOTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "coma":
                self.trainers[brain_name] = COMATrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            else:
                raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
                                                .format(brain_name))

        all_vars = tf.trainable_variables()
        self.brain_vars = {}
        total_vars = len(all_vars)
        idx1 = 0
        idx2 = int(total_vars/len(self.env.external_brain_names))
        for brain_name in self.env.external_brain_names:
            self.brain_vars[brain_name] = all_vars[idx1:idx2]
            idx1 = idx2
            idx2 = idx2*total_vars
            if (self.trainer_parameters_dict[brain_name]['trainer'] == "dqn" or
            self.trainer_parameters_dict[brain_name]['trainer'] == "madqn"):
                self.trainers[brain_name].update_target_graph(self.brain_vars[brain_name])
                if self.trainer_parameters_dict[brain_name]['trainer'] == "madqn":
                    if not self.trainers[brain_name].parameters['frozen']:
                        self.free_brain_vars = self.brain_vars[brain_name]
        for brain_name in self.env.external_brain_names:
            if self.trainer_parameters_dict[brain_name]['trainer'] == "madqn":
                if self.trainers[brain_name].parameters['frozen']:
                    self.trainers[brain_name].update_frozen_brain_graph(self.brain_vars[brain_name], self.free_brain_vars)