Пример #1
0
 def advance(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         new_step_infos = env.step()
     for step_info in new_step_infos:
         for brain_name, trainer in self.trainers.items():
             if step_info.has_actions_for_brain(brain_name):
                 _processor = self.managers[brain_name].processor
                 _processor.add_experiences(
                     step_info.previous_all_brain_info[brain_name],
                     step_info.current_all_brain_info[brain_name],
                     step_info.brain_name_to_action_info[brain_name].
                     outputs,
                 )
     for brain_name, trainer in self.trainers.items():
         if self.train_model and trainer.get_step <= trainer.get_max_steps:
             trainer.increment_step(len(new_step_infos))
             if trainer.is_ready_update():
                 # Perform gradient descent with experience buffer
                 with hierarchical_timer("update_policy"):
                     trainer.update_policy()
                 env.set_policy(brain_name, trainer.policy)
         else:
             # Avoid memory leak during inference
             # Eventually this whole block will take place in advance()
             # But currently this only calls clear_update_buffer() in RLTrainer
             # and nothing in the base class
             trainer.advance()
     return len(new_step_infos)
Пример #2
0
 def _get_and_process_experiences(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         # Get new policies if found
         for brain_name in self.trainers.keys():
             for name_behavior_id in self.brain_name_to_identifier[
                     brain_name]:
                 try:
                     _policy = self.managers[
                         name_behavior_id].policy_queue.get_nowait()
                     env.set_policy(name_behavior_id, _policy)
                 except AgentManagerQueue.Empty:
                     pass
         # Step the environment
         new_step_infos = env.step()
     # Add to AgentProcessor
     for step_info in new_step_infos:
         for name_behavior_id in step_info.name_behavior_ids:
             if name_behavior_id not in self.managers:
                 self.logger.warning(
                     "Agent manager was not created for behavior id {}.".
                     format(name_behavior_id))
                 continue
             self.managers[name_behavior_id].add_experiences(
                 step_info.previous_all_brain_info[name_behavior_id],
                 step_info.current_all_brain_info[name_behavior_id],
                 step_info.brain_name_to_action_info[name_behavior_id].
                 outputs,
             )
     return len(new_step_infos)
Пример #3
0
 def _get_and_process_experiences(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         # Get new policies if found
         for brain_name in self.trainers.keys():
             for name_behavior_id in self.brain_name_to_identifier[
                     brain_name]:
                 try:
                     _policy = self.managers[
                         name_behavior_id].policy_queue.get_nowait()
                     env.set_policy(name_behavior_id, _policy)
                 except AgentManagerQueue.Empty:
                     pass
         # Step the environment
         new_step_infos = env.step()
     # Add to AgentProcessor
     num_step_infos = self._process_step_infos(new_step_infos)
     return num_step_infos
Пример #4
0
    def advance(self, env: EnvManager) -> int:
        with hierarchical_timer("env_step"):
            new_step_infos = env.step()
        for step_info in new_step_infos:
            for name_behavior_id in step_info.name_behavior_ids:
                if name_behavior_id not in self.managers:
                    self.logger.warning(
                        "Agent manager was not created for behavior id {}.".
                        format(name_behavior_id))
                    continue
                _processor = self.managers[name_behavior_id].processor
                _processor.add_experiences(
                    step_info.previous_all_brain_info[name_behavior_id],
                    step_info.current_all_brain_info[name_behavior_id],
                    step_info.brain_name_to_action_info[name_behavior_id].
                    outputs,
                )

        for brain_name, trainer in self.trainers.items():
            if self.train_model and trainer.get_step <= trainer.get_max_steps:
                n_steps = len(new_step_infos)
                trainer.increment_step(n_steps)
                for name_behavior_id in self.brain_name_to_identifier[
                        brain_name]:
                    trainer.get_policy(name_behavior_id).increment_step(
                        n_steps)
                if trainer.is_ready_update():
                    # Perform gradient descent with experience buffer
                    with hierarchical_timer("update_policy"):
                        trainer.update_policy()
                    for name_behavior_id in self.brain_name_to_identifier[
                            brain_name]:
                        env.set_policy(name_behavior_id,
                                       trainer.get_policy(name_behavior_id))
            else:
                # Avoid memory leak during inference
                # Eventually this whole block will take place in advance()
                # But currently this only calls clear_update_buffer() in RLTrainer
                # and nothing in the base class
                trainer.advance()
        return len(new_step_infos)
Пример #5
0
 def advance(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         time_start_step = time()
         new_step_infos = env.step()
         delta_time_step = time() - time_start_step
     for step_info in new_step_infos:
         for brain_name, trainer in self.trainers.items():
             if brain_name in self.trainer_metrics:
                 self.trainer_metrics[brain_name].add_delta_step(
                     delta_time_step)
             if step_info.has_actions_for_brain(brain_name):
                 trainer.add_experiences(
                     step_info.previous_all_brain_info[brain_name],
                     step_info.current_all_brain_info[brain_name],
                     step_info.brain_name_to_action_info[brain_name].
                     outputs,
                 )
                 trainer.process_experiences(
                     step_info.previous_all_brain_info[brain_name],
                     step_info.current_all_brain_info[brain_name],
                 )
     for brain_name, trainer in self.trainers.items():
         if brain_name in self.trainer_metrics:
             self.trainer_metrics[brain_name].add_delta_step(
                 delta_time_step)
         if self.train_model and trainer.get_step <= trainer.get_max_steps:
             trainer.increment_step(len(new_step_infos))
             if trainer.is_ready_update():
                 # Perform gradient descent with experience buffer
                 with hierarchical_timer("update_policy"):
                     trainer.update_policy()
                 env.set_policy(brain_name, trainer.policy)
         else:
             # Avoid memory leak during inference
             trainer.clear_update_buffer()
     return len(new_step_infos)