def _generate_reset_input(self, training, config) -> UnityRLInput:
     rl_in = UnityRLInput()
     rl_in.is_training = training
     rl_in.environment_parameters.CopyFrom(EnvironmentParametersProto())
     for key in config:
         rl_in.environment_parameters.float_parameters[key] = config[key]
     rl_in.command = 1
     return self.wrap_unity_input(rl_in)
 def _generate_reset_input(self, training, config) -> UnityRLInput:
     rl_in = UnityRLInput()
     rl_in.is_training = training
     rl_in.environment_parameters.CopyFrom(EnvironmentParametersProto())
     for key in config:
         rl_in.environment_parameters.float_parameters[key] = config[key]
     rl_in.command = 1
     return self.wrap_unity_input(rl_in)
 def _generate_step_input(self, vector_action, memory,
                          text_action) -> UnityRLInput:
     rl_in = UnityRLInput()
     for b in vector_action:
         n_agents = self._n_agents[b]
         if n_agents == 0:
             continue
         _a_s = len(vector_action[b]) // n_agents
         _m_s = len(memory[b]) // n_agents
         for i in range(n_agents):
             action = AgentActionProto(
                 vector_actions=vector_action[b][i * _a_s:(i + 1) * _a_s],
                 memories=memory[b][i * _m_s:(i + 1) * _m_s],
                 text_actions=text_action[b][i])
             rl_in.agent_actions[b].value.extend([action])
             rl_in.command = 0
     return self.wrap_unity_input(rl_in)
 def _generate_step_input(self, vector_action, memory, text_action) -> UnityRLInput:
     rl_in = UnityRLInput()
     for b in vector_action:
         n_agents = self._n_agents[b]
         if n_agents == 0:
             continue
         _a_s = len(vector_action[b]) // n_agents
         _m_s = len(memory[b]) // n_agents
         for i in range(n_agents):
             action = AgentActionProto(
                 vector_actions=vector_action[b][i*_a_s: (i+1)*_a_s],
                 memories=memory[b][i*_m_s: (i+1)*_m_s],
                 text_actions=text_action[b][i]
             )
             rl_in.agent_actions[b].value.extend([action])
             rl_in.command = 0
     return self.wrap_unity_input(rl_in)
Пример #5
0
 def _generate_record_input(self) -> UnityRLInput:
     rl_in = UnityRLInput()
     rl_in.command = UnityCommand.RECORD
     return self.wrap_unity_input(rl_in)