def _generate_reset_input(self, training, config) -> UnityRLInput: rl_in = UnityRLInput() rl_in.is_training = training rl_in.environment_parameters.CopyFrom(EnvironmentParametersProto()) for key in config: rl_in.environment_parameters.float_parameters[key] = config[key] rl_in.command = 1 return self.wrap_unity_input(rl_in)
def _generate_step_input(self, vector_action, memory, text_action) -> UnityRLInput: rl_in = UnityRLInput() for b in vector_action: n_agents = self._n_agents[b] if n_agents == 0: continue _a_s = len(vector_action[b]) // n_agents _m_s = len(memory[b]) // n_agents for i in range(n_agents): action = AgentActionProto( vector_actions=vector_action[b][i * _a_s:(i + 1) * _a_s], memories=memory[b][i * _m_s:(i + 1) * _m_s], text_actions=text_action[b][i]) rl_in.agent_actions[b].value.extend([action]) rl_in.command = 0 return self.wrap_unity_input(rl_in)
def _generate_step_input(self, vector_action, memory, text_action) -> UnityRLInput: rl_in = UnityRLInput() for b in vector_action: n_agents = self._n_agents[b] if n_agents == 0: continue _a_s = len(vector_action[b]) // n_agents _m_s = len(memory[b]) // n_agents for i in range(n_agents): action = AgentActionProto( vector_actions=vector_action[b][i*_a_s: (i+1)*_a_s], memories=memory[b][i*_m_s: (i+1)*_m_s], text_actions=text_action[b][i] ) rl_in.agent_actions[b].value.extend([action]) rl_in.command = 0 return self.wrap_unity_input(rl_in)
def _generate_record_input(self) -> UnityRLInput: rl_in = UnityRLInput() rl_in.command = UnityCommand.RECORD return self.wrap_unity_input(rl_in)