def test_batched_step_result_from_proto_raises_on_nan(): n_agents = 10 shapes = [(3,), (4,)] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3) ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True) with pytest.raises(RuntimeError): batched_step_result_from_proto(ap_list, group_spec)
def make_demo_buffer( pair_infos: List[AgentInfoActionPairProto], group_spec: AgentGroupSpec, sequence_length: int, ) -> AgentBuffer: # Create and populate buffer using experiences demo_raw_buffer = AgentBuffer() demo_processed_buffer = AgentBuffer() for idx, current_pair_info in enumerate(pair_infos): if idx > len(pair_infos) - 2: break next_pair_info = pair_infos[idx + 1] current_step_info = batched_step_result_from_proto( [current_pair_info.agent_info], group_spec) next_step_info = batched_step_result_from_proto( [next_pair_info.agent_info], group_spec) previous_action = (np.array(pair_infos[idx].action_info.vector_actions, dtype=np.float32) * 0) if idx > 0: previous_action = np.array( pair_infos[idx - 1].action_info.vector_actions, dtype=np.float32) curr_agent_id = current_step_info.agent_id[0] current_agent_step_info = current_step_info.get_agent_step_result( curr_agent_id) next_agent_id = next_step_info.agent_id[0] next_agent_step_info = next_step_info.get_agent_step_result( next_agent_id) demo_raw_buffer["done"].append(next_agent_step_info.done) demo_raw_buffer["rewards"].append(next_agent_step_info.reward) split_obs = SplitObservations.from_observations( current_agent_step_info.obs) for i, obs in enumerate(split_obs.visual_observations): demo_raw_buffer["visual_obs%d" % i].append(obs) demo_raw_buffer["vector_obs"].append(split_obs.vector_observations) demo_raw_buffer["actions"].append( current_pair_info.action_info.vector_actions) demo_raw_buffer["prev_action"].append(previous_action) if next_step_info.done: demo_raw_buffer.resequence_and_append( demo_processed_buffer, batch_size=None, training_length=sequence_length) demo_raw_buffer.reset_agent() demo_raw_buffer.resequence_and_append(demo_processed_buffer, batch_size=None, training_length=sequence_length) return demo_processed_buffer
def test_action_masking_continuous(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 10) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) masks = result.action_mask assert masks is None
def test_action_masking_discrete_1(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (10, )) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) masks = result.action_mask assert isinstance(masks, list) assert len(masks) == 1 assert masks[0].shape == (n_agents, 10) assert masks[0][0, 0]
def test_batched_step_result_from_proto(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) assert list(result.reward) == list(range(n_agents)) assert list(result.agent_id) == list(range(n_agents)) for index in range(n_agents): assert result.done[index] == (index % 2 == 0) assert result.max_step[index] == (index % 2 == 1) assert list(result.obs[0].shape) == [n_agents] + list(shapes[0]) assert list(result.obs[1].shape) == [n_agents] + list(shapes[1])
def _update_state(self, output: UnityRLOutputProto) -> None: """ Collects experience information from all external brains in environment at current step. """ for brain_name in self._env_specs.keys(): if brain_name in output.agentInfos: agent_info_list = output.agentInfos[brain_name].value self._env_state[brain_name] = batched_step_result_from_proto( agent_info_list, self._env_specs[brain_name]) else: self._env_state[brain_name] = BatchedStepResult.empty( self._env_specs[brain_name]) self._parse_side_channel_message(self.side_channels, output.side_channel)