示例#1
0
def test_batched_step_result_from_proto_raises_on_nan():
    n_agents = 10
    shapes = [(3,), (4,)]
    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
    ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
    with pytest.raises(RuntimeError):
        batched_step_result_from_proto(ap_list, group_spec)
示例#2
0
def make_demo_buffer(
    pair_infos: List[AgentInfoActionPairProto],
    group_spec: AgentGroupSpec,
    sequence_length: int,
) -> AgentBuffer:
    # Create and populate buffer using experiences
    demo_raw_buffer = AgentBuffer()
    demo_processed_buffer = AgentBuffer()
    for idx, current_pair_info in enumerate(pair_infos):
        if idx > len(pair_infos) - 2:
            break
        next_pair_info = pair_infos[idx + 1]
        current_step_info = batched_step_result_from_proto(
            [current_pair_info.agent_info], group_spec)
        next_step_info = batched_step_result_from_proto(
            [next_pair_info.agent_info], group_spec)
        previous_action = (np.array(pair_infos[idx].action_info.vector_actions,
                                    dtype=np.float32) * 0)
        if idx > 0:
            previous_action = np.array(
                pair_infos[idx - 1].action_info.vector_actions,
                dtype=np.float32)
        curr_agent_id = current_step_info.agent_id[0]
        current_agent_step_info = current_step_info.get_agent_step_result(
            curr_agent_id)
        next_agent_id = next_step_info.agent_id[0]
        next_agent_step_info = next_step_info.get_agent_step_result(
            next_agent_id)

        demo_raw_buffer["done"].append(next_agent_step_info.done)
        demo_raw_buffer["rewards"].append(next_agent_step_info.reward)
        split_obs = SplitObservations.from_observations(
            current_agent_step_info.obs)
        for i, obs in enumerate(split_obs.visual_observations):
            demo_raw_buffer["visual_obs%d" % i].append(obs)
        demo_raw_buffer["vector_obs"].append(split_obs.vector_observations)
        demo_raw_buffer["actions"].append(
            current_pair_info.action_info.vector_actions)
        demo_raw_buffer["prev_action"].append(previous_action)
        if next_step_info.done:
            demo_raw_buffer.resequence_and_append(
                demo_processed_buffer,
                batch_size=None,
                training_length=sequence_length)
            demo_raw_buffer.reset_agent()
    demo_raw_buffer.resequence_and_append(demo_processed_buffer,
                                          batch_size=None,
                                          training_length=sequence_length)
    return demo_processed_buffer
示例#3
0
def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3, ), (4, )]
    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 10)
    ap_list = generate_list_agent_proto(n_agents, shapes)
    result = batched_step_result_from_proto(ap_list, group_spec)
    masks = result.action_mask
    assert masks is None
示例#4
0
def test_action_masking_discrete_1():
    n_agents = 10
    shapes = [(3, ), (4, )]
    group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (10, ))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    result = batched_step_result_from_proto(ap_list, group_spec)
    masks = result.action_mask
    assert isinstance(masks, list)
    assert len(masks) == 1
    assert masks[0].shape == (n_agents, 10)
    assert masks[0][0, 0]
示例#5
0
def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3, ), (4, )]
    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
    ap_list = generate_list_agent_proto(n_agents, shapes)
    result = batched_step_result_from_proto(ap_list, group_spec)
    assert list(result.reward) == list(range(n_agents))
    assert list(result.agent_id) == list(range(n_agents))
    for index in range(n_agents):
        assert result.done[index] == (index % 2 == 0)
        assert result.max_step[index] == (index % 2 == 1)
    assert list(result.obs[0].shape) == [n_agents] + list(shapes[0])
    assert list(result.obs[1].shape) == [n_agents] + list(shapes[1])
示例#6
0
 def _update_state(self, output: UnityRLOutputProto) -> None:
     """
     Collects experience information from all external brains in environment at current step.
     """
     for brain_name in self._env_specs.keys():
         if brain_name in output.agentInfos:
             agent_info_list = output.agentInfos[brain_name].value
             self._env_state[brain_name] = batched_step_result_from_proto(
                 agent_info_list, self._env_specs[brain_name])
         else:
             self._env_state[brain_name] = BatchedStepResult.empty(
                 self._env_specs[brain_name])
     self._parse_side_channel_message(self.side_channels,
                                      output.side_channel)