def test_process_visual_observation(): in_array_1 = np.random.rand(128, 64, 3) proto_obs_1 = generate_compressed_proto_obs(in_array_1) in_array_2 = np.random.rand(128, 64, 3) in_array_2_mapping = [0, 1, 2] proto_obs_2 = generate_compressed_proto_obs_with_mapping( in_array_2, in_array_2_mapping) ap1 = AgentInfoProto() ap1.observations.extend([proto_obs_1]) ap2 = AgentInfoProto() ap2.observations.extend([proto_obs_2]) ap_list = [ap1, ap2] arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list) assert list(arr.shape) == [2, 128, 64, 3] assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01) assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
def test_process_visual_observation_bad_shape(): in_array_1 = np.random.rand(128, 64, 3) proto_obs_1 = generate_compressed_proto_obs(in_array_1) ap1 = AgentInfoProto() ap1.observations.extend([proto_obs_1]) ap_list = [ap1] with pytest.raises(UnityObservationException): _process_visual_observation(0, (128, 42, 3), ap_list)
def test_process_visual_observation_grayscale(): in_array_1 = np.random.rand(128, 64, 3) proto_obs_1 = generate_compressed_proto_obs(in_array_1, grayscale=True) expected_out_array_1 = np.mean(in_array_1, axis=2, keepdims=True) in_array_2 = np.random.rand(128, 64, 3) in_array_2_mapping = [0, 0, 0] proto_obs_2 = generate_compressed_proto_obs_with_mapping( in_array_2, in_array_2_mapping) expected_out_array_2 = np.mean(in_array_2, axis=2, keepdims=True) ap1 = AgentInfoProto() ap1.observations.extend([proto_obs_1]) ap2 = AgentInfoProto() ap2.observations.extend([proto_obs_2]) ap_list = [ap1, ap2] arr = _process_visual_observation(0, (128, 64, 1), ap_list) assert list(arr.shape) == [2, 128, 64, 1] assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01) assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
def test_process_visual_observation_bad_shape(): in_array_1 = np.random.rand(128, 64, 3) proto_obs_1 = generate_compressed_proto_obs(in_array_1) ap1 = AgentInfoProto() ap1.observations.extend([proto_obs_1]) ap_list = [ap1] shape = (128, 42, 3) obs_spec = create_observation_specs_with_shapes([shape])[0] with pytest.raises(UnityObservationException): _process_maybe_compressed_observation(0, obs_spec, ap_list)
def generate_list_agent_proto( n_agent: int, shape: List[Tuple[int]], infinite_rewards: bool = False, nan_observations: bool = False, ) -> List[AgentInfoProto]: result = [] for agent_index in range(n_agent): ap = AgentInfoProto() ap.reward = float("inf") if infinite_rewards else agent_index ap.done = agent_index % 2 == 0 ap.max_step_reached = agent_index % 4 == 0 ap.id = agent_index ap.action_mask.extend([True, False] * 5) obs_proto_list = [] for obs_index in range(len(shape)): obs_proto = ObservationProto() obs_proto.shape.extend(list(shape[obs_index])) obs_proto.compression_type = NONE obs_proto.float_data.data.extend( ([float("nan")] if nan_observations else [0.1]) * np.prod(shape[obs_index])) obs_proto_list.append(obs_proto) ap.observations.extend(obs_proto_list) result.append(ap) return result
def test_process_visual_observation_padded_channels(): in_array_1 = np.random.rand(128, 64, 12) in_array_1_mapping = [0, 1, 2, 3, -1, -1, 4, 5, 6, 7, -1, -1] proto_obs_1 = generate_compressed_proto_obs_with_mapping( in_array_1, in_array_1_mapping) expected_out_array_1 = np.take(in_array_1, [0, 1, 2, 3, 6, 7, 8, 9], axis=2) ap1 = AgentInfoProto() ap1.observations.extend([proto_obs_1]) ap_list = [ap1] arr = _process_visual_observation(0, (128, 64, 8), ap_list) assert list(arr.shape) == [1, 128, 64, 8] assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
def proto_from_batched_step_result( batched_step_result: BatchedStepResult ) -> List[AgentInfoProto]: agent_info_protos: List[AgentInfoProto] = [] for agent_id in batched_step_result.agent_id: agent_id_index = batched_step_result.agent_id_to_index[agent_id] reward = batched_step_result.reward[agent_id_index] done = batched_step_result.done[agent_id_index] max_step_reached = batched_step_result.max_step[agent_id_index] agent_mask = None if batched_step_result.action_mask is not None: agent_mask = [] # type: ignore for _branch in batched_step_result.action_mask: agent_mask = np.concatenate( (agent_mask, _branch[agent_id_index, :]), axis=0 ) observations: List[ObservationProto] = [] for all_observations_of_type in batched_step_result.obs: observation = all_observations_of_type[agent_id_index] if len(observation.shape) == 3: observations.append(generate_uncompressed_proto_obs(observation)) else: observations.append( ObservationProto( float_data=ObservationProto.FloatData(data=observation), shape=[len(observation)], compression_type=NONE, ) ) agent_info_proto = AgentInfoProto( reward=reward, done=done, id=agent_id, max_step_reached=max_step_reached, action_mask=agent_mask, observations=observations, ) agent_info_protos.append(agent_info_proto) return agent_info_protos
def proto_from_steps(decision_steps: DecisionSteps, terminal_steps: TerminalSteps) -> List[AgentInfoProto]: agent_info_protos: List[AgentInfoProto] = [] # Take care of the DecisionSteps first for agent_id in decision_steps.agent_id: agent_id_index = decision_steps.agent_id_to_index[agent_id] reward = decision_steps.reward[agent_id_index] done = False max_step_reached = False agent_mask: Any = None if decision_steps.action_mask is not None: agent_mask = [] for _branch in decision_steps.action_mask: agent_mask = np.concatenate( (agent_mask, _branch[agent_id_index, :]), axis=0) agent_mask = agent_mask.astype(np.bool).tolist() observations: List[ObservationProto] = [] for all_observations_of_type in decision_steps.obs: observation = all_observations_of_type[agent_id_index] if len(observation.shape) == 3: observations.append( generate_uncompressed_proto_obs(observation)) else: observations.append( ObservationProto( float_data=ObservationProto.FloatData( data=observation), shape=[len(observation)], compression_type=NONE, )) agent_info_proto = AgentInfoProto( reward=reward, done=done, id=agent_id, max_step_reached=bool(max_step_reached), action_mask=agent_mask, observations=observations, ) agent_info_protos.append(agent_info_proto) # Take care of the TerminalSteps second for agent_id in terminal_steps.agent_id: agent_id_index = terminal_steps.agent_id_to_index[agent_id] reward = terminal_steps.reward[agent_id_index] done = True max_step_reached = terminal_steps.interrupted[agent_id_index] final_observations: List[ObservationProto] = [] for all_observations_of_type in terminal_steps.obs: observation = all_observations_of_type[agent_id_index] if len(observation.shape) == 3: final_observations.append( generate_uncompressed_proto_obs(observation)) else: final_observations.append( ObservationProto( float_data=ObservationProto.FloatData( data=observation), shape=[len(observation)], compression_type=NONE, )) agent_info_proto = AgentInfoProto( reward=reward, done=done, id=agent_id, max_step_reached=bool(max_step_reached), action_mask=None, observations=final_observations, ) agent_info_protos.append(agent_info_proto) return agent_info_protos