def batched_step_result_from_proto( agent_info_list: Collection[AgentInfoProto], # pylint: disable=unsubscriptable-object group_spec: AgentGroupSpec, ) -> BatchedStepResult: obs_list: List[np.ndarray] = [] for obs_index, obs_shape in enumerate(group_spec.observation_shapes): is_visual = len(obs_shape) == 3 if is_visual: obs_shape = cast(Tuple[int, int, int], obs_shape) obs_list += [ _process_visual_observation(obs_index, obs_shape, agent_info_list) ] else: obs_list += [ _process_vector_observation(obs_index, obs_shape, agent_info_list) ] rewards = np.array([agent_info.reward for agent_info in agent_info_list], dtype=np.float32) d = np.dot(rewards, rewards) has_nan = np.isnan(d) has_inf = not np.isfinite(d) # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values if has_nan or has_inf: rewards = np.nan_to_num(rewards) if has_nan: logger.warning(f"An agent had a NaN reward in the environment") done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool) max_step = np.array( [agent_info.max_step_reached for agent_info in agent_info_list], dtype=np.bool) agent_id = np.array([agent_info.id for agent_info in agent_info_list], dtype=np.int32) action_mask = None if group_spec.is_action_discrete(): if any([agent_info.action_mask is not None] for agent_info in agent_info_list): n_agents = len(agent_info_list) a_size = np.sum(group_spec.discrete_action_branches) mask_matrix = np.ones((n_agents, a_size), dtype=np.bool) for agent_index, agent_info in enumerate(agent_info_list): if agent_info.action_mask is not None: if len(agent_info.action_mask) == a_size: mask_matrix[agent_index, :] = [ False if agent_info.action_mask[k] else True for k in range(a_size) ] action_mask = (1 - mask_matrix).astype(np.bool) indices = _generate_split_indices( group_spec.discrete_action_branches) action_mask = np.split(action_mask, indices, axis=1) return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)
def batched_step_result_from_proto( agent_info_list: Collection[AgentInfoProto], # pylint: disable=unsubscriptable-object envStat: EnvironmentStatisticsProto, group_spec: AgentGroupSpec, ) -> BatchedStepResult: obs_list: List[np.ndarray] = [] for obs_index, obs_shape in enumerate(group_spec.observation_shapes): is_visual = len(obs_shape) == 3 if is_visual: obs_shape = cast(Tuple[int, int, int], obs_shape) obs_list.append( _process_visual_observation(obs_index, obs_shape, agent_info_list)) else: obs_list.append( _process_vector_observation(obs_index, obs_shape, agent_info_list)) rewards = np.array([agent_info.reward for agent_info in agent_info_list], dtype=np.float32) _raise_on_nan_and_inf(rewards, "rewards") done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool) max_step = np.array( [agent_info.max_step_reached for agent_info in agent_info_list], dtype=np.bool) agent_id = np.array([agent_info.id for agent_info in agent_info_list], dtype=np.int32) action_mask = None if group_spec.is_action_discrete(): if any([agent_info.action_mask is not None] for agent_info in agent_info_list): n_agents = len(agent_info_list) a_size = np.sum(group_spec.discrete_action_branches) mask_matrix = np.ones((n_agents, a_size), dtype=np.bool) for agent_index, agent_info in enumerate(agent_info_list): if agent_info.action_mask is not None: if len(agent_info.action_mask) == a_size: mask_matrix[agent_index, :] = [ False if agent_info.action_mask[k] else True for k in range(a_size) ] action_mask = (1 - mask_matrix).astype(np.bool) indices = _generate_split_indices( group_spec.discrete_action_branches) action_mask = np.split(action_mask, indices, axis=1) # convert protobuf maps to dicts double_stat = dict( (key, envStat.double_stat[key]) for key in envStat.double_stat) string_stat = dict( (key, envStat.string_stat[key]) for key in envStat.string_stat) return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask, double_stat, string_stat)
def step_result_to_brain_info( step_result: BatchedStepResult, group_spec: AgentGroupSpec, agent_id_prefix: int = None, ) -> BrainInfo: n_agents = step_result.n_agents() vis_obs_indices = [] vec_obs_indices = [] for index, observation in enumerate(step_result.obs): if len(observation.shape) == 2: vec_obs_indices.append(index) elif len(observation.shape) == 4: vis_obs_indices.append(index) else: raise UnityEnvironmentException( "Invalid input received from the environment, the observation should " "either be a vector of float or a PNG image") if len(vec_obs_indices) == 0: vec_obs = np.zeros((n_agents, 0), dtype=np.float32) else: vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1) vis_obs = [step_result.obs[i] for i in vis_obs_indices] mask = np.ones((n_agents, np.sum(group_spec.action_size)), dtype=np.float32) if group_spec.is_action_discrete(): mask = np.ones((n_agents, np.sum(group_spec.discrete_action_branches)), dtype=np.float32) if step_result.action_mask is not None: mask = 1 - np.concatenate(step_result.action_mask, axis=1) if agent_id_prefix is None: agent_ids = [str(ag_id) for ag_id in list(step_result.agent_id)] else: agent_ids = [ f"${agent_id_prefix}-{ag_id}" for ag_id in step_result.agent_id ] return BrainInfo( vis_obs, vec_obs, list(step_result.reward), agent_ids, list(step_result.done), list(step_result.max_step), mask, )
def group_spec_to_brain_parameters( name: str, group_spec: AgentGroupSpec) -> BrainParameters: vec_size = np.sum([ shape[0] for shape in group_spec.observation_shapes if len(shape) == 1 ]) vis_sizes = [ shape for shape in group_spec.observation_shapes if len(shape) == 3 ] cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes] a_size: List[int] = [] if group_spec.is_action_discrete(): a_size += list(group_spec.discrete_action_branches) vector_action_space_type = 0 else: a_size += [group_spec.action_size] vector_action_space_type = 1 return BrainParameters(name, int(vec_size), cam_res, a_size, [], vector_action_space_type)