示例#1
0
def make_demo_buffer(
    pair_infos: List[AgentInfoActionPairProto],
    brain_params: BrainParameters,
    sequence_length: int,
) -> AgentBuffer:
    # Create and populate buffer using experiences
    demo_raw_buffer = AgentBuffer()
    demo_processed_buffer = AgentBuffer()
    for idx, experience in enumerate(pair_infos):
        if idx > len(pair_infos) - 2:
            break
        current_pair_info = pair_infos[idx]
        next_pair_info = pair_infos[idx + 1]
        current_brain_info = BrainInfo.from_agent_proto(
            0, [current_pair_info.agent_info], brain_params
        )
        next_brain_info = BrainInfo.from_agent_proto(
            0, [next_pair_info.agent_info], brain_params
        )
        previous_action = (
            np.array(pair_infos[idx].action_info.vector_actions, dtype=np.float32) * 0
        )
        if idx > 0:
            previous_action = np.array(
                pair_infos[idx - 1].action_info.vector_actions, dtype=np.float32
            )
        demo_raw_buffer["done"].append(next_brain_info.local_done[0])
        demo_raw_buffer["rewards"].append(next_brain_info.rewards[0])
        for i in range(brain_params.number_visual_observations):
            demo_raw_buffer["visual_obs%d" % i].append(
                current_brain_info.visual_observations[i][0]
            )
        if brain_params.vector_observation_space_size > 0:
            demo_raw_buffer["vector_obs"].append(
                current_brain_info.vector_observations[0]
            )
        demo_raw_buffer["actions"].append(current_pair_info.action_info.vector_actions)
        demo_raw_buffer["prev_action"].append(previous_action)
        if next_brain_info.local_done[0]:
            demo_raw_buffer.resequence_and_append(
                demo_processed_buffer, batch_size=None, training_length=sequence_length
            )
            demo_raw_buffer.reset_agent()
    demo_raw_buffer.resequence_and_append(
        demo_processed_buffer, batch_size=None, training_length=sequence_length
    )
    return demo_processed_buffer
示例#2
0
def test_take_action_returns_nones_on_missing_values():
    test_seed = 3
    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    policy.evaluate = MagicMock(return_value={})
    policy.save_memories = MagicMock()
    brain_info_with_agents = BrainInfo([], [], [],
                                       agents=["an-agent-id"],
                                       local_done=[False])
    result = policy.get_action(brain_info_with_agents)
    assert result == ActionInfo(None, None, {})
示例#3
0
def test_take_action_returns_action_info_when_available():
    test_seed = 3
    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    policy_eval_out = {
        "action": np.array([1.0], dtype=np.float32),
        "memory_out": np.array([[2.5]], dtype=np.float32),
        "value": np.array([1.1], dtype=np.float32),
    }
    policy.evaluate = MagicMock(return_value=policy_eval_out)
    brain_info_with_agents = BrainInfo([], [], [],
                                       agents=["an-agent-id"],
                                       local_done=[False])
    result = policy.get_action(brain_info_with_agents)
    expected = ActionInfo(policy_eval_out["action"], policy_eval_out["value"],
                          policy_eval_out)
    assert result == expected
def step_result_to_brain_info(
    step_result: BatchedStepResult,
    group_spec: AgentGroupSpec,
    agent_id_prefix: int = None,
) -> BrainInfo:
    n_agents = step_result.n_agents()
    vis_obs_indices = []
    vec_obs_indices = []
    for index, observation in enumerate(step_result.obs):
        if len(observation.shape) == 2:
            vec_obs_indices.append(index)
        elif len(observation.shape) == 4:
            vis_obs_indices.append(index)
        else:
            raise UnityEnvironmentException(
                "Invalid input received from the environment, the observation should "
                "either be a vector of float or a PNG image")
    if len(vec_obs_indices) == 0:
        vec_obs = np.zeros((n_agents, 0), dtype=np.float32)
    else:
        vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices],
                                 axis=1)
    vis_obs = [step_result.obs[i] for i in vis_obs_indices]
    mask = np.ones((n_agents, np.sum(group_spec.action_size)),
                   dtype=np.float32)
    if group_spec.is_action_discrete():
        mask = np.ones((n_agents, np.sum(group_spec.discrete_action_branches)),
                       dtype=np.float32)
        if step_result.action_mask is not None:
            mask = 1 - np.concatenate(step_result.action_mask, axis=1)
    if agent_id_prefix is None:
        agent_ids = [str(ag_id) for ag_id in list(step_result.agent_id)]
    else:
        agent_ids = [
            f"${agent_id_prefix}-{ag_id}" for ag_id in step_result.agent_id
        ]
    return BrainInfo(
        vis_obs,
        vec_obs,
        list(step_result.reward),
        agent_ids,
        list(step_result.done),
        list(step_result.max_step),
        mask,
    )
示例#5
0
 def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
     """
     Constructs a BrainInfo which contains the most recent previous experiences for all agents
     which correspond to the agents in a provided next_info.
     :BrainInfo next_info: A t+1 BrainInfo.
     :return: curr_info: Reconstructed BrainInfo to match agents of next_info.
     """
     visual_observations: List[List[Any]] = [
         [] for _ in next_info.visual_observations
     ]  # TODO add types to brain.py methods
     vector_observations = []
     rewards = []
     local_dones = []
     max_reacheds = []
     agents = []
     action_masks = []
     for agent_id in next_info.agents:
         agent_brain_info = self.processing_buffer[agent_id].last_brain_info
         if agent_brain_info is None:
             agent_brain_info = next_info
         agent_index = agent_brain_info.agents.index(agent_id)
         for i in range(len(next_info.visual_observations)):
             visual_observations[i].append(
                 agent_brain_info.visual_observations[i][agent_index])
         vector_observations.append(
             agent_brain_info.vector_observations[agent_index])
         rewards.append(agent_brain_info.rewards[agent_index])
         local_dones.append(agent_brain_info.local_done[agent_index])
         max_reacheds.append(agent_brain_info.max_reached[agent_index])
         agents.append(agent_brain_info.agents[agent_index])
         action_masks.append(agent_brain_info.action_masks[agent_index])
     curr_info = BrainInfo(
         visual_observations,
         vector_observations,
         rewards,
         agents,
         local_dones,
         max_reacheds,
         action_masks,
     )
     return curr_info
示例#6
0
def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    no_agent_brain_info = BrainInfo([], [], [], agents=[])
    result = policy.get_action(no_agent_brain_info)
    assert result == ActionInfo([], [], None)