def test_take_action_returns_empty_with_no_agents(): test_seed = 3 behavior_spec = basic_behavior_spec() policy = FakePolicy(test_seed, behavior_spec, TrainerSettings(), "output") no_agent_step = DecisionSteps.empty(behavior_spec) result = policy.get_action(no_agent_step) assert result == ActionInfo.empty()
def test_empty_decision_steps(): specs = BehaviorSpec(observation_shapes=[(3, 2), (5, )], action_spec=ActionSpec.create_continuous(3)) ds = DecisionSteps.empty(specs) assert len(ds.obs) == 2 assert ds.obs[0].shape == (0, 3, 2) assert ds.obs[1].shape == (0, 5)
def test_take_action_returns_empty_with_no_agents(): test_seed = 3 policy = FakePolicy(test_seed, basic_mock_brain(), basic_params()) # Doesn't really matter what this is dummy_groupspec = BehaviorSpec([(1, )], "continuous", 1) no_agent_step = DecisionSteps.empty(dummy_groupspec) result = policy.get_action(no_agent_step) assert result == ActionInfo.empty()
def test_empty_decision_steps(): specs = BehaviorSpec( sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5, )]), action_spec=ActionSpec.create_continuous(3), ) ds = DecisionSteps.empty(specs) assert len(ds.obs) == 2 assert ds.obs[0].shape == (0, 3, 2) assert ds.obs[1].shape == (0, 5)
def test_empty_decision_steps(): specs = BehaviorSpec( observation_shapes=[(3, 2), (5, )], action_type=ActionType.CONTINUOUS, action_shape=3, ) ds = DecisionSteps.empty(specs) assert len(ds.obs) == 2 assert ds.obs[0].shape == (0, 3, 2) assert ds.obs[1].shape == (0, 5)
def create_mock_steps( num_agents: int = 1, num_vector_observations: int = 0, num_vis_observations: int = 0, action_shape: List[int] = None, discrete: bool = False, done: bool = False, ) -> Tuple[DecisionSteps, TerminalSteps]: """ Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations. Imitates constant vector/visual observations, rewards, dones, and agents. :int num_agents: Number of "agents" to imitate. :int num_vector_observations: Number of "observations" in your observation space :int num_vis_observations: Number of "observations" in your observation space :int num_vector_acts: Number of actions in your action space :bool discrete: Whether or not action space is discrete :bool done: Whether all the agents in the batch are done """ if action_shape is None: action_shape = [2] obs_list = [] for _ in range(num_vis_observations): obs_list.append(np.ones((num_agents, 84, 84, 3), dtype=np.float32)) if num_vector_observations > 1: obs_list.append( np.array(num_agents * [num_vector_observations * [1]], dtype=np.float32)) action_mask = None if discrete: action_mask = [ np.array(num_agents * [action_size * [False]]) for action_size in action_shape ] reward = np.array(num_agents * [1.0], dtype=np.float32) interrupted = np.array(num_agents * [False], dtype=np.bool) agent_id = np.arange(num_agents, dtype=np.int32) behavior_spec = BehaviorSpec( [(84, 84, 3)] * num_vis_observations + [(num_vector_observations, 0, 0)], ActionType.DISCRETE if discrete else ActionType.CONTINUOUS, action_shape if discrete else action_shape[0], ) if done: return ( DecisionSteps.empty(behavior_spec), TerminalSteps(obs_list, reward, interrupted, agent_id), ) else: return ( DecisionSteps(obs_list, reward, agent_id, action_mask), TerminalSteps.empty(behavior_spec), )
def create_mock_steps( num_agents: int, observation_shapes: List[Tuple], action_shape: Union[int, Tuple[int]] = None, discrete: bool = False, done: bool = False, ) -> Tuple[DecisionSteps, TerminalSteps]: """ Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations. Imitates constant vector/visual observations, rewards, dones, and agents. :int num_agents: Number of "agents" to imitate. :List observation_shapes: A List of the observation spaces in your steps :int num_vector_acts: Number of actions in your action space :bool discrete: Whether or not action space is discrete :bool done: Whether all the agents in the batch are done """ if action_shape is None: action_shape = 2 obs_list = [] for _shape in observation_shapes: obs_list.append(np.ones((num_agents, ) + _shape, dtype=np.float32)) action_mask = None if discrete and isinstance(action_shape, Iterable): action_mask = [ np.array(num_agents * [action_size * [False]]) for action_size in action_shape # type: ignore ] # type: ignore reward = np.array(num_agents * [1.0], dtype=np.float32) interrupted = np.array(num_agents * [False], dtype=np.bool) agent_id = np.arange(num_agents, dtype=np.int32) behavior_spec = BehaviorSpec( observation_shapes, ActionType.DISCRETE if discrete else ActionType.CONTINUOUS, action_shape, ) if done: return ( DecisionSteps.empty(behavior_spec), TerminalSteps(obs_list, reward, interrupted, agent_id), ) else: return ( DecisionSteps(obs_list, reward, agent_id, action_mask), TerminalSteps.empty(behavior_spec), )
def _update_state(self, output: UnityRLOutputProto) -> None: """ Collects experience information from all external brains in environment at current step. """ for brain_name in self._env_specs.keys(): if brain_name in output.agentInfos: agent_info_list = output.agentInfos[brain_name].value self._env_state[brain_name] = steps_from_proto( agent_info_list, self._env_specs[brain_name] ) else: self._env_state[brain_name] = ( DecisionSteps.empty(self._env_specs[brain_name]), TerminalSteps.empty(self._env_specs[brain_name]), ) self._side_channel_manager.process_side_channel_message(output.side_channel)
def create_mock_steps( num_agents: int, observation_specs: List[ObservationSpec], action_spec: ActionSpec, done: bool = False, grouped: bool = False, ) -> Tuple[DecisionSteps, TerminalSteps]: """ Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations. Imitates constant vector/visual observations, rewards, dones, and agents. :int num_agents: Number of "agents" to imitate. :List observation_specs: A List of the observation specs in your steps :int action_spec: ActionSpec for the agent :bool done: Whether all the agents in the batch are done """ obs_list = [] for obs_spec in observation_specs: obs_list.append(np.ones((num_agents,) + obs_spec.shape, dtype=np.float32)) action_mask = None if action_spec.is_discrete(): action_mask = [ np.array(num_agents * [action_size * [False]]) for action_size in action_spec.discrete_branches # type: ignore ] # type: ignore reward = np.array(num_agents * [1.0], dtype=np.float32) interrupted = np.array(num_agents * [False], dtype=np.bool) agent_id = np.arange(num_agents, dtype=np.int32) _gid = 1 if grouped else 0 group_id = np.array(num_agents * [_gid], dtype=np.int32) group_reward = np.array(num_agents * [0.0], dtype=np.float32) behavior_spec = BehaviorSpec(observation_specs, action_spec) if done: return ( DecisionSteps.empty(behavior_spec), TerminalSteps( obs_list, reward, interrupted, agent_id, group_id, group_reward ), ) else: return ( DecisionSteps( obs_list, reward, agent_id, action_mask, group_id, group_reward ), TerminalSteps.empty(behavior_spec), )