def _convert_observations(self, original, home_team, player_index): """Converts generic observations returned by the environment to the player specific observations. Args: original: original observations from the environment. home_team: is the player on the home team or not. player_index: index of the player for which to generate observations. """ observations = {} for v in constants.EXPOSED_OBSERVATIONS: # Active and sticky_actions are added below. if v != 'active' and v != 'sticky_actions': observations[v] = copy.deepcopy(original[v]) if home_team: observations['active'] = copy.deepcopy( original['home_agent_controlled_player'][player_index]) observations['sticky_actions'] = copy.deepcopy( original['home_agent_sticky_actions'][player_index]) if 'frame' in original: observations['frame'] = original['frame'] else: # Currently we don't support rotating of the 'frame'. observations['opponent_active'] = copy.deepcopy( original['away_agent_controlled_player'][player_index]) observations['opponent_sticky_actions'] = copy.deepcopy( original['away_agent_sticky_actions'][player_index]) observations = observation_rotation.flip_observation(observations) diff = constants.EXPOSED_OBSERVATIONS.difference(observations.keys()) assert not diff or (len(diff) == 1 and 'frame' in observations) return observations
def testObservationFlipping(self): cfg = config.Config() num_players = 11 observation = {} observation['left_team'] = np.random.rand(num_players * 2) - 0.5 observation['left_team_roles'] = np.random.rand(num_players) observation['left_team_direction'] = np.random.rand( num_players * 2) - 0.5 observation['left_team_tired_factor'] = np.random.rand(num_players) observation['left_team_yellow_card'] = np.random.rand(num_players) observation['left_team_active'] = [3] observation['left_team_designated_player'] = 3 observation['right_team'] = np.random.rand(num_players * 2) - 0.5 observation['right_team_roles'] = np.random.rand(num_players) observation['right_team_direction'] = np.random.rand( num_players * 2) - 0.5 observation['right_team_tired_factor'] = np.random.rand(num_players) observation['right_team_yellow_card'] = np.random.rand(num_players) observation['right_team_active'] = [0] observation['right_team_designated_player'] = 0 observation['ball'] = np.array([1, -1, 0]) observation['ball_direction'] = np.random.rand(3) - 0.5 observation['ball_rotation'] = np.random.rand(3) - 0.5 observation['ball_owned_team'] = 0 observation['ball_owned_player'] = 7 observation['left_agent_controlled_player'] = [4] observation['right_agent_controlled_player'] = [6] observation['game_mode'] = 123 observation['left_agent_sticky_actions'] = [[ np.random.rand(2) for _ in range(10) ]] observation['right_agent_sticky_actions'] = [[ np.random.rand(2) for _ in range(10) ]] observation['score'] = [3, 5] observation['steps_left'] = 45 # Flipping twice the observation is the identity. flipped_observation = observation_rotation.flip_observation( observation, cfg) original_observation = observation_rotation.flip_observation( flipped_observation, cfg) self.assertEqual(str(tuple(sorted(original_observation.items()))), str(tuple(sorted(observation.items()))))
def convert_observations_static( original, player, left_player_position, right_player_position, config, ): """Converts generic observations returned by the environment to the player specific observations. Args: original: original observations from the environment. player: player for which to generate observations. left_player_position: index into observation corresponding to the left player. right_player_position: index into observation corresponding to the right player. """ assert isinstance(config, Config), config observations = [] # assert 0, (original.keys(), player, left_player_position, right_player_position) for is_left in [True, False]: adopted = original if is_left or player.can_play_right( ) else observation_rotation.flip_observation(original, config) prefix = 'left' if is_left or not player.can_play_right( ) else 'right' position = left_player_position if is_left else right_player_position for x in range(player.num_controlled_left_players( ) if is_left else player.num_controlled_right_players()): o = {} for v in constants.EXPOSED_OBSERVATIONS: # Active and sticky_actions are added below. if v != 'active' and v != 'sticky_actions': o[v] = copy.deepcopy(adopted[v]) assert (len( adopted[prefix + '_agent_controlled_player']) == len( adopted[prefix + '_agent_sticky_actions'])) if position + x >= len( adopted[prefix + '_agent_controlled_player']): o['active'] = -1 o['sticky_actions'] = [] else: o['active'] = ( adopted[prefix + '_agent_controlled_player'][position + x]) o['sticky_actions'] = np.array( copy.deepcopy( adopted[prefix + '_agent_sticky_actions'][position + x])) # There is no frame for players on the right ATM. if is_left and 'frame' in original: o['frame'] = original['frame'] observations.append(o) return observations