def build_agent_spaces(self) -> Tuple[Space, Space]: """Construct the action and observation spaces Description of actions and observations: https://github.com/google-research/football/blob/master/gfootball/doc/observation.md """ # noqa: E501 action_space = Discrete(19) # The football field's corners are [+-1., +-0.42]. However, the players # and balls may get out of the field. Thus we multiply those limits by # a factor of 2. xlim = 1. * 2 ylim = 0.42 * 2 num_players: int = 11 xy_space = Box( np.array([-xlim, -ylim], dtype=np.float32), np.array([xlim, ylim], dtype=np.float32)) xyz_space = Box( np.array([-xlim, -ylim, 0], dtype=np.float32), np.array([xlim, ylim, np.inf], dtype=np.float32)) observation_space = DictSpace({ "controlled_players": Discrete(2), "players_raw": TupleSpace([ DictSpace({ # ball information "ball": xyz_space, "ball_direction": Box(-np.inf, np.inf, (3, )), "ball_rotation": Box(-np.inf, np.inf, (3, )), "ball_owned_team": Discrete(3), "ball_owned_player": Discrete(num_players + 1), # left team "left_team": TupleSpace([xy_space] * num_players), "left_team_direction": TupleSpace( [xy_space] * num_players), "left_team_tired_factor": Box(0., 1., (num_players, )), "left_team_yellow_card": MultiBinary(num_players), "left_team_active": MultiBinary(num_players), "left_team_roles": MultiDiscrete([10] * num_players), # right team "right_team": TupleSpace([xy_space] * num_players), "right_team_direction": TupleSpace( [xy_space] * num_players), "right_team_tired_factor": Box(0., 1., (num_players, )), "right_team_yellow_card": MultiBinary(num_players), "right_team_active": MultiBinary(num_players), "right_team_roles": MultiDiscrete([10] * num_players), # controlled player information "active": Discrete(num_players), "designated": Discrete(num_players), "sticky_actions": MultiBinary(10), # match state "score": Box(-np.inf, np.inf, (2, )), "steps_left": Box(0, np.inf, (1, )), "game_mode": Discrete(7) }) ]) }) return action_space, observation_space
def get_policy_configs_for_game( game_name: str) -> Tuple[dict, Callable[[AgentID], PolicyID]]: # The RLlib server must know about the Spaces that the Client will be # using inside Unity3D, up-front. obs_spaces = { # 3DBall. "3DBall": Box(float("-inf"), float("inf"), (8, )), # 3DBallHard. "3DBallHard": Box(float("-inf"), float("inf"), (45, )), # GridFoodCollector "GridFoodCollector": Box(float("-inf"), float("inf"), (40, 40, 6)), # Pyramids. "Pyramids": TupleSpace([ Box(float("-inf"), float("inf"), (56, )), Box(float("-inf"), float("inf"), (56, )), Box(float("-inf"), float("inf"), (56, )), Box(float("-inf"), float("inf"), (4, )), ]), # SoccerStrikersVsGoalie. "Goalie": Box(float("-inf"), float("inf"), (738, )), "Striker": TupleSpace([ Box(float("-inf"), float("inf"), (231, )), Box(float("-inf"), float("inf"), (63, )), ]), # Sorter. "Sorter": TupleSpace([ Box(float("-inf"), float("inf"), ( 20, 23, )), Box(float("-inf"), float("inf"), (10, )), Box(float("-inf"), float("inf"), (8, )), ]), # Tennis. "Tennis": Box(float("-inf"), float("inf"), (27, )), # VisualHallway. "VisualHallway": Box(float("-inf"), float("inf"), (84, 84, 3)), # Walker. "Walker": Box(float("-inf"), float("inf"), (212, )), # FoodCollector. "FoodCollector": TupleSpace([ Box(float("-inf"), float("inf"), (49, )), Box(float("-inf"), float("inf"), (4, )), ]), } action_spaces = { # 3DBall. "3DBall": Box(float("-inf"), float("inf"), (2, ), dtype=np.float32), # 3DBallHard. "3DBallHard": Box(float("-inf"), float("inf"), (2, ), dtype=np.float32), # GridFoodCollector. "GridFoodCollector": MultiDiscrete([3, 3, 3, 2]), # Pyramids. "Pyramids": MultiDiscrete([5]), # SoccerStrikersVsGoalie. "Goalie": MultiDiscrete([3, 3, 3]), "Striker": MultiDiscrete([3, 3, 3]), # Sorter. "Sorter": MultiDiscrete([3, 3, 3]), # Tennis. "Tennis": Box(float("-inf"), float("inf"), (3, )), # VisualHallway. "VisualHallway": MultiDiscrete([5]), # Walker. "Walker": Box(float("-inf"), float("inf"), (39, )), # FoodCollector. "FoodCollector": MultiDiscrete([3, 3, 3, 2]), } # Policies (Unity: "behaviors") and agent-to-policy mapping fns. if game_name == "SoccerStrikersVsGoalie": policies = { "Goalie": (None, obs_spaces["Goalie"], action_spaces["Goalie"], {}), "Striker": (None, obs_spaces["Striker"], action_spaces["Striker"], {}), } def policy_mapping_fn(agent_id, episode, **kwargs): return "Striker" if "Striker" in agent_id else "Goalie" else: policies = { game_name: (None, obs_spaces[game_name], action_spaces[game_name], {}), } def policy_mapping_fn(agent_id, episode, **kwargs): return game_name return policies, policy_mapping_fn