示例#1
0
    def build_agent_spaces(self) -> Tuple[Space, Space]:
        """Construct the action and observation spaces

        Description of actions and observations:
        https://github.com/google-research/football/blob/master/gfootball/doc/observation.md
        """  # noqa: E501
        action_space = Discrete(19)
        # The football field's corners are [+-1., +-0.42]. However, the players
        # and balls may get out of the field. Thus we multiply those limits by
        # a factor of 2.
        xlim = 1. * 2
        ylim = 0.42 * 2
        num_players: int = 11
        xy_space = Box(
            np.array([-xlim, -ylim], dtype=np.float32),
            np.array([xlim, ylim], dtype=np.float32))
        xyz_space = Box(
            np.array([-xlim, -ylim, 0], dtype=np.float32),
            np.array([xlim, ylim, np.inf], dtype=np.float32))
        observation_space = DictSpace({
            "controlled_players": Discrete(2),
            "players_raw": TupleSpace([
                DictSpace({
                    # ball information
                    "ball": xyz_space,
                    "ball_direction": Box(-np.inf, np.inf, (3, )),
                    "ball_rotation": Box(-np.inf, np.inf, (3, )),
                    "ball_owned_team": Discrete(3),
                    "ball_owned_player": Discrete(num_players + 1),
                    # left team
                    "left_team": TupleSpace([xy_space] * num_players),
                    "left_team_direction": TupleSpace(
                        [xy_space] * num_players),
                    "left_team_tired_factor": Box(0., 1., (num_players, )),
                    "left_team_yellow_card": MultiBinary(num_players),
                    "left_team_active": MultiBinary(num_players),
                    "left_team_roles": MultiDiscrete([10] * num_players),
                    # right team
                    "right_team": TupleSpace([xy_space] * num_players),
                    "right_team_direction": TupleSpace(
                        [xy_space] * num_players),
                    "right_team_tired_factor": Box(0., 1., (num_players, )),
                    "right_team_yellow_card": MultiBinary(num_players),
                    "right_team_active": MultiBinary(num_players),
                    "right_team_roles": MultiDiscrete([10] * num_players),
                    # controlled player information
                    "active": Discrete(num_players),
                    "designated": Discrete(num_players),
                    "sticky_actions": MultiBinary(10),
                    # match state
                    "score": Box(-np.inf, np.inf, (2, )),
                    "steps_left": Box(0, np.inf, (1, )),
                    "game_mode": Discrete(7)
                })
            ])
        })
        return action_space, observation_space
示例#2
0
    def get_policy_configs_for_game(
            game_name: str) -> Tuple[dict, Callable[[AgentID], PolicyID]]:

        # The RLlib server must know about the Spaces that the Client will be
        # using inside Unity3D, up-front.
        obs_spaces = {
            # 3DBall.
            "3DBall":
            Box(float("-inf"), float("inf"), (8, )),
            # 3DBallHard.
            "3DBallHard":
            Box(float("-inf"), float("inf"), (45, )),
            # GridFoodCollector
            "GridFoodCollector":
            Box(float("-inf"), float("inf"), (40, 40, 6)),
            # Pyramids.
            "Pyramids":
            TupleSpace([
                Box(float("-inf"), float("inf"), (56, )),
                Box(float("-inf"), float("inf"), (56, )),
                Box(float("-inf"), float("inf"), (56, )),
                Box(float("-inf"), float("inf"), (4, )),
            ]),
            # SoccerStrikersVsGoalie.
            "Goalie":
            Box(float("-inf"), float("inf"), (738, )),
            "Striker":
            TupleSpace([
                Box(float("-inf"), float("inf"), (231, )),
                Box(float("-inf"), float("inf"), (63, )),
            ]),
            # Sorter.
            "Sorter":
            TupleSpace([
                Box(float("-inf"), float("inf"), (
                    20,
                    23,
                )),
                Box(float("-inf"), float("inf"), (10, )),
                Box(float("-inf"), float("inf"), (8, )),
            ]),
            # Tennis.
            "Tennis":
            Box(float("-inf"), float("inf"), (27, )),
            # VisualHallway.
            "VisualHallway":
            Box(float("-inf"), float("inf"), (84, 84, 3)),
            # Walker.
            "Walker":
            Box(float("-inf"), float("inf"), (212, )),
            # FoodCollector.
            "FoodCollector":
            TupleSpace([
                Box(float("-inf"), float("inf"), (49, )),
                Box(float("-inf"), float("inf"), (4, )),
            ]),
        }
        action_spaces = {
            # 3DBall.
            "3DBall": Box(float("-inf"), float("inf"), (2, ),
                          dtype=np.float32),
            # 3DBallHard.
            "3DBallHard": Box(float("-inf"),
                              float("inf"), (2, ),
                              dtype=np.float32),
            # GridFoodCollector.
            "GridFoodCollector": MultiDiscrete([3, 3, 3, 2]),
            # Pyramids.
            "Pyramids": MultiDiscrete([5]),
            # SoccerStrikersVsGoalie.
            "Goalie": MultiDiscrete([3, 3, 3]),
            "Striker": MultiDiscrete([3, 3, 3]),
            # Sorter.
            "Sorter": MultiDiscrete([3, 3, 3]),
            # Tennis.
            "Tennis": Box(float("-inf"), float("inf"), (3, )),
            # VisualHallway.
            "VisualHallway": MultiDiscrete([5]),
            # Walker.
            "Walker": Box(float("-inf"), float("inf"), (39, )),
            # FoodCollector.
            "FoodCollector": MultiDiscrete([3, 3, 3, 2]),
        }

        # Policies (Unity: "behaviors") and agent-to-policy mapping fns.
        if game_name == "SoccerStrikersVsGoalie":
            policies = {
                "Goalie":
                (None, obs_spaces["Goalie"], action_spaces["Goalie"], {}),
                "Striker":
                (None, obs_spaces["Striker"], action_spaces["Striker"], {}),
            }

            def policy_mapping_fn(agent_id, episode, **kwargs):
                return "Striker" if "Striker" in agent_id else "Goalie"

        else:
            policies = {
                game_name:
                (None, obs_spaces[game_name], action_spaces[game_name], {}),
            }

            def policy_mapping_fn(agent_id, episode, **kwargs):
                return game_name

        return policies, policy_mapping_fn