Exemplo n.º 1
0
  def observation_spec(self):
    """The observation spec for the SC2 environment.

    It's worth noting that the image-like observations are in y,x/row,column
    order which is different than the actions which are in x,y order. This is
    due to conflicting conventions, and to facilitate printing of the images.

    Returns:
      The dict of observation names to their tensor shapes. Shapes with a 0 can
      vary in length, for example the number of valid actions depends on which
      units you have selected.
    """
    obs_spec = named_array.NamedDict({
        "action_result": (0,),  # See error.proto: ActionResult.
        "alerts": (0,),  # See sc2api.proto: Alert.
        "available_actions": (0,),
        "build_queue": (0, len(UnitLayer)),  # pytype: disable=wrong-arg-types
        "cargo": (0, len(UnitLayer)),  # pytype: disable=wrong-arg-types
        "cargo_slots_available": (1,),
        "control_groups": (10, 2),
        "game_loop": (1,),
        "last_actions": (0,),
        "multi_select": (0, len(UnitLayer)),  # pytype: disable=wrong-arg-types
        "player": (len(Player),),  # pytype: disable=wrong-arg-types
        "score_cumulative": (len(ScoreCumulative),),  # pytype: disable=wrong-arg-types
        "score_by_category": (len(ScoreByCategory), len(ScoreCategories)),  # pytype: disable=wrong-arg-types
        "score_by_vital": (len(ScoreByVital), len(ScoreVitals)),  # pytype: disable=wrong-arg-types
        "single_select": (0, len(UnitLayer)),  # Only (n, 7) for n in (0, 1).  # pytype: disable=wrong-arg-types
    })

    aif = self._agent_interface_format

    if aif.feature_dimensions:
      obs_spec["feature_screen"] = (len(SCREEN_FEATURES),
                                    aif.feature_dimensions.screen.y,
                                    aif.feature_dimensions.screen.x)

      obs_spec["feature_minimap"] = (len(MINIMAP_FEATURES),
                                     aif.feature_dimensions.minimap.y,
                                     aif.feature_dimensions.minimap.x)
    if aif.rgb_dimensions:
      obs_spec["rgb_screen"] = (aif.rgb_dimensions.screen.y,
                                aif.rgb_dimensions.screen.x,
                                3)
      obs_spec["rgb_minimap"] = (aif.rgb_dimensions.minimap.y,
                                 aif.rgb_dimensions.minimap.x,
                                 3)
    if aif.use_feature_units:
      obs_spec["feature_units"] = (0, len(FeatureUnit))  # pytype: disable=wrong-arg-types

    if aif.use_raw_units:
      obs_spec["raw_units"] = (0, len(FeatureUnit))

    if aif.use_unit_counts:
      obs_spec["unit_counts"] = (0, len(UnitCounts))

    if aif.use_camera_position:
      obs_spec["camera_position"] = (2,)
    return obs_spec
Exemplo n.º 2
0
 def test_named_dict(self):
     a = named_array.NamedDict(a=2, b=(1, 2))
     self.assertEqual(a["a"], a.a)
     self.assertEqual(a["b"], a.b)
     self.assertIs(a["b"], a.b)
     self.assertNotEqual(a["a"], a.b)
     a.c = 3
     self.assertEqual(a["c"], 3)
Exemplo n.º 3
0
  def observation_spec(self):
    """The observation spec for the SC2 environment.

    It's worth noting that the image-like observations are in y,x/row,column
    order which is different than the actions which are in x,y order. This is
    due to conflicting conventions, and to facilitate printing of the images.

    Returns:
      The dict of observation names to their tensor shapes. Shapes with a 0 can
      vary in length, for example the number of valid actions depends on which
      units you have selected.
    """
    obs_spec = named_array.NamedDict({
        "available_actions": (0,),
        "build_queue": (0, len(UnitLayer)),  # pytype: disable=wrong-arg-types
        "cargo": (0, len(UnitLayer)),  # pytype: disable=wrong-arg-types
        "cargo_slots_available": (1,),
        "control_groups": (10, 2),
        "game_loop": (1,),
        "last_actions": (0,),
        "multi_select": (0, len(UnitLayer)),  # pytype: disable=wrong-arg-types
        "player": (len(Player),),  # pytype: disable=wrong-arg-types
        "score_cumulative": (len(ScoreCumulative),),  # pytype: disable=wrong-arg-types
        "single_select": (0, len(UnitLayer)),  # Only (n, 7) for n in (0, 1).  # pytype: disable=wrong-arg-types
    })
    if self._feature_screen_px:
      obs_spec["feature_screen"] = (len(SCREEN_FEATURES),
                                    self._feature_screen_px.y,
                                    self._feature_screen_px.x)
    if self._feature_minimap_px:
      obs_spec["feature_minimap"] = (len(MINIMAP_FEATURES),
                                     self._feature_minimap_px.y,
                                     self._feature_minimap_px.x)
    if self._rgb_screen_px:
      obs_spec["rgb_screen"] = (self._rgb_screen_px.y,
                                self._rgb_screen_px.x,
                                3)
    if self._rgb_minimap_px:
      obs_spec["rgb_minimap"] = (self._rgb_minimap_px.y,
                                 self._rgb_minimap_px.x,
                                 3)
    if self._feature_units:
      obs_spec["feature_units"] = (0, len(FeatureUnit))  # pytype: disable=wrong-arg-types
    return obs_spec
Exemplo n.º 4
0
    def transform_obs(self, obs):
        """Render some SC2 observations into something an agent can handle."""
        empty = np.array([], dtype=np.int32).reshape((0, 7))
        out = named_array.NamedDict({  # Fill out some that are sometimes empty.
            "single_select": empty,
            "multi_select": empty,
            "build_queue": empty,
            "cargo": empty,
            "cargo_slots_available": np.array([0], dtype=np.int32),
        })

        def or_zeros(layer, size):
            if layer is not None:
                return layer.astype(np.int32, copy=False)
            else:
                return np.zeros((size.y, size.x), dtype=np.int32)

        aif = self._agent_interface_format

        if aif.feature_dimensions:
            out["feature_screen"] = named_array.NamedNumpyArray(
                np.stack(
                    or_zeros(f.unpack(obs.observation),
                             aif.feature_dimensions.screen)
                    for f in SCREEN_FEATURES),
                names=[ScreenFeatures, None, None])
            out["feature_minimap"] = named_array.NamedNumpyArray(
                np.stack(
                    or_zeros(f.unpack(obs.observation),
                             aif.feature_dimensions.minimap)
                    for f in MINIMAP_FEATURES),
                names=[MinimapFeatures, None, None])

        if aif.rgb_dimensions:
            out["rgb_screen"] = Feature.unpack_rgb_image(
                obs.observation.render_data.map).astype(np.int32)
            out["rgb_minimap"] = Feature.unpack_rgb_image(
                obs.observation.render_data.minimap).astype(np.int32)

        out["last_actions"] = np.array(
            [self.reverse_action(a).function for a in obs.actions],
            dtype=np.int32)

        out["action_result"] = np.array([o.result for o in obs.action_errors],
                                        dtype=np.int32)

        out["alerts"] = np.array(obs.observation.alerts, dtype=np.int32)

        out["game_loop"] = np.array([obs.observation.game_loop],
                                    dtype=np.int32)

        score_details = obs.observation.score.score_details
        out["score_cumulative"] = named_array.NamedNumpyArray(
            [
                obs.observation.score.score,
                score_details.idle_production_time,
                score_details.idle_worker_time,
                score_details.total_value_units,
                score_details.total_value_structures,
                score_details.killed_value_units,
                score_details.killed_value_structures,
                score_details.collected_minerals,
                score_details.collected_vespene,
                score_details.collection_rate_minerals,
                score_details.collection_rate_vespene,
                score_details.spent_minerals,
                score_details.spent_vespene,
            ],
            names=ScoreCumulative,
            dtype=np.int32)

        def get_score_details(key, details, categories):
            row = getattr(details, key.name)
            return [getattr(row, category.name) for category in categories]

        out["score_by_category"] = named_array.NamedNumpyArray(
            [
                get_score_details(key, score_details, ScoreCategories)
                for key in ScoreByCategory
            ],
            names=[ScoreByCategory, ScoreCategories],
            dtype=np.int32)

        out["score_by_vital"] = named_array.NamedNumpyArray(
            [
                get_score_details(key, score_details, ScoreVitals)
                for key in ScoreByVital
            ],
            names=[ScoreByVital, ScoreVitals],
            dtype=np.int32)

        player = obs.observation.player_common
        out["player"] = named_array.NamedNumpyArray([
            player.player_id,
            player.minerals,
            player.vespene,
            player.food_used,
            player.food_cap,
            player.food_army,
            player.food_workers,
            player.idle_worker_count,
            player.army_count,
            player.warp_gate_count,
            player.larva_count,
        ],
                                                    names=Player,
                                                    dtype=np.int32)

        def unit_vec(u):
            return np.array(
                (
                    u.unit_type,
                    u.player_relative,
                    u.health,
                    u.shields,
                    u.energy,
                    u.transport_slots_taken,
                    int(u.build_progress * 100),  # discretize
                ),
                dtype=np.int32)

        ui = obs.observation.ui_data

        with sw("ui"):
            groups = np.zeros((10, 2), dtype=np.int32)
            for g in ui.groups:
                groups[g.control_group_index, :] = (g.leader_unit_type,
                                                    g.count)
            out["control_groups"] = groups

            if ui.single:
                out["single_select"] = named_array.NamedNumpyArray(
                    [unit_vec(ui.single.unit)], [None, UnitLayer])

            if ui.multi and ui.multi.units:
                out["multi_select"] = named_array.NamedNumpyArray(
                    [unit_vec(u) for u in ui.multi.units], [None, UnitLayer])

            if ui.cargo and ui.cargo.passengers:
                out["single_select"] = named_array.NamedNumpyArray(
                    [unit_vec(ui.single.unit)], [None, UnitLayer])
                out["cargo"] = named_array.NamedNumpyArray(
                    [unit_vec(u) for u in ui.cargo.passengers],
                    [None, UnitLayer])
                out["cargo_slots_available"] = np.array(
                    [ui.cargo.slots_available], dtype=np.int32)

            if ui.production and ui.production.build_queue:
                out["single_select"] = named_array.NamedNumpyArray(
                    [unit_vec(ui.production.unit)], [None, UnitLayer])
                out["build_queue"] = named_array.NamedNumpyArray(
                    [unit_vec(u) for u in ui.production.build_queue],
                    [None, UnitLayer])

        def full_unit_vec(u, pos_transform, is_raw=False):
            screen_pos = pos_transform.fwd_pt(point.Point.build(u.pos))
            screen_radius = pos_transform.fwd_dist(u.radius)
            return np.array(
                (
                    # Match unit_vec order
                    u.unit_type,
                    u.alliance,  # Self = 1, Ally = 2, Neutral = 3, Enemy = 4
                    u.health,
                    u.shield,
                    u.energy,
                    u.cargo_space_taken,
                    int(u.build_progress * 100),  # discretize

                    # Resume API order
                    int(u.health / u.health_max *
                        255) if u.health_max > 0 else 0,
                    int(u.shield / u.shield_max *
                        255) if u.shield_max > 0 else 0,
                    int(u.energy / u.energy_max *
                        255) if u.energy_max > 0 else 0,
                    u.display_type,  # Visible = 1, Snapshot = 2, Hidden = 3
                    u.owner,  # 1-15, 16 = neutral
                    screen_pos.x,
                    screen_pos.y,
                    u.facing,
                    screen_radius,
                    u.
                    cloak,  # Cloaked = 1, CloakedDetected = 2, NotCloaked = 3
                    u.is_selected,
                    u.is_blip,
                    u.is_powered,
                    u.mineral_contents,
                    u.vespene_contents,

                    # Not populated for enemies or neutral
                    u.cargo_space_max,
                    u.assigned_harvesters,
                    u.ideal_harvesters,
                    u.weapon_cooldown,
                    len(u.orders),
                    u.tag if is_raw else 0),
                dtype=np.int64)

        raw = obs.observation.raw_data

        if aif.use_feature_units:
            with sw("feature_units"):
                # Update the camera location so we can calculate world to screen pos
                self._update_camera(point.Point.build(raw.player.camera))
                feature_units = []
                for u in raw.units:
                    if u.is_on_screen and u.display_type != sc_raw.Hidden:
                        feature_units.append(
                            full_unit_vec(u, self._world_to_feature_screen_px))
                out["feature_units"] = named_array.NamedNumpyArray(
                    feature_units, [None, FeatureUnit], dtype=np.int32)

        if aif.use_raw_units:
            with sw("raw_units"):
                raw_units = [
                    full_unit_vec(u, self._world_to_world_tl, is_raw=True)
                    for u in raw.units
                ]
                out["raw_units"] = named_array.NamedNumpyArray(
                    raw_units, [None, FeatureUnit], dtype=np.int32)

        if aif.use_unit_counts:
            with sw("unit_counts"):
                unit_counts = collections.defaultdict(int)
                for u in raw.units:
                    if u.alliance == sc_raw.Self:
                        unit_counts[u.unit_type] += 1
                out["unit_counts"] = named_array.NamedNumpyArray(
                    sorted(unit_counts.items()), [None, UnitCounts],
                    dtype=np.int32)

        if aif.use_camera_position:
            camera_position = self._world_to_world_tl.fwd_pt(
                point.Point.build(raw.player.camera))
            out["camera_position"] = np.array(
                (camera_position.x, camera_position.y), dtype=np.int32)

        out["available_actions"] = np.array(self.available_actions(
            obs.observation),
                                            dtype=np.int32)

        return out