Python DictSpace示例，gym.spaces.DictSpace Python示例

示例#1

0

显示文件

文件： kaggle_wrapper.py 项目： zivzone/ray

    def build_agent_spaces(self) -> Tuple[Space, Space]:
        """Construct the action and observation spaces

        Description of actions and observations:
        https://github.com/google-research/football/blob/master/gfootball/doc/observation.md
        """  # noqa: E501
        action_space = Discrete(19)
        # The football field's corners are [+-1., +-0.42]. However, the players
        # and balls may get out of the field. Thus we multiply those limits by
        # a factor of 2.
        xlim = 1. * 2
        ylim = 0.42 * 2
        num_players: int = 11
        xy_space = Box(
            np.array([-xlim, -ylim], dtype=np.float32),
            np.array([xlim, ylim], dtype=np.float32))
        xyz_space = Box(
            np.array([-xlim, -ylim, 0], dtype=np.float32),
            np.array([xlim, ylim, np.inf], dtype=np.float32))
        observation_space = DictSpace({
            "controlled_players": Discrete(2),
            "players_raw": TupleSpace([
                DictSpace({
                    # ball information
                    "ball": xyz_space,
                    "ball_direction": Box(-np.inf, np.inf, (3, )),
                    "ball_rotation": Box(-np.inf, np.inf, (3, )),
                    "ball_owned_team": Discrete(3),
                    "ball_owned_player": Discrete(num_players + 1),
                    # left team
                    "left_team": TupleSpace([xy_space] * num_players),
                    "left_team_direction": TupleSpace(
                        [xy_space] * num_players),
                    "left_team_tired_factor": Box(0., 1., (num_players, )),
                    "left_team_yellow_card": MultiBinary(num_players),
                    "left_team_active": MultiBinary(num_players),
                    "left_team_roles": MultiDiscrete([10] * num_players),
                    # right team
                    "right_team": TupleSpace([xy_space] * num_players),
                    "right_team_direction": TupleSpace(
                        [xy_space] * num_players),
                    "right_team_tired_factor": Box(0., 1., (num_players, )),
                    "right_team_yellow_card": MultiBinary(num_players),
                    "right_team_active": MultiBinary(num_players),
                    "right_team_roles": MultiDiscrete([10] * num_players),
                    # controlled player information
                    "active": Discrete(num_players),
                    "designated": Discrete(num_players),
                    "sticky_actions": MultiBinary(10),
                    # match state
                    "score": Box(-np.inf, np.inf, (2, )),
                    "steps_left": Box(0, np.inf, (1, )),
                    "game_mode": Discrete(7)
                })
            ])
        })
        return action_space, observation_space

示例#2

0

显示文件

文件： continuous_simulation.py 项目： blumx116/Chargers

 def reset(self, logging: bool = False):
     self.engine = self.generator.generate(logging=True)
     n_stations: int = self.engine.n_stations
     self.max_cars: int = self.engine.max_cars
     self.max_occ: int = np.max(self.engine.station_info[:, 2])
     self.observation_space = DictSpace({
         'station_idx':
         Box(0, n_stations, (n_stations, 1), dtype=np.int32),
         'station_locations':
         Box(0, np.inf, (n_stations, 2), dtype=np.float32),
         'station_occs':
         Box(0, self.max_occ, (n_stations, 1), dtype=np.int32),
         'station_maxes':
         Box(0, self.max_occ, (n_stations, 1), dtype=np.int32),
         'car_locs':
         Box(0, np.inf, (self.max_cars, 2), dtype=np.float32),
         'car_dest_idx':
         Box(0, n_stations, (self.max_cars, 1), dtype=np.int32),
         'car_dest_loc':
         Box(0, np.inf, (self.max_cars, 2), dtype=np.float32),
         't':
         Box(0, np.inf, (1, 1), dtype=np.int32),
         'query_loc':
         Box(0, np.inf, (1, 2), dtype=np.float32),
         'remaining_queries':
         Box(0, np.inf, (1, 1), dtype=np.int32)
     })
     self.action_space = Discrete(self.engine.n_stations)
     self.reward_range = (-1 * self.engine.max_cars,
                          3 * self.engine.max_cars)
     return self.state()

示例#3

0

显示文件

def convertOrderedDict2Space(odict, has_pixels=False, has_task_params=False):
    '''
        For now just using -inf and inf for the bounds of the Box
    '''
    if len(odict.keys()) == 1:
        # no concatenation
        return convertSpec2Space(list(odict.values())[0])
    else:
        # len keys is more than 1
        _min, _max = -np.Inf, np.Inf
        numdim = 0
        for key in odict:
            if key not in ['pixels', 'obs_task_params']:
                numdim += np.prod(odict[key].shape)
                # cur_min, cur_max = compute_min_max(spec)
                # _min = min(_min, cur_min)
                # _max = max(_max, cur_max)

        dict_thus_far = {
            'obs': spaces.Box(-np.inf, np.inf, shape=(numdim,))
        }
        if has_pixels:
            dict_thus_far.update(
                {'pixels': spaces.Box(low=0, high=1, shape=odict['pixels'].shape)}
            )
        if has_task_params:
            dict_thus_far.update(
                {'obs_task_params': spaces.Box(low=0, high=1, shape=odict['obs_task_params'].shape)}
            )         
        if len(dict_thus_far.keys()) == 1:
            # we just have obs so we will make it a Box
            # concatentation
            # numdim = sum([np.int(np.prod(odict[key].shape)) for key in odict])
            gym_space = spaces.Box(-np.inf, np.inf, shape=(numdim,))
        else:
            gym_space = DictSpace(dict_thus_far)
        
        return gym_space

示例#4

0

显示文件

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.actions: List[int] = []
        self.datasets_site_path = site_data_path(
            "llvm/10.0.0/bitcode_benchmarks")

        # Register the LLVM datasets.
        self.datasets_site_path.mkdir(parents=True, exist_ok=True)
        self.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True)
        for dataset in LLVM_DATASETS:
            self.register_dataset(dataset)

        self.inst2vec = _INST2VEC_ENCODER

        self.observation.spaces["CpuInfo"].space = DictSpace({
            "name":
            Sequence(size_range=(0, None), dtype=str),
            "cores_count":
            Scalar(min=None, max=None, dtype=int),
            "l1i_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l1i_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l1d_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l1d_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l2_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l2_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l3_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l3_cache_count":
            Scalar(min=None, max=None, dtype=int),
            "l4_cache_size":
            Scalar(min=None, max=None, dtype=int),
            "l4_cache_count":
            Scalar(min=None, max=None, dtype=int),
        })

        self.observation.add_derived_space(
            id="Inst2vecPreprocessedText",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=str),
            cb=lambda base_observation: self.inst2vec.preprocess(
                base_observation),
            default_value="",
        )
        self.observation.add_derived_space(
            id="Inst2vecEmbeddingIndices",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=np.int32),
            cb=lambda base_observation: self.inst2vec.encode(
                self.inst2vec.preprocess(base_observation)),
            default_value=np.array([self.inst2vec.vocab["!UNK"]]),
        )
        self.observation.add_derived_space(
            id="Inst2vec",
            base_id="Ir",
            space=Sequence(size_range=(0, None), dtype=np.ndarray),
            cb=lambda base_observation: self.inst2vec.embed(
                self.inst2vec.encode(self.inst2vec.preprocess(base_observation)
                                     )),
            default_value=np.vstack(
                [self.inst2vec.embeddings[self.inst2vec.vocab["!UNK"]]]),
        )

        self.observation.add_derived_space(
            id="AutophaseDict",
            base_id="Autophase",
            space=DictSpace({
                name: Scalar(min=0, max=None, dtype=int)
                for name in AUTOPHASE_FEATURE_NAMES
            }),
            cb=lambda base_observation: {
                name: val
                for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation)
            },
        )

示例#5

0

显示文件

文件： llvm_env.py 项目： LearnCV/CompilerGym

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.inst2vec = Inst2vecEncoder()

        self.register_derived_space(
            base_name="CpuInfo",
            derived_name="CpuInfoDict",
            derived_space=DictSpace({
                "name":
                Sequence(size_range=(0, None), dtype=str),
                "cores_count":
                Scalar(min=None, max=None, dtype=int),
                "l1i_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l1i_cache_count":
                Scalar(min=None, max=None, dtype=int),
                "l1d_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l1d_cache_count":
                Scalar(min=None, max=None, dtype=int),
                "l2_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l2_cache_count":
                Scalar(min=None, max=None, dtype=int),
                "l3_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l3_cache_count":
                Scalar(min=None, max=None, dtype=int),
                "l4_cache_size":
                Scalar(min=None, max=None, dtype=int),
                "l4_cache_count":
                Scalar(min=None, max=None, dtype=int),
            }),
            cb=lambda base_observation: base_observation,
        )

        self.register_derived_space(
            base_name="Ir",
            derived_name="Inst2vecPreprocessedText",
            derived_space=Sequence(size_range=(0, None), dtype=str),
            cb=lambda base_observation: self.inst2vec.preprocess(
                base_observation),
        )
        self.register_derived_space(
            base_name="Ir",
            derived_name="Inst2vecEmbeddingIndices",
            derived_space=Sequence(size_range=(0, None), dtype=np.int32),
            cb=lambda base_observation: self.inst2vec.encode(
                self.inst2vec.preprocess(base_observation)),
        )
        self.register_derived_space(
            base_name="Ir",
            derived_name="Inst2vec",
            derived_space=Sequence(size_range=(0, None), dtype=np.ndarray),
            cb=lambda base_observation: self.inst2vec.embed(
                self.inst2vec.encode(self.inst2vec.preprocess(base_observation)
                                     )),
        )

        self.register_derived_space(
            base_name="Autophase",
            derived_name="AutophaseDict",
            derived_space=DictSpace({
                name: Scalar(min=0, max=None, dtype=int)
                for name in AUTOPHASE_FEATURE_NAMES
            }),
            cb=lambda base_observation: {
                name: val
                for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation)
            },
        )