示例#1
0
 def _make_step_spec(self, obs_spec):
     self._step_spec = dict(
         step_type=BoundedArraySpec(dtype=np.int8,
                                    shape=(),
                                    minimum=0,
                                    maximum=2,
                                    name='batched_env_step'
                                    '_type_spec'),
         reward=ArraySpec(dtype=np.float32,
                          shape=(),
                          name='batched_env_reward_spec'),
         discount=ArraySpec(dtype=np.float32,
                            shape=(),
                            name='batched_env_discount_spec'),
         observation=obs_spec)
     return self._step_spec
示例#2
0
 def __init__(
     self,
     obs_spec,
     step_output_spec,
 ):
     self._trajs = None
     # Don't use shape in the spec since it's unknown
     self._traj_spec = dict(
         step_type=ArraySpec(dtype=np.int8,
                             shape=(None, None),
                             name='traj_step_type_spec'),
         reward=ArraySpec(dtype=np.float32,
                          shape=(None, None),
                          name='traj_reward_spec'),
         discount=ArraySpec(dtype=np.float32,
                            shape=(None, None),
                            name='traj_discount_spec'),
         observation=nest.map_structure(expand_spec, obs_spec),
         step_output=nest.map_structure(expand_spec, step_output_spec))
示例#3
0
    def __init__(self, obs_spec, step_output_spec, batch_size, discount_factor,
                 traj_length):
        self._batch_size = batch_size
        self._traj_len = traj_length
        self._discount_factor = discount_factor
        # Don't use shape in the spec since it's unknown
        self._traj_spec = dict(
            step_type=ArraySpec(dtype=np.int8,
                                shape=(None, None),
                                name='traj_step_type_spec'),
            reward=ArraySpec(dtype=np.float32,
                             shape=(None, None),
                             name='traj_reward_spec'),
            discount=ArraySpec(dtype=np.float32,
                               shape=(None, None),
                               name='traj_discount_spec'),
            observation=nest.map_structure(expand_spec, obs_spec),
            step_output=nest.map_structure(expand_spec, step_output_spec))

        # self._trajs[i] = trajectory of the ith experience in the batch.
        self._trajs = None
        # list of timesteps that have been backtracked
        # and ready to be split into chunks to be shipped out.
        # _finished_timesteps[i] = Finished timesteps for the ith item of the batch.
        self._finished_timesteps = None
        # used to chop the trajectory into chunks.

        obs_spec2 = copy.deepcopy(obs_spec)
        obs_spec2['bootstrap_value'] = ArraySpec(dtype=np.float32,
                                                 shape=(None, ),
                                                 name='bootstrap_value_spec')
        self._chopping_trajs = [
            BaseTrajectory(obs_spec2, step_output_spec)
            for _ in range(batch_size)
        ]
        self._len = 0
示例#4
0
 def mk_spec(path_tuple, np_arr):
     return ArraySpec(np_arr.shape,
                      np_arr.dtype,
                      name='_'.join(path_tuple) + '_spec')
示例#5
0
文件: xor_env.py 项目: aravic/liaison
 def observation_spec(self):
     features_spec = ArraySpec((2, ), np.float32, name='features_spec')
     return dict(features=features_spec)
示例#6
0
文件: shell.py 项目: aravic/liaison
 def mk_spec(tensor):
     return ArraySpec(dtype=tensor.dtype.as_numpy_dtype,
                      shape=tensor.shape,
                      name=tensor.name)