def nest_spec(self, shape=(2, 3), dtype=np.float32):
     return {
         'array_spec_1':
         array_spec.ArraySpec(shape, dtype),
         'bounded_spec_1':
         array_spec.BoundedArraySpec(shape, dtype, -10, 10),
         'dict_spec': {
             'tensor_spec_2':
             array_spec.ArraySpec(shape, dtype),
             'bounded_spec_2':
             array_spec.BoundedArraySpec(shape, dtype, -10, 10)
         },
         'tuple_spec': (
             array_spec.ArraySpec(shape, dtype),
             array_spec.BoundedArraySpec(shape, dtype, -10, 10),
         ),
         'list_spec': [
             array_spec.ArraySpec(shape, dtype),
             (array_spec.ArraySpec(shape, dtype),
              array_spec.BoundedArraySpec(shape, dtype, -10, 10)),
         ],
     }
示例#2
0
    def testStepContinuous(self):
        obs_spec = array_spec.BoundedArraySpec((2, 3), np.int32, -10, 10)
        action_spec = array_spec.ArraySpec((2, ), np.float32)
        mock_env = mock.Mock(wraps=random_py_environment.RandomPyEnvironment(
            obs_spec, action_spec))
        one_hot_action_wrapper = wrappers.OneHotActionWrapper(mock_env)
        one_hot_action_wrapper.reset()

        one_hot_action_wrapper.step(np.array([0.5, 0.3]).astype(np.float32))
        self.assertTrue(mock_env.step.called)
        np.testing.assert_array_equal(
            np.array([0.5, 0.3]).astype(np.float32),
            mock_env.step.call_args[0][0])
示例#3
0
 def test_close_no_hang_after_init(self):
     constructor = functools.partial(
         random_py_environment.RandomPyEnvironment,
         array_spec.ArraySpec((3, 3), np.float32),
         array_spec.BoundedArraySpec([1],
                                     np.float32,
                                     minimum=-1.0,
                                     maximum=1.0),
         episode_end_probability=0,
         min_duration=2,
         max_duration=2)
     env = parallel_py_environment.ProcessPyEnvironment(constructor)
     env.start()
     env.close()
示例#4
0
  def _create_replay_buffer(self, rb_cls):
    self._stack_count = 4
    self._single_shape = (15, 15, 1)
    shape = (15, 15, self._stack_count)
    observation_spec = array_spec.ArraySpec(shape, np.int32, 'obs')
    time_step_spec = ts.time_step_spec(observation_spec)
    action_spec = policy_step.PolicyStep(array_spec.BoundedArraySpec(
        shape=(), dtype=np.int32, minimum=0, maximum=1, name='action'))
    self._trajectory_spec = trajectory.from_transition(
        time_step_spec, action_spec, time_step_spec)

    self._capacity = 32
    self._replay_buffer = rb_cls(
        data_spec=self._trajectory_spec, capacity=self._capacity)
示例#5
0
def example_nested_array_spec(dtype):
    return {
        "spec_1":
        array_spec.ArraySpec((2, 3), dtype),
        "bounded_spec_1":
        array_spec.BoundedArraySpec((2, 3), dtype, -10, 10),
        "bounded_array_spec_3":
        array_spec.BoundedArraySpec((2, ), dtype, [-10, -10], [10, 10]),
        "dict_spec": {
            "spec_2": array_spec.ArraySpec((2, 3), dtype),
            "bounded_spec_2": array_spec.BoundedArraySpec((2, 3), dtype, -10,
                                                          10)
        },
        "tuple_spec": (
            array_spec.ArraySpec((2, 3), dtype),
            array_spec.BoundedArraySpec((2, 3), dtype, -10, 10),
        ),
        "list_spec": [
            array_spec.ArraySpec((2, 3), dtype),
            (array_spec.ArraySpec((2, 3), dtype),
             array_spec.BoundedArraySpec((2, 3), dtype, -10, 10)),
        ],
    }
示例#6
0
  def testNotEqualOtherClass(self):
    spec_1 = array_spec.BoundedArraySpec(
        (1, 2), np.int32, minimum=[0.0, -0.6], maximum=[1.0, 1.0])
    spec_2 = array_spec.ArraySpec((1, 2), np.int32)
    self.assertNotEqual(spec_1, spec_2)
    self.assertNotEqual(spec_2, spec_1)

    spec_2 = None
    self.assertNotEqual(spec_1, spec_2)
    self.assertNotEqual(spec_2, spec_1)

    spec_2 = ()
    self.assertNotEqual(spec_1, spec_2)
    self.assertNotEqual(spec_2, spec_1)
示例#7
0
    def __init__(self,
                 piece_means: np.ndarray,
                 change_duration_generator: Callable[[], int],
                 batch_size: Optional[int] = 1):
        """Initializes a piecewise stationary Bernoulli Bandit environment.

    Args:
      piece_means: a matrix (list of lists) with shape (num_pieces, num_arms)
        containing floats in [0, 1]. Each list contains the mean rewards for
        the num_arms actions of the num_pieces pieces. The list is wrapped
        around after the last piece.
      change_duration_generator: a generator of the time durations. If this
        yields the values d0, d1, d2, ..., then the reward parameters change at
        steps d0, d0 + d1, d0 + d1 + d2, ..., as following:

        piece_means[0] for 0 <= t < d0
        piece_means[1] for d0 <= t < d0 + d1
        piece_means[2] for d0 + d1 <= t < d0 + d1 + d2
        ...

        Note that the values generated have to be non-negative. The value zero
        means that the corresponding parameters in the piece_means list are
        skipped, i.e. the duration of the piece is zero steps.
        If the generator ends (e.g. if it is obtained with iter(<list>)) and the
        step goes beyond the last piece, a StopIteration exception is raised.
      batch_size: If specified, this is the batch size for observation and
        actions.
    """
        self._batch_size = batch_size
        self._piece_means = np.asarray(piece_means, dtype=np.float32)
        if np.any(self._piece_means > 1.0) or np.any(self._piece_means < 0):
            raise ValueError('All parameters should be floats in [0, 1].')
        self._num_pieces, self._num_actions = self._piece_means.shape
        self._change_duration_generator = change_duration_generator
        self._current_time = -1
        self._current_piece = -1
        self._next_change = 0
        self._increment_time()

        action_spec = array_spec.BoundedArraySpec(shape=(),
                                                  dtype=np.int32,
                                                  minimum=0,
                                                  maximum=self._num_actions -
                                                  1,
                                                  name='action')
        observation_spec = array_spec.ArraySpec(shape=(1, ),
                                                dtype=np.int32,
                                                name='observation')
        super(PiecewiseBernoulliPyEnvironment,
              self).__init__(observation_spec, action_spec)
示例#8
0
    def __init__(self):
        self.duration = 30
        self.size = 10

        # IMPORTANT
        # Needed to be able to compare different environment's results
        random.seed(0)
        np.random.seed(0)

        # Places and products
        # Average size of places: 2000 visits per day
        self.placeSize = random.random() * 2000

        # Average cost per product: 10
        self.productsCosts = np.random.exponential(size=self.size) * 10
        # Average margin rate: 10%
        self.productsUsualMarginRates = np.random.exponential(
            size=self.size) / 10
        # Products are on average bought once per hundred of visitors
        self.productsUsualBuyingRates = np.random.exponential(
            size=self.size) / 100
        self.productsUsualPrices = self.productsCosts / (
            1 - self.productsUsualMarginRates)

        # Price flexibility between 5 and 10
        self.productsPriceFlexibility = np.random.random(
            size=self.size) * 5 + 5

        # Specs
        self.initial_observation = np.zeros((self.size, ), dtype=np.float32)

        # Action is an array of all the product prices, explained in product cost multiplication
        # This environment doesn't allow to sell at lost to train faster
        self._action_spec = array_spec.BoundedArraySpec(shape=(self.size, ),
                                                        dtype=np.float32,
                                                        minimum=1,
                                                        maximum=100,
                                                        name='action')

        self._observation_spec = array_spec.ArraySpec(shape=(self.size, ),
                                                      dtype=np.float32,
                                                      name='observation')

        self.seeds = []
        for i in range(self.duration):
            self.seeds.append(i)

        self._state = 0
        self._episode_ended = False
示例#9
0
    def __init__(self, env):
        """Initializes a grayscale wrapper."""
        super(GrayscaleWrapper, self).__init__(env)

        # Update the observation spec in the environment.
        observation_spec = env.observation_spec()

        # Update the observation spec.
        self._grayscale_observation_spec = copy.copy(observation_spec)
        frame_shape = observation_spec['pixels'].shape
        grayscale_frame_shape = frame_shape[:2] + (1, )
        self._grayscale_observation_spec['pixels'] = array_spec.ArraySpec(
            shape=grayscale_frame_shape,
            dtype=observation_spec['pixels'].dtype,
            name='grayscale_pixels')
示例#10
0
  def _convert(s):
    if isinstance(s, array_spec.ArraySpec):
      return s

    if hasattr(s, "minimum") and hasattr(s, "maximum"):
      return array_spec.BoundedArraySpec(
          s.shape.as_list(),
          s.dtype.as_numpy_dtype,
          minimum=s.minimum,
          maximum=s.maximum,
          name=s.name)
    else:
      return array_spec.ArraySpec(s.shape.as_list(),
                                  s.dtype.as_numpy_dtype,
                                  s.name)
示例#11
0
  def test_with_varying_observation_specs(
      self, observation_keys, observation_shapes, observation_dtypes):
    """Vary the observation spec and step the environment."""
    obs_spec = collections.OrderedDict()
    for idx, key in enumerate(observation_keys):
      obs_spec[key] = array_spec.ArraySpec(observation_shapes[idx],
                                           observation_dtypes)
    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec)
    env = wrappers.FlattenObservationsWrapper(env)
    time_step = env.step(
        array_spec.sample_bounded_spec(action_spec, np.random.RandomState()))
    # Check that all observations returned from environment is packed into one
    # dimension.
    expected_shape = self._get_expected_shape(obs_spec, obs_spec.keys())
    self.assertEqual(time_step.observation.shape, expected_shape)
    self.assertEqual(
        env.observation_spec(),
        array_spec.ArraySpec(
            shape=expected_shape,
            dtype=observation_dtypes,
            name='packed_observations'))
示例#12
0
    def __init__(self, env):
        """Initializes a wrapper."""
        super(FlattenState, self).__init__(env)
        # Update the observation spec in the environment.
        observation_spec = env.observation_spec()

        dim = 0
        dtype = None
        for v in observation_spec.values():
            dim += v.shape[0]
            dtype = v.dtype

        self._new_observation_spec = array_spec.ArraySpec(shape=(dim, ),
                                                          dtype=dtype,
                                                          name='state')
示例#13
0
    def testGetOuterArrayShape(self):
        spec = (array_spec.ArraySpec([5, 8], np.float32),
                (array_spec.ArraySpec([1], np.int32),
                 array_spec.ArraySpec([2, 2, 2], np.float32)))

        batch_size = 3
        unstacked_arrays = [
            self.zeros_from_spec(spec) for _ in range(batch_size)
        ]

        outer_dims = nest_utils.get_outer_array_shape(unstacked_arrays[0],
                                                      spec)
        self.assertEqual((), outer_dims)

        stacked_array = nest_utils.stack_nested_arrays(unstacked_arrays)
        outer_dims = nest_utils.get_outer_array_shape(stacked_array, spec)
        self.assertEqual((batch_size, ), outer_dims)

        time_dim = [
            nest_utils.batch_nested_array(arr) for arr in unstacked_arrays
        ]
        batch_time = nest_utils.stack_nested_arrays(time_dim)
        outer_dims = nest_utils.get_outer_array_shape(batch_time, spec)
        self.assertEqual((batch_size, 1), outer_dims)
示例#14
0
def to_array_spec(tensor_spec):
    """Converts TensorSpec into ArraySpec."""
    if isinstance(tensor_spec, array_spec.ArraySpec):
        return tensor_spec

    if hasattr(tensor_spec, "minimum") and hasattr(tensor_spec, "maximum"):
        return array_spec.BoundedArraySpec(tensor_spec.shape.as_list(),
                                           tensor_spec.dtype.as_numpy_dtype,
                                           minimum=tensor_spec.minimum,
                                           maximum=tensor_spec.maximum,
                                           name=tensor_spec.name)
    else:
        return array_spec.ArraySpec(tensor_spec.shape.as_list(),
                                    tensor_spec.dtype.as_numpy_dtype,
                                    tensor_spec.name)
示例#15
0
  def test_batch_env(self):
    """Test batched version of the environment."""
    obs_spec = collections.OrderedDict({
        'obs1': array_spec.ArraySpec((1,), np.int32),
        'obs2': array_spec.ArraySpec((2,), np.int32),
    })
    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    # Generate a randomy py environment with batch size.
    batch_size = 4
    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec, batch_size=batch_size)
    env = MockGoalReplayEnvWrapper(env)
    random_action = array_spec.sample_bounded_spec(action_spec,
                                                   np.random.RandomState())

    time_step = env.step(random_action)
    self.assertIsInstance(time_step.observation, dict)
    self.assertEqual(time_step.observation.keys(),
                     env.observation_spec().keys())
    time_step = env.reset()
    self.assertIsInstance(time_step.observation, dict)
    self.assertEqual(time_step.observation.keys(),
                     env.observation_spec().keys())
示例#16
0
    def test_compress_image(self):
        if not common.has_eager_been_enabled():
            self.skipTest("Image compression only supported in TF2.x")

        gin.parse_config_files_and_bindings([], """
    _get_feature_encoder.compress_image=True
    _get_feature_parser.compress_image=True
    """)
        spec = {
            "image": array_spec.ArraySpec((128, 128, 3), np.uint8),
            "mask": array_spec.ArraySpec((128, 128, 1), np.uint8)
        }
        serializer = example_encoding.get_example_serializer(spec)
        decoder = example_encoding.get_example_decoder(spec)

        sample = {
            "image": 128 * np.ones([128, 128, 3], dtype=np.uint8),
            "mask": 128 * np.ones([128, 128, 1], dtype=np.uint8)
        }
        example_proto = serializer(sample)

        recovered = self.evaluate(decoder(example_proto))
        tf.nest.map_structure(np.testing.assert_almost_equal, sample,
                              recovered)
示例#17
0
    def __init__(self, env, stack_size, actions_in_obs, rewards_in_obs):
        """Initializes a wrapper."""
        super(FrameStack, self).__init__(env)
        self.stack_size = stack_size
        self._frames = collections.deque(maxlen=stack_size)
        self.actions_in_obs = actions_in_obs
        self.rewards_in_obs = rewards_in_obs

        # Update the observation spec in the environment.
        observation_spec = env.observation_spec()

        # Update the observation spec.
        self._new_observation_spec = copy.copy(observation_spec)

        # Redefine pixels spec
        frame_shape = observation_spec['pixels'].shape
        stacked_frame_shape = frame_shape[:2] + (frame_shape[2] * stack_size, )
        self._new_observation_spec['pixels'] = array_spec.ArraySpec(
            shape=stacked_frame_shape,
            dtype=observation_spec['pixels'].dtype,
            name='grayscale_pixels')

        # Define action stack spec
        if self.actions_in_obs:
            self._actions = collections.deque(maxlen=stack_size - 1)
            stacked_action_shape = (stack_size - 1, ) + env.action_spec().shape
            self._new_observation_spec['actions'] = array_spec.ArraySpec(
                shape=stacked_action_shape,
                dtype=env.action_spec().dtype,
                name='actions')

        # Define rewards stack spec
        if self.rewards_in_obs:
            self._rewards = collections.deque(maxlen=stack_size)
            self._new_observation_spec['rewards'] = array_spec.ArraySpec(
                shape=(stack_size, ), dtype=np.float32, name='rewards')
示例#18
0
 def test_close_no_hang_after_step(self):
   constructor = functools.partial(
       random_py_environment.RandomPyEnvironment,
       array_spec.ArraySpec((3, 3), np.float32),
       array_spec.BoundedArraySpec([1], np.float32, minimum=-1.0, maximum=1.0),
       episode_end_probability=0,
       min_duration=5,
       max_duration=5)
   rng = np.random.RandomState()
   env = parallel_py_environment.ProcessPyEnvironment(constructor)
   env.start()
   action_spec = env.action_spec()
   env.reset()
   env.step(array_spec.sample_bounded_spec(action_spec, rng))
   env.step(array_spec.sample_bounded_spec(action_spec, rng))
   env.close()
  def __init__(self, goalX=0.0, goalY=0.0):
    x, y = goalX + random.randint(-100, 100), goalY + random.randint(-100, 100)
    self._ship = Ship(x, y)

    self._action_spec = array_spec.BoundedArraySpec(shape = (), dtype=np.int32, minimum=0, maximum=3, name='action')
    self._observation_spec = array_spec.ArraySpec(shape=(7,), dtype=np.float32, name='observation')
    self._state = self._ship.state()
    self._episode_ended = False

    self._time_elapsed = 0
    self._time_cap = 15 # seconds
    self._time_interval = 1.0 / 5.0 # fps
    self._physics_interval = 1.0 / 60.0

    self._goalX = goalX
    self._goalY = goalY
    self._terminal_distance = 1200
    def testSavedModel(self):
        if not common.has_eager_been_enabled():
            self.skipTest('Only supported in eager.')

        observation_spec = array_spec.ArraySpec([2], np.float32)
        action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3)
        time_step_spec = ts.time_step_spec(observation_spec)

        observation_tensor_spec = tensor_spec.from_spec(observation_spec)
        action_tensor_spec = tensor_spec.from_spec(action_spec)
        time_step_tensor_spec = tensor_spec.from_spec(time_step_spec)

        actor_net = actor_network.ActorNetwork(
            observation_tensor_spec,
            action_tensor_spec,
            fc_layer_params=(10, ),
        )

        tf_policy = actor_policy.ActorPolicy(time_step_tensor_spec,
                                             action_tensor_spec,
                                             actor_network=actor_net)

        path = os.path.join(self.get_temp_dir(), 'saved_policy')
        saver = policy_saver.PolicySaver(tf_policy)
        saver.save(path)

        eager_py_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(
            path, time_step_spec, action_spec)

        rng = np.random.RandomState()
        sample_time_step = array_spec.sample_spec_nest(time_step_spec, rng)
        batched_sample_time_step = nest_utils.batch_nested_array(
            sample_time_step)

        original_action = tf_policy.action(batched_sample_time_step)
        unbatched_original_action = nest_utils.unbatch_nested_tensors(
            original_action)
        original_action_np = tf.nest.map_structure(lambda t: t.numpy(),
                                                   unbatched_original_action)
        saved_policy_action = eager_py_policy.action(sample_time_step)

        tf.nest.assert_same_structure(saved_policy_action.action, action_spec)

        np.testing.assert_array_almost_equal(original_action_np.action,
                                             saved_policy_action.action)
 def setUp(self):
     super(PyTFEagerPolicyTest, self).setUp()
     self._observation_spec = array_spec.ArraySpec([2], np.float32)
     self._action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3)
     self._observation_tensor_spec = tensor_spec.from_spec(
         self._observation_spec)
     self._action_tensor_spec = tensor_spec.from_spec(self._action_spec)
     self._time_step_tensor_spec = ts.time_step_spec(
         self._observation_tensor_spec)
     info_spec = {
         'a': array_spec.BoundedArraySpec([1], np.float32, 0, 1),
         'b': array_spec.BoundedArraySpec([1], np.float32, 100, 101)
     }
     self._info_tensor_spec = tensor_spec.from_spec(info_spec)
     # Env will validate action types automaticall since we provided the
     # action_spec.
     self._env = random_py_environment.RandomPyEnvironment(
         self._observation_spec, self._action_spec)
    def __init__(self, v_n=2, v_k=2, v_seed=2, do_transform=True):
        self._action_spec = array_spec.BoundedArraySpec(shape=(),
                                                        dtype=np.int32,
                                                        minimum=0,
                                                        maximum=v_n - 1,
                                                        name='action')
        self._observation_spec = array_spec.ArraySpec(shape=(v_k, ),
                                                      dtype=np.float32,
                                                      name='observation')
        self._time_step_spec = ts.time_step_spec(self._observation_spec)

        self.env = VectorIncrementEnvironment(n=v_n,
                                              k=v_k,
                                              seed=v_seed,
                                              do_transform=do_transform)
        self._state = self.env.encoded_state()
        self._episode_ended = False
        self._batched = False
示例#23
0
  def __init__(self, name=None, num_actions=3):
    input_spec = array_spec.ArraySpec([2], np.float32)
    action_spec = array_spec.BoundedArraySpec([1], np.float32, 1, num_actions)

    input_tensor_spec = tensor_spec.from_spec(input_spec)
    action_tensor_spec = tensor_spec.from_spec(action_spec)

    super(HeteroscedasticDummyNet, self).__init__(input_tensor_spec,
                                                  action_tensor_spec)
    self._value_layer = tf.keras.layers.Dense(
        num_actions,
        kernel_initializer=tf.constant_initializer([[1, 1.5, 2], [1, 1.5, 4]]),
        bias_initializer=tf.constant_initializer([[1], [1], [-10]]))

    self._log_variance_layer = tf.keras.layers.Dense(
        num_actions,
        kernel_initializer=tf.constant_initializer([[1, 1.5, 2], [1, 1.5, 4]]),
        bias_initializer=tf.constant_initializer([[1], [1], [-10]]))
示例#24
0
    def __init__(
        self,
        dictionary_path: str = DICTIONARY_PATH_DEFAULT,
        reward_map: Dict[str, float] = REWARD_DEFAULT,
        life_initial: int = 6,
        seed: int = 42,
    ):

        self._action_spec = array_spec.BoundedArraySpec(shape=(),
                                                        dtype=np.int32,
                                                        minimum=0,
                                                        maximum=25,
                                                        name="letter")
        # self._observation_spec = array_spec.BoundedArraySpec(
        #     shape=(30,), dtype=np.int32, minimum=-1, maximum=26, name="game"
        # )
        self._observation_spec = {
            'observations':
            array_spec.BoundedArraySpec(shape=(1, 30),
                                        dtype=np.float32,
                                        minimum=-1,
                                        maximum=26,
                                        name="game"),
            'legal_moves':
            array_spec.ArraySpec(shape=(26, ), dtype=np.float32),
        }
        self._state = np.empty(30)
        self._state.fill(-1)

        self._episode_ended = False
        self.logger = logging.getLogger(__name__)
        # 26 letters to be proposed
        # self.action_space = spaces.Discrete(26)
        # # 27 letter (26 + '.'+ '_') ex b.nj.ur___________
        # # 30 maximum size of word
        # # 2 state (to be found or not to be found)
        # self.observation_space = spaces.Tuple((
        #     spaces.Discrete(28),
        #     spaces.Discrete(30),
        # ))
        self.life_initial = life_initial
        self.words_set = list(set(self._build_dictionary(dictionary_path)))
        self.reward_map = reward_map
        self.seed(seed)
  def __init__(self,
               env,
               out_width_height = None):
    super(FlattenImageObservationsWrapper, self).__init__(env)

    self.wh = out_width_height

    obs_spec: array_spec.ArraySpec = self._env.observation_spec()
    if not isinstance(obs_spec, collections.OrderedDict):
      raise ValueError('Unsupported observation_spec %s' % str(obs_spec))

    o_shape = None
    o_dtype = None
    o_name = []
    for _, obs in obs_spec.items():
      if not isinstance(obs, array_spec.ArraySpec):
        raise ValueError('Unsupported observation_spec %s' % str(obs))

      if len(obs.shape) != 3:
        raise ValueError('All observations must be images (got shape %s).' % (
            str(obs.shape)))

      if self.wh:
        # The image size will be normalized.
        cur_shape = self.wh + (obs.shape[2],)
      else:
        cur_shape = obs.shape

      if o_shape is None:
        o_shape = list(obs.shape)
        o_dtype = obs.dtype
      else:
        if tuple(o_shape[0:2]) != cur_shape[0:2]:
          raise ValueError('All images must be the same shape.')
        if o_dtype != obs.dtype:
          raise ValueError('All images must be the same dtype.')
        o_shape[2] += obs.shape[2]
      o_name.append(obs.name)

    self._observation_spec = array_spec.ArraySpec(
        shape=o_shape,
        dtype=o_dtype,
        name='_'.join(o_name) + '_flattened')
    def testRandomTFPolicyCompatibility(self):
        if not common.has_eager_been_enabled():
            self.skipTest('Only supported in eager.')

        observation_spec = array_spec.ArraySpec([2], np.float32)
        action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3)
        info_spec = {
            'a': array_spec.BoundedArraySpec([1], np.float32, 0, 1),
            'b': array_spec.BoundedArraySpec([1], np.float32, 100, 101)
        }

        observation_tensor_spec = tensor_spec.from_spec(observation_spec)
        action_tensor_spec = tensor_spec.from_spec(action_spec)
        info_tensor_spec = tensor_spec.from_spec(info_spec)
        time_step_tensor_spec = ts.time_step_spec(observation_tensor_spec)

        tf_policy = random_tf_policy.RandomTFPolicy(time_step_tensor_spec,
                                                    action_tensor_spec,
                                                    info_spec=info_tensor_spec)

        py_policy = py_tf_eager_policy.PyTFEagerPolicy(tf_policy)
        env = random_py_environment.RandomPyEnvironment(
            observation_spec, action_spec)
        time_step = env.reset()

        def _check_action_step(action_step):
            self.assertIsInstance(action_step.action, np.ndarray)
            self.assertEqual(action_step.action.shape, (1, ))
            self.assertBetween(action_step.action[0], 2.0, 3.0)

            self.assertIsInstance(action_step.info['a'], np.ndarray)
            self.assertEqual(action_step.info['a'].shape, (1, ))
            self.assertBetween(action_step.info['a'][0], 0.0, 1.0)

            self.assertIsInstance(action_step.info['b'], np.ndarray)
            self.assertEqual(action_step.info['b'].shape, (1, ))
            self.assertBetween(action_step.info['b'][0], 100.0, 101.0)

        for _ in range(100):
            action_step = py_policy.action(time_step)
            _check_action_step(action_step)
            time_step = env.step(action_step.action)
示例#27
0
    def testGeneratesBatchedActionsWithoutSpecifyingOuterDims(self):
        action_spec = [
            array_spec.BoundedArraySpec((2, 3), np.int32, -10, 10),
            array_spec.BoundedArraySpec((1, 2), np.int32, -10, 10)
        ]
        time_step_spec = time_step.time_step_spec(
            observation_spec=array_spec.ArraySpec((1, ), np.int32))
        policy = random_py_policy.RandomPyPolicy(time_step_spec=time_step_spec,
                                                 action_spec=action_spec)

        action_step = policy.action(
            time_step.restart(np.array([[1], [2], [3]], dtype=np.int32)))
        tf.nest.assert_same_structure(action_spec, action_step.action)
        self.assertEqual((3, 2, 3), action_step.action[0].shape)
        self.assertEqual((3, 1, 2), action_step.action[1].shape)

        self.assertTrue(np.all(action_step.action[0] >= -10))
        self.assertTrue(np.all(action_step.action[0] <= 10))
        self.assertTrue(np.all(action_step.action[1] >= -10))
        self.assertTrue(np.all(action_step.action[1] <= 10))
    def _generate_replay_buffer(self, rb_cls):
        stack_count = 4
        shape = (15, 15, stack_count)
        single_shape = (15, 15, 1)
        observation_spec = array_spec.ArraySpec(shape, np.int32, 'obs')
        time_step_spec = ts.time_step_spec(observation_spec)
        action_spec = policy_step.PolicyStep(
            array_spec.BoundedArraySpec(shape=(),
                                        dtype=np.int32,
                                        minimum=0,
                                        maximum=1,
                                        name='action'))
        self._trajectory_spec = trajectory.from_transition(
            time_step_spec, action_spec, time_step_spec)

        self._capacity = 32
        self._replay_buffer = rb_cls(data_spec=self._trajectory_spec,
                                     capacity=self._capacity)

        # Generate N frames: the value of pixels is the frame index.
        # The observations will be generated by stacking K frames out of those N,
        # generating some redundancies between the observations.
        single_frames = []
        frame_count = 100
        for k in range(frame_count):
            single_frames.append(np.full(single_shape, k, dtype=np.int32))

        # Add stack of frames to the replay buffer.
        time_steps = []
        for k in range(len(single_frames) - stack_count + 1):
            observation = np.concatenate(single_frames[k:k + stack_count],
                                         axis=-1)
            time_steps.append(ts.transition(observation, reward=0.0))

        self._transition_count = len(time_steps) - 1
        dummy_action = policy_step.PolicyStep(np.int32(0))
        for k in range(self._transition_count):
            self._replay_buffer.add_batch(
                nest_utils.batch_nested_array(
                    trajectory.from_transition(time_steps[k], dummy_action,
                                               time_steps[k + 1])))
示例#29
0
    def _create_replay_buffer(self, capacity=32):
        self._stack_count = 2
        self._single_shape = (1, )
        shape = (1, self._stack_count)
        observation_spec = array_spec.ArraySpec(shape, np.int32, 'obs')
        time_step_spec = ts.time_step_spec(observation_spec)
        action_spec = policy_step.PolicyStep(
            array_spec.BoundedArraySpec(shape=(),
                                        dtype=np.int32,
                                        minimum=0,
                                        maximum=1,
                                        name='action'))
        self._trajectory_spec = trajectory.from_transition(
            time_step_spec, action_spec, time_step_spec)

        self._capacity = capacity
        self._alpha = 0.6
        self._replay_buffer = PyPrioritizedReplayBuffer(
            data_spec=self._trajectory_spec,
            capacity=self._capacity,
            alpha=self._alpha)
示例#30
0
  def setUp(self):
    super(SavedModelPYTFEagerPolicyTest, self).setUp()
    if not common.has_eager_been_enabled():
      self.skipTest('Only supported in eager.')

    observation_spec = array_spec.ArraySpec([2], np.float32)
    self.action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3)
    self.time_step_spec = ts.time_step_spec(observation_spec)

    observation_tensor_spec = tensor_spec.from_spec(observation_spec)
    action_tensor_spec = tensor_spec.from_spec(self.action_spec)
    time_step_tensor_spec = tensor_spec.from_spec(self.time_step_spec)

    actor_net = actor_network.ActorNetwork(
        observation_tensor_spec,
        action_tensor_spec,
        fc_layer_params=(10,),
    )

    self.tf_policy = actor_policy.ActorPolicy(
        time_step_tensor_spec, action_tensor_spec, actor_network=actor_net)