def nest_spec(self, shape=(2, 3), dtype=np.float32): return { 'array_spec_1': array_spec.ArraySpec(shape, dtype), 'bounded_spec_1': array_spec.BoundedArraySpec(shape, dtype, -10, 10), 'dict_spec': { 'tensor_spec_2': array_spec.ArraySpec(shape, dtype), 'bounded_spec_2': array_spec.BoundedArraySpec(shape, dtype, -10, 10) }, 'tuple_spec': ( array_spec.ArraySpec(shape, dtype), array_spec.BoundedArraySpec(shape, dtype, -10, 10), ), 'list_spec': [ array_spec.ArraySpec(shape, dtype), (array_spec.ArraySpec(shape, dtype), array_spec.BoundedArraySpec(shape, dtype, -10, 10)), ], }
def testStepContinuous(self): obs_spec = array_spec.BoundedArraySpec((2, 3), np.int32, -10, 10) action_spec = array_spec.ArraySpec((2, ), np.float32) mock_env = mock.Mock(wraps=random_py_environment.RandomPyEnvironment( obs_spec, action_spec)) one_hot_action_wrapper = wrappers.OneHotActionWrapper(mock_env) one_hot_action_wrapper.reset() one_hot_action_wrapper.step(np.array([0.5, 0.3]).astype(np.float32)) self.assertTrue(mock_env.step.called) np.testing.assert_array_equal( np.array([0.5, 0.3]).astype(np.float32), mock_env.step.call_args[0][0])
def test_close_no_hang_after_init(self): constructor = functools.partial( random_py_environment.RandomPyEnvironment, array_spec.ArraySpec((3, 3), np.float32), array_spec.BoundedArraySpec([1], np.float32, minimum=-1.0, maximum=1.0), episode_end_probability=0, min_duration=2, max_duration=2) env = parallel_py_environment.ProcessPyEnvironment(constructor) env.start() env.close()
def _create_replay_buffer(self, rb_cls): self._stack_count = 4 self._single_shape = (15, 15, 1) shape = (15, 15, self._stack_count) observation_spec = array_spec.ArraySpec(shape, np.int32, 'obs') time_step_spec = ts.time_step_spec(observation_spec) action_spec = policy_step.PolicyStep(array_spec.BoundedArraySpec( shape=(), dtype=np.int32, minimum=0, maximum=1, name='action')) self._trajectory_spec = trajectory.from_transition( time_step_spec, action_spec, time_step_spec) self._capacity = 32 self._replay_buffer = rb_cls( data_spec=self._trajectory_spec, capacity=self._capacity)
def example_nested_array_spec(dtype): return { "spec_1": array_spec.ArraySpec((2, 3), dtype), "bounded_spec_1": array_spec.BoundedArraySpec((2, 3), dtype, -10, 10), "bounded_array_spec_3": array_spec.BoundedArraySpec((2, ), dtype, [-10, -10], [10, 10]), "dict_spec": { "spec_2": array_spec.ArraySpec((2, 3), dtype), "bounded_spec_2": array_spec.BoundedArraySpec((2, 3), dtype, -10, 10) }, "tuple_spec": ( array_spec.ArraySpec((2, 3), dtype), array_spec.BoundedArraySpec((2, 3), dtype, -10, 10), ), "list_spec": [ array_spec.ArraySpec((2, 3), dtype), (array_spec.ArraySpec((2, 3), dtype), array_spec.BoundedArraySpec((2, 3), dtype, -10, 10)), ], }
def testNotEqualOtherClass(self): spec_1 = array_spec.BoundedArraySpec( (1, 2), np.int32, minimum=[0.0, -0.6], maximum=[1.0, 1.0]) spec_2 = array_spec.ArraySpec((1, 2), np.int32) self.assertNotEqual(spec_1, spec_2) self.assertNotEqual(spec_2, spec_1) spec_2 = None self.assertNotEqual(spec_1, spec_2) self.assertNotEqual(spec_2, spec_1) spec_2 = () self.assertNotEqual(spec_1, spec_2) self.assertNotEqual(spec_2, spec_1)
def __init__(self, piece_means: np.ndarray, change_duration_generator: Callable[[], int], batch_size: Optional[int] = 1): """Initializes a piecewise stationary Bernoulli Bandit environment. Args: piece_means: a matrix (list of lists) with shape (num_pieces, num_arms) containing floats in [0, 1]. Each list contains the mean rewards for the num_arms actions of the num_pieces pieces. The list is wrapped around after the last piece. change_duration_generator: a generator of the time durations. If this yields the values d0, d1, d2, ..., then the reward parameters change at steps d0, d0 + d1, d0 + d1 + d2, ..., as following: piece_means[0] for 0 <= t < d0 piece_means[1] for d0 <= t < d0 + d1 piece_means[2] for d0 + d1 <= t < d0 + d1 + d2 ... Note that the values generated have to be non-negative. The value zero means that the corresponding parameters in the piece_means list are skipped, i.e. the duration of the piece is zero steps. If the generator ends (e.g. if it is obtained with iter(<list>)) and the step goes beyond the last piece, a StopIteration exception is raised. batch_size: If specified, this is the batch size for observation and actions. """ self._batch_size = batch_size self._piece_means = np.asarray(piece_means, dtype=np.float32) if np.any(self._piece_means > 1.0) or np.any(self._piece_means < 0): raise ValueError('All parameters should be floats in [0, 1].') self._num_pieces, self._num_actions = self._piece_means.shape self._change_duration_generator = change_duration_generator self._current_time = -1 self._current_piece = -1 self._next_change = 0 self._increment_time() action_spec = array_spec.BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=self._num_actions - 1, name='action') observation_spec = array_spec.ArraySpec(shape=(1, ), dtype=np.int32, name='observation') super(PiecewiseBernoulliPyEnvironment, self).__init__(observation_spec, action_spec)
def __init__(self): self.duration = 30 self.size = 10 # IMPORTANT # Needed to be able to compare different environment's results random.seed(0) np.random.seed(0) # Places and products # Average size of places: 2000 visits per day self.placeSize = random.random() * 2000 # Average cost per product: 10 self.productsCosts = np.random.exponential(size=self.size) * 10 # Average margin rate: 10% self.productsUsualMarginRates = np.random.exponential( size=self.size) / 10 # Products are on average bought once per hundred of visitors self.productsUsualBuyingRates = np.random.exponential( size=self.size) / 100 self.productsUsualPrices = self.productsCosts / ( 1 - self.productsUsualMarginRates) # Price flexibility between 5 and 10 self.productsPriceFlexibility = np.random.random( size=self.size) * 5 + 5 # Specs self.initial_observation = np.zeros((self.size, ), dtype=np.float32) # Action is an array of all the product prices, explained in product cost multiplication # This environment doesn't allow to sell at lost to train faster self._action_spec = array_spec.BoundedArraySpec(shape=(self.size, ), dtype=np.float32, minimum=1, maximum=100, name='action') self._observation_spec = array_spec.ArraySpec(shape=(self.size, ), dtype=np.float32, name='observation') self.seeds = [] for i in range(self.duration): self.seeds.append(i) self._state = 0 self._episode_ended = False
def __init__(self, env): """Initializes a grayscale wrapper.""" super(GrayscaleWrapper, self).__init__(env) # Update the observation spec in the environment. observation_spec = env.observation_spec() # Update the observation spec. self._grayscale_observation_spec = copy.copy(observation_spec) frame_shape = observation_spec['pixels'].shape grayscale_frame_shape = frame_shape[:2] + (1, ) self._grayscale_observation_spec['pixels'] = array_spec.ArraySpec( shape=grayscale_frame_shape, dtype=observation_spec['pixels'].dtype, name='grayscale_pixels')
def _convert(s): if isinstance(s, array_spec.ArraySpec): return s if hasattr(s, "minimum") and hasattr(s, "maximum"): return array_spec.BoundedArraySpec( s.shape.as_list(), s.dtype.as_numpy_dtype, minimum=s.minimum, maximum=s.maximum, name=s.name) else: return array_spec.ArraySpec(s.shape.as_list(), s.dtype.as_numpy_dtype, s.name)
def test_with_varying_observation_specs( self, observation_keys, observation_shapes, observation_dtypes): """Vary the observation spec and step the environment.""" obs_spec = collections.OrderedDict() for idx, key in enumerate(observation_keys): obs_spec[key] = array_spec.ArraySpec(observation_shapes[idx], observation_dtypes) action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10) env = random_py_environment.RandomPyEnvironment( obs_spec, action_spec=action_spec) env = wrappers.FlattenObservationsWrapper(env) time_step = env.step( array_spec.sample_bounded_spec(action_spec, np.random.RandomState())) # Check that all observations returned from environment is packed into one # dimension. expected_shape = self._get_expected_shape(obs_spec, obs_spec.keys()) self.assertEqual(time_step.observation.shape, expected_shape) self.assertEqual( env.observation_spec(), array_spec.ArraySpec( shape=expected_shape, dtype=observation_dtypes, name='packed_observations'))
def __init__(self, env): """Initializes a wrapper.""" super(FlattenState, self).__init__(env) # Update the observation spec in the environment. observation_spec = env.observation_spec() dim = 0 dtype = None for v in observation_spec.values(): dim += v.shape[0] dtype = v.dtype self._new_observation_spec = array_spec.ArraySpec(shape=(dim, ), dtype=dtype, name='state')
def testGetOuterArrayShape(self): spec = (array_spec.ArraySpec([5, 8], np.float32), (array_spec.ArraySpec([1], np.int32), array_spec.ArraySpec([2, 2, 2], np.float32))) batch_size = 3 unstacked_arrays = [ self.zeros_from_spec(spec) for _ in range(batch_size) ] outer_dims = nest_utils.get_outer_array_shape(unstacked_arrays[0], spec) self.assertEqual((), outer_dims) stacked_array = nest_utils.stack_nested_arrays(unstacked_arrays) outer_dims = nest_utils.get_outer_array_shape(stacked_array, spec) self.assertEqual((batch_size, ), outer_dims) time_dim = [ nest_utils.batch_nested_array(arr) for arr in unstacked_arrays ] batch_time = nest_utils.stack_nested_arrays(time_dim) outer_dims = nest_utils.get_outer_array_shape(batch_time, spec) self.assertEqual((batch_size, 1), outer_dims)
def to_array_spec(tensor_spec): """Converts TensorSpec into ArraySpec.""" if isinstance(tensor_spec, array_spec.ArraySpec): return tensor_spec if hasattr(tensor_spec, "minimum") and hasattr(tensor_spec, "maximum"): return array_spec.BoundedArraySpec(tensor_spec.shape.as_list(), tensor_spec.dtype.as_numpy_dtype, minimum=tensor_spec.minimum, maximum=tensor_spec.maximum, name=tensor_spec.name) else: return array_spec.ArraySpec(tensor_spec.shape.as_list(), tensor_spec.dtype.as_numpy_dtype, tensor_spec.name)
def test_batch_env(self): """Test batched version of the environment.""" obs_spec = collections.OrderedDict({ 'obs1': array_spec.ArraySpec((1,), np.int32), 'obs2': array_spec.ArraySpec((2,), np.int32), }) action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10) # Generate a randomy py environment with batch size. batch_size = 4 env = random_py_environment.RandomPyEnvironment( obs_spec, action_spec=action_spec, batch_size=batch_size) env = MockGoalReplayEnvWrapper(env) random_action = array_spec.sample_bounded_spec(action_spec, np.random.RandomState()) time_step = env.step(random_action) self.assertIsInstance(time_step.observation, dict) self.assertEqual(time_step.observation.keys(), env.observation_spec().keys()) time_step = env.reset() self.assertIsInstance(time_step.observation, dict) self.assertEqual(time_step.observation.keys(), env.observation_spec().keys())
def test_compress_image(self): if not common.has_eager_been_enabled(): self.skipTest("Image compression only supported in TF2.x") gin.parse_config_files_and_bindings([], """ _get_feature_encoder.compress_image=True _get_feature_parser.compress_image=True """) spec = { "image": array_spec.ArraySpec((128, 128, 3), np.uint8), "mask": array_spec.ArraySpec((128, 128, 1), np.uint8) } serializer = example_encoding.get_example_serializer(spec) decoder = example_encoding.get_example_decoder(spec) sample = { "image": 128 * np.ones([128, 128, 3], dtype=np.uint8), "mask": 128 * np.ones([128, 128, 1], dtype=np.uint8) } example_proto = serializer(sample) recovered = self.evaluate(decoder(example_proto)) tf.nest.map_structure(np.testing.assert_almost_equal, sample, recovered)
def __init__(self, env, stack_size, actions_in_obs, rewards_in_obs): """Initializes a wrapper.""" super(FrameStack, self).__init__(env) self.stack_size = stack_size self._frames = collections.deque(maxlen=stack_size) self.actions_in_obs = actions_in_obs self.rewards_in_obs = rewards_in_obs # Update the observation spec in the environment. observation_spec = env.observation_spec() # Update the observation spec. self._new_observation_spec = copy.copy(observation_spec) # Redefine pixels spec frame_shape = observation_spec['pixels'].shape stacked_frame_shape = frame_shape[:2] + (frame_shape[2] * stack_size, ) self._new_observation_spec['pixels'] = array_spec.ArraySpec( shape=stacked_frame_shape, dtype=observation_spec['pixels'].dtype, name='grayscale_pixels') # Define action stack spec if self.actions_in_obs: self._actions = collections.deque(maxlen=stack_size - 1) stacked_action_shape = (stack_size - 1, ) + env.action_spec().shape self._new_observation_spec['actions'] = array_spec.ArraySpec( shape=stacked_action_shape, dtype=env.action_spec().dtype, name='actions') # Define rewards stack spec if self.rewards_in_obs: self._rewards = collections.deque(maxlen=stack_size) self._new_observation_spec['rewards'] = array_spec.ArraySpec( shape=(stack_size, ), dtype=np.float32, name='rewards')
def test_close_no_hang_after_step(self): constructor = functools.partial( random_py_environment.RandomPyEnvironment, array_spec.ArraySpec((3, 3), np.float32), array_spec.BoundedArraySpec([1], np.float32, minimum=-1.0, maximum=1.0), episode_end_probability=0, min_duration=5, max_duration=5) rng = np.random.RandomState() env = parallel_py_environment.ProcessPyEnvironment(constructor) env.start() action_spec = env.action_spec() env.reset() env.step(array_spec.sample_bounded_spec(action_spec, rng)) env.step(array_spec.sample_bounded_spec(action_spec, rng)) env.close()
def __init__(self, goalX=0.0, goalY=0.0): x, y = goalX + random.randint(-100, 100), goalY + random.randint(-100, 100) self._ship = Ship(x, y) self._action_spec = array_spec.BoundedArraySpec(shape = (), dtype=np.int32, minimum=0, maximum=3, name='action') self._observation_spec = array_spec.ArraySpec(shape=(7,), dtype=np.float32, name='observation') self._state = self._ship.state() self._episode_ended = False self._time_elapsed = 0 self._time_cap = 15 # seconds self._time_interval = 1.0 / 5.0 # fps self._physics_interval = 1.0 / 60.0 self._goalX = goalX self._goalY = goalY self._terminal_distance = 1200
def testSavedModel(self): if not common.has_eager_been_enabled(): self.skipTest('Only supported in eager.') observation_spec = array_spec.ArraySpec([2], np.float32) action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3) time_step_spec = ts.time_step_spec(observation_spec) observation_tensor_spec = tensor_spec.from_spec(observation_spec) action_tensor_spec = tensor_spec.from_spec(action_spec) time_step_tensor_spec = tensor_spec.from_spec(time_step_spec) actor_net = actor_network.ActorNetwork( observation_tensor_spec, action_tensor_spec, fc_layer_params=(10, ), ) tf_policy = actor_policy.ActorPolicy(time_step_tensor_spec, action_tensor_spec, actor_network=actor_net) path = os.path.join(self.get_temp_dir(), 'saved_policy') saver = policy_saver.PolicySaver(tf_policy) saver.save(path) eager_py_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy( path, time_step_spec, action_spec) rng = np.random.RandomState() sample_time_step = array_spec.sample_spec_nest(time_step_spec, rng) batched_sample_time_step = nest_utils.batch_nested_array( sample_time_step) original_action = tf_policy.action(batched_sample_time_step) unbatched_original_action = nest_utils.unbatch_nested_tensors( original_action) original_action_np = tf.nest.map_structure(lambda t: t.numpy(), unbatched_original_action) saved_policy_action = eager_py_policy.action(sample_time_step) tf.nest.assert_same_structure(saved_policy_action.action, action_spec) np.testing.assert_array_almost_equal(original_action_np.action, saved_policy_action.action)
def setUp(self): super(PyTFEagerPolicyTest, self).setUp() self._observation_spec = array_spec.ArraySpec([2], np.float32) self._action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3) self._observation_tensor_spec = tensor_spec.from_spec( self._observation_spec) self._action_tensor_spec = tensor_spec.from_spec(self._action_spec) self._time_step_tensor_spec = ts.time_step_spec( self._observation_tensor_spec) info_spec = { 'a': array_spec.BoundedArraySpec([1], np.float32, 0, 1), 'b': array_spec.BoundedArraySpec([1], np.float32, 100, 101) } self._info_tensor_spec = tensor_spec.from_spec(info_spec) # Env will validate action types automaticall since we provided the # action_spec. self._env = random_py_environment.RandomPyEnvironment( self._observation_spec, self._action_spec)
def __init__(self, v_n=2, v_k=2, v_seed=2, do_transform=True): self._action_spec = array_spec.BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=v_n - 1, name='action') self._observation_spec = array_spec.ArraySpec(shape=(v_k, ), dtype=np.float32, name='observation') self._time_step_spec = ts.time_step_spec(self._observation_spec) self.env = VectorIncrementEnvironment(n=v_n, k=v_k, seed=v_seed, do_transform=do_transform) self._state = self.env.encoded_state() self._episode_ended = False self._batched = False
def __init__(self, name=None, num_actions=3): input_spec = array_spec.ArraySpec([2], np.float32) action_spec = array_spec.BoundedArraySpec([1], np.float32, 1, num_actions) input_tensor_spec = tensor_spec.from_spec(input_spec) action_tensor_spec = tensor_spec.from_spec(action_spec) super(HeteroscedasticDummyNet, self).__init__(input_tensor_spec, action_tensor_spec) self._value_layer = tf.keras.layers.Dense( num_actions, kernel_initializer=tf.constant_initializer([[1, 1.5, 2], [1, 1.5, 4]]), bias_initializer=tf.constant_initializer([[1], [1], [-10]])) self._log_variance_layer = tf.keras.layers.Dense( num_actions, kernel_initializer=tf.constant_initializer([[1, 1.5, 2], [1, 1.5, 4]]), bias_initializer=tf.constant_initializer([[1], [1], [-10]]))
def __init__( self, dictionary_path: str = DICTIONARY_PATH_DEFAULT, reward_map: Dict[str, float] = REWARD_DEFAULT, life_initial: int = 6, seed: int = 42, ): self._action_spec = array_spec.BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=25, name="letter") # self._observation_spec = array_spec.BoundedArraySpec( # shape=(30,), dtype=np.int32, minimum=-1, maximum=26, name="game" # ) self._observation_spec = { 'observations': array_spec.BoundedArraySpec(shape=(1, 30), dtype=np.float32, minimum=-1, maximum=26, name="game"), 'legal_moves': array_spec.ArraySpec(shape=(26, ), dtype=np.float32), } self._state = np.empty(30) self._state.fill(-1) self._episode_ended = False self.logger = logging.getLogger(__name__) # 26 letters to be proposed # self.action_space = spaces.Discrete(26) # # 27 letter (26 + '.'+ '_') ex b.nj.ur___________ # # 30 maximum size of word # # 2 state (to be found or not to be found) # self.observation_space = spaces.Tuple(( # spaces.Discrete(28), # spaces.Discrete(30), # )) self.life_initial = life_initial self.words_set = list(set(self._build_dictionary(dictionary_path))) self.reward_map = reward_map self.seed(seed)
def __init__(self, env, out_width_height = None): super(FlattenImageObservationsWrapper, self).__init__(env) self.wh = out_width_height obs_spec: array_spec.ArraySpec = self._env.observation_spec() if not isinstance(obs_spec, collections.OrderedDict): raise ValueError('Unsupported observation_spec %s' % str(obs_spec)) o_shape = None o_dtype = None o_name = [] for _, obs in obs_spec.items(): if not isinstance(obs, array_spec.ArraySpec): raise ValueError('Unsupported observation_spec %s' % str(obs)) if len(obs.shape) != 3: raise ValueError('All observations must be images (got shape %s).' % ( str(obs.shape))) if self.wh: # The image size will be normalized. cur_shape = self.wh + (obs.shape[2],) else: cur_shape = obs.shape if o_shape is None: o_shape = list(obs.shape) o_dtype = obs.dtype else: if tuple(o_shape[0:2]) != cur_shape[0:2]: raise ValueError('All images must be the same shape.') if o_dtype != obs.dtype: raise ValueError('All images must be the same dtype.') o_shape[2] += obs.shape[2] o_name.append(obs.name) self._observation_spec = array_spec.ArraySpec( shape=o_shape, dtype=o_dtype, name='_'.join(o_name) + '_flattened')
def testRandomTFPolicyCompatibility(self): if not common.has_eager_been_enabled(): self.skipTest('Only supported in eager.') observation_spec = array_spec.ArraySpec([2], np.float32) action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3) info_spec = { 'a': array_spec.BoundedArraySpec([1], np.float32, 0, 1), 'b': array_spec.BoundedArraySpec([1], np.float32, 100, 101) } observation_tensor_spec = tensor_spec.from_spec(observation_spec) action_tensor_spec = tensor_spec.from_spec(action_spec) info_tensor_spec = tensor_spec.from_spec(info_spec) time_step_tensor_spec = ts.time_step_spec(observation_tensor_spec) tf_policy = random_tf_policy.RandomTFPolicy(time_step_tensor_spec, action_tensor_spec, info_spec=info_tensor_spec) py_policy = py_tf_eager_policy.PyTFEagerPolicy(tf_policy) env = random_py_environment.RandomPyEnvironment( observation_spec, action_spec) time_step = env.reset() def _check_action_step(action_step): self.assertIsInstance(action_step.action, np.ndarray) self.assertEqual(action_step.action.shape, (1, )) self.assertBetween(action_step.action[0], 2.0, 3.0) self.assertIsInstance(action_step.info['a'], np.ndarray) self.assertEqual(action_step.info['a'].shape, (1, )) self.assertBetween(action_step.info['a'][0], 0.0, 1.0) self.assertIsInstance(action_step.info['b'], np.ndarray) self.assertEqual(action_step.info['b'].shape, (1, )) self.assertBetween(action_step.info['b'][0], 100.0, 101.0) for _ in range(100): action_step = py_policy.action(time_step) _check_action_step(action_step) time_step = env.step(action_step.action)
def testGeneratesBatchedActionsWithoutSpecifyingOuterDims(self): action_spec = [ array_spec.BoundedArraySpec((2, 3), np.int32, -10, 10), array_spec.BoundedArraySpec((1, 2), np.int32, -10, 10) ] time_step_spec = time_step.time_step_spec( observation_spec=array_spec.ArraySpec((1, ), np.int32)) policy = random_py_policy.RandomPyPolicy(time_step_spec=time_step_spec, action_spec=action_spec) action_step = policy.action( time_step.restart(np.array([[1], [2], [3]], dtype=np.int32))) tf.nest.assert_same_structure(action_spec, action_step.action) self.assertEqual((3, 2, 3), action_step.action[0].shape) self.assertEqual((3, 1, 2), action_step.action[1].shape) self.assertTrue(np.all(action_step.action[0] >= -10)) self.assertTrue(np.all(action_step.action[0] <= 10)) self.assertTrue(np.all(action_step.action[1] >= -10)) self.assertTrue(np.all(action_step.action[1] <= 10))
def _generate_replay_buffer(self, rb_cls): stack_count = 4 shape = (15, 15, stack_count) single_shape = (15, 15, 1) observation_spec = array_spec.ArraySpec(shape, np.int32, 'obs') time_step_spec = ts.time_step_spec(observation_spec) action_spec = policy_step.PolicyStep( array_spec.BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=1, name='action')) self._trajectory_spec = trajectory.from_transition( time_step_spec, action_spec, time_step_spec) self._capacity = 32 self._replay_buffer = rb_cls(data_spec=self._trajectory_spec, capacity=self._capacity) # Generate N frames: the value of pixels is the frame index. # The observations will be generated by stacking K frames out of those N, # generating some redundancies between the observations. single_frames = [] frame_count = 100 for k in range(frame_count): single_frames.append(np.full(single_shape, k, dtype=np.int32)) # Add stack of frames to the replay buffer. time_steps = [] for k in range(len(single_frames) - stack_count + 1): observation = np.concatenate(single_frames[k:k + stack_count], axis=-1) time_steps.append(ts.transition(observation, reward=0.0)) self._transition_count = len(time_steps) - 1 dummy_action = policy_step.PolicyStep(np.int32(0)) for k in range(self._transition_count): self._replay_buffer.add_batch( nest_utils.batch_nested_array( trajectory.from_transition(time_steps[k], dummy_action, time_steps[k + 1])))
def _create_replay_buffer(self, capacity=32): self._stack_count = 2 self._single_shape = (1, ) shape = (1, self._stack_count) observation_spec = array_spec.ArraySpec(shape, np.int32, 'obs') time_step_spec = ts.time_step_spec(observation_spec) action_spec = policy_step.PolicyStep( array_spec.BoundedArraySpec(shape=(), dtype=np.int32, minimum=0, maximum=1, name='action')) self._trajectory_spec = trajectory.from_transition( time_step_spec, action_spec, time_step_spec) self._capacity = capacity self._alpha = 0.6 self._replay_buffer = PyPrioritizedReplayBuffer( data_spec=self._trajectory_spec, capacity=self._capacity, alpha=self._alpha)
def setUp(self): super(SavedModelPYTFEagerPolicyTest, self).setUp() if not common.has_eager_been_enabled(): self.skipTest('Only supported in eager.') observation_spec = array_spec.ArraySpec([2], np.float32) self.action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3) self.time_step_spec = ts.time_step_spec(observation_spec) observation_tensor_spec = tensor_spec.from_spec(observation_spec) action_tensor_spec = tensor_spec.from_spec(self.action_spec) time_step_tensor_spec = tensor_spec.from_spec(self.time_step_spec) actor_net = actor_network.ActorNetwork( observation_tensor_spec, action_tensor_spec, fc_layer_params=(10,), ) self.tf_policy = actor_policy.ActorPolicy( time_step_tensor_spec, action_tensor_spec, actor_network=actor_net)