def parse_encoded_spec_from_file(input_path): """Returns the tensor data spec stored at a path. Args: input_path: The path to the TFRecord file which contains the spec. Returns: `TensorSpec` nested structure parsed from the TFRecord file. Raises: IOError: File at input path does not exist. """ if not tf.io.gfile.exists(input_path): raise IOError('Could not find spec file at %s.' % input_path) dataset = tf.data.TFRecordDataset(input_path, buffer_size=1) dataset_iterator = eager_utils.dataset_iterator(dataset) signature_proto_string = eager_utils.get_next(dataset_iterator) if tf.executing_eagerly(): signature_proto = struct_pb2.StructuredValue.FromString( signature_proto_string.numpy()) else: # In non-eager mode a session must be run in order to get the value with tf.Session() as sess: signature_proto_string_value = sess.run(signature_proto_string) signature_proto = struct_pb2.StructuredValue.FromString( signature_proto_string_value) return tensor_spec.from_proto(signature_proto)
def testIteration(self): data = np.arange(100) ds = tf.data.Dataset.from_tensor_slices(data) itr = eager_utils.dataset_iterator(ds) for d in data: self.assertEqual(np.array([d]), self.evaluate(eager_utils.get_next(itr)))
def test_with_dynamic_step_driver(self): env = driver_test_utils.PyEnvironmentMock() tf_env = tf_py_environment.TFPyEnvironment(env) policy = driver_test_utils.TFPolicyMock(tf_env.time_step_spec(), tf_env.action_spec()) trajectory_spec = trajectory.from_transition(tf_env.time_step_spec(), policy.policy_step_spec, tf_env.time_step_spec()) tfrecord_observer = example_encoding_dataset.TFRecordObserver( self.dataset_path, trajectory_spec) driver = dynamic_step_driver.DynamicStepDriver( tf_env, policy, observers=[common.function(tfrecord_observer)], num_steps=10) self.evaluate(tf.compat.v1.global_variables_initializer()) time_step = self.evaluate(tf_env.reset()) initial_policy_state = policy.get_initial_state(batch_size=1) self.evaluate( common.function(driver.run)(time_step, initial_policy_state)) tfrecord_observer.flush() tfrecord_observer.close() dataset = example_encoding_dataset.load_tfrecord_dataset( [self.dataset_path], buffer_size=2, as_trajectories=True) iterator = eager_utils.dataset_iterator(dataset) sample = self.evaluate(eager_utils.get_next(iterator)) self.assertIsInstance(sample, trajectory.Trajectory)
def test_with_py_driver(self): env = driver_test_utils.PyEnvironmentMock() policy = driver_test_utils.PyPolicyMock(env.time_step_spec(), env.action_spec()) trajectory_spec = trajectory.from_transition(env.time_step_spec(), policy.policy_step_spec, env.time_step_spec()) trajectory_spec = tensor_spec.from_spec(trajectory_spec) tfrecord_observer = example_encoding_dataset.TFRecordObserver( self.dataset_path, trajectory_spec, py_mode=True) driver = py_driver.PyDriver(env, policy, [tfrecord_observer], max_steps=10) time_step = env.reset() driver.run(time_step) tfrecord_observer.flush() tfrecord_observer.close() dataset = example_encoding_dataset.load_tfrecord_dataset( [self.dataset_path], buffer_size=2, as_trajectories=True) iterator = eager_utils.dataset_iterator(dataset) sample = self.evaluate(eager_utils.get_next(iterator)) self.assertIsInstance(sample, trajectory.Trajectory)
def __init__(self, dataset, reward_distribution, batch_size): """Initialize `ClassificationBanditEnvironment`. Args: dataset: a `tf.data.Dataset` consisting of two `Tensor`s, [inputs, labels] where inputs can be of any shape, while labels are integer class labels. The label tensor can be of any rank as long as it has 1 element. reward_distribution: a `tfd.Distribution` with event_shape `[num_classes, num_actions]`. Entry `[i, j]` is the reward for taking action `j` for an instance of class `i`. batch_size: if `dataset` is batched, this is the size of the batches. Raises: ValueError: if `reward_distribution` does not have an event shape with rank 2. """ # Computing `action_spec`. event_shape = reward_distribution.event_shape if len(event_shape) != 2: raise ValueError( 'reward_distribution must have event shape of rank 2; ' 'got event shape {}'.format(event_shape)) _, num_actions = event_shape action_spec = tensor_spec.BoundedTensorSpec(shape=(), dtype=tf.int32, minimum=0, maximum=num_actions - 1, name='action') output_shapes = tf.compat.v1.data.get_output_shapes(dataset) # Computing `time_step_spec`. if len(output_shapes) != 2: raise ValueError( 'Dataset must have exactly two outputs; got {}'.format( len(output_shapes))) context_shape = output_shapes[0] context_dtype, lbl_dtype = tf.compat.v1.data.get_output_types(dataset) observation_spec = tensor_spec.TensorSpec(shape=context_shape, dtype=context_dtype) time_step_spec = time_step.time_step_spec(observation_spec) super(ClassificationBanditEnvironment, self).__init__(action_spec=action_spec, time_step_spec=time_step_spec, batch_size=batch_size) self._data_iterator = eager_utils.dataset_iterator( dataset.batch(batch_size, drop_remainder=True)) self._current_label = tf.compat.v2.Variable( tf.zeros(batch_size, dtype=lbl_dtype)) self._previous_label = tf.compat.v2.Variable( tf.zeros(batch_size, dtype=lbl_dtype)) self._reward_distribution = reward_distribution reward_means = self._reward_distribution.mean() self._optimal_action_table = tf.argmax( reward_means, axis=1, output_type=self._action_spec.dtype) self._optimal_reward_table = tf.reduce_max(reward_means, axis=1)
def __init__(self, dataset: tf.data.Dataset, reward_distribution: types.Distribution, batch_size: types.Int, label_dtype_cast: Optional[tf.DType] = None, shuffle_buffer_size: Optional[types.Int] = None, repeat_dataset: Optional[bool] = True, prefetch_size: Optional[types.Int] = None, seed: Optional[types.Int] = None): """Initialize `ClassificationBanditEnvironment`. Args: dataset: a `tf.data.Dataset` consisting of two `Tensor`s, [inputs, labels] where inputs can be of any shape, while labels are integer class labels. The label tensor can be of any rank as long as it has 1 element. reward_distribution: a `tfd.Distribution` with event_shape `[num_classes, num_actions]`. Entry `[i, j]` is the reward for taking action `j` for an instance of class `i`. batch_size: if `dataset` is batched, this is the size of the batches. label_dtype_cast: if not None, casts dataset labels to this dtype. shuffle_buffer_size: If None, do not shuffle. Otherwise, a shuffle buffer of the specified size is used in the environment's `dataset`. repeat_dataset: Makes the environment iterate on the `dataset` once avoiding `OutOfRangeError: End of sequence` errors when the environment is stepped past the end of the `dataset`. prefetch_size: If None, do not prefetch. Otherwise, a prefetch buffer of the specified size is used in the environment's `dataset`. seed: Used to make results deterministic. Raises: ValueError: if `reward_distribution` does not have an event shape with rank 2. """ # Computing `action_spec`. event_shape = reward_distribution.event_shape if len(event_shape) != 2: raise ValueError( 'reward_distribution must have event shape of rank 2; ' 'got event shape {}'.format(event_shape)) _, num_actions = event_shape action_spec = tensor_spec.BoundedTensorSpec(shape=(), dtype=tf.int32, minimum=0, maximum=num_actions - 1, name='action') output_shapes = tf.compat.v1.data.get_output_shapes(dataset) # Computing `time_step_spec`. if len(output_shapes) != 2: raise ValueError( 'Dataset must have exactly two outputs; got {}'.format( len(output_shapes))) context_shape = output_shapes[0] context_dtype, lbl_dtype = tf.compat.v1.data.get_output_types(dataset) if label_dtype_cast: lbl_dtype = label_dtype_cast observation_spec = tensor_spec.TensorSpec(shape=context_shape, dtype=context_dtype) time_step_spec = time_step.time_step_spec(observation_spec) super(ClassificationBanditEnvironment, self).__init__(action_spec=action_spec, time_step_spec=time_step_spec, batch_size=batch_size) if shuffle_buffer_size: dataset = dataset.shuffle(buffer_size=shuffle_buffer_size, seed=seed, reshuffle_each_iteration=True) if repeat_dataset: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) if prefetch_size: dataset = dataset.prefetch(prefetch_size) self._data_iterator = eager_utils.dataset_iterator(dataset) self._current_label = tf.compat.v2.Variable( tf.zeros(batch_size, dtype=lbl_dtype)) self._previous_label = tf.compat.v2.Variable( tf.zeros(batch_size, dtype=lbl_dtype)) self._reward_distribution = reward_distribution self._label_dtype = lbl_dtype reward_means = self._reward_distribution.mean() self._optimal_action_table = tf.argmax( reward_means, axis=1, output_type=self._action_spec.dtype) self._optimal_reward_table = tf.reduce_max(reward_means, axis=1)