示例#1
0
def parse_example_dataset(features, num_parallel_calls=1):
  """A transformation that parses `Example` protos into a `dict` of tensors.

  Parses a number of serialized `Example` protos given in `serialized`. We refer
  to `serialized` as a batch with `batch_size` many entries of individual
  `Example` protos.

  This op parses serialized examples into a dictionary mapping keys to `Tensor`
  and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
  `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
  and `SparseFeature` is mapped to a `SparseTensor`, and each
  `FixedLenFeature` is mapped to a `Tensor`. See `tf.io.parse_example` for more
  details about feature dictionaries.

  Args:
   features: A `dict` mapping feature keys to `FixedLenFeature`,
     `VarLenFeature`, and `SparseFeature` values.
   num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
      representing the number of parsing processes to call in parallel.

  Returns:
    A dataset transformation function, which can be passed to
    `tf.data.Dataset.apply`.

  Raises:
    ValueError: if features argument is None.
  """
  return parsing_ops.parse_example_dataset(features, num_parallel_calls)
  def testDeterminism(self, local_determinism, global_determinism):
    num_elements = 1000
    batches = []
    for i in range(num_elements):
      example_i = example(features=features({
          "a": int64_feature([i]),
      }))
      batches.append([example_i.SerializeToString()])

    test_features = {"a": parsing_ops.FixedLenFeature((), dtype=dtypes.int64)}
    dataset = dataset_ops.Dataset.from_tensor_slices(batches)
    dataset = dataset.apply(
        contrib_parsing_ops.parse_example_dataset(
            test_features,
            num_parallel_calls=10,
            deterministic=local_determinism))

    opts = dataset_ops.Options()
    opts.experimental_deterministic = global_determinism
    dataset = dataset.with_options(opts)

    expected = list(range(num_elements))
    actual = [elem["a"][0] for elem in self.getDatasetOutput(dataset)]

    require_order = local_determinism or (local_determinism is None and
                                          global_determinism)
    if require_order:
      self.assertAllEqual(expected, actual)
    else:
      self.assertCountEqual(expected, actual)
示例#3
0
def parse_example_dataset(features, num_parallel_calls=1):
    """A transformation that parses `Example` protos into a `dict` of tensors.

  Parses a number of serialized `Example` protos given in `serialized`. We refer
  to `serialized` as a batch with `batch_size` many entries of individual
  `Example` protos.

  This op parses serialized examples into a dictionary mapping keys to `Tensor`
  and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
  `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
  and `SparseFeature` is mapped to a `SparseTensor`, and each
  `FixedLenFeature` is mapped to a `Tensor`. See `tf.io.parse_example` for more
  details about feature dictionaries.

  Args:
   features: A `dict` mapping feature keys to `FixedLenFeature`,
     `VarLenFeature`, and `SparseFeature` values.
   num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
      representing the number of parsing processes to call in parallel.

  Returns:
    A dataset transformation function, which can be passed to
    `tf.data.Dataset.apply`.

  Raises:
    ValueError: if features argument is None.
  """
    return parsing_ops.parse_example_dataset(features, num_parallel_calls)
示例#4
0
    def _test(self,
              input_tensor,
              feature_val,
              expected_values=None,
              expected_err=None,
              create_iterator_twice=False):

        if expected_err:
            with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                     expected_err[1]):
                dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
                    contrib_parsing_ops.parse_example_dataset(feature_val))
                get_next = self.getNext(dataset)
                self.evaluate(get_next())
            return
        else:
            # Returns dict w/ Tensors and SparseTensors.
            # Check values.
            dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
                contrib_parsing_ops.parse_example_dataset(feature_val))
            get_next = self.getNext(dataset)
            result = self.evaluate(get_next())
            self._compare_output_to_expected(result, expected_values)
            with self.assertRaises(errors_impl.OutOfRangeError):
                self.evaluate(get_next())
            with self.assertRaises(errors_impl.OutOfRangeError):
                self.evaluate(get_next())
            if create_iterator_twice:
                get_next = self.getNext(dataset)
                result = self.evaluate(get_next())
                self._compare_output_to_expected(result, expected_values)
                with self.assertRaises(errors_impl.OutOfRangeError):
                    self.evaluate(get_next())
        # Check shapes; if serialized is a Tensor we need its size to
        # properly check.
        batch_size = (self.evaluate(input_tensor).size if isinstance(
            input_tensor, ops.Tensor) else np.asarray(input_tensor).size)
        for k, f in feature_val.items():
            if isinstance(f,
                          parsing_ops.FixedLenFeature) and f.shape is not None:
                self.assertEqual(
                    dataset_ops.get_legacy_output_shapes(dataset)[k].as_list()
                    [0], batch_size)
            elif isinstance(f, parsing_ops.VarLenFeature):
                self.assertEqual(
                    dataset_ops.get_legacy_output_shapes(dataset)[k].as_list()
                    [1], None)
  def _test(self,
            input_tensor,
            feature_val,
            expected_values=None,
            expected_err=None,
            create_iterator_twice=False):

    if expected_err:
      with self.assertRaisesWithPredicateMatch(expected_err[0],
                                               expected_err[1]):
        dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
            contrib_parsing_ops.parse_example_dataset(feature_val))
        get_next = self.getNext(dataset)
        self.evaluate(get_next())
      return
    else:
      # Returns dict w/ Tensors and SparseTensors.
      # Check values.
      dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
          contrib_parsing_ops.parse_example_dataset(feature_val))
      get_next = self.getNext(dataset)
      result = self.evaluate(get_next())
      self._compare_output_to_expected(result, expected_values)
      with self.assertRaises(errors_impl.OutOfRangeError):
        self.evaluate(get_next())
      with self.assertRaises(errors_impl.OutOfRangeError):
        self.evaluate(get_next())
      if create_iterator_twice:
        get_next = self.getNext(dataset)
        result = self.evaluate(get_next())
        self._compare_output_to_expected(result, expected_values)
        with self.assertRaises(errors_impl.OutOfRangeError):
          self.evaluate(get_next())
    # Check shapes; if serialized is a Tensor we need its size to
    # properly check.
    batch_size = (
        self.evaluate(input_tensor).size if isinstance(input_tensor, ops.Tensor)
        else np.asarray(input_tensor).size)
    for k, f in feature_val.items():
      if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
        self.assertEqual(
            dataset_ops.get_legacy_output_shapes(dataset)[k].as_list()[0],
            batch_size)
      elif isinstance(f, parsing_ops.VarLenFeature):
        self.assertEqual(
            dataset_ops.get_legacy_output_shapes(dataset)[k].as_list()[1], None)
示例#6
0
    def _test(self,
              input_tensor,
              feature_val,
              expected_values=None,
              expected_err=None):

        with self.cached_session() as sess:
            if expected_err:
                with self.assertRaisesWithPredicateMatch(
                        expected_err[0], expected_err[1]):
                    dataset = dataset_ops.Dataset.from_tensors(
                        input_tensor).apply(
                            contrib_parsing_ops.parse_example_dataset(
                                feature_val))
                    get_next = dataset.make_one_shot_iterator().get_next()
                    sess.run(get_next)
                return
            else:
                # Returns dict w/ Tensors and SparseTensors.
                # Check values.
                dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
                    contrib_parsing_ops.parse_example_dataset(feature_val))
                get_next = dataset.make_one_shot_iterator().get_next()
                result = sess.run(get_next)
                flattened = nest.flatten(result)
                print("result", result, "expected_values", expected_values)
                _compare_output_to_expected(self, result, expected_values,
                                            flattened)

            # Check shapes; if serialized is a Tensor we need its size to
            # properly check.
            batch_size = (input_tensor.eval().size if isinstance(
                input_tensor, ops.Tensor) else np.asarray(input_tensor).size)
            for k, f in feature_val.items():
                print("output_shapes as list ",
                      tuple(dataset.output_shapes[k].as_list()))
                if isinstance(
                        f,
                        parsing_ops.FixedLenFeature) and f.shape is not None:
                    self.assertEqual(dataset.output_shapes[k].as_list()[0],
                                     batch_size)
                elif isinstance(f, parsing_ops.VarLenFeature):
                    self.assertEqual(dataset.output_shapes[k].as_list()[1],
                                     None)
  def _test(self,
            input_tensor,
            feature_val,
            expected_values=None,
            expected_err=None):

    with self.cached_session() as sess:
      if expected_err:
        with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                 expected_err[1]):
          dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
              contrib_parsing_ops.parse_example_dataset(feature_val))
          get_next = dataset.make_one_shot_iterator().get_next()
          sess.run(get_next)
        return
      else:
        # Returns dict w/ Tensors and SparseTensors.
        # Check values.
        dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
            contrib_parsing_ops.parse_example_dataset(feature_val))
        get_next = dataset.make_one_shot_iterator().get_next()
        result = sess.run(get_next)
        flattened = nest.flatten(result)
        print("result", result, "expected_values", expected_values)
        _compare_output_to_expected(self, result, expected_values, flattened)

      # Check shapes; if serialized is a Tensor we need its size to
      # properly check.
      batch_size = (
          input_tensor.eval().size if isinstance(input_tensor, ops.Tensor) else
          np.asarray(input_tensor).size)
      for k, f in feature_val.items():
        print("output_shapes as list ",
              tuple(dataset.output_shapes[k].as_list()))
        if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
          self.assertEqual(dataset.output_shapes[k].as_list()[0], batch_size)
        elif isinstance(f, parsing_ops.VarLenFeature):
          self.assertEqual(dataset.output_shapes[k].as_list()[1], None)
示例#8
0
  def _test(self,
            input_tensor,
            feature_val,
            expected_values=None,
            expected_err=None):

    with self.cached_session() as sess:
      if expected_err:
        with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                 expected_err[1]):
          dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
              contrib_parsing_ops.parse_example_dataset(feature_val))
          get_next = dataset.make_one_shot_iterator().get_next()
          sess.run(get_next)
        return
      else:
        # Returns dict w/ Tensors and SparseTensors.
        # Check values.
        dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
            contrib_parsing_ops.parse_example_dataset(feature_val))
        get_next = self.getNext(dataset)
        self.evaluate(get_next())
      return
    else:
示例#9
0
def make_batched_features_dataset_v2(file_pattern,
                                     batch_size,
                                     features,
                                     reader=core_readers.TFRecordDataset,
                                     label_key=None,
                                     reader_args=None,
                                     num_epochs=None,
                                     shuffle=True,
                                     shuffle_buffer_size=10000,
                                     shuffle_seed=None,
                                     prefetch_buffer_size=optimization.AUTOTUNE,
                                     reader_num_threads=1,
                                     parser_num_threads=2,
                                     sloppy_ordering=False,
                                     drop_final_batch=False):
  """Returns a `Dataset` of feature dictionaries from `Example` protos.

  If label_key argument is provided, returns a `Dataset` of tuple
  comprising of feature dictionaries and label.

  Example:

  ```
  serialized_examples = [
    features {
      feature { key: "age" value { int64_list { value: [ 0 ] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } }
    },
    features {
      feature { key: "age" value { int64_list { value: [] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "sports" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  features: {
    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
    "gender": FixedLenFeature([], dtype=tf.string),
    "kws": VarLenFeature(dtype=tf.string),
  }
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
    "kws": SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0]],
      values=["code", "art", "sports"]
      dense_shape=[2, 2]),
  }
  ```

  Args:
    file_pattern: List of files or patterns of file paths containing
      `Example` records. See `tf.io.gfile.glob` for pattern rules.
    batch_size: An int representing the number of records to combine
      in a single batch.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values. See `tf.io.parse_example`.
    reader: A function or class that can be
      called with a `filenames` tensor and (optional) `reader_args` and returns
      a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
    label_key: (Optional) A string corresponding to the key labels are stored in
      `tf.Examples`. If provided, it must be one of the `features` key,
      otherwise results in `ValueError`.
    reader_args: Additional arguments to pass to the reader class.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. Defaults to `None`.
    shuffle: A boolean, indicates whether the input should be shuffled. Defaults
      to `True`.
    shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity
      ensures better shuffling but would increase memory usage and startup time.
    shuffle_seed: Randomization seed to use for shuffling.
    prefetch_buffer_size: Number of feature batches to prefetch in order to
      improve performance. Recommended value is the number of batches consumed
      per training step. Defaults to auto-tune.
    reader_num_threads: Number of threads used to read `Example` records. If >1,
      the results will be interleaved.
    parser_num_threads: Number of threads to use for parsing `Example` tensors
      into a dictionary of `Feature` tensors.
    sloppy_ordering: If `True`, reading performance will be improved at
      the cost of non-deterministic ordering. If `False`, the order of elements
      produced is deterministic prior to shuffling (elements are still
      randomized if `shuffle=True`. Note that if the seed is set, then order
      of elements after shuffling is deterministic). Defaults to `False`.
    drop_final_batch: If `True`, and the batch size does not evenly divide the
      input dataset size, the final smaller batch will be dropped. Defaults to
      `False`.

  Returns:
    A dataset of `dict` elements, (or a tuple of `dict` elements and label).
    Each `dict` maps feature keys to `Tensor` or `SparseTensor` objects.

  Raises:
    TypeError: If `reader` is a `tf.compat.v1.ReaderBase` subclass.
    ValueError: If `label_key` is not one of the `features` keys.
  """
  # Create dataset of all matching filenames
  filenames = _get_file_names(file_pattern, False)
  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
  if shuffle:
    dataset = dataset.shuffle(len(filenames), shuffle_seed)

  if isinstance(reader, type) and issubclass(reader, io_ops.ReaderBase):
    raise TypeError("The `reader` argument must return a `Dataset` object. "
                    "`tf.ReaderBase` subclasses are not supported. For "
                    "example, pass `tf.data.TFRecordDataset` instead of "
                    "`tf.TFRecordReader`.")

  # Read `Example` records from files as tensor objects.
  if reader_args is None:
    reader_args = []

  # Read files sequentially (if reader_num_threads=1) or in parallel
  dataset = dataset.apply(
      interleave_ops.parallel_interleave(
          lambda filename: reader(filename, *reader_args),
          cycle_length=reader_num_threads,
          sloppy=sloppy_ordering))

  # Extract values if the `Example` tensors are stored as key-value tuples.
  if dataset_ops.get_legacy_output_types(dataset) == (
      dtypes.string, dtypes.string):
    dataset = dataset_ops.MapDataset(
        dataset, lambda _, v: v, use_inter_op_parallelism=False)

  # Apply dataset repeat and shuffle transformations.
  dataset = _maybe_shuffle_and_repeat(
      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)

  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
  # improve the shape inference, because it makes the batch dimension static.
  # It is safe to do this because in that case we are repeating the input
  # indefinitely, and all batches will be full-sized.
  dataset = dataset.batch(
      batch_size, drop_remainder=drop_final_batch or num_epochs is None)

  # Parse `Example` tensors to a dictionary of `Feature` tensors.
  dataset = dataset.apply(
      parsing_ops.parse_example_dataset(
          features, num_parallel_calls=parser_num_threads))

  if label_key:
    if label_key not in features:
      raise ValueError(
          "The `label_key` provided (%r) must be one of the `features` keys." %
          label_key)
    dataset = dataset.map(lambda x: (x, x.pop(label_key)))

  dataset = dataset.prefetch(prefetch_buffer_size)
  return dataset
示例#10
0
def make_batched_features_dataset(file_pattern,
                                  batch_size,
                                  features,
                                  reader=core_readers.TFRecordDataset,
                                  label_key=None,
                                  reader_args=None,
                                  num_epochs=None,
                                  shuffle=True,
                                  shuffle_buffer_size=10000,
                                  shuffle_seed=None,
                                  prefetch_buffer_size=optimization.AUTOTUNE,
                                  reader_num_threads=1,
                                  parser_num_threads=2,
                                  sloppy_ordering=False,
                                  drop_final_batch=False):
  """Returns a `Dataset` of feature dictionaries from `Example` protos.

  If label_key argument is provided, returns a `Dataset` of tuple
  comprising of feature dictionaries and label.

  Example:

  ```
  serialized_examples = [
    features {
      feature { key: "age" value { int64_list { value: [ 0 ] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } }
    },
    features {
      feature { key: "age" value { int64_list { value: [] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "sports" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  features: {
    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
    "gender": FixedLenFeature([], dtype=tf.string),
    "kws": VarLenFeature(dtype=tf.string),
  }
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
    "kws": SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0]],
      values=["code", "art", "sports"]
      dense_shape=[2, 2]),
  }
  ```

  Args:
    file_pattern: List of files or patterns of file paths containing
      `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int representing the number of records to combine
      in a single batch.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values. See `tf.parse_example`.
    reader: A function or class that can be
      called with a `filenames` tensor and (optional) `reader_args` and returns
      a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
    label_key: (Optional) A string corresponding to the key labels are stored in
      `tf.Examples`. If provided, it must be one of the `features` key,
      otherwise results in `ValueError`.
    reader_args: Additional arguments to pass to the reader class.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. Defaults to `None`.
    shuffle: A boolean, indicates whether the input should be shuffled. Defaults
      to `True`.
    shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity
      ensures better shuffling but would increase memory usage and startup time.
    shuffle_seed: Randomization seed to use for shuffling.
    prefetch_buffer_size: Number of feature batches to prefetch in order to
      improve performance. Recommended value is the number of batches consumed
      per training step. Defaults to auto-tune.
    reader_num_threads: Number of threads used to read `Example` records. If >1,
      the results will be interleaved.
    parser_num_threads: Number of threads to use for parsing `Example` tensors
      into a dictionary of `Feature` tensors.
    sloppy_ordering: If `True`, reading performance will be improved at
      the cost of non-deterministic ordering. If `False`, the order of elements
      produced is deterministic prior to shuffling (elements are still
      randomized if `shuffle=True`. Note that if the seed is set, then order
      of elements after shuffling is deterministic). Defaults to `False`.
    drop_final_batch: If `True`, and the batch size does not evenly divide the
      input dataset size, the final smaller batch will be dropped. Defaults to
      `False`.

  Returns:
    A dataset of `dict` elements, (or a tuple of `dict` elements and label).
    Each `dict` maps feature keys to `Tensor` or `SparseTensor` objects.

  Raises:
    ValueError: If `label_key` is not one of the `features` keys.
  """
  # Create dataset of all matching filenames
  filenames = _get_file_names(file_pattern, False)
  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
  if shuffle:
    dataset = dataset.shuffle(len(filenames), shuffle_seed)

  # Read `Example` records from files as tensor objects.
  if reader_args is None:
    reader_args = []

  # Read files sequentially (if reader_num_threads=1) or in parallel
  dataset = dataset.apply(
      interleave_ops.parallel_interleave(
          lambda filename: reader(filename, *reader_args),
          cycle_length=reader_num_threads,
          sloppy=sloppy_ordering))

  # Extract values if the `Example` tensors are stored as key-value tuples.
  if dataset.output_types == (dtypes.string, dtypes.string):
    dataset = dataset_ops.MapDataset(
        dataset, lambda _, v: v, use_inter_op_parallelism=False)

  # Apply dataset repeat and shuffle transformations.
  dataset = _maybe_shuffle_and_repeat(
      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)

  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
  # improve the shape inference, because it makes the batch dimension static.
  # It is safe to do this because in that case we are repeating the input
  # indefinitely, and all batches will be full-sized.
  dataset = dataset.batch(
      batch_size, drop_remainder=drop_final_batch or num_epochs is None)

  # Parse `Example` tensors to a dictionary of `Feature` tensors.
  dataset = dataset.apply(
      parsing_ops.parse_example_dataset(
          features, num_parallel_calls=parser_num_threads))

  if label_key:
    if label_key not in features:
      raise ValueError(
          "The `label_key` provided (%r) must be one of the `features` keys." %
          label_key)
    dataset = dataset.map(lambda x: (x, x.pop(label_key)))

  dataset = dataset.prefetch(prefetch_buffer_size)
  return dataset
示例#11
0
def make_batched_features_dataset_multi_task(  file_pattern,
                                    batch_size,
                                    features,
                                    reader=core_readers.TFRecordDataset,
                                    label_key=None,
                                    weight_key=None,
                                    reader_args=None,
                                    num_epochs=None,
                                    shuffle=True,
                                    shuffle_buffer_size=10000,
                                    shuffle_seed=None,
                                    prefetch_buffer_size=optimization.AUTOTUNE,
                                    reader_num_threads=32,
                                    parser_num_threads=32,
                                    sloppy_ordering=True,
                                    drop_final_batch=False):

    """Returns a `Dataset` of feature dictionaries from `Example` protos.
    Returns:
    A dataset of `dict` elements, (or a tuple of `dict` elements and label).
    Each `dict` maps feature keys to `Tensor` or `SparseTensor` objects.
    """
    if shuffle_seed is None:
        shuffle_seed = int(time.time())

    filenames = list(gfile.Glob(file_pattern))
    dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
    if shuffle:
        dataset = dataset.shuffle(len(filenames), shuffle_seed)

    # Read `Example` records from files as tensor objects.
    if reader_args is None:
        reader_args = []

    # Read files sequentially (if reader_num_threads=1) or in parallel
    dataset = dataset.apply(
      interleave_ops.parallel_interleave(
          lambda filename: reader(filename, *reader_args),
          cycle_length=reader_num_threads,
          block_length=200,
          sloppy=sloppy_ordering))

    # Extract values if the `Example` tensors are stored as key-value tuples.
    if dataset_ops.get_legacy_output_types(dataset) == (
          dtypes.string, dtypes.string):
        dataset = dataset_ops.MapDataset(
          dataset, lambda _, v: v, use_inter_op_parallelism=True)

    # Apply dataset repeat and shuffle transformations.
    dataset = dataset.apply(
        shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs,
                                       shuffle_seed))

    dataset = dataset.batch(
      batch_size, drop_remainder=drop_final_batch or num_epochs is None)

    # Parse `Example` tensors to a dictionary of `Feature` tensors.
    dataset = dataset.apply(
      parsing_ops.parse_example_dataset(
          features, num_parallel_calls=parser_num_threads))

        
    if weight_key:
        #assert label_key
        #assert label_key != weight_key
        #assert label_key in features
        assert weight_key in features
        if label_key:
            if label_key not in features:
                raise ValueError(
                    "The 'label_key' provided (%r) must be one of the 'features' keys."% label_key)
        assert label_key != weight_key
        
        
        dataset = dataset.map(lambda x: (x, tuple([x.pop(label_key)]*5),x.pop(weight_key)))
        #w = dataset.map(lambda x,y : x.pop(weight_key))
        
    else:
        if label_key:
            if label_key not in features:
                raise ValueError(
                    "The `label_key` provided (%r) must be one of the `features` keys." % label_key)
        dataset = dataset.map(lambda x: (x, tuple([x.pop(label_key)]*5)))
    dataset = dataset.prefetch(prefetch_buffer_size)
    
    if not weight_key:
        return dataset
    else:
        return dataset
示例#12
0
       get_next = dataset.make_one_shot_iterator().get_next()
       sess.run(get_next)
     return
   else:
     # Returns dict w/ Tensors and SparseTensors.
     # Check values.
     dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
         contrib_parsing_ops.parse_example_dataset(feature_val))
     get_next = self.getNext(dataset)
     self.evaluate(get_next())
   return
 else:
   # Returns dict w/ Tensors and SparseTensors.
   # Check values.
   dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply(
       contrib_parsing_ops.parse_example_dataset(feature_val))
   get_next = self.getNext(dataset)
   result = self.evaluate(get_next())
   self._compare_output_to_expected(result, expected_values)
   with self.assertRaises(errors_impl.OutOfRangeError):
     self.evaluate(get_next())
   with self.assertRaises(errors_impl.OutOfRangeError):
     self.evaluate(get_next())
   if create_iterator_twice:
     get_next = self.getNext(dataset)
     result = self.evaluate(get_next())
     self._compare_output_to_expected(result, expected_values)
     with self.assertRaises(errors_impl.OutOfRangeError):
       self.evaluate(get_next())
 # Check shapes; if serialized is a Tensor we need its size to
 # properly check.