Python unbatch示例，tensorflow.contrib.data.python.ops.batching.unbatch Python示例

示例#1

0

显示文件

文件： batch_dataset_op_test.py 项目： syn0803/tf-det

    def build_dataset(self, multiplier=15.0, tensor_slice_len=2, batch_size=2):
        components = (np.arange(tensor_slice_len), np.array([[1, 2, 3]]) *
                      np.arange(tensor_slice_len)[:, np.newaxis],
                      np.array(multiplier) * np.arange(tensor_slice_len))

        return dataset_ops.Dataset.from_tensor_slices(components).batch(
            batch_size).apply(batching.unbatch())

示例#2

0

显示文件

文件： batch_dataset_op_test.py 项目： Jordan1237/tensorflow

  def benchmarkNativeUnbatch(self):
    batch_sizes = [1, 2, 5, 10, 20, 50]
    elems_per_trial = 10000
    with ops.Graph().as_default():
      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
      dataset = dataset.batch(batch_size_placeholder)
      dataset = dataset.apply(batching.unbatch())
      dataset = dataset.skip(elems_per_trial)
      iterator = dataset.make_initializable_iterator()
      next_element = iterator.get_next()

      with session.Session() as sess:
        for batch_size in batch_sizes:
          deltas = []
          for _ in range(5):
            sess.run(
                iterator.initializer,
                feed_dict={batch_size_placeholder: batch_size})
            start = time.time()
            sess.run(next_element.op)
            end = time.time()
            deltas.append((end - start) / elems_per_trial)

          median_wall_time = np.median(deltas)
          print("Unbatch (native) batch size: %d Median wall time per element:"
                " %f microseconds" % (batch_size, median_wall_time * 1e6))
          self.report_benchmark(
              iters=10000,
              wall_time=median_wall_time,
              name="benchmark_unbatch_dataset_native_batch_size_%d" %
              batch_size)

示例#3

0

显示文件

  def testUnbatchDynamicShapeMismatch(self):
    ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
    ph2 = array_ops.placeholder(dtypes.int32, shape=None)
    data = dataset_ops.Dataset.from_tensors((ph1, ph2))
    data = data.apply(batching.unbatch())
    iterator = data.make_initializable_iterator()
    next_element = iterator.get_next()

    with self.test_session() as sess:
      # Mismatch in the 0th dimension.
      sess.run(
          iterator.initializer,
          feed_dict={
              ph1: np.arange(7).astype(np.int32),
              ph2: np.arange(8).astype(np.int32)
          })
      with self.assertRaises(errors.InvalidArgumentError):
        print(sess.run(next_element))

      # No 0th dimension (i.e. scalar value) for one component.
      sess.run(
          iterator.initializer,
          feed_dict={
              ph1: np.arange(7).astype(np.int32),
              ph2: 7
          })
      with self.assertRaises(errors.InvalidArgumentError):
        print(sess.run(next_element))

示例#4

0

显示文件

文件： batch_dataset_op_test.py 项目： Jordan1237/tensorflow

  def testUnbatchDynamicShapeMismatch(self):
    ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
    ph2 = array_ops.placeholder(dtypes.int32, shape=None)
    data = dataset_ops.Dataset.from_tensors((ph1, ph2))
    data = data.apply(batching.unbatch())
    iterator = data.make_initializable_iterator()
    next_element = iterator.get_next()

    with self.cached_session() as sess:
      # Mismatch in the 0th dimension.
      sess.run(
          iterator.initializer,
          feed_dict={
              ph1: np.arange(7).astype(np.int32),
              ph2: np.arange(8).astype(np.int32)
          })
      with self.assertRaises(errors.InvalidArgumentError):
        sess.run(next_element)

      # No 0th dimension (i.e. scalar value) for one component.
      sess.run(
          iterator.initializer,
          feed_dict={
              ph1: np.arange(7).astype(np.int32),
              ph2: 7
          })
      with self.assertRaises(errors.InvalidArgumentError):
        sess.run(next_element)

示例#5

0

显示文件

  def benchmarkNativeUnbatch(self):
    batch_sizes = [1, 2, 5, 10, 20, 50]
    elems_per_trial = 10000
    with ops.Graph().as_default():
      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
      dataset = dataset.batch(batch_size_placeholder)
      dataset = dataset.apply(batching.unbatch())
      dataset = dataset.skip(elems_per_trial)
      iterator = dataset.make_initializable_iterator()
      next_element = iterator.get_next()

      with session.Session() as sess:
        for batch_size in batch_sizes:
          deltas = []
          for _ in range(5):
            sess.run(
                iterator.initializer,
                feed_dict={batch_size_placeholder: batch_size})
            start = time.time()
            sess.run(next_element.op)
            end = time.time()
            deltas.append((end - start) / elems_per_trial)

          median_wall_time = np.median(deltas)
          print("Unbatch (native) batch size: %d Median wall time per element:"
                " %f microseconds" % (batch_size, median_wall_time * 1e6))
          self.report_benchmark(
              iters=10000,
              wall_time=median_wall_time,
              name="benchmark_unbatch_dataset_native_batch_size_%d" %
              batch_size)

示例#6

0

显示文件

文件： batch_dataset_op_test.py 项目： jinxin0924/tensorflow

  def build_dataset(self, multiplier=15.0, tensor_slice_len=2, batch_size=2):
    components = (
        np.arange(tensor_slice_len),
        np.array([[1, 2, 3]]) * np.arange(tensor_slice_len)[:, np.newaxis],
        np.array(multiplier) * np.arange(tensor_slice_len))

    return dataset_ops.Dataset.from_tensor_slices(components).batch(
        batch_size).apply(batching.unbatch())

示例#7

0

显示文件

文件： resampling.py 项目： ebrevdo/tensorflow

  def _apply_fn(dataset):
    """Function from `Dataset` to `Dataset` that applies the transformation."""
    dist_estimation_batch_size = 32
    target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist")
    class_values_ds = dataset.map(class_func)
    if initial_dist is not None:
      initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
      acceptance_dist = _calculate_acceptance_probs(initial_dist_t,
                                                    target_dist_t)
      initial_dist_ds = dataset_ops.Dataset.from_tensors(
          initial_dist_t).repeat()
      acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
          acceptance_dist).repeat()
    else:
      num_classes = (target_dist_t.shape[0].value or
                     array_ops.shape(target_dist_t)[0])
      smoothing_constant = 10
      initial_examples_per_class_seen = array_ops.fill(
          [num_classes], np.int64(smoothing_constant))

      def update_estimate_and_tile(num_examples_per_class_seen, c):
        updated_examples_per_class_seen, dist = _estimate_data_distribution(
            c, num_examples_per_class_seen)
        tiled_dist = array_ops.tile(
            array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
        return updated_examples_per_class_seen, tiled_dist

      initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
                         .apply(scan_ops.scan(initial_examples_per_class_seen,
                                              update_estimate_and_tile))
                         .apply(batching.unbatch()))
      acceptance_dist_ds = initial_dist_ds.map(
          lambda initial: _calculate_acceptance_probs(initial, target_dist_t))

    def maybe_warn_on_large_rejection(accept_dist, initial_dist):
      proportion_rejected = math_ops.reduce_sum(
          (1 - accept_dist) * initial_dist)
      return control_flow_ops.cond(
          math_ops.less(proportion_rejected, .5),
          lambda: accept_dist,
          lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
              accept_dist, [proportion_rejected, initial_dist, accept_dist],
              message="Proportion of examples rejected by sampler is high: ",
              summarize=100,
              first_n=10))

    acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
                                                   initial_dist_ds))
                          .map(maybe_warn_on_large_rejection))

    def _gather_and_copy(class_val, acceptance_prob, data):
      return (class_val, array_ops.gather(acceptance_prob, class_val), data)
    current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip(
        (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy)
    filtered_ds = (
        current_probabilities_and_class_and_data_ds
        .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
    return filtered_ds.map(lambda class_value, _, data: (class_value, data))

示例#8

0

显示文件

文件： resampling.py 项目： Soum-Soum/Tensorflow_Face_Finder

  def _apply_fn(dataset):
    """Function from `Dataset` to `Dataset` that applies the transformation."""
    dist_estimation_batch_size = 32
    target_dist_t = ops.convert_to_tensor(target_dist, name="initial_dist")
    class_values_ds = dataset.map(class_func)
    if initial_dist is not None:
      initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
      acceptance_dist = _calculate_acceptance_probs(initial_dist_t,
                                                    target_dist_t)
      initial_dist_ds = dataset_ops.Dataset.from_tensors(
          initial_dist_t).repeat()
      acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
          acceptance_dist).repeat()
    else:
      num_classes = (target_dist_t.shape[0].value or
                     array_ops.shape(target_dist_t)[0])
      smoothing_constant = 10
      initial_examples_per_class_seen = array_ops.fill(
          [num_classes], np.int64(smoothing_constant))

      def update_estimate_and_tile(num_examples_per_class_seen, c):
        updated_examples_per_class_seen, dist = _estimate_data_distribution(
            c, num_examples_per_class_seen)
        tiled_dist = array_ops.tile(
            array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
        return updated_examples_per_class_seen, tiled_dist

      initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
                         .apply(scan_ops.scan(initial_examples_per_class_seen,
                                              update_estimate_and_tile))
                         .apply(batching.unbatch()))
      acceptance_dist_ds = initial_dist_ds.map(
          lambda initial: _calculate_acceptance_probs(initial, target_dist_t))

    def maybe_warn_on_large_rejection(accept_dist, initial_dist):
      proportion_rejected = math_ops.reduce_sum(
          (1 - accept_dist) * initial_dist)
      return control_flow_ops.cond(
          math_ops.less(proportion_rejected, .5),
          lambda: accept_dist,
          lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
              accept_dist, [proportion_rejected, initial_dist, accept_dist],
              message="Proportion of examples rejected by sampler is high: ",
              summarize=100,
              first_n=10))

    acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
                                                   initial_dist_ds))
                          .map(maybe_warn_on_large_rejection))

    current_probabilities_ds = dataset_ops.Dataset.zip(
        (acceptance_dist_ds, class_values_ds)).map(array_ops.gather)
    filtered_ds = (
        dataset_ops.Dataset.zip((class_values_ds, current_probabilities_ds,
                                 dataset))
        .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
    return filtered_ds.map(lambda class_value, _, data: (class_value, data))

示例#9

0

显示文件

文件： batch_dataset_op_test.py 项目： Jordan1237/tensorflow

  def testUnbatchEmpty(self):
    data = dataset_ops.Dataset.from_tensors(
        (constant_op.constant([]), constant_op.constant([], shape=[0, 4]),
         constant_op.constant([], shape=[0, 4, 0])))
    data = data.apply(batching.unbatch())
    iterator = data.make_one_shot_iterator()
    next_element = iterator.get_next()

    with self.cached_session() as sess:
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)

示例#10

0

显示文件

  def testUnbatchDatasetWithSparseTensor(self):
    st = sparse_tensor.SparseTensorValue(
        indices=[[i, i] for i in range(10)],
        values=list(range(10)),
        dense_shape=[10, 10])
    data = dataset_ops.Dataset.from_tensors(st)
    data = data.apply(batching.unbatch())
    data = data.batch(5)
    data = data.apply(batching.unbatch())
    iterator = data.make_one_shot_iterator()
    next_element = iterator.get_next()

    with self.test_session() as sess:
      for i in range(10):
        st_row = sess.run(next_element)
        self.assertEqual([i], st_row.indices)
        self.assertEqual([i], st_row.values)
        self.assertEqual([10], st_row.dense_shape)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)

示例#11

0

显示文件

文件： batch_dataset_op_test.py 项目： Jordan1237/tensorflow

  def testUnbatchDatasetWithSparseTensor(self):
    st = sparse_tensor.SparseTensorValue(
        indices=[[i, i] for i in range(10)],
        values=list(range(10)),
        dense_shape=[10, 10])
    data = dataset_ops.Dataset.from_tensors(st)
    data = data.apply(batching.unbatch())
    data = data.batch(5)
    data = data.apply(batching.unbatch())
    iterator = data.make_one_shot_iterator()
    next_element = iterator.get_next()

    with self.cached_session() as sess:
      for i in range(10):
        st_row = sess.run(next_element)
        self.assertEqual([i], st_row.indices)
        self.assertEqual([i], st_row.values)
        self.assertEqual([10], st_row.dense_shape)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)

示例#12

0

显示文件

  def testUnbatchEmpty(self):
    data = dataset_ops.Dataset.from_tensors(
        (constant_op.constant([]), constant_op.constant([], shape=[0, 4]),
         constant_op.constant([], shape=[0, 4, 0])))
    data = data.apply(batching.unbatch())
    iterator = data.make_one_shot_iterator()
    next_element = iterator.get_next()

    with self.test_session() as sess:
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)

示例#13

0

显示文件

  def testUnbatchSingleElementTupleDataset(self):
    data = tuple([(math_ops.range(10),) for _ in range(3)])
    data = dataset_ops.Dataset.from_tensor_slices(data)
    expected_types = ((dtypes.int32,),) * 3
    data = data.batch(2)
    self.assertEqual(expected_types, data.output_types)
    data = data.apply(batching.unbatch())
    self.assertEqual(expected_types, data.output_types)

    iterator = data.make_one_shot_iterator()
    op = iterator.get_next()

    with self.test_session() as sess:
      for i in range(10):
        self.assertEqual(((i,),) * 3, sess.run(op))

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(op)

示例#14

0

显示文件

文件： batch_dataset_op_test.py 项目： Jordan1237/tensorflow

  def testUnbatchSingleElementTupleDataset(self):
    data = tuple([(math_ops.range(10),) for _ in range(3)])
    data = dataset_ops.Dataset.from_tensor_slices(data)
    expected_types = ((dtypes.int32,),) * 3
    data = data.batch(2)
    self.assertEqual(expected_types, data.output_types)
    data = data.apply(batching.unbatch())
    self.assertEqual(expected_types, data.output_types)

    iterator = data.make_one_shot_iterator()
    op = iterator.get_next()

    with self.cached_session() as sess:
      for i in range(10):
        self.assertEqual(((i,),) * 3, sess.run(op))

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(op)

示例#15

0

显示文件

文件： batch_dataset_op_test.py 项目： Jordan1237/tensorflow

  def testUnbatchDatasetWithStrings(self):
    data = tuple([math_ops.range(10) for _ in range(3)])
    data = dataset_ops.Dataset.from_tensor_slices(data)
    data = data.map(lambda x, y, z: (x, string_ops.as_string(y), z))
    expected_types = (dtypes.int32, dtypes.string, dtypes.int32)
    data = data.batch(2)
    self.assertEqual(expected_types, data.output_types)
    data = data.apply(batching.unbatch())
    self.assertEqual(expected_types, data.output_types)

    iterator = data.make_one_shot_iterator()
    op = iterator.get_next()

    with self.cached_session() as sess:
      for i in range(10):
        self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op))

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(op)

示例#16

0

显示文件

  def testUnbatchDatasetWithStrings(self):
    data = tuple([math_ops.range(10) for _ in range(3)])
    data = dataset_ops.Dataset.from_tensor_slices(data)
    data = data.map(lambda x, y, z: (x, string_ops.as_string(y), z))
    expected_types = (dtypes.int32, dtypes.string, dtypes.int32)
    data = data.batch(2)
    self.assertEqual(expected_types, data.output_types)
    data = data.apply(batching.unbatch())
    self.assertEqual(expected_types, data.output_types)

    iterator = data.make_one_shot_iterator()
    op = iterator.get_next()

    with self.test_session() as sess:
      for i in range(10):
        self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op))

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(op)

示例#17

0

显示文件

  def testUnbatchMultiElementTupleDataset(self):
    data = tuple([(math_ops.range(10 * i, 10 * i + 10),
                   array_ops.fill([10], "hi")) for i in range(3)])
    data = dataset_ops.Dataset.from_tensor_slices(data)
    expected_types = ((dtypes.int32, dtypes.string),) * 3
    data = data.batch(2)
    self.assertAllEqual(expected_types, data.output_types)
    data = data.apply(batching.unbatch())
    self.assertAllEqual(expected_types, data.output_types)

    iterator = data.make_one_shot_iterator()
    op = iterator.get_next()

    with self.test_session() as sess:
      for i in range(10):
        self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")),
                         sess.run(op))

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(op)

示例#18

0

显示文件

文件： batch_dataset_op_test.py 项目： Jordan1237/tensorflow

  def testUnbatchMultiElementTupleDataset(self):
    data = tuple([(math_ops.range(10 * i, 10 * i + 10),
                   array_ops.fill([10], "hi")) for i in range(3)])
    data = dataset_ops.Dataset.from_tensor_slices(data)
    expected_types = ((dtypes.int32, dtypes.string),) * 3
    data = data.batch(2)
    self.assertAllEqual(expected_types, data.output_types)
    data = data.apply(batching.unbatch())
    self.assertAllEqual(expected_types, data.output_types)

    iterator = data.make_one_shot_iterator()
    op = iterator.get_next()

    with self.cached_session() as sess:
      for i in range(10):
        self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")),
                         sess.run(op))

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(op)

示例#19

0

显示文件

def _estimate_initial_dist_ds(
    target_dist_t, class_values_ds, dist_estimation_batch_size=32,
    smoothing_constant=10):
  num_classes = (target_dist_t.shape[0].value or
                 array_ops.shape(target_dist_t)[0])
  initial_examples_per_class_seen = array_ops.fill(
      [num_classes], np.int64(smoothing_constant))

  def update_estimate_and_tile(num_examples_per_class_seen, c):
    updated_examples_per_class_seen, dist = _estimate_data_distribution(
        c, num_examples_per_class_seen)
    tiled_dist = array_ops.tile(
        array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
    return updated_examples_per_class_seen, tiled_dist

  initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
                     .apply(scan_ops.scan(initial_examples_per_class_seen,
                                          update_estimate_and_tile))
                     .apply(batching.unbatch()))

  return initial_dist_ds

示例#20

0

显示文件

def StreamingFilesDataset(files,
                          filetype=None,
                          file_reader_job=None,
                          worker_job=None,
                          num_epochs=None,
                          filename_shuffle_buffer_size=None,
                          num_parallel_reads=None,
                          batch_transfer_size=None,
                          sloppy=None):
    """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM).

  Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read
  files local to your GCE VM. In order to train using files stored on your local
  VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset
  helper to generate a dataset to feed your Cloud TPU with files from your GCE
  VM.

  The resulting dataset may return an OutOfRangeError if there are no files
  found as a result of the fileglob expansion.

  Note: StreamingFilesDataset assumes that the session is using a
  TPUClusterResolver and has therefore a worker and a coordinator job. File
  loading will be done on the coordinator job.

  Args:
    files: A string glob to match files, or a `tf.data.Dataset` generating file
      names.
    filetype: A string (one of 'tfrecord', or 'textline') or a single-argument
      TensorFlow function that when given a filename returns a dataset.
    file_reader_job: An optional string that corresponds to the job that should
      perform the file reads.
    worker_job: An optional string that corresponds to the job that should
      process the tensors (i.e. your GPU or TPU worker).
    num_epochs: The number of epochs through the training set that should be
      generated. By default, it will repeat infinitely.
    filename_shuffle_buffer_size: An optional integer whose value controls the
      shuffling of the file names. If you would like to read from the files in
      the same order, set to 0 or False.
    num_parallel_reads: An optional integer controlling the number of files to
      read from concurrently. (Set to 1 for no parallelism.)
    batch_transfer_size: An optional integer controlling the batching used to
      amortize the remote function invocation overhead. Set to a very large
      number to increase throughput. Set to a very small number to reduce memory
      consumption. Set to False to skip batching.
    sloppy: (Optional.) If `True`, read input data as fast as possible, without
      maintaining a deterministic order. Defaults to `False`.
  Returns:
    A `tf.data.Dataset` with an infinite stream of elements generated by a
    parallel interleaving of the set of files matched (or generated) by `files`
    with a type is the output of the dataset specified by `filetype`.

  Raises:
    ValueError: if any argument is not of the expected type.
  """
    if filetype is None:
        filetype = 'tfrecord'

    if isinstance(filetype, str):
        if filetype not in _FILETYPE_MAP:
            raise ValueError('Unexpected filetype: %s' % filetype)
        reader_fn = _FILETYPE_MAP[filetype]
    elif callable(filetype):
        reader_fn = filetype
    else:
        raise ValueError('filetype should be a string or a callable')

    file_reader_job = file_reader_job or 'coordinator'

    worker_job = worker_job or 'worker'

    if filename_shuffle_buffer_size is None:
        filename_shuffle_buffer_size = 4096

    num_parallel_reads = num_parallel_reads or 8

    if batch_transfer_size is None:
        batch_transfer_size = 1024

    if sloppy is None:
        sloppy = False

    with ops.device('/job:%s' % file_reader_job):
        if isinstance(files, str):
            source_dataset = dataset_ops.Dataset.list_files(files)
        elif isinstance(files, dataset_ops.Dataset):
            source_dataset = files
        else:
            raise ValueError('files was not a string or a dataset: %s' % files)

        if filename_shuffle_buffer_size:
            source_dataset = source_dataset.shuffle(
                buffer_size=filename_shuffle_buffer_size)

        # NOTE: We perform the `repeat` on the source dataset, because the output
        # dataset does not currently have enough information to recreate an iterator
        # over the source dataset when it reaches the end.
        source_dataset = source_dataset.repeat(num_epochs)

        source_dataset = source_dataset.apply(
            interleave_ops.parallel_interleave(reader_fn,
                                               cycle_length=num_parallel_reads,
                                               sloppy=sloppy))

        if batch_transfer_size:
            # Note: we can safely call batch_and_drop_remainder because we have an
            # infinite stream of TFRecords.
            source_dataset = source_dataset.apply(
                batching.batch_and_drop_remainder(batch_transfer_size))

        source_dataset = source_dataset.prefetch(1)

        source_iterator = source_dataset.make_one_shot_iterator()
        source_handle = source_iterator.string_handle()

    @function.Defun(dtypes.string)
    def LoadingFunc(h):
        remote_iterator = iterator_ops.Iterator.from_string_handle(
            h, source_dataset.output_types, source_dataset.output_shapes)
        return remote_iterator.get_next()

    def MapFn(unused_input):
        return functional_ops.remote_call(
            args=[source_handle],
            Tout=[dtypes.string],
            f=LoadingFunc,
            target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)

    with ops.device('/job:%s' % worker_job):
        # TODO(saeta,mrry): Switch to using _GeneratorDataset.

        # identity = lambda x: x
        # dummy = constant_op.constant(0)
        # output_dataset = dataset_ops._GeneratorDataset(dummy, identity, MapFn,
        #                                                identity)

        output_dataset = dataset_ops.Dataset.range(2).repeat().map(MapFn)
        output_dataset = output_dataset.prefetch(1)

        if batch_transfer_size:
            # Undo the batching used during the transfer.
            output_dataset = output_dataset.apply(
                batching.unbatch()).prefetch(1)

    return output_dataset

示例#21

0

显示文件

文件： batch_dataset_op_test.py 项目： Jordan1237/tensorflow

 def testUnbatchStaticShapeMismatch(self):
   data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8),
                                            np.arange(9)))
   with self.assertRaises(ValueError):
     data.apply(batching.unbatch())

示例#22

0

显示文件

文件： datasets.py 项目： Jackiefan/tensorflow

def StreamingFilesDataset(files,
                          filetype=None,
                          file_reader_job=None,
                          worker_job=None,
                          num_epochs=None,
                          filename_shuffle_buffer_size=None,
                          num_parallel_reads=None,
                          batch_transfer_size=None,
                          sloppy=None):
  """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM).

  Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read
  files local to your GCE VM. In order to train using files stored on your local
  VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset
  helper to generate a dataset to feed your Cloud TPU with files from your GCE
  VM.

  The resulting dataset may return an OutOfRangeError if there are no files
  found as a result of the fileglob expansion.

  Note: StreamingFilesDataset assumes that the session is using a
  TPUClusterResolver and has therefore a worker and a coordinator job. File
  loading will be done on the coordinator job.

  Args:
    files: A string glob to match files, or a `tf.data.Dataset` generating file
      names.
    filetype: A string (one of 'tfrecord', or 'textline') or a single-argument
      TensorFlow function that when given a filename returns a dataset.
    file_reader_job: An optional string that corresponds to the job that should
      perform the file reads.
    worker_job: An optional string that corresponds to the job that should
      process the tensors (i.e. your GPU or TPU worker).
    num_epochs: The number of epochs through the training set that should be
      generated. By default, it will repeat infinitely.
    filename_shuffle_buffer_size: An optional integer whose value controls the
      shuffling of the file names. If you would like to read from the files in
      the same order, set to 0 or False.
    num_parallel_reads: An optional integer controlling the number of files to
      read from concurrently. (Set to 1 for no parallelism.)
    batch_transfer_size: An optional integer controlling the batching used to
      amortize the remote function invocation overhead. Set to a very large
      number to increase throughput. Set to a very small number to reduce memory
      consumption. Set to False to skip batching.
    sloppy: (Optional.) If `False`, read input data while maintaining a
      deterministic order. (This may have significant performance impacts.)
      sloppy defaults to: True.
  Returns:
    A `tf.data.Dataset` with an infinite stream of elements generated by a
    parallel interleaving of the set of files matched (or generated) by `files`
    with a type is the output of the dataset specified by `filetype`.

  Raises:
    ValueError: if any argument is not of the expected type.
  """
  if filetype is None:
    filetype = 'tfrecord'

  if isinstance(filetype, str):
    if filetype not in _FILETYPE_MAP:
      raise ValueError('Unexpected filetype: %s' % filetype)
    reader_fn = _FILETYPE_MAP[filetype]
  elif callable(filetype):
    reader_fn = filetype
  else:
    raise ValueError('filetype should be a string or a callable')

  file_reader_job = file_reader_job or 'coordinator'

  worker_job = worker_job or 'worker'

  if filename_shuffle_buffer_size is None:
    filename_shuffle_buffer_size = 4096

  num_parallel_reads = num_parallel_reads or 8

  if batch_transfer_size is None:
    batch_transfer_size = 256

  if sloppy is None:
    sloppy = True

  with ops.device('/job:%s' % file_reader_job):
    if isinstance(files, str):
      source_dataset = dataset_ops.Dataset.list_files(files)
    elif isinstance(files, dataset_ops.Dataset):
      source_dataset = files
    else:
      raise ValueError('files was not a string or a dataset: %s' % files)

    if filename_shuffle_buffer_size:
      source_dataset = source_dataset.shuffle(
          buffer_size=filename_shuffle_buffer_size)

    # NOTE: We perform the `repeat` on the source dataset, because the output
    # dataset does not currently have enough information to recreate an iterator
    # over the source dataset when it reaches the end.
    source_dataset = source_dataset.repeat(num_epochs)

    source_dataset = source_dataset.apply(
        interleave_ops.parallel_interleave(
            reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy))

    if batch_transfer_size:
      source_dataset = source_dataset.batch(batch_transfer_size)

    source_dataset = source_dataset.prefetch(1)

    source_iterator = source_dataset.make_one_shot_iterator()
    source_handle = source_iterator.string_handle()

  @function.Defun(dtypes.string)
  def LoadingFunc(h):
    remote_iterator = iterator_ops.Iterator.from_string_handle(
        h, source_dataset.output_types, source_dataset.output_shapes)
    return remote_iterator.get_next()

  def MapFn(unused_input):
    return functional_ops.remote_call(
        args=[source_handle],
        Tout=[dtypes.string],
        f=LoadingFunc,
        target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)[0]

  with ops.device('/job:%s' % worker_job):
    output_dataset = dataset_ops.Dataset.range(2).repeat().map(
        MapFn, num_parallel_calls=4 if sloppy else None)
    output_dataset = output_dataset.prefetch(1)

    if batch_transfer_size:
      # Undo the batching used during the transfer.
      output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1)

  return output_dataset

示例#23

0

显示文件

 def testUnbatchStaticShapeMismatch(self):
   data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8),
                                            np.arange(9)))
   with self.assertRaises(ValueError):
     data.apply(batching.unbatch())

示例#24

0

显示文件

文件： dataset_ops.py 项目： DjangoPeng/tensorflow

  def unbatch(self):
    """Deprecated: Use `Dataset.apply(tf.contrib.data.unbatch()`."""

    return self.apply(batching.unbatch())

示例#25

0

显示文件

文件： dataset_ops.py 项目： codemogroup/Interview-Bot

    def unbatch(self):
        """Deprecated: Use `Dataset.apply(tf.contrib.data.unbatch()`."""

        return self.apply(batching.unbatch())

示例#26

0

显示文件

文件： resampling.py 项目： ychen404/TensorFlowPlus

    def _apply_fn(dataset):
        """Function from `Dataset` to `Dataset` that applies the transformation."""
        dist_estimation_batch_size = 32
        target_dist_t = ops.convert_to_tensor(target_dist, name="initial_dist")
        class_values_ds = dataset.map(class_func)
        if initial_dist is not None:
            initial_dist_t = ops.convert_to_tensor(initial_dist,
                                                   name="initial_dist")
            acceptance_dist = _calculate_acceptance_probs(
                initial_dist_t, target_dist_t)
            initial_dist_ds = dataset_ops.Dataset.from_tensors(
                initial_dist_t).repeat()
            acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
                acceptance_dist).repeat()
        else:
            num_classes = (target_dist_t.shape[0].value
                           or array_ops.shape(target_dist_t)[0])
            smoothing_constant = 10
            # Disable device functions and colocation constraints so that the variable
            # will be placed with the eventual DT_VARIANT dataset tensor.
            with ops.colocate_with(None, ignore_existing=True):
                num_examples_per_class_seen = resource_variable_ops.ResourceVariable(
                    initial_value=array_ops.fill([num_classes],
                                                 np.int64(smoothing_constant)),
                    trainable=False,
                    collections=[ops.GraphKeys.LOCAL_VARIABLES],
                    name="local_class_count",
                    dtype=dtypes.int64)

            def update_estimate_and_tile(c):
                return array_ops.tile(
                    array_ops.expand_dims(
                        _estimate_data_distribution(
                            c, num_examples_per_class_seen), 0),
                    [dist_estimation_batch_size, 1])

            initial_dist_ds = (
                class_values_ds.batch(dist_estimation_batch_size).map(
                    update_estimate_and_tile).apply(batching.unbatch()))
            acceptance_dist_ds = initial_dist_ds.map(
                lambda initial: _calculate_acceptance_probs(
                    initial, target_dist_t))

        def maybe_warn_on_large_rejection(accept_dist, initial_dist):
            proportion_rejected = math_ops.reduce_sum(
                (1 - accept_dist) * initial_dist)
            return control_flow_ops.cond(
                math_ops.less(proportion_rejected, .5),
                lambda: accept_dist,
                lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
                    accept_dist,
                    [proportion_rejected, initial_dist, accept_dist],
                    message=
                    "Proportion of examples rejected by sampler is high: ",
                    summarize=100,
                    first_n=10))

        acceptance_dist_ds = (dataset_ops.Dataset.zip(
            (acceptance_dist_ds,
             initial_dist_ds)).map(maybe_warn_on_large_rejection))

        current_probabilities_ds = dataset_ops.Dataset.zip(
            (acceptance_dist_ds, class_values_ds)).map(array_ops.gather)
        filtered_ds = (dataset_ops.Dataset.zip(
            (class_values_ds, current_probabilities_ds,
             dataset)).filter(lambda _1, p, _2: random_ops.random_uniform(
                 [], seed=seed) < p))
        return filtered_ds.map(lambda class_value, _, data:
                               (class_value, data))