def test_metrics_correctness_with_iterator(self):
    layers = [
        keras.layers.Dense(8, activation='relu', input_dim=4,
                           kernel_initializer='ones'),
        keras.layers.Dense(1, activation='sigmoid', kernel_initializer='ones')
    ]

    model = testing_utils.get_model_from_layers(layers, (4,))

    model.compile(
        loss='binary_crossentropy',
        metrics=['accuracy', metrics_module.BinaryAccuracy()],
        optimizer='rmsprop',
        run_eagerly=testing_utils.should_run_eagerly())

    np.random.seed(123)
    x = np.random.randint(10, size=(100, 4)).astype(np.float32)
    y = np.random.randint(2, size=(100, 1)).astype(np.float32)
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
    dataset = dataset.batch(10)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    outs = model.evaluate(iterator, steps=10)
    self.assertEqual(np.around(outs[1], decimals=1), 0.5)
    self.assertEqual(np.around(outs[2], decimals=1), 0.5)

    y = np.zeros((100, 1), dtype=np.float32)
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
    dataset = dataset.repeat(100)
    dataset = dataset.batch(10)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    outs = model.evaluate(iterator, steps=10)
    self.assertEqual(outs[1], 0.)
    self.assertEqual(outs[2], 0.)
  def _compare(self, input_dataset, map_fn, batch_size, input_size, str_id):
    num_elems = int(np.sum([np.prod(x) for x in input_size]))
    name_template = "{}_batch_size_{}_input_element_size_{}_{}"

    unoptimized_dataset = input_dataset.map(map_fn).batch(batch_size)

    options = dataset_ops.Options()
    options.experimental_optimization.apply_default_optimizations = False
    unoptimized_dataset = unoptimized_dataset.with_options(options)
    unoptimized_next = dataset_ops.make_one_shot_iterator(
        unoptimized_dataset).get_next()

    options = dataset_ops.Options()
    options.experimental_optimization.map_vectorization = True
    optimized_dataset = unoptimized_dataset.with_options(options)
    optimized_next = dataset_ops.make_one_shot_iterator(
        optimized_dataset).get_next()

    unoptimized_time = self._run(
        unoptimized_next,
        name=name_template.format(str_id, batch_size, num_elems, "unoptimized"))
    optimized_time = self._run(
        optimized_next,
        name=name_template.format(str_id, batch_size, num_elems, "optimized"))

    print("Batch size: {}\n"
          "Input element size: {}\n"
          "Transformation: {}\n"
          "Speedup: {}\n".format(batch_size, input_size, str_id,
                                 (unoptimized_time / optimized_time)))
示例#3
0
  def testMapNamedtuple(self, count=10):
    # construct dataset of tuples
    labels = dataset_ops.Dataset.range(count)
    images = labels.map(lambda l: -l)
    dataset_tuple = dataset_ops.Dataset.zip((labels, images))

    # convert dataset of tuples to dataset of namedtuples
    example = namedtuple("Example", ["label", "image"])
    dataset_namedtuple = dataset_tuple.map(example)

    def preprocess_tuple(label, image):
      image = 2 * image
      return label, image

    def preprocess_namedtuple(example):
      return example._replace(image=2 * example.image)

    # preprocess both datasets
    dataset_tuple = dataset_tuple.map(preprocess_tuple)
    dataset_namedtuple = dataset_namedtuple.map(preprocess_namedtuple)

    next_tuple = dataset_ops.make_one_shot_iterator(dataset_tuple).get_next()
    next_namedtuple = dataset_ops.make_one_shot_iterator(
        dataset_namedtuple).get_next()

    # make sure both datasets contain the same data
    with self.cached_session() as sess:
      for i in range(count):
        tuple_, namedtuple_ = sess.run([next_tuple, next_namedtuple])
        self.assertEqual(tuple_, namedtuple_)
        self.assertEqual(tuple_, (i, -2 * i))

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_namedtuple)
  def test_model_fit_and_validation_with_missing_arg_errors(self):
    model = testing_utils.get_small_mlp(10, 4, 3)
    model.compile(optimizer=rmsprop.RMSprop(learning_rate=0.001),
                  loss='mse',
                  run_eagerly=True)

    x = array_ops.zeros(shape=(10, 3))
    y = array_ops.zeros(shape=(10, 4))
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat(10).batch(5)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    validation_dataset = dataset_ops.Dataset.from_tensor_slices(
        (x, y)).repeat().batch(5)  # Infinite dataset.
    validation_iterator = dataset_ops.make_one_shot_iterator(validation_dataset)

    with self.assertRaisesRegexp(
        ValueError, r'specify .* `steps_per_epoch`'):
      model.fit(iterator, epochs=1, verbose=0)
    if not context.executing_eagerly():
      # In eager execution, `array_ops.zeros` returns value tensors
      # which can be used for validation without a `validation_steps` argument.
      with self.assertRaisesRegexp(
          ValueError, r'provide either `batch_size` or `validation_steps`'):
        model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
                  validation_data=(x, y))
    # Step argument is required for infinite datasets.
    with self.assertRaisesRegexp(ValueError,
                                 'specify the `validation_steps` argument.'):
      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
                validation_data=validation_dataset)
    with self.assertRaisesRegexp(ValueError,
                                 'specify the `validation_steps` argument.'):
      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
                validation_data=validation_iterator)
 def testSaveRestoreMultipleIterator(self):
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
   dataset = dataset_ops.Dataset.from_tensor_slices(
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
   dataset = dataset.map(math_ops.square).batch(2)
   iterator_1 = iter(dataset) if context.executing_eagerly(
   ) else dataset_ops.make_one_shot_iterator(dataset)
   get_next_1 = iterator_1.get_next if context.executing_eagerly(
   ) else functools.partial(self.evaluate, iterator_1.get_next())
   iterator_2 = iter(dataset) if context.executing_eagerly(
   ) else dataset_ops.make_one_shot_iterator(dataset)
   get_next_2 = iterator_2.get_next if context.executing_eagerly(
   ) else functools.partial(self.evaluate, iterator_2.get_next())
   dataset_2 = dataset_ops.Dataset.range(10)
   iterator_3 = iter(dataset_2) if context.executing_eagerly(
   ) else dataset_ops.make_one_shot_iterator(dataset_2)
   get_next_3 = iterator_3.get_next if context.executing_eagerly(
   ) else functools.partial(self.evaluate, iterator_3.get_next())
   checkpoint = trackable_utils.Checkpoint(
       iterator_1=iterator_1, iterator_2=iterator_2, iterator_3=iterator_3)
   self.assertAllEqual([1, 4], get_next_1())
   self.assertAllEqual(0, get_next_3())
   self.assertAllEqual(1, get_next_3())
   self.assertAllEqual(2, get_next_3())
   save_path = checkpoint.save(checkpoint_prefix)
   self.assertAllEqual([1, 4], get_next_2())
   self.assertAllEqual([9, 16], get_next_2())
   self.assertAllEqual(3, get_next_3())
   checkpoint.restore(save_path).run_restore_ops()
   self.assertAllEqual([9, 16], get_next_1())
   self.assertAllEqual([1, 4], get_next_2())
   self.assertAllEqual(3, get_next_3())
示例#6
0
 def testCapturingStateInOneShotRaisesException(self):
   var = variables.Variable(37.0, name="myvar")
   dataset = (
       dataset_ops.Dataset.from_tensor_slices([0.0, 1.0, 2.0])
       .map(lambda x: x + var))
   with self.assertRaisesRegexp(
       ValueError, r"`Dataset.make_one_shot_iterator\(\)` does not support "
       "datasets that capture stateful objects.+myvar"):
     dataset_ops.make_one_shot_iterator(dataset)
示例#7
0
  def testIteratorStringHandle(self):
    dataset_3 = dataset_ops.Dataset.from_tensor_slices([1, 2, 3])
    dataset_4 = dataset_ops.Dataset.from_tensor_slices([10, 20, 30, 40])

    iterator_3 = dataset_ops.make_one_shot_iterator(dataset_3)
    iterator_4 = dataset_ops.make_one_shot_iterator(dataset_4)

    handle_placeholder = array_ops.placeholder(dtypes.string, shape=[])
    feedable_iterator = iterator_ops.Iterator.from_string_handle(
        handle_placeholder, dataset_ops.get_legacy_output_types(dataset_3),
        dataset_ops.get_legacy_output_shapes(dataset_3))
    next_element = feedable_iterator.get_next()

    self.assertTrue(dataset_ops.get_structure(dataset_3).is_compatible_with(
        dataset_ops.get_structure(feedable_iterator)))
    self.assertTrue(dataset_ops.get_structure(dataset_4).is_compatible_with(
        dataset_ops.get_structure(feedable_iterator)))

    with self.cached_session() as sess:
      iterator_3_handle = sess.run(iterator_3.string_handle())
      iterator_4_handle = sess.run(iterator_4.string_handle())

      self.assertEqual(10,
                       sess.run(
                           next_element,
                           feed_dict={handle_placeholder: iterator_4_handle}))
      self.assertEqual(1,
                       sess.run(
                           next_element,
                           feed_dict={handle_placeholder: iterator_3_handle}))
      self.assertEqual(20,
                       sess.run(
                           next_element,
                           feed_dict={handle_placeholder: iterator_4_handle}))
      self.assertEqual(2,
                       sess.run(
                           next_element,
                           feed_dict={handle_placeholder: iterator_3_handle}))
      self.assertEqual(30,
                       sess.run(
                           next_element,
                           feed_dict={handle_placeholder: iterator_4_handle}))
      self.assertEqual(3,
                       sess.run(
                           next_element,
                           feed_dict={handle_placeholder: iterator_3_handle}))
      self.assertEqual(40,
                       sess.run(
                           next_element,
                           feed_dict={handle_placeholder: iterator_4_handle}))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(
            next_element, feed_dict={handle_placeholder: iterator_3_handle})
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(
            next_element, feed_dict={handle_placeholder: iterator_4_handle})
  def testCopyToSameDevice(self):
    host_dataset = dataset_ops.Dataset.range(10)
    device_dataset = host_dataset.apply(
        prefetching_ops.copy_to_device("/cpu:0"))

    with ops.device("/cpu:0"):
      iterator = dataset_ops.make_one_shot_iterator(device_dataset)
      next_element = iterator.get_next()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    self.assertEqual(dtypes.int64, next_element.dtype)
    self.assertEqual([], next_element.shape)

    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
    with self.test_session(config=worker_config):
      for i in range(10):
        self.assertEqual(i, self.evaluate(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
示例#9
0
  def testOneShotIteratorInitializerFails(self):
    # Define a dataset whose initialization will always fail.
    dataset = dataset_ops.Dataset.from_tensors(
        array_ops.check_numerics(
            constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    next_element = iterator.get_next()

    with self.cached_session() as sess:
      with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
        sess.run(next_element)

      # Test that subsequent attempts to use the iterator also fail.
      with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
        sess.run(next_element)

    with self.cached_session() as sess:

      def consumer_thread():
        with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
          sess.run(next_element)

      num_threads = 8
      threads = [
          self.checkedThread(consumer_thread) for _ in range(num_threads)
      ]
      for t in threads:
        t.start()
      for t in threads:
        t.join()
示例#10
0
  def testMapAndBatchOutOfRangeError(self, threshold, numa_aware):

    def raising_py_fn(i):
      if i == threshold:
        raise StopIteration()
      elif i > threshold:
        raise RuntimeError("Alternate error; you shouldn't see me! (i: %s)" % i)
      else:
        return i

    dataset = dataset_ops.Dataset.range(100).apply(
        batching.map_and_batch(
            lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
            batch_size=10))
    if numa_aware:
      options = dataset_ops.Options()
      options.experimental_numa_aware = True
      dataset = dataset.with_options(options)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      for i in range(threshold // 10):
        self.assertAllEqual([i * 10 + j for j in range(10)],
                            self.evaluate(get_next))
      if threshold % 10 != 0:
        self.assertAllEqual(
            [threshold // 10 * 10 + j for j in range(threshold % 10)],
            self.evaluate(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(get_next)
  def testPadToBoundaryNoExtraneousPadding(self):

    boundaries = [3, 7, 11]
    batch_sizes = [2, 2, 2, 2]
    lengths = range(1, 11)

    def element_gen():
      for length in lengths:
        yield ([1] * length,)

    element_len = lambda element: array_ops.shape(element)[0]
    dataset = dataset_ops.Dataset.from_generator(
        element_gen, (dtypes.int64,), ([None],)).apply(
            grouping.bucket_by_sequence_length(
                element_len, boundaries, batch_sizes,
                pad_to_bucket_boundary=True))
    batch, = dataset_ops.make_one_shot_iterator(dataset).get_next()

    with self.cached_session() as sess:
      batches = []
      for _ in range(5):
        batches.append(self.evaluate(batch))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(batch)

    self.assertAllEqual(batches[0], [[1, 0],
                                     [1, 1]])
    self.assertAllEqual(batches[1], [[1, 1, 1, 0, 0, 0],
                                     [1, 1, 1, 1, 0, 0]])
    self.assertAllEqual(batches[2], [[1, 1, 1, 1, 1, 0],
                                     [1, 1, 1, 1, 1, 1]])
    self.assertAllEqual(batches[3], [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
                                     [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])
    self.assertAllEqual(batches[4], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
                                     [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
  def testPrefetchToSameDevice(self):
    host_dataset = dataset_ops.Dataset.range(10)
    device_dataset = host_dataset.apply(
        prefetching_ops.prefetch_to_device(
            "/job:localhost/replica:0/task:0/device:CPU:0"))

    with ops.device("/cpu:1"):
      iterator = dataset_ops.make_one_shot_iterator(device_dataset)
      next_element = iterator.get_next()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    self.assertEqual(dtypes.int64, next_element.dtype)
    self.assertEqual([], next_element.shape)

    with self.cached_session():
      for i in range(10):
        self.assertEqual(i, self.evaluate(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
示例#13
0
  def testFromGenerator(self):
    test_cases = [{
        'tensor': 0,
        'shape': tensor_shape.TensorShape([])
    }, {
        'tensor': np.array([1, 2, 3]),
        'shape': tensor_shape.TensorShape([3])
    }, {
        'tensor': np.array([[1, 2, 3]]),
        'shape': tensor_shape.TensorShape([1, 3])
    }]

    for test_case in test_cases:

      def make_generator(tensor):

        def generator():
          yield tensor

        return generator

      with ops.Graph().as_default() as g:
        dataset = dataset_ops.Dataset.from_generator(
            make_generator(test_case['tensor']),
            dtypes.int64,
            output_shapes=test_case['shape'])
        iterator = dataset_ops.make_one_shot_iterator(dataset)
        get_next = iterator.get_next()
        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
        train_op.append(get_next)
        mg = meta_graph.create_meta_graph_def(graph=g)
        grappler_item = item.Item(mg)
        op_properties = grappler_item.GetOpProperties()
        self.assertEqual(test_case['shape'],
                         op_properties['IteratorGetNext'][0].shape)
示例#14
0
  def testMap(self):
    test_cases = [{
        'tensor': 0,
        'shape': tensor_shape.TensorShape([])
    }, {
        'tensor': np.array([1, 2, 3]),
        'shape': tensor_shape.TensorShape([3])
    }, {
        'tensor': np.array([[1, 2, 3]]),
        'shape': tensor_shape.TensorShape([3, 1])
    }, {
        'tensor': np.array([[[1, 2, 3], [4, 5, 6]]]),
        'shape': tensor_shape.TensorShape([3, 2, 1])
    }]

    for test_case in test_cases:
      with ops.Graph().as_default() as g:
        dataset = dataset_ops.Dataset.from_tensors(test_case['tensor'])
        dataset = dataset.map(array_ops.transpose)
        iterator = dataset_ops.make_one_shot_iterator(dataset)
        get_next = iterator.get_next()
        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
        train_op.append(get_next)
        mg = meta_graph.create_meta_graph_def(graph=g)
        grappler_item = item.Item(mg)
        op_properties = grappler_item.GetOpProperties()
        self.assertEqual(test_case['shape'],
                         op_properties['IteratorGetNext'][0].shape)
    def _test_bucket_by_padding(no_padding):
      dataset = build_dataset(sparse=no_padding)
      dataset = dataset.apply(
          grouping.bucket_by_sequence_length(
              _element_length_fn,
              boundaries,
              batch_sizes,
              no_padding=no_padding))
      batch, = dataset_ops.make_one_shot_iterator(dataset).get_next()

      with self.cached_session() as sess:
        batches = []
        for _ in range(4):
          batches.append(self.evaluate(batch))
        with self.assertRaises(errors.OutOfRangeError):
          self.evaluate(batch)
      batch_sizes_val = []
      lengths_val = []
      for batch in batches:
        shape = batch.dense_shape if no_padding else batch.shape
        batch_size = shape[0]
        length = shape[1]
        batch_sizes_val.append(batch_size)
        lengths_val.append(length)
        sum_check = batch.values.sum() if no_padding else batch.sum()
        self.assertEqual(sum_check, batch_size * length - 1)
      self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
      self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
      self.assertEqual(sorted(lengths), sorted(lengths_val))
示例#16
0
  def testOneShotIteratorCaptureByValue(self):
    components = (np.arange(7),
                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
                  np.array(37.0) * np.arange(7))
    tensor_components = tuple([ops.convert_to_tensor(c) for c in components])

    def _map_fn(x, y, z):
      return math_ops.square(x), math_ops.square(y), math_ops.square(z)

    iterator = dataset_ops.make_one_shot_iterator(
        dataset_ops.Dataset.from_tensor_slices(tensor_components)
        .map(_map_fn).repeat(14))
    get_next = iterator.get_next()

    self.assertEqual([c.shape[1:] for c in components],
                     [t.shape for t in get_next])

    with self.cached_session() as sess:
      for _ in range(14):
        for i in range(7):
          result = sess.run(get_next)
          for component, result_component in zip(components, result):
            self.assertAllEqual(component[i]**2, result_component)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
示例#17
0
  def testMapAndBatchImplicitDispose(self, numa_aware):
    # Tests whether a map and batch dataset will be cleaned up correctly when
    # the pipeline does not run it until exhaustion.
    # The pipeline is TensorSliceDataset -> RepeatDataset(1000) ->
    # MapAndBatchDataset(f=square_3, batch_size=100).
    components = (np.arange(1000),
                  np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis],
                  np.array(37.0) * np.arange(1000))

    def _map_fn(x, y, z):
      return math_ops.square(x), math_ops.square(y), math_ops.square(z)

    dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
        1000).apply(batching.map_and_batch(_map_fn, batch_size=100))
    dataset = dataset.prefetch(5)
    if numa_aware:
      options = dataset_ops.Options()
      options.experimental_numa_aware = True
      dataset = dataset.with_options(options)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      for _ in range(3):
        self.evaluate(get_next)
示例#18
0
  def testMapAndBatchPartialBatch(self, drop_remainder, numa_aware):
    dataset = (
        dataset_ops.Dataset.range(10).apply(
            batching.map_and_batch(
                lambda x: array_ops.reshape(x * x, [1]),
                batch_size=4,
                drop_remainder=drop_remainder)))

    if numa_aware:
      options = dataset_ops.Options()
      options.experimental_numa_aware = True
      dataset = dataset.with_options(options)
    iterator = dataset_ops.make_one_shot_iterator(dataset)

    if drop_remainder:
      self.assertEqual([4, 1], iterator.output_shapes.as_list())
    else:
      self.assertEqual([None, 1], iterator.output_shapes.as_list())
    next_element = iterator.get_next()
    with self.cached_session() as sess:
      self.assertAllEqual([[0], [1], [4], [9]], self.evaluate(next_element))
      self.assertAllEqual([[16], [25], [36], [49]], self.evaluate(next_element))
      if not drop_remainder:
        self.assertAllEqual([[64], [81]], self.evaluate(next_element))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
示例#19
0
  def testCopySparseTensorsToDeviceWithPrefetch(self):

    def make_tensor(i):
      return sparse_tensor.SparseTensorValue(
          indices=[[0, 0]], values=(i * [1]), dense_shape=[2, 2])

    host_dataset = dataset_ops.Dataset.range(10).map(make_tensor)

    device_dataset = host_dataset.apply(
        prefetching_ops.copy_to_device("/cpu:1")).prefetch(1)

    with ops.device("/cpu:1"):
      iterator = dataset_ops.make_one_shot_iterator(device_dataset)
      next_element = iterator.get_next()

    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
    self.assertEqual(host_dataset.output_types, iterator.output_types)
    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
    self.assertEqual(host_dataset.output_classes, iterator.output_classes)

    self.assertEqual(dtypes.int64, next_element.dtype)

    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
    with self.test_session(config=worker_config):
      for i in range(10):
        actual = self.evaluate(next_element)
        self.assertAllEqual([i], actual.values)
        self.assertAllEqual([[0, 0]], actual.indices)
        self.assertAllEqual([2, 2], actual.dense_shape)
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
示例#20
0
  def testSkipEagerMultipleIterators(self, reshuffle, initializable):
    with ops.Graph().as_default() as g:
      dataset = dataset_ops.Dataset.range(100).shuffle(
          10, reshuffle_each_iteration=reshuffle).repeat(3)

      if initializable:
        iterators = [dataset_ops.make_initializable_iterator(dataset)
                     for _ in range(2)]
      else:
        iterators = [dataset_ops.make_one_shot_iterator(dataset)
                     for _ in range(2)]

      results = []
      with self.session(graph=g) as sess:
        for iterator in iterators:
          if initializable:
            sess.run(iterator.initializer)
          next_element = iterator.get_next()
          run_results = []
          for _ in range(300):
            run_results.append(sess.run(next_element))
          with self.assertRaises(errors.OutOfRangeError):
            sess.run(next_element)

          results.append(run_results)

        self.assertNotEqual(results[0], results[1])
示例#21
0
  def getNext(self, dataset, requires_initialization=False):
    """Returns a callable that returns the next element of the dataset.

    Example use:
    ```python
    # In both graph and eager modes
    dataset = ...
    get_next = self.getNext(dataset)
    result = self.evaluate(get_next())
    ```

    Args:
      dataset: A dataset whose elements will be returned.
      requires_initialization: Indicates that when the test is executed in graph
        mode, it should use an initializable iterator to iterate through the
        dataset (e.g. when it contains stateful nodes). Defaults to False.
    Returns:
      A callable that returns the next element of `dataset`.
    """
    if context.executing_eagerly():
      iterator = dataset.__iter__()
      return iterator._next_internal  # pylint: disable=protected-access
    else:
      if requires_initialization:
        iterator = dataset_ops.make_initializable_iterator(dataset)
        self.evaluate(iterator.initializer)
      else:
        iterator = dataset_ops.make_one_shot_iterator(dataset)
      get_next = iterator.get_next()
      return lambda: get_next
  def testMapAndBatchParallelGetNextDropRemainder(self, numa_aware):
    dataset = dataset_ops.Dataset.range(49999).apply(
        batching.map_and_batch(
            lambda x: x, batch_size=100, drop_remainder=True))

    if numa_aware:
      options = dataset_ops.Options()
      options.experimental_numa_aware = True
      dataset = dataset.with_options(options)

    if context.executing_eagerly():
      iterator = iter(dataset)
      get_next = iterator._next_internal  # pylint: disable=protected-access
    else:
      iterator = dataset_ops.make_one_shot_iterator(dataset)
      get_next = iterator.get_next

    elements = []
    for _ in range(100):
      elements.append(get_next)

    for i in range(4):
      got = self.evaluate([element() for element in elements])
      got.sort(key=lambda x: x[0])
      expected = []
      for j in range(100):
        expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
      self.assertAllEqual(got, expected)
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate([element() for element in elements])
  def _read_test(self, batch_size, num_epochs, file_index=None,
                 num_parallel_reads=1, drop_final_batch=False, parser_fn=False):
    if file_index is None:
      file_pattern = self.test_filenames
    else:
      file_pattern = self.test_filenames[file_index]

    if parser_fn:
      fn = lambda x: string_ops.substr(x, 1, 999)
    else:
      fn = None

    with ops.Graph().as_default() as g:
      with self.session(graph=g) as sess:
        outputs = dataset_ops.make_one_shot_iterator(
            readers.make_tf_record_dataset(
                file_pattern=file_pattern,
                num_epochs=num_epochs,
                batch_size=batch_size,
                parser_fn=fn,
                num_parallel_reads=num_parallel_reads,
                drop_final_batch=drop_final_batch,
                shuffle=False)).get_next()
        self._verify_records(
            sess, outputs, batch_size, file_index, num_epochs=num_epochs,
            interleave_cycle_length=num_parallel_reads,
            drop_final_batch=drop_final_batch, use_parser_fn=parser_fn)
        with self.assertRaises(errors.OutOfRangeError):
          self.evaluate(outputs)
示例#24
0
  def benchmarkMap(self):
    k = 1024 * 1024
    dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
                                                np.random.rand(4 * k,
                                                               1))).repeat()
    dataset = dataset.map(
        math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    get_next = iterator.get_next()

    deltas = []
    with session.Session() as sess:
      for _ in range(5):
        sess.run(get_next.op)
      for _ in range(1000):
        start = time.time()
        sess.run(get_next.op)
        end = time.time()
        deltas.append(end - start)

    print("%f (median), %f (mean), %f (stddev), %f (min), %f (max)\n" %
          (np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas),
           np.max(deltas)))
    self.report_benchmark(
        iters=1000, wall_time=np.median(deltas), name="map_autotune")
示例#25
0
  def _benchmarkMapAndBatch(self, numa_aware):
    batch_size = 16
    k = 1024 * 1024
    dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
                                                np.random.rand(4 * k,
                                                               1))).repeat()
    dataset = dataset.apply(
        batching.map_and_batch(
            math_ops.matmul,
            num_parallel_calls=optimization.AUTOTUNE,
            batch_size=batch_size))
    options = dataset_ops.Options()
    options.experimental_numa_aware = numa_aware
    dataset = dataset.with_options(options)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    get_next = iterator.get_next()

    deltas = []
    with session.Session() as sess:
      for _ in range(5):
        sess.run(get_next.op)
      for _ in range(100):
        start = time.time()
        sess.run(get_next.op)
        end = time.time()
        deltas.append(end - start)

    print("%f (median), %f (mean), %f (stddev), %f (min), %f (max)\n" %
          (np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas),
           np.max(deltas)))

    self.report_benchmark(
        iters=100,
        wall_time=np.median(deltas),
        name=("numa_" if numa_aware else "") + "map_and_batch_autotune")
示例#26
0
  def testMapAndBatchControlFlow(self, numa_aware):

    def map_fn(x):
      previous_cond_v2_value = control_flow_ops.ENABLE_COND_V2
      control_flow_ops.ENABLE_COND_V2 = True
      return_value = control_flow_ops.cond(x < 50, lambda: x + 1, lambda: x * x)
      control_flow_ops.ENABLE_COND_V2 = previous_cond_v2_value
      return return_value

    dataset = dataset_ops.Dataset.range(100).apply(
        batching.map_and_batch(map_fn, batch_size=10))
    if numa_aware:
      options = dataset_ops.Options()
      options.experimental_numa_aware = True
      dataset = dataset.with_options(options)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    get_next = iterator.get_next()
    with self.cached_session() as sess:
      for i in range(10):
        print("Case %d" % i)
        if i < 5:
          self.assertAllEqual([i * 10 + j + 1 for j in range(10)],
                              self.evaluate(get_next))
        else:
          self.assertAllEqual(
              [((i * 10) + j) * ((i * 10) + j) for j in range(10)],
              self.evaluate(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(get_next)
  def _benchmarkFilters(self, chain_length, optimize_dataset):
    with ops.Graph().as_default():
      dataset = dataset_ops.Dataset.from_tensors(5).repeat(None)
      for _ in range(chain_length):
        dataset = dataset.filter(lambda x: math_ops.greater_equal(x - 5, 0))
      if optimize_dataset:
        dataset = dataset.apply(optimization.optimize(["filter_fusion"]))

      iterator = dataset_ops.make_one_shot_iterator(dataset)
      next_element = iterator.get_next()

      with session.Session() as sess:
        for _ in range(10):
          self.evaluate(next_element.op)
        deltas = []
        for _ in range(100):
          start = time.time()
          for _ in range(100):
            self.evaluate(next_element.op)
          end = time.time()
          deltas.append(end - start)

        median_wall_time = np.median(deltas) / 100
        opt_mark = "opt" if optimize_dataset else "no-opt"
        print("Filter dataset {} chain length: {} Median wall time: {}".format(
            opt_mark, chain_length, median_wall_time))
        self.report_benchmark(
            iters=1000,
            wall_time=median_wall_time,
            name="benchmark_filter_dataset_chain_latency_{}_{}".format(
                opt_mark, chain_length))
示例#28
0
  def test_sequential_deferred_build_with_dataset_iterators(self):
    num_hidden = 5
    input_dim = 3
    num_classes = 2
    num_samples = 50
    steps_per_epoch = 10

    model = testing_utils.get_small_sequential_mlp(num_hidden, num_classes)
    model.compile(
        loss='mse',
        optimizer='rmsprop',
        metrics=[keras.metrics.CategoricalAccuracy()],
        run_eagerly=testing_utils.should_run_eagerly())
    self.assertEqual(len(model.layers), 2)
    self.assertEqual(len(model.weights), 0)
    self.assertFalse(model.built)

    x = array_ops.ones((num_samples, input_dim))
    y = array_ops.zeros((num_samples, num_classes))
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
    dataset = dataset.repeat(100)
    dataset = dataset.batch(10)
    iterator = dataset_ops.make_one_shot_iterator(dataset)

    model.fit(iterator, epochs=1, steps_per_epoch=steps_per_epoch)
    self.assertTrue(model.built)
    self.assertEqual(len(model.weights), 2 * 2)
    self.assertFalse(model._is_graph_network)
示例#29
0
  def testIteratorStringHandleReuseTensorObject(self):
    dataset = dataset_ops.Dataset.from_tensor_slices([1, 2, 3])
    one_shot_iterator = dataset_ops.make_one_shot_iterator(dataset)
    initializable_iterator = dataset_ops.make_initializable_iterator(dataset)
    structure_iterator = iterator_ops.Iterator.from_structure(
        dataset.output_types)

    created_ops = len(ops.get_default_graph().get_operations())

    self.assertIs(one_shot_iterator.string_handle(),
                  one_shot_iterator.string_handle())
    self.assertIs(initializable_iterator.string_handle(),
                  initializable_iterator.string_handle())
    self.assertIs(structure_iterator.string_handle(),
                  structure_iterator.string_handle())

    # Assert that getting the (default) string handle creates no ops.
    self.assertEqual(created_ops, len(ops.get_default_graph().get_operations()))

    # Specifying an explicit name will create a new op.
    handle_with_name = one_shot_iterator.string_handle(name="foo")
    self.assertEqual("foo", handle_with_name.op.name)
    self.assertIsNot(one_shot_iterator.string_handle(), handle_with_name)

    handle_with_same_name = one_shot_iterator.string_handle(name="foo")
    self.assertEqual("foo_1", handle_with_same_name.op.name)
    self.assertIsNot(handle_with_name, handle_with_same_name)
示例#30
0
  def _benchmarkMapAndFilterFusion(self, chain_length, optimize_dataset):
    with ops.Graph().as_default():
      dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
      for _ in range(chain_length):
        dataset = dataset.map(lambda x: x + 5).filter(
            lambda x: math_ops.greater_equal(x - 5, 0))
      if optimize_dataset:
        options = dataset_ops.Options()
        options.experimental_map_and_filter_fusion = True
        dataset = dataset.with_options(options)
      iterator = dataset_ops.make_one_shot_iterator(dataset)
      next_element = iterator.get_next()

      with session.Session() as sess:
        for _ in range(10):
          sess.run(next_element.op)
        deltas = []
        for _ in range(100):
          start = time.time()
          for _ in range(100):
            sess.run(next_element.op)
          end = time.time()
          deltas.append(end - start)

        median_wall_time = np.median(deltas) / 100
        opt_mark = "opt" if optimize_dataset else "noopt"
        print("Map and filter dataset {} chain length: {} Median wall time: {}"
              .format(opt_mark, chain_length, median_wall_time))
        self.report_benchmark(
            iters=100,
            wall_time=median_wall_time,
            name="map_and_filter_fusion_{}_chain_length_{}".format(
                opt_mark, chain_length))
示例#31
0
    def testCopyToDeviceInt32(self):
        host_dataset = dataset_ops.Dataset.from_tensors([0, 1, 2, 3])
        device_dataset = host_dataset.apply(
            prefetching_ops.copy_to_device("/cpu:1"))

        with ops.device("/cpu:1"):
            iterator = dataset_ops.make_one_shot_iterator(device_dataset)
            next_element = iterator.get_next()

        self.assertTrue(
            dataset_ops.get_structure(host_dataset).is_compatible_with(
                dataset_ops.get_structure(device_dataset)))
        self.assertTrue(
            dataset_ops.get_structure(host_dataset).is_compatible_with(
                dataset_ops.get_structure(iterator)))

        self.assertEqual(dtypes.int32, next_element.dtype)
        self.assertEqual((4, ), next_element.shape)

        worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
        with self.test_session(config=worker_config):
            self.assertAllEqual([0, 1, 2, 3], self.evaluate(next_element))
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
示例#32
0
    def testOneShotIterator(self):
        components = (np.arange(7),
                      np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
                      np.array(37.0) * np.arange(7))

        def _map_fn(x, y, z):
            return math_ops.square(x), math_ops.square(y), math_ops.square(z)

        iterator = dataset_ops.make_one_shot_iterator(
            dataset_ops.Dataset.from_tensor_slices(components).map(
                _map_fn).repeat(14))
        get_next = iterator.get_next()

        self.assertEqual([c.shape[1:] for c in components],
                         [t.shape for t in get_next])

        with self.cached_session() as sess:
            for _ in range(14):
                for i in range(7):
                    result = sess.run(get_next)
                    for component, result_component in zip(components, result):
                        self.assertAllEqual(component[i]**2, result_component)
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(get_next)
示例#33
0
    def testFromTensors(self):
        test_cases = [{
            'tensor': 0,
            'shape': tensor_shape.TensorShape([])
        }, {
            'tensor': np.array([1, 2, 3]),
            'shape': tensor_shape.TensorShape([3])
        }, {
            'tensor': np.array([[1, 2, 3]]),
            'shape': tensor_shape.TensorShape([1, 3])
        }]

        for test_case in test_cases:
            with ops.Graph().as_default() as g:
                dataset = dataset_ops.Dataset.from_tensors(test_case['tensor'])
                iterator = dataset_ops.make_one_shot_iterator(dataset)
                get_next = iterator.get_next()
                train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
                train_op.append(get_next)
                mg = meta_graph.create_meta_graph_def(graph=g)
                grappler_item = item.Item(mg)
                op_properties = grappler_item.GetOpProperties()
                self.assertEqual(test_case['shape'],
                                 op_properties['IteratorGetNext'][0].shape)
示例#34
0
    def testSeedZero(self):
        """Test for same behavior when the seed is a Python or Tensor zero."""
        iterator = dataset_ops.make_one_shot_iterator(
            dataset_ops.Dataset.range(10).shuffle(10, seed=0))
        get_next = iterator.get_next()

        elems = []
        with self.cached_session() as sess:
            for _ in range(10):
                elems.append(sess.run(get_next))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(get_next)

        seed_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
        iterator = dataset_ops.make_initializable_iterator(
            dataset_ops.Dataset.range(10).shuffle(10, seed=seed_placeholder))
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            sess.run(iterator.initializer, feed_dict={seed_placeholder: 0})
            for elem in elems:
                self.assertEqual(elem, sess.run(get_next))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(get_next)
    def testPrefetchSparseTensorsToDevice(self):
        def make_tensor(i):
            return sparse_tensor.SparseTensorValue(indices=[[0, 0]],
                                                   values=(i * [1]),
                                                   dense_shape=[2, 2])

        host_dataset = dataset_ops.Dataset.range(10).map(make_tensor)

        device_dataset = host_dataset.apply(
            prefetching_ops.prefetch_to_device("/cpu:1"))

        with ops.device("/cpu:1"):
            iterator = dataset_ops.make_one_shot_iterator(device_dataset)
            next_element = iterator.get_next()

        self.assertEqual(host_dataset.output_types,
                         device_dataset.output_types)
        self.assertEqual(host_dataset.output_types, iterator.output_types)
        self.assertEqual(host_dataset.output_shapes,
                         device_dataset.output_shapes)
        self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
        self.assertEqual(host_dataset.output_classes,
                         device_dataset.output_classes)
        self.assertEqual(host_dataset.output_classes, iterator.output_classes)

        self.assertEqual(dtypes.int64, next_element.dtype)

        worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
        with self.test_session(config=worker_config):
            for i in range(10):
                actual = self.evaluate(next_element)
                self.assertAllEqual([i], actual.values)
                self.assertAllEqual([[0, 0]], actual.indices)
                self.assertAllEqual([2, 2], actual.dense_shape)
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
    def testNestedTensorIteratorStructure(self, expected_element_structure,
                                          expected_output_classes,
                                          expected_output_types,
                                          expected_output_shapes):
        def tf_value_fn():
            return {
                "a": constant_op.constant(37.0),
                "b":
                (constant_op.constant(["Foo"]), constant_op.constant("Bar"))
            }

        tf_value = tf_value_fn()
        iterator = dataset_ops.make_one_shot_iterator(
            dataset_ops.Dataset.from_tensors(tf_value))

        self.assertTrue(
            structure.are_compatible(dataset_ops.get_structure(iterator),
                                     expected_element_structure))
        self.assertEqual(expected_output_classes,
                         dataset_ops.get_legacy_output_classes(iterator))
        self.assertEqual(expected_output_types,
                         dataset_ops.get_legacy_output_types(iterator))
        self.assertEqual(expected_output_shapes,
                         dataset_ops.get_legacy_output_shapes(iterator))
示例#37
0
    def test_sequence_example_into_input_layer(self):
        examples = [_make_sequence_example().SerializeToString()] * 100
        ctx_cols, seq_cols = self._build_feature_columns()

        def _parse_example(example):
            ctx, seq = parsing_ops.parse_single_sequence_example(
                example,
                context_features=fc.make_parse_example_spec_v2(ctx_cols),
                sequence_features=fc.make_parse_example_spec_v2(seq_cols))
            ctx.update(seq)
            return ctx

        ds = dataset_ops.Dataset.from_tensor_slices(examples)
        ds = ds.map(_parse_example)
        ds = ds.batch(20)

        # Test on a single batch
        features = dataset_ops.make_one_shot_iterator(ds).get_next()

        # Tile the context features across the sequence features
        sequence_input_layer = ksfc.SequenceFeatures(seq_cols)
        seq_layer, _ = sequence_input_layer(features)
        input_layer = dense_features.DenseFeatures(ctx_cols)
        ctx_layer = input_layer(features)
        input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)

        rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
        output = rnn_layer(input_layer)

        with self.cached_session() as sess:
            sess.run(variables.global_variables_initializer())
            features_r = sess.run(features)
            self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])

            output_r = sess.run(output)
            self.assertAllEqual(output_r.shape, [20, 10])
    def testPrefetchToSameDevice(self):
        host_dataset = dataset_ops.Dataset.range(10)
        device_dataset = host_dataset.apply(
            prefetching_ops.prefetch_to_device(
                "/job:localhost/replica:0/task:0/device:CPU:0"))

        with ops.device("/cpu:1"):
            iterator = dataset_ops.make_one_shot_iterator(device_dataset)
            next_element = iterator.get_next()

        self.assertTrue(
            structure.are_compatible(
                dataset_ops.get_structure(host_dataset),
                dataset_ops.get_structure(device_dataset)))

        self.assertEqual(dtypes.int64, next_element.dtype)
        self.assertEqual([], next_element.shape)

        worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
        with self.test_session(config=worker_config):
            for i in range(10):
                self.assertEqual(i, self.evaluate(next_element))
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
示例#39
0
  def testMapAndBatchParallelGetNext(self):
    dataset = dataset_ops.Dataset.range(50000).apply(
        batching.map_and_batch(lambda x: x, batch_size=100))

    if context.executing_eagerly():
      iterator = iter(dataset)
      get_next = iterator._next_internal  # pylint: disable=protected-access
    else:
      iterator = dataset_ops.make_one_shot_iterator(dataset)
      get_next = iterator.get_next

    elements = []
    for _ in range(100):
      elements.append(get_next)

    for i in range(5):
      got = self.evaluate([element() for element in elements])
      got.sort(key=lambda x: x[0])
      expected = []
      for j in range(100):
        expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
      self.assertAllEqual(got, expected)
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate([element() for element in elements])
示例#40
0
def convert_to_generator_like(data,
                              batch_size=None,
                              steps_per_epoch=None,
                              epochs=1,
                              shuffle=False):
    """Make a generator out of NumPy or EagerTensor inputs.

  Arguments:
    data: Either a generator or `keras.utils.data_utils.Sequence` object or
      `Dataset`, `Iterator`, or a {1,2,3}-tuple of NumPy arrays or EagerTensors.
      If a tuple, the elements represent `(x, y, sample_weights)` and may be
      `None` or `[None]`.
    batch_size: Used when creating a generator out of tuples of NumPy arrays or
      EagerTensors.
    steps_per_epoch: Steps of the generator to run each epoch. If `None` the
      number of steps will be read from the data (for
      `keras.utils.data_utils.Sequence` types).
    epochs: Total number of epochs to run.
    shuffle: Whether the data should be shuffled.

  Returns:
    - Generator, `keras.utils.data_utils.Sequence`, or `Iterator`.

  Raises:
    - ValueError: If `batch_size` is not provided for NumPy or EagerTensor
      inputs.
  """
    if isinstance(data, tuple):
        # Scrub `Nones` that might have been passed for `targets`, `sample_weights`.
        data = tuple(ele for ele in data
                     if not all(e is None for e in nest.flatten(ele)))

    if data_utils.is_generator_or_sequence(data) or isinstance(
            data, iterator_ops.OwnedIterator):
        if isinstance(data, data_utils.Sequence):
            if steps_per_epoch is None:
                steps_per_epoch = len(data)
        return data, steps_per_epoch
    if isinstance(data, dataset_ops.DatasetV2):
        return dataset_ops.make_one_shot_iterator(data), steps_per_epoch

    # Create generator from NumPy or EagerTensor Input.
    num_samples = int(nest.flatten(data)[0].shape[0])
    if batch_size is None:
        raise ValueError(
            'When passing input data as arrays, do not specify '
            '`steps_per_epoch`/`steps` argument. Please use `batch_size` instead.'
        )
    steps_per_epoch = int(math.ceil(num_samples / batch_size))

    def _gen(data):
        """Makes a generator out of a structure of NumPy/EagerTensors."""
        index_array = np.arange(num_samples)
        for _ in range(epochs):
            if shuffle:
                np.random.shuffle(index_array)
            batches = generic_utils.make_batches(num_samples, batch_size)
            for (batch_start, batch_end) in batches:
                batch_ids = index_array[batch_start:batch_end]
                flat_batch_data = training_utils.slice_arrays(
                    nest.flatten(data), batch_ids, contiguous=(not shuffle))
                yield nest.pack_sequence_as(data, flat_batch_data)

    return _gen(data), steps_per_epoch
示例#41
0
def model_iteration(model,
                    data,
                    steps_per_epoch=None,
                    epochs=1,
                    verbose=1,
                    callbacks=None,
                    validation_data=None,
                    validation_steps=None,
                    validation_freq=1,
                    class_weight=None,
                    max_queue_size=10,
                    workers=1,
                    use_multiprocessing=False,
                    shuffle=False,
                    initial_epoch=0,
                    mode=ModeKeys.TRAIN,
                    batch_size=None,
                    steps_name='steps',
                    **kwargs):
    """Loop function for arrays of data with modes TRAIN/TEST/PREDICT.

  Arguments:
      model: Keras Model instance.
      data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x, y)` or
        `(x, y, sample_weights)`) or a generator or
        `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset.
      steps_per_epoch: Total number of steps (batches of samples) before
        declaring one epoch finished and starting the next epoch. Ignored with
        the default value of `None`.
      epochs: Number of times to iterate over the data.
      verbose: 0, 1, or 2. Verbosity mode.
        0 = silent, 1 = progress bar, 2 = one line per epoch.
        Note that the progress bar is not particularly useful when
        logged to a file, so verbose=2 is recommended when not running
        interactively (eg, in a production environment).
      callbacks: List of callbacks to be called during training.
      validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or
        `(x, y)` or `(x, y, sample_weights)`) or a generator or
        `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset.
      validation_steps: Total number of steps (batches of samples) before
        declaring validation finished.
      validation_freq: Only relevant if validation data is provided. Integer or
        `collections.abc.Container` instance (e.g. list, tuple, etc.). If an
        integer, specifies how many training epochs to run before a new
        validation run is performed, e.g. `validation_freq=2` runs
        validation every 2 epochs. If a Container, specifies the epochs on
        which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
        validation at the end of the 1st, 2nd, and 10th epochs.
      class_weight: Dictionary mapping class indices to a weight for the class.
      max_queue_size: Integer. Maximum size for the generator queue. If
        unspecified, `max_queue_size` will default to 10.
      workers: Integer. Maximum number of processes to spin up when using
        process-based threading. If unspecified, `workers` will default to 1. If
        0, will execute the generator on the main thread.
      use_multiprocessing: Boolean. If `True`, use process-based threading. If
        unspecified, `use_multiprocessing` will default to `False`. Note that
        because this implementation relies on multiprocessing, you should not
        pass non-picklable arguments to the generator as they can't be passed
        easily to children processes.
      shuffle: Boolean. Whether to shuffle the order of the batches at the
        beginning of each epoch. Only used with instances of `Sequence`
        (`keras.utils.Sequence`). Has no effect when `steps_per_epoch` is not
        `None`.
      initial_epoch: Epoch at which to start training (useful for resuming a
        previous training run).
      mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
      batch_size: Integer batch size or None if unknown. Will only be used if
        `data` is in NumPy/Tensor format.
      steps_name: The string name of the steps argument, either `steps`,
        `validation_steps`, or `steps_per_epoch`. Only used for error message
        formatting.
      **kwargs: Additional arguments for backwards compatibility. `steps` is
        accepted as an alias for `steps_per_epoch`.

  Returns:
      - In TRAIN mode: `History` object.
      - In TEST mode: Evaluation metrics.
      - In PREDICT mode: Outputs of the Model called on inputs.

  Raises:
      ValueError: in case of invalid arguments.
  """
    if 'steps' in kwargs:
        steps_per_epoch = kwargs['steps']

    # Determine the number of steps per epoch and whether we should reset the
    # dataset at the end of each epoch.
    reset_dataset_after_each_epoch = False
    original_dataset = None
    is_dataset = isinstance(data,
                            (dataset_ops.DatasetV2, dataset_ops.DatasetV1))
    if is_dataset:
        original_dataset = data
        if steps_per_epoch is None:
            reset_dataset_after_each_epoch = True
            steps_per_epoch = training_utils.infer_steps_for_dataset(
                model,
                data,
                steps_per_epoch,
                epochs=epochs,
                steps_name=steps_name)

    # Convert to a format that supports `next(generator)`.
    generator, steps_per_epoch = convert_to_generator_like(
        data,
        steps_per_epoch=steps_per_epoch,
        batch_size=batch_size,
        epochs=epochs - initial_epoch,
        shuffle=shuffle)

    do_validation = validation_data is not None
    is_sequence = isinstance(generator, data_utils.Sequence)
    _validate_arguments(is_sequence, is_dataset, use_multiprocessing, workers,
                        steps_per_epoch, validation_data, validation_steps,
                        mode, kwargs)

    batch_function = _make_execution_function(model,
                                              mode,
                                              class_weight=class_weight)

    # Create the queue for the generator.
    enqueuer = None
    if not is_dataset:
        generator, enqueuer = _make_enqueued_generator(
            generator,
            workers=workers,
            use_multiprocessing=use_multiprocessing,
            max_queue_size=max_queue_size,
            shuffle=shuffle)

    num_samples_or_steps, use_steps = _get_num_samples_or_steps(
        data, steps_per_epoch)

    count_mode = 'steps' if use_steps else 'samples'
    callbacks = cbks.configure_callbacks(callbacks,
                                         model,
                                         do_validation=do_validation,
                                         epochs=epochs,
                                         steps_per_epoch=steps_per_epoch,
                                         batch_size=batch_size,
                                         samples=num_samples_or_steps,
                                         count_mode=count_mode,
                                         verbose=verbose,
                                         mode=mode)

    if mode == ModeKeys.PREDICT:
        aggregator = training_utils.OutputsAggregator(True,
                                                      steps=steps_per_epoch)
    else:
        aggregator = training_utils.MetricsAggregator(True,
                                                      steps=steps_per_epoch)

    should_set_learning_phase = context.executing_eagerly(
    ) and model.run_eagerly
    if should_set_learning_phase:
        learning_phase_scope = backend.eager_learning_phase_scope(
            1 if mode == ModeKeys.TRAIN else 0)
        learning_phase_scope.__enter__()

    callbacks.model.stop_training = False
    callbacks._call_begin_hook(mode)

    initial_epoch = model._maybe_load_initial_epoch_from_ckpt(
        initial_epoch, mode)

    for epoch in range(initial_epoch, epochs):
        if callbacks.model.stop_training:
            break

        # Setup work for each epoch.
        model.reset_metrics()
        epoch_logs = {}
        if mode == ModeKeys.TRAIN:
            callbacks.on_epoch_begin(epoch, epoch_logs)

        if steps_per_epoch is None:
            # Loop over dataset until `OutOfRangeError` is raised.
            target_steps = np.inf
        else:
            # Loop over dataset for the specified number of steps.
            target_steps = steps_per_epoch

        step = 0
        while step < target_steps:
            batch_data = _get_next_batch(generator)
            if batch_data is None:
                if is_dataset:
                    # The dataset passed by the user ran out of batches.
                    # Now we know the cardinality of the dataset.
                    # If steps_per_epoch was specified, then running out of data is
                    # unexpected, so we stop training and inform the user.
                    if steps_per_epoch:
                        callbacks.model.stop_training = True
                        logging.warning(
                            'Your dataset ran out of data; interrupting training. '
                            'Make sure that your dataset can generate at least '
                            '`%s * epochs` batches (in this case, %d batches). '
                            'You may need to use the repeat() function when '
                            'building your dataset.' %
                            (steps_name, steps_per_epoch * epochs))
                    elif step > 0:
                        steps_per_epoch = step
                        aggregator.steps = steps_per_epoch
                else:
                    # We ran out of batches while the user passed an iterator (legacy).
                    callbacks.model.stop_training = True
                    logging.warning(
                        'Your dataset iterator ran out of data; '
                        'interrupting training. Make sure that your iterator '
                        'can generate at least `%s * epochs` '
                        'batches (in this case, %d batches). You may need to'
                        'use the repeat() function when building your '
                        'dataset.' % (steps_name, steps_per_epoch * epochs))
                break

            # `batch_size` used for validation data if validation
            # data is NumPy/EagerTensors.
            batch_size = int(nest.flatten(batch_data)[0].shape[0])

            # Callbacks batch begin.
            batch_logs = {'batch': step, 'size': batch_size}
            callbacks._call_batch_hook(mode, 'begin', step, batch_logs)

            is_deferred = not model._is_compiled
            batch_outs = batch_function(*batch_data)
            if not isinstance(batch_outs, list):
                batch_outs = [batch_outs]

            if step == 0:
                aggregator.create(batch_outs)

                if is_deferred:
                    # Set callbacks params. We do this here when model is compiled only
                    # in the first iteration of this loop (deferred build scenario).
                    cbks.set_callback_parameters(
                        callbacks,
                        model,
                        do_validation=do_validation,
                        batch_size=batch_size,
                        epochs=epochs,
                        steps_per_epoch=steps_per_epoch,
                        samples=num_samples_or_steps,
                        verbose=verbose,
                        mode=mode)

            # Aggregate results.
            aggregator.aggregate(batch_outs)

            # Callbacks batch end.
            batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
            callbacks._call_batch_hook(mode, 'end', step, batch_logs)
            step += 1

            if callbacks.model.stop_training:
                break

        aggregator.finalize()
        results = aggregator.results
        epoch_logs = cbks.make_logs(model, epoch_logs, results, mode)
        if len(results) == 1:
            results = results[0]

        # Run the test loop every epoch during training.
        if (do_validation and training_utils.should_run_validation(
                validation_freq, epoch) and not callbacks.model.stop_training):
            val_results = model_iteration(
                model,
                validation_data,
                steps_per_epoch=validation_steps,
                batch_size=batch_size,
                class_weight=class_weight,
                workers=workers,
                use_multiprocessing=use_multiprocessing,
                max_queue_size=max_queue_size,
                callbacks=callbacks,
                verbose=verbose,
                mode=ModeKeys.TEST,
                steps_name='validation_steps')

            if not isinstance(val_results, list):
                val_results = [val_results]
            epoch_logs = cbks.make_logs(model,
                                        epoch_logs,
                                        val_results,
                                        mode,
                                        prefix='val_')

        if mode == ModeKeys.TRAIN:
            # Epochs only apply to `fit`.
            callbacks.on_epoch_end(epoch, epoch_logs)

        # Recreate dataset iterator for the next epoch.
        if reset_dataset_after_each_epoch and epoch < epochs - 1:
            generator = dataset_ops.make_one_shot_iterator(original_dataset)

    model._successful_loop_finish = True
    callbacks._call_end_hook(mode)

    if enqueuer is not None:
        enqueuer.stop()

    if should_set_learning_phase:
        learning_phase_scope.__exit__(None, None, None)

    if mode == ModeKeys.TRAIN:
        return model.history
    return results
示例#42
0
 def fn():
     dataset = dataset_ops.Dataset.range(5 * required_gpus)
     it = dataset_ops.make_one_shot_iterator(dataset)
     return it.get_next
示例#43
0
 def fn():
     dataset = dataset_ops.Dataset.range(20)
     it = dataset_ops.make_one_shot_iterator(dataset)
     return it.get_next
示例#44
0
        def benchmark(label, series):
            """Runs benchmark the given series."""
            def make_dataset(element_size, num_calls, batch_size):  # pylint: disable=missing-docstring
                k = 1024 * 1024
                x = constant_op.constant(np.random.rand(element_size, 4 * k))
                y = constant_op.constant(np.random.rand(4 * k, 1))
                dataset = dataset_ops.Dataset.range(1000000000000).map(
                    lambda _: (x, y))
                dataset = dataset.map(
                    math_ops.matmul,
                    num_parallel_calls=num_calls).batch(batch_size=batch_size)
                options = dataset_ops.Options()
                options.experimental_optimization.apply_default_optimizations = False
                return dataset.with_options(options)

            for num_calls, inter_op, element_size, batch_size in series:
                num_iters = 1024 // (
                    (element_size * batch_size) // min(num_calls, inter_op))
                # By default the chained map().batch() calls will not be fused.
                chained_dataset = make_dataset(element_size, num_calls,
                                               batch_size)
                chained_iterator = dataset_ops.make_one_shot_iterator(
                    chained_dataset)
                chained_get_next = chained_iterator.get_next()
                chained_deltas = []
                with session.Session(config=config_pb2.ConfigProto(
                        inter_op_parallelism_threads=inter_op,
                        use_per_session_threads=True)) as sess:
                    for _ in range(5):
                        sess.run(chained_get_next.op)
                    for _ in range(num_iters):
                        start = time.time()
                        sess.run(chained_get_next.op)
                        end = time.time()
                        chained_deltas.append(end - start)

                self.report_benchmark(iters=num_iters,
                                      wall_time=np.median(chained_deltas),
                                      name=name("chained", label, num_calls,
                                                inter_op, element_size,
                                                batch_size))

                # Apply an option to the default dataset that will fuse map().batch().
                options = dataset_ops.Options()
                options.experimental_optimization.map_and_batch_fusion = True
                fused_dataset = chained_dataset.with_options(options)
                fused_iterator = dataset_ops.make_one_shot_iterator(
                    fused_dataset)
                fused_get_next = fused_iterator.get_next()
                fused_deltas = []
                with session.Session(config=config_pb2.ConfigProto(
                        inter_op_parallelism_threads=inter_op,
                        use_per_session_threads=True)) as sess:

                    for _ in range(5):
                        sess.run(fused_get_next.op)
                    for _ in range(num_iters):
                        start = time.time()
                        sess.run(fused_get_next.op)
                        end = time.time()
                        fused_deltas.append(end - start)

                self.report_benchmark(iters=num_iters,
                                      wall_time=np.median(fused_deltas),
                                      name=name("fused", label, num_calls,
                                                inter_op, element_size,
                                                batch_size))

            print()
 def _get_dataset_iterator(self, dataset):
     """Gets an iterator from a tf.data.Dataset."""
     return dataset_ops.make_one_shot_iterator(dataset).get_next
 def _input_fn(params):
     dataset = self._make_input_fn(mode=_TRAIN, repeat=True)(params)
     return dataset_ops.make_one_shot_iterator(dataset).get_next()
    def benchmark(label, series):
      """Runs benchmark the given series."""

      print("%s:" % label)

      def make_base_dataset(element_size):
        k = 1024 * 1024
        x = constant_op.constant(np.random.rand(element_size, 4 * k))
        y = constant_op.constant(np.random.rand(4 * k, 1))
        return dataset_ops.Dataset.range(1000000000000).map(lambda _: (x, y))

      for num_calls, inter_op, element_size, batch_size in series:

        num_iters = 1024 // (
            (element_size * batch_size) // min(num_calls, inter_op))
        dataset = make_base_dataset(element_size)
        chained_dataset = dataset.map(
            math_ops.matmul,
            num_parallel_calls=num_calls).batch(batch_size=batch_size)
        chained_iterator = dataset_ops.make_one_shot_iterator(chained_dataset)
        chained_get_next = chained_iterator.get_next()

        chained_deltas = []
        with session.Session(
            config=config_pb2.ConfigProto(
                inter_op_parallelism_threads=inter_op,
                use_per_session_threads=True)) as sess:
          for _ in range(5):
            sess.run(chained_get_next.op)
          for _ in range(num_iters):
            start = time.time()
            sess.run(chained_get_next.op)
            end = time.time()
            chained_deltas.append(end - start)

        fused_dataset = dataset.apply(
            batching.map_and_batch(
                math_ops.matmul,
                num_parallel_calls=num_calls,
                batch_size=batch_size))
        fused_iterator = dataset_ops.make_one_shot_iterator(fused_dataset)
        fused_get_next = fused_iterator.get_next()

        fused_deltas = []
        with session.Session(
            config=config_pb2.ConfigProto(
                inter_op_parallelism_threads=inter_op,
                use_per_session_threads=True)) as sess:

          for _ in range(5):
            sess.run(fused_get_next.op)
          for _ in range(num_iters):
            start = time.time()
            sess.run(fused_get_next.op)
            end = time.time()
            fused_deltas.append(end - start)

        print(
            "batch size: %d, num parallel calls: %d, inter-op parallelism: %d, "
            "element size: %d, num iters: %d\nchained wall time: %f (median), "
            "%f (mean), %f (stddev), %f (min), %f (max)\n  fused wall time: "
            "%f (median), %f (mean), %f (stddev), %f (min), %f (max)\n    "
            "chained/fused:    %.2fx (median),    %.2fx (mean)" %
            (batch_size, num_calls, inter_op, element_size, num_iters,
             np.median(chained_deltas), np.mean(chained_deltas),
             np.std(chained_deltas), np.min(chained_deltas),
             np.max(chained_deltas), np.median(fused_deltas),
             np.mean(fused_deltas), np.std(fused_deltas), np.min(fused_deltas),
             np.max(fused_deltas),
             np.median(chained_deltas) / np.median(fused_deltas),
             np.mean(chained_deltas) / np.mean(fused_deltas)))

        self.report_benchmark(
            iters=num_iters,
            wall_time=np.median(chained_deltas),
            name=name("chained", label, num_calls, inter_op, element_size,
                      batch_size))

        self.report_benchmark(
            iters=num_iters,
            wall_time=np.median(fused_deltas),
            name=name("fused", label, num_calls, inter_op, element_size,
                      batch_size))

      print()
示例#48
0
    def testRead(self):
        for batch_size in [1, 2]:
            for num_epochs in [1, 10]:
                with ops.Graph().as_default() as g:
                    with self.session(graph=g) as sess:
                        # Basic test: read from file 0.
                        self.outputs = dataset_ops.make_one_shot_iterator(
                            self.make_batch_feature(
                                filenames=self.test_filenames[0],
                                label_key="label",
                                num_epochs=num_epochs,
                                batch_size=batch_size)).get_next()
                        self.verify_records(sess,
                                            batch_size,
                                            0,
                                            num_epochs=num_epochs,
                                            label_key_provided=True)
                        with self.assertRaises(errors.OutOfRangeError):
                            self._next_actual_batch(sess,
                                                    label_key_provided=True)

                with ops.Graph().as_default() as g:
                    with self.session(graph=g) as sess:
                        # Basic test: read from file 1.
                        self.outputs = dataset_ops.make_one_shot_iterator(
                            self.make_batch_feature(
                                filenames=self.test_filenames[1],
                                label_key="label",
                                num_epochs=num_epochs,
                                batch_size=batch_size)).get_next()
                        self.verify_records(sess,
                                            batch_size,
                                            1,
                                            num_epochs=num_epochs,
                                            label_key_provided=True)
                        with self.assertRaises(errors.OutOfRangeError):
                            self._next_actual_batch(sess,
                                                    label_key_provided=True)

                with ops.Graph().as_default() as g:
                    with self.session(graph=g) as sess:
                        # Basic test: read from both files.
                        self.outputs = dataset_ops.make_one_shot_iterator(
                            self.make_batch_feature(
                                filenames=self.test_filenames,
                                label_key="label",
                                num_epochs=num_epochs,
                                batch_size=batch_size)).get_next()
                        self.verify_records(sess,
                                            batch_size,
                                            num_epochs=num_epochs,
                                            label_key_provided=True)
                        with self.assertRaises(errors.OutOfRangeError):
                            self._next_actual_batch(sess,
                                                    label_key_provided=True)

                with ops.Graph().as_default() as g:
                    with self.session(graph=g) as sess:
                        # Basic test: read from both files.
                        self.outputs = dataset_ops.make_one_shot_iterator(
                            self.make_batch_feature(
                                filenames=self.test_filenames,
                                num_epochs=num_epochs,
                                batch_size=batch_size)).get_next()
                        self.verify_records(sess,
                                            batch_size,
                                            num_epochs=num_epochs)
                        with self.assertRaises(errors.OutOfRangeError):
                            self._next_actual_batch(sess)
def dummy_input_fn(batch_size, repeat=True):
    dataset = dummy_input_fn_with_dataset(batch_size, repeat)
    iterator = dataset_ops.make_one_shot_iterator(dataset)
    return iterator.get_next()
    def __init__(self,
                 dataset,
                 devices,
                 max_buffer_size=1,
                 prefetch_buffer_size=1,
                 source_device="/cpu:0"):
        """Constructs a MultiDeviceIterator.

    Args:
      dataset: The input dataset to be iterated over.
      devices: The list of devices to fetch data to.
      max_buffer_size: Maximum size of the host side per device buffer to keep.
      prefetch_buffer_size: if > 1, then we setup a buffer on each device
        to prefetch into.
      source_device: The host device to place the `dataset` on.

    Raises:
      RuntimeError: If run in Eager mode.
    """
        self._dataset = dataset._apply_options()  # pylint: disable=protected-access
        self._devices = devices
        self._source_device = source_device
        self._source_device_tensor = ops.convert_to_tensor(source_device)

        # Create the MultiDeviceIterator.
        with ops.device(self._source_device):
            # TODO(b/121378567): Get rid of this shared_name hack.
            shared_name = ""
            if context.executing_eagerly():
                shared_name = context.shared_name()
            self._multi_device_iterator_resource = (
                gen_dataset_ops.multi_device_iterator(
                    devices=self._devices,
                    shared_name=shared_name,
                    container="",
                    **dataset_ops.flat_structure(dataset)))
            if context.executing_eagerly():
                # Delete the resource when this object is deleted
                self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
                    handle=self._multi_device_iterator_resource,
                    handle_device=self._source_device)

            # The incarnation ID is used to ensure consistency between the per-device
            # iterators and the multi-device iterator.
            self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
                self._dataset._variant_tensor,  # pylint: disable=protected-access
                self._multi_device_iterator_resource,
                max_buffer_size=max_buffer_size)

        # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to
        # initialize the device side of the pipeline. This would allow the
        # MultiDeviceIterator to choose, for example, to move some transformations
        # into the device side from its input. It might be useful in rewriting.
        # Create the per device iterators.
        self._device_iterators = []
        for i, device in enumerate(self._devices):
            with ops.device(device):
                ds = _PerDeviceGenerator(i,
                                         self._multi_device_iterator_resource,
                                         self._incarnation_id,
                                         self._source_device_tensor,
                                         dataset._element_structure)  # pylint: disable=protected-access
                if prefetch_buffer_size > 0:
                    ds = ds.prefetch(prefetch_buffer_size)
                # TODO(jsimsa): Enable auto-tuning and optimizations when supported for
                # non-CPU devices.
                options = dataset_ops.Options()
                options.experimental_autotune = False
                options.experimental_optimization.apply_default_optimizations = False
                ds = ds.with_options(options)
                if context.executing_eagerly():
                    self._device_iterators.append(
                        dataset_ops.make_one_shot_iterator(ds))
                else:
                    self._device_iterators.append(
                        dataset_ops.make_initializable_iterator(ds))

        if not context.executing_eagerly():
            device_iterator_initializers = [
                iterator.initializer for iterator in self._device_iterators
            ]
            self._initializer = control_flow_ops.group(
                *device_iterator_initializers)
示例#51
0
 def _make_iterator(self, inputs, batches):
     return dataset_ops.make_one_shot_iterator(
         self._make_dataset(inputs, batches))
示例#52
0
    def testIteratorStringHandleFuture(self):
        with forward_compat.forward_compatibility_horizon(2018, 8, 4):
            dataset_3 = dataset_ops.Dataset.from_tensor_slices([1, 2, 3])
            dataset_4 = dataset_ops.Dataset.from_tensor_slices(
                [10, 20, 30, 40])

            iterator_3 = dataset_ops.make_one_shot_iterator(dataset_3)
            iterator_4 = dataset_ops.make_one_shot_iterator(dataset_4)

            handle_placeholder = array_ops.placeholder(dtypes.string, shape=[])
            feedable_iterator = iterator_ops.Iterator.from_string_handle(
                handle_placeholder,
                dataset_ops.get_legacy_output_types(dataset_3),
                dataset_ops.get_legacy_output_shapes(dataset_3))
            next_element = feedable_iterator.get_next()

            self.assertTrue(
                structure.are_compatible(
                    dataset_ops.get_structure(dataset_3),
                    dataset_ops.get_structure(feedable_iterator)))

            with self.cached_session() as sess:
                iterator_3_handle = sess.run(iterator_3.string_handle())
                iterator_4_handle = sess.run(iterator_4.string_handle())

                self.assertEqual(
                    10,
                    sess.run(next_element,
                             feed_dict={handle_placeholder:
                                        iterator_4_handle}))
                self.assertEqual(
                    1,
                    sess.run(next_element,
                             feed_dict={handle_placeholder:
                                        iterator_3_handle}))
                self.assertEqual(
                    20,
                    sess.run(next_element,
                             feed_dict={handle_placeholder:
                                        iterator_4_handle}))
                self.assertEqual(
                    2,
                    sess.run(next_element,
                             feed_dict={handle_placeholder:
                                        iterator_3_handle}))
                self.assertEqual(
                    30,
                    sess.run(next_element,
                             feed_dict={handle_placeholder:
                                        iterator_4_handle}))
                self.assertEqual(
                    3,
                    sess.run(next_element,
                             feed_dict={handle_placeholder:
                                        iterator_3_handle}))
                self.assertEqual(
                    40,
                    sess.run(next_element,
                             feed_dict={handle_placeholder:
                                        iterator_4_handle}))
                with self.assertRaises(errors.OutOfRangeError):
                    sess.run(next_element,
                             feed_dict={handle_placeholder: iterator_3_handle})
                with self.assertRaises(errors.OutOfRangeError):
                    sess.run(next_element,
                             feed_dict={handle_placeholder: iterator_4_handle})
示例#53
0
 def testIteratorGetNextName(self):
     with ops.Graph().as_default():
         iterator = dataset_ops.make_one_shot_iterator(
             dataset_ops.Dataset.from_tensors(37.0))
         next_element = iterator.get_next(name="overridden_name")
         self.assertEqual("overridden_name", next_element.op.name)
 def input_fn():
   dataset = dataset_ops.Dataset.from_tensor_slices({'x': x, 'y': y})
   iterator = dataset_ops.make_one_shot_iterator(dataset)
   features = iterator.get_next()
   labels = features.pop('y')
   return features, labels
 def _get_dataset_iterator(self, dataset):
     """Gets an iterator from a tf.data.Dataset."""
     iterator = dataset_ops.make_one_shot_iterator(dataset)
     session = K.get_session()
     next_element = iterator.get_next()
     return lambda: session.run(next_element)
示例#56
0
    def __init__(self,
                 dataset,
                 devices,
                 max_buffer_size=1,
                 prefetch_buffer_size=1,
                 source_device="/cpu:0"):
        """Constructs a MultiDeviceIterator.

    Args:
      dataset: The input dataset to be iterated over.
      devices: The list of devices to fetch data to.
      max_buffer_size: Maximum size of the host side per device buffer to keep.
      prefetch_buffer_size: if > 1, then we setup a buffer on each device to
        prefetch into.
      source_device: The host device to place the `dataset` on.  In order to
        prevent deadlocks, if the prefetch_buffer_size is greater than the
        max_buffer_size, we set the max_buffer_size to prefetch_buffer_size.
    """
        options = dataset_ops.Options()
        options.experimental_distribute.num_devices = len(devices)
        dataset = dataset.with_options(options)
        self._dataset = dataset._apply_options()  # pylint: disable=protected-access
        self._experimental_slack = dataset.options().experimental_slack
        self._devices = devices
        self._source_device = source_device
        self._source_device_tensor = ops.convert_to_tensor(source_device)
        self._max_buffer_size = max_buffer_size
        self._prefetch_buffer_size = prefetch_buffer_size

        if self._prefetch_buffer_size > self._max_buffer_size:
            self._max_buffer_size = self._prefetch_buffer_size

        # Create the MultiDeviceIterator.
        with ops.device(self._source_device):
            # TODO(b/121378567): Get rid of this shared_name hack.
            shared_name = ""
            if context.executing_eagerly():
                shared_name = context.shared_name()
            self._multi_device_iterator_resource = (
                gen_dataset_ops.multi_device_iterator(
                    devices=self._devices,
                    shared_name=shared_name,
                    container="",
                    **self._dataset._flat_structure))  # pylint: disable=protected-access
            if context.executing_eagerly():
                # Delete the resource when this object is deleted
                self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
                    handle=self._multi_device_iterator_resource,
                    handle_device=self._source_device)

            # The incarnation ID is used to ensure consistency between the per-device
            # iterators and the multi-device iterator.
            self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
                self._dataset._variant_tensor,  # pylint: disable=protected-access
                self._multi_device_iterator_resource,
                max_buffer_size=self._max_buffer_size)

        self._prototype_device_datasets = []
        for i, device in enumerate(self._devices):
            with ops.device(device):
                ds = _PerDeviceGenerator(i,
                                         self._multi_device_iterator_resource,
                                         self._incarnation_id,
                                         self._source_device_tensor,
                                         self._dataset.element_spec)
                self._prototype_device_datasets.append(ds)

        # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to
        # initialize the device side of the pipeline. This would allow the
        # MultiDeviceIterator to choose, for example, to move some transformations
        # into the device side from its input. It might be useful in rewriting.
        # Create the per device iterators.
        self._device_iterators = []
        for i, device in enumerate(self._devices):
            with ops.device(device):
                ds = _create_device_dataset(self._prototype_device_datasets[i],
                                            self._incarnation_id,
                                            self._prefetch_buffer_size,
                                            self._experimental_slack)
                if context.executing_eagerly():
                    self._device_iterators.append(
                        dataset_ops.make_one_shot_iterator(ds))
                else:
                    self._device_iterators.append(
                        dataset_ops.make_initializable_iterator(ds))

        if not context.executing_eagerly():
            device_iterator_initializers = [
                iterator.initializer for iterator in self._device_iterators
            ]
            self._initializer = control_flow_ops.group(
                *device_iterator_initializers)
    def test_training_and_eval_methods_on_iterators_single_io(self, model):
        if model == 'functional':
            model = testing_utils.get_small_functional_mlp(1, 4, input_dim=3)
        elif model == 'subclass':
            model = testing_utils.get_small_sequential_mlp(1, 4)
        optimizer = RMSPropOptimizer(learning_rate=0.001)
        loss = 'mse'
        metrics = ['mae', metrics_module.CategoricalAccuracy()]
        model.compile(optimizer, loss, metrics=metrics)

        inputs = np.zeros((10, 3), np.float32)
        targets = np.zeros((10, 4), np.float32)
        dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
        dataset = dataset.repeat(100)
        dataset = dataset.batch(10)
        iterator = dataset_ops.make_one_shot_iterator(dataset)

        model.fit(iterator, epochs=1, steps_per_epoch=2, verbose=1)
        model.evaluate(iterator, steps=2, verbose=1)
        model.predict(iterator, steps=2)

        # Test with validation data
        model.fit(iterator,
                  epochs=1,
                  steps_per_epoch=2,
                  verbose=0,
                  validation_data=iterator,
                  validation_steps=2)
        # Test with validation split
        with self.assertRaisesRegexp(
                ValueError, '`validation_split` argument is not supported '
                'when input `x` is a dataset or a dataset iterator'):
            model.fit(iterator,
                      epochs=1,
                      steps_per_epoch=2,
                      verbose=0,
                      validation_split=0.5,
                      validation_steps=2)

        # Test with sample weight.
        sample_weight = np.random.random((10, ))
        with self.assertRaisesRegexp(
                ValueError, '`sample_weight` argument is not supported '
                'when input `x` is a dataset or a dataset iterator'):
            model.fit(iterator,
                      epochs=1,
                      steps_per_epoch=2,
                      verbose=0,
                      sample_weight=sample_weight)

        # Test invalid usage
        with self.assertRaisesRegexp(ValueError,
                                     'you should not specify a target'):
            model.fit(iterator,
                      iterator,
                      epochs=1,
                      steps_per_epoch=2,
                      verbose=0)

        with self.assertRaisesRegexp(
                ValueError,
                'you should specify the `steps_per_epoch` argument'):
            model.fit(iterator, epochs=1, verbose=0)
        with self.assertRaisesRegexp(
                ValueError, 'you should specify the `steps` argument'):
            model.evaluate(iterator, verbose=0)
        with self.assertRaisesRegexp(
                ValueError, 'you should specify the `steps` argument'):
            model.predict(iterator, verbose=0)
示例#58
0
def _get_example_iter(inputs):
  dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
  return dataset_ops.make_one_shot_iterator(dataset)
示例#59
0
def StreamingFilesDataset(files,
                          filetype=None,
                          file_reader_job=None,
                          worker_job=None,
                          num_epochs=None,
                          filename_shuffle_buffer_size=None,
                          num_parallel_reads=None,
                          batch_transfer_size=None,
                          sloppy=None):
  """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM).

  Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read
  files local to your GCE VM. In order to train using files stored on your local
  VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset
  helper to generate a dataset to feed your Cloud TPU with files from your GCE
  VM.

  The resulting dataset may return an OutOfRangeError if there are no files
  found as a result of the fileglob expansion.

  Note: StreamingFilesDataset assumes that the session is using a
  TPUClusterResolver and has therefore a worker and a coordinator job. File
  loading will be done on the coordinator job.

  Args:
    files: A string glob to match files, or a `tf.data.Dataset` generating file
      names.
    filetype: A string (one of 'tfrecord', or 'textline') or a single-argument
      TensorFlow function that when given a filename returns a dataset.
    file_reader_job: An optional string that corresponds to the job that should
      perform the file reads.
    worker_job: An optional string that corresponds to the job that should
      process the tensors (i.e. your GPU or TPU worker).
    num_epochs: The number of epochs through the training set that should be
      generated. By default, it will repeat infinitely.
    filename_shuffle_buffer_size: An optional integer whose value controls the
      shuffling of the file names. If you would like to read from the files in
      the same order, set to 0 or False.
    num_parallel_reads: An optional integer controlling the number of files to
      read from concurrently. (Set to 1 for no parallelism.)
    batch_transfer_size: An optional integer controlling the batching used to
      amortize the remote function invocation overhead. Set to a very large
      number to increase throughput. Set to a very small number to reduce memory
      consumption. Set to False to skip batching.
    sloppy: (Optional.) If `False`, read input data while maintaining a
      deterministic order. (This may have significant performance impacts.)
      sloppy defaults to: True.
  Returns:
    A `tf.data.Dataset` with an infinite stream of elements generated by a
    parallel interleaving of the set of files matched (or generated) by `files`
    with a type is the output of the dataset specified by `filetype`.

  Raises:
    ValueError: if any argument is not of the expected type.
  """
  if filetype is None:
    filetype = 'tfrecord'

  if isinstance(filetype, str):
    if filetype not in _FILETYPE_MAP:
      raise ValueError('Unexpected filetype: %s' % filetype)
    reader_fn = _FILETYPE_MAP[filetype]
  elif callable(filetype):
    reader_fn = filetype
  else:
    raise ValueError('filetype should be a string or a callable')

  file_reader_job = file_reader_job or 'coordinator'

  worker_job = worker_job or 'worker'

  if filename_shuffle_buffer_size is None:
    filename_shuffle_buffer_size = 4096

  num_parallel_reads = num_parallel_reads or 8

  if batch_transfer_size is None:
    batch_transfer_size = 256

  if sloppy is None:
    sloppy = True

  if file_reader_job == 'coordinator':
    file_reader_device = '/job:coordinator/task:0'
  else:
    file_reader_device = '/job:%s' % file_reader_job

  with ops.device(file_reader_device):
    if isinstance(files, str):
      source_dataset = dataset_ops.Dataset.list_files(files)
    elif isinstance(files, dataset_ops.DatasetV2):
      source_dataset = files
    else:
      raise ValueError('files was not a string or a dataset: %s' % files)

    if filename_shuffle_buffer_size:
      source_dataset = source_dataset.shuffle(
          buffer_size=filename_shuffle_buffer_size)

    source_dataset = source_dataset.apply(
        interleave_ops.parallel_interleave(
            reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy))

    source_dataset = source_dataset.repeat(num_epochs)

    if batch_transfer_size:
      source_dataset = source_dataset.batch(batch_transfer_size)

    source_dataset = source_dataset.prefetch(1)

    source_iterator = dataset_ops.make_one_shot_iterator(source_dataset)
    source_handle = source_iterator.string_handle()

  @function.Defun(dtypes.string)
  def LoadingFunc(h):
    remote_iterator = iterator_ops.Iterator.from_string_handle(
        h, dataset_ops.get_legacy_output_types(source_dataset),
        dataset_ops.get_legacy_output_shapes(source_dataset))
    return remote_iterator.get_next()

  def MapFn(unused_input):
    source_dataset_output_types = dataset_ops.get_legacy_output_types(
        source_dataset)
    if isinstance(source_dataset_output_types, dtypes.DType):
      output_types = [source_dataset_output_types]
    elif isinstance(source_dataset_output_types, (list, tuple)):
      output_types = source_dataset_output_types
    else:
      raise ValueError('source dataset has invalid output types')
    remote_calls = functional_ops.remote_call(
        args=[source_handle],
        Tout=output_types,
        f=LoadingFunc,
        target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)
    if len(remote_calls) == 1:
      return remote_calls[0]
    else:
      return remote_calls

  with ops.device('/job:%s' % worker_job):
    output_dataset = dataset_ops.Dataset.range(2).repeat().map(
        MapFn, num_parallel_calls=4 if sloppy else None)
    output_dataset = output_dataset.prefetch(1)

    if batch_transfer_size:
      # Undo the batching used during the transfer.
      output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1)

  return output_dataset
def read_batch_features(file_pattern,
                        batch_size,
                        features,
                        reader=core_readers.TFRecordDataset,
                        reader_args=None,
                        randomize_input=True,
                        num_epochs=None,
                        capacity=10000):
  """Reads batches of Examples.

  Example:

  ```
  serialized_examples = [
    features {
      feature { key: "age" value { int64_list { value: [ 0 ] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } }
    },
    features {
      feature { key: "age" value { int64_list { value: [] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "sports" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  features: {
    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
    "gender": FixedLenFeature([], dtype=tf.string),
    "kws": VarLenFeature(dtype=tf.string),
  }
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
    "kws": SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0]],
      values=["code", "art", "sports"]
      dense_shape=[2, 2]),
  }
  ```

  Args:
    file_pattern: List of files or patterns of file paths containing
      `Example` records. See `tf.io.gfile.glob` for pattern rules.
    batch_size: An int representing the number of records to combine
      in a single batch.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values. See `tf.io.parse_example`.
    reader: A function or class that can be
      called with a `filenames` tensor and (optional) `reader_args` and returns
      a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
    reader_args: Additional arguments to pass to the reader class.
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever.
    capacity: Buffer size of the ShuffleDataset. A large capacity ensures better
      shuffling but would increase memory usage and startup time.
  Returns:
    A dict from keys in features to `Tensor` or `SparseTensor` objects.
  """
  dataset = readers.make_batched_features_dataset(
      file_pattern,
      batch_size,
      features,
      reader=reader,
      reader_args=reader_args,
      shuffle=randomize_input,
      num_epochs=num_epochs,
      shuffle_buffer_size=capacity)
  iterator = dataset_ops.make_one_shot_iterator(dataset)
  outputs = iterator.get_next()
  return outputs