def testTextLineDataset(self):
    test_filenames = self._createFiles(2, 5, crlf=True)
    filenames = array_ops.placeholder(dtypes.string, shape=[None])
    num_epochs = array_ops.placeholder(dtypes.int64, shape=[])
    batch_size = array_ops.placeholder(dtypes.int64, shape=[])

    repeat_dataset = dataset_ops.TextLineDataset(filenames).repeat(num_epochs)
    batch_dataset = repeat_dataset.batch(batch_size)

    iterator = dataset_ops.Iterator.from_structure(batch_dataset.output_types)
    init_op = iterator.make_initializer(repeat_dataset)
    init_batch_op = iterator.make_initializer(batch_dataset)
    get_next = iterator.get_next()

    with self.test_session() as sess:
      # Basic test: read from file 0.
      sess.run(init_op, feed_dict={filenames: [test_filenames[0]],
                                   num_epochs: 1})
      for i in range(5):
        self.assertEqual(self._lineText(0, i), sess.run(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)

      # Basic test: read from file 1.
      sess.run(init_op, feed_dict={filenames: [test_filenames[1]],
                                   num_epochs: 1})
      for i in range(5):
        self.assertEqual(self._lineText(1, i), sess.run(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)

      # Basic test: read from both files.
      sess.run(init_op, feed_dict={filenames: test_filenames,
                                   num_epochs: 1})
      for j in range(2):
        for i in range(5):
          self.assertEqual(self._lineText(j, i), sess.run(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)

      # Test repeated iteration through both files.
      sess.run(init_op, feed_dict={filenames: test_filenames,
                                   num_epochs: 10})
      for _ in range(10):
        for j in range(2):
          for i in range(5):
            self.assertEqual(self._lineText(j, i), sess.run(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)

      # Test batched and repeated iteration through both files.
      sess.run(init_batch_op, feed_dict={filenames: test_filenames,
                                         num_epochs: 10,
                                         batch_size: 5})
      for _ in range(10):
        self.assertAllEqual([self._lineText(0, i) for i in range(5)],
                            sess.run(get_next))
        self.assertAllEqual([self._lineText(1, i) for i in range(5)],
                            sess.run(get_next))
Пример #2
0
    def testTextLineDatasetBuffering(self):
        test_filenames = self._createFiles(2, 5, crlf=True)

        repeat_dataset = dataset_ops.TextLineDataset(test_filenames,
                                                     buffer_size=10)
        iterator = repeat_dataset.make_one_shot_iterator()

        with self.test_session() as sess:
            for j in range(2):
                for i in range(5):
                    self.assertEqual(self._lineText(j, i),
                                     sess.run(iterator.get_next()))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(iterator.get_next())