def testFailOnWrongBucketCapacities(self):
     with self.assertRaisesRegexp(ValueError,
                                  r"must have exactly num_buckets"):
         bucket_ops.bucket(  # 2 buckets and 3 capacities raises ValueError.
             tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
             which_bucket=constant_op.constant(0),
             num_buckets=2,
             batch_size=32,
             bucket_capacities=[3, 4, 5])
    def testSingleBucket(self):
        bucketed_dynamic = bucket_ops.bucket(
            tensors=[
                self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c
            ],
            which_bucket=constant_op.constant(0),
            num_buckets=2,
            batch_size=32,
            num_threads=10,
            dynamic_pad=True)
        # Check shape inference on bucketing outputs
        self.assertAllEqual(
            [[32], [32, None], [32, 3], [None, None]],
            [out.get_shape().as_list() for out in bucketed_dynamic[1]])
        with self.test_session() as sess:
            for v in range(32):
                self.enqueue_inputs(
                    sess, {
                        self.scalar_int_feed: v,
                        self.unk_int64_feed: v * [v],
                        self.vec3_str_feed: 3 * [str(v)]
                    })
            self.start_queue_runners(sess)

            # Get a single minibatch
            bucketed_values = sess.run(bucketed_dynamic)

            # (which_bucket, bucket_tensors).
            self.assertEqual(2, len(bucketed_values))

            # Count number of bucket_tensors.
            self.assertEqual(4, len(bucketed_values[1]))

            # Ensure bucket 0 was used for all minibatch entries.
            self.assertAllEqual(0, bucketed_values[0])

            expected_scalar_int = np.arange(32)
            expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
            for i in range(32):
                expected_unk_int64[i, :i] = i
            expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T

            # Must resort the output because num_threads > 1 leads to
            # sometimes-inconsistent insertion order.
            resort = np.argsort(bucketed_values[1][0])
            self.assertAllEqual(expected_scalar_int,
                                bucketed_values[1][0][resort])
            self.assertAllEqual(expected_unk_int64,
                                bucketed_values[1][1][resort])
            self.assertAllEqual(expected_vec3_str,
                                bucketed_values[1][2][resort])
    def testBatchSizePerBucket(self):
        which_bucket = control_flow_ops.cond(self.scalar_int < 5,
                                             lambda: constant_op.constant(0),
                                             lambda: constant_op.constant(1))
        batch_sizes = [5, 10]
        bucketed_dynamic = bucket_ops.bucket(tensors=[
            self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c
        ],
                                             which_bucket=which_bucket,
                                             num_buckets=2,
                                             batch_size=batch_sizes,
                                             num_threads=1,
                                             dynamic_pad=True)
        # Check shape inference on bucketing outputs
        self.assertAllEqual(
            [[None], [None, None], [None, 3], [None, None]],
            [out.get_shape().as_list() for out in bucketed_dynamic[1]])
        with self.test_session() as sess:
            for v in range(15):
                self.enqueue_inputs(
                    sess, {
                        self.scalar_int_feed: v,
                        self.unk_int64_feed: v * [v],
                        self.vec3_str_feed: 3 * [str(v)]
                    })
            self.start_queue_runners(sess)

            # Get two minibatches (one with small values, one with large).
            bucketed_values_0 = sess.run(bucketed_dynamic)
            bucketed_values_1 = sess.run(bucketed_dynamic)

            # Figure out which output has the small values
            if bucketed_values_0[0] < 5:
                bucketed_values_large, bucketed_values_small = (
                    bucketed_values_1, bucketed_values_0)
            else:
                bucketed_values_small, bucketed_values_large = (
                    bucketed_values_0, bucketed_values_1)

            # Ensure bucket 0 was used for all minibatch entries.
            self.assertAllEqual(0, bucketed_values_small[0])
            self.assertAllEqual(1, bucketed_values_large[0])

            # Check that the batch sizes differ per bucket
            self.assertEqual(5, len(bucketed_values_small[1][0]))
            self.assertEqual(10, len(bucketed_values_large[1][0]))
示例#4
0
  def testSingleBucket(self):
    bucketed_dynamic = bucket_ops.bucket(
        tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c],
        which_bucket=constant_op.constant(0),
        num_buckets=2,
        batch_size=32,
        num_threads=10,
        dynamic_pad=True)
    # Check shape inference on bucketing outputs
    self.assertAllEqual(
        [[32], [32, None], [32, 3], [None, None]],
        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
    with self.cached_session() as sess:
      for v in range(32):
        self.enqueue_inputs(sess, {
            self.scalar_int_feed: v,
            self.unk_int64_feed: v * [v],
            self.vec3_str_feed: 3 * [str(v)]
        })
      self.start_queue_runners(sess)

      # Get a single minibatch
      bucketed_values = sess.run(bucketed_dynamic)

      # (which_bucket, bucket_tensors).
      self.assertEqual(2, len(bucketed_values))

      # Count number of bucket_tensors.
      self.assertEqual(4, len(bucketed_values[1]))

      # Ensure bucket 0 was used for all minibatch entries.
      self.assertAllEqual(0, bucketed_values[0])

      expected_scalar_int = np.arange(32)
      expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
      for i in range(32):
        expected_unk_int64[i, :i] = i
      expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T

      # Must resort the output because num_threads > 1 leads to
      # sometimes-inconsistent insertion order.
      resort = np.argsort(bucketed_values[1][0])
      self.assertAllEqual(expected_scalar_int, bucketed_values[1][0][resort])
      self.assertAllEqual(expected_unk_int64, bucketed_values[1][1][resort])
      self.assertAllEqual(expected_vec3_str, bucketed_values[1][2][resort])
示例#5
0
  def testBatchSizePerBucket(self):
    which_bucket = control_flow_ops.cond(self.scalar_int < 5,
                                         lambda: constant_op.constant(0),
                                         lambda: constant_op.constant(1))
    batch_sizes = [5, 10]
    bucketed_dynamic = bucket_ops.bucket(
        tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c],
        which_bucket=which_bucket,
        num_buckets=2,
        batch_size=batch_sizes,
        num_threads=1,
        dynamic_pad=True)
    # Check shape inference on bucketing outputs
    self.assertAllEqual(
        [[None], [None, None], [None, 3], [None, None]],
        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
    with self.cached_session() as sess:
      for v in range(15):
        self.enqueue_inputs(sess, {
            self.scalar_int_feed: v,
            self.unk_int64_feed: v * [v],
            self.vec3_str_feed: 3 * [str(v)]
        })
      self.start_queue_runners(sess)

      # Get two minibatches (one with small values, one with large).
      bucketed_values_0 = sess.run(bucketed_dynamic)
      bucketed_values_1 = sess.run(bucketed_dynamic)

      # Figure out which output has the small values
      if bucketed_values_0[0] < 5:
        bucketed_values_large, bucketed_values_small = (bucketed_values_1,
                                                        bucketed_values_0)
      else:
        bucketed_values_small, bucketed_values_large = (bucketed_values_0,
                                                        bucketed_values_1)

      # Ensure bucket 0 was used for all minibatch entries.
      self.assertAllEqual(0, bucketed_values_small[0])
      self.assertAllEqual(1, bucketed_values_large[0])

      # Check that the batch sizes differ per bucket
      self.assertEqual(5, len(bucketed_values_small[1][0]))
      self.assertEqual(10, len(bucketed_values_large[1][0]))
示例#6
0
    def testGeneratorWorksWithManyBatchingThreads(self):
        def simple_generator():
            for i in range(5000):
                yield {"value": i, "ignored": 3}

        simple_features = {
            "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
        }
        tensors = python_input.python_input(simple_generator, simple_features)

        # Request batches of size 20 at a time, the final batch may be smaller.
        _, batched_tensors = bucket_ops.bucket(tensors,
                                               which_bucket=tensors["value"] %
                                               5,
                                               batch_size=20,
                                               num_buckets=5,
                                               num_threads=7,
                                               capacity=17,
                                               allow_smaller_final_batch=True)

        self.assertEqual(["value"], batched_tensors.keys())
        self.assertEqual(dtypes.int32, batched_tensors["value"].dtype)
        self.assertEqual([None], batched_tensors["value"].shape.as_list())

        with self.test_session() as sess:
            # The generator emits 5 items total.  The first 4 are returned in
            # the first session run; the final one is returned in the
            # second.  This works because allow_smaller_final_batch=True.
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(sess=sess,
                                                            coord=coord)
            results = []
            while True:
                try:
                    r = sess.run(batched_tensors)
                    results.extend(r["value"].tolist())
                except errors.OutOfRangeError:
                    break
            coord.request_stop()
            for thread in threads:
                thread.join()
        self.assertEqual(sorted(results), list(range(5000)))
    def testEvenOddBucketsFilterOutAllOdd(self):
        which_bucket = (self.scalar_int % 2)
        keep_input = math_ops.equal(which_bucket, 0)
        bucketed_dynamic = bucket_ops.bucket(
            tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
            which_bucket=which_bucket,
            num_buckets=2,
            batch_size=32,
            num_threads=10,
            keep_input=keep_input,
            dynamic_pad=True)
        # Check shape inference on bucketing outputs
        self.assertAllEqual(
            [[32], [32, None], [32, 3]],
            [out.get_shape().as_list() for out in bucketed_dynamic[1]])
        with self.test_session() as sess:
            for v in range(128):
                self.enqueue_inputs(
                    sess, {
                        self.scalar_int_feed: v,
                        self.unk_int64_feed: v * [v],
                        self.vec3_str_feed: 3 * [str(v)]
                    })
            self.start_queue_runners(sess)

            # Get two minibatches ([0, 2, ...] and [64, 66, ...])
            bucketed_values_even0 = sess.run(bucketed_dynamic)
            bucketed_values_even1 = sess.run(bucketed_dynamic)

            # Ensure that bucket 1 was completely filtered out
            self.assertAllEqual(0, bucketed_values_even0[0])
            self.assertAllEqual(0, bucketed_values_even1[0])

            # Merge their output for sorting and comparison
            bucketed_values_all_elem0 = np.concatenate(
                (bucketed_values_even0[1][0], bucketed_values_even1[1][0]))

            self.assertAllEqual(np.arange(0, 128, 2),
                                sorted(bucketed_values_all_elem0))
示例#8
0
  def testEvenOddBucketsFilterOutAllOdd(self):
    which_bucket = (self.scalar_int % 2)
    keep_input = math_ops.equal(which_bucket, 0)
    bucketed_dynamic = bucket_ops.bucket(
        tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
        which_bucket=which_bucket,
        num_buckets=2,
        batch_size=32,
        num_threads=10,
        keep_input=keep_input,
        dynamic_pad=True)
    # Check shape inference on bucketing outputs
    self.assertAllEqual(
        [[32], [32, None], [32, 3]],
        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
    with self.cached_session() as sess:
      for v in range(128):
        self.enqueue_inputs(sess, {
            self.scalar_int_feed: v,
            self.unk_int64_feed: v * [v],
            self.vec3_str_feed: 3 * [str(v)]
        })
      self.start_queue_runners(sess)

      # Get two minibatches ([0, 2, ...] and [64, 66, ...])
      bucketed_values_even0 = sess.run(bucketed_dynamic)
      bucketed_values_even1 = sess.run(bucketed_dynamic)

      # Ensure that bucket 1 was completely filtered out
      self.assertAllEqual(0, bucketed_values_even0[0])
      self.assertAllEqual(0, bucketed_values_even1[0])

      # Merge their output for sorting and comparison
      bucketed_values_all_elem0 = np.concatenate((bucketed_values_even0[1][0],
                                                  bucketed_values_even1[1][0]))

      self.assertAllEqual(
          np.arange(0, 128, 2), sorted(bucketed_values_all_elem0))
  def testGeneratorWorksWithManyBatchingThreads(self):
    def simple_generator():
      for i in range(5000):
        yield {"value": i, "ignored": 3}

    simple_features = {
        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
    }
    tensors = python_input.python_input(simple_generator, simple_features)

    # Request batches of size 20 at a time, the final batch may be smaller.
    _, batched_tensors = bucket_ops.bucket(
        tensors, which_bucket=tensors["value"] % 5,
        batch_size=20, num_buckets=5, num_threads=7, capacity=17,
        allow_smaller_final_batch=True)

    self.assertEqual(["value"], batched_tensors.keys())
    self.assertEqual(dtypes.int32, batched_tensors["value"].dtype)
    self.assertEqual([None], batched_tensors["value"].shape.as_list())

    with self.test_session() as sess:
      # The generator emits 5 items total.  The first 4 are returned in
      # the first session run; the final one is returned in the
      # second.  This works because allow_smaller_final_batch=True.
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
      results = []
      while True:
        try:
          r = sess.run(batched_tensors)
          results.extend(r["value"].tolist())
        except errors.OutOfRangeError:
          break
      coord.request_stop()
      for thread in threads:
        thread.join()
    self.assertEqual(sorted(results),
                     list(range(5000)))
示例#10
0
 def testFailOnWrongBucketCapacities(self):
   with self.assertRaisesRegexp(ValueError, r"must have exactly num_buckets"):
     bucket_ops.bucket(  # 2 buckets and 3 capacities raises ValueError.
         tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
         which_bucket=constant_op.constant(0), num_buckets=2,
         batch_size=32, bucket_capacities=[3, 4, 5])
示例#11
0
  def testEvenOddBuckets(self):
    which_bucket = (self.scalar_int % 2)
    bucketed_dynamic = bucket_ops.bucket(
        tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c],
        which_bucket=which_bucket,
        num_buckets=2,
        batch_size=32,
        num_threads=10,
        dynamic_pad=True)
    # Check shape inference on bucketing outputs
    self.assertAllEqual(
        [[32], [32, None], [32, 3], [None, None]],
        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
    with self.cached_session() as sess:
      for v in range(64):
        self.enqueue_inputs(sess, {
            self.scalar_int_feed: v,
            self.unk_int64_feed: v * [v],
            self.vec3_str_feed: 3 * [str(v)]
        })
      self.start_queue_runners(sess)

      # Get two minibatches (one containing even values, one containing odds)
      bucketed_values_0 = sess.run(bucketed_dynamic)
      bucketed_values_1 = sess.run(bucketed_dynamic)

      # (which_bucket, bucket_tensors).
      self.assertEqual(2, len(bucketed_values_0))
      self.assertEqual(2, len(bucketed_values_1))

      # Count number of bucket_tensors.
      self.assertEqual(4, len(bucketed_values_0[1]))
      self.assertEqual(4, len(bucketed_values_1[1]))

      # Figure out which output has the even values (there's
      # randomness due to the multithreaded nature of bucketing)
      if bucketed_values_0[0] % 2 == 1:
        bucketed_values_even, bucketed_values_odd = (bucketed_values_1,
                                                     bucketed_values_0)
      else:
        bucketed_values_even, bucketed_values_odd = (bucketed_values_0,
                                                     bucketed_values_1)

      # Ensure bucket 0 was used for all minibatch entries.
      self.assertAllEqual(0, bucketed_values_even[0])
      self.assertAllEqual(1, bucketed_values_odd[0])

      # Test the first bucket outputted, the events starting at 0
      expected_scalar_int = np.arange(0, 32 * 2, 2)
      expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
      for i in range(0, 32):
        expected_unk_int64[i, :2 * i] = 2 * i
      expected_vec3_str = np.vstack(3 *
                                    [np.arange(0, 32 * 2, 2).astype(bytes)]).T

      # Must resort the output because num_threads > 1 leads to
      # sometimes-inconsistent insertion order.
      resort = np.argsort(bucketed_values_even[1][0])
      self.assertAllEqual(expected_scalar_int,
                          bucketed_values_even[1][0][resort])
      self.assertAllEqual(expected_unk_int64,
                          bucketed_values_even[1][1][resort])
      self.assertAllEqual(expected_vec3_str, bucketed_values_even[1][2][resort])

      # Test the second bucket outputted, the odds starting at 1
      expected_scalar_int = np.arange(1, 32 * 2 + 1, 2)
      expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
      for i in range(0, 32):
        expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
      expected_vec3_str = np.vstack(
          3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T

      # Must resort the output because num_threads > 1 leads to
      # sometimes-inconsistent insertion order.
      resort = np.argsort(bucketed_values_odd[1][0])
      self.assertAllEqual(expected_scalar_int,
                          bucketed_values_odd[1][0][resort])
      self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1][1][resort])
      self.assertAllEqual(expected_vec3_str, bucketed_values_odd[1][2][resort])