def testFailOnWrongBucketCapacities(self): with self.assertRaisesRegexp(ValueError, r"must have exactly num_buckets"): bucket_ops.bucket( # 2 buckets and 3 capacities raises ValueError. tensors=[self.scalar_int, self.unk_int64, self.vec3_str], which_bucket=constant_op.constant(0), num_buckets=2, batch_size=32, bucket_capacities=[3, 4, 5])
def testSingleBucket(self): bucketed_dynamic = bucket_ops.bucket( tensors=[ self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c ], which_bucket=constant_op.constant(0), num_buckets=2, batch_size=32, num_threads=10, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[32], [32, None], [32, 3], [None, None]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.test_session() as sess: for v in range(32): self.enqueue_inputs( sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get a single minibatch bucketed_values = sess.run(bucketed_dynamic) # (which_bucket, bucket_tensors). self.assertEqual(2, len(bucketed_values)) # Count number of bucket_tensors. self.assertEqual(4, len(bucketed_values[1])) # Ensure bucket 0 was used for all minibatch entries. self.assertAllEqual(0, bucketed_values[0]) expected_scalar_int = np.arange(32) expected_unk_int64 = np.zeros((32, 31)).astype(np.int64) for i in range(32): expected_unk_int64[i, :i] = i expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T # Must resort the output because num_threads > 1 leads to # sometimes-inconsistent insertion order. resort = np.argsort(bucketed_values[1][0]) self.assertAllEqual(expected_scalar_int, bucketed_values[1][0][resort]) self.assertAllEqual(expected_unk_int64, bucketed_values[1][1][resort]) self.assertAllEqual(expected_vec3_str, bucketed_values[1][2][resort])
def testBatchSizePerBucket(self): which_bucket = control_flow_ops.cond(self.scalar_int < 5, lambda: constant_op.constant(0), lambda: constant_op.constant(1)) batch_sizes = [5, 10] bucketed_dynamic = bucket_ops.bucket(tensors=[ self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c ], which_bucket=which_bucket, num_buckets=2, batch_size=batch_sizes, num_threads=1, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[None], [None, None], [None, 3], [None, None]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.test_session() as sess: for v in range(15): self.enqueue_inputs( sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get two minibatches (one with small values, one with large). bucketed_values_0 = sess.run(bucketed_dynamic) bucketed_values_1 = sess.run(bucketed_dynamic) # Figure out which output has the small values if bucketed_values_0[0] < 5: bucketed_values_large, bucketed_values_small = ( bucketed_values_1, bucketed_values_0) else: bucketed_values_small, bucketed_values_large = ( bucketed_values_0, bucketed_values_1) # Ensure bucket 0 was used for all minibatch entries. self.assertAllEqual(0, bucketed_values_small[0]) self.assertAllEqual(1, bucketed_values_large[0]) # Check that the batch sizes differ per bucket self.assertEqual(5, len(bucketed_values_small[1][0])) self.assertEqual(10, len(bucketed_values_large[1][0]))
def testSingleBucket(self): bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c], which_bucket=constant_op.constant(0), num_buckets=2, batch_size=32, num_threads=10, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[32], [32, None], [32, 3], [None, None]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.cached_session() as sess: for v in range(32): self.enqueue_inputs(sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get a single minibatch bucketed_values = sess.run(bucketed_dynamic) # (which_bucket, bucket_tensors). self.assertEqual(2, len(bucketed_values)) # Count number of bucket_tensors. self.assertEqual(4, len(bucketed_values[1])) # Ensure bucket 0 was used for all minibatch entries. self.assertAllEqual(0, bucketed_values[0]) expected_scalar_int = np.arange(32) expected_unk_int64 = np.zeros((32, 31)).astype(np.int64) for i in range(32): expected_unk_int64[i, :i] = i expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T # Must resort the output because num_threads > 1 leads to # sometimes-inconsistent insertion order. resort = np.argsort(bucketed_values[1][0]) self.assertAllEqual(expected_scalar_int, bucketed_values[1][0][resort]) self.assertAllEqual(expected_unk_int64, bucketed_values[1][1][resort]) self.assertAllEqual(expected_vec3_str, bucketed_values[1][2][resort])
def testBatchSizePerBucket(self): which_bucket = control_flow_ops.cond(self.scalar_int < 5, lambda: constant_op.constant(0), lambda: constant_op.constant(1)) batch_sizes = [5, 10] bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c], which_bucket=which_bucket, num_buckets=2, batch_size=batch_sizes, num_threads=1, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[None], [None, None], [None, 3], [None, None]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.cached_session() as sess: for v in range(15): self.enqueue_inputs(sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get two minibatches (one with small values, one with large). bucketed_values_0 = sess.run(bucketed_dynamic) bucketed_values_1 = sess.run(bucketed_dynamic) # Figure out which output has the small values if bucketed_values_0[0] < 5: bucketed_values_large, bucketed_values_small = (bucketed_values_1, bucketed_values_0) else: bucketed_values_small, bucketed_values_large = (bucketed_values_0, bucketed_values_1) # Ensure bucket 0 was used for all minibatch entries. self.assertAllEqual(0, bucketed_values_small[0]) self.assertAllEqual(1, bucketed_values_large[0]) # Check that the batch sizes differ per bucket self.assertEqual(5, len(bucketed_values_small[1][0])) self.assertEqual(10, len(bucketed_values_large[1][0]))
def testGeneratorWorksWithManyBatchingThreads(self): def simple_generator(): for i in range(5000): yield {"value": i, "ignored": 3} simple_features = { "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32) } tensors = python_input.python_input(simple_generator, simple_features) # Request batches of size 20 at a time, the final batch may be smaller. _, batched_tensors = bucket_ops.bucket(tensors, which_bucket=tensors["value"] % 5, batch_size=20, num_buckets=5, num_threads=7, capacity=17, allow_smaller_final_batch=True) self.assertEqual(["value"], batched_tensors.keys()) self.assertEqual(dtypes.int32, batched_tensors["value"].dtype) self.assertEqual([None], batched_tensors["value"].shape.as_list()) with self.test_session() as sess: # The generator emits 5 items total. The first 4 are returned in # the first session run; the final one is returned in the # second. This works because allow_smaller_final_batch=True. coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) results = [] while True: try: r = sess.run(batched_tensors) results.extend(r["value"].tolist()) except errors.OutOfRangeError: break coord.request_stop() for thread in threads: thread.join() self.assertEqual(sorted(results), list(range(5000)))
def testEvenOddBucketsFilterOutAllOdd(self): which_bucket = (self.scalar_int % 2) keep_input = math_ops.equal(which_bucket, 0) bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str], which_bucket=which_bucket, num_buckets=2, batch_size=32, num_threads=10, keep_input=keep_input, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[32], [32, None], [32, 3]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.test_session() as sess: for v in range(128): self.enqueue_inputs( sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get two minibatches ([0, 2, ...] and [64, 66, ...]) bucketed_values_even0 = sess.run(bucketed_dynamic) bucketed_values_even1 = sess.run(bucketed_dynamic) # Ensure that bucket 1 was completely filtered out self.assertAllEqual(0, bucketed_values_even0[0]) self.assertAllEqual(0, bucketed_values_even1[0]) # Merge their output for sorting and comparison bucketed_values_all_elem0 = np.concatenate( (bucketed_values_even0[1][0], bucketed_values_even1[1][0])) self.assertAllEqual(np.arange(0, 128, 2), sorted(bucketed_values_all_elem0))
def testEvenOddBucketsFilterOutAllOdd(self): which_bucket = (self.scalar_int % 2) keep_input = math_ops.equal(which_bucket, 0) bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str], which_bucket=which_bucket, num_buckets=2, batch_size=32, num_threads=10, keep_input=keep_input, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[32], [32, None], [32, 3]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.cached_session() as sess: for v in range(128): self.enqueue_inputs(sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get two minibatches ([0, 2, ...] and [64, 66, ...]) bucketed_values_even0 = sess.run(bucketed_dynamic) bucketed_values_even1 = sess.run(bucketed_dynamic) # Ensure that bucket 1 was completely filtered out self.assertAllEqual(0, bucketed_values_even0[0]) self.assertAllEqual(0, bucketed_values_even1[0]) # Merge their output for sorting and comparison bucketed_values_all_elem0 = np.concatenate((bucketed_values_even0[1][0], bucketed_values_even1[1][0])) self.assertAllEqual( np.arange(0, 128, 2), sorted(bucketed_values_all_elem0))
def testGeneratorWorksWithManyBatchingThreads(self): def simple_generator(): for i in range(5000): yield {"value": i, "ignored": 3} simple_features = { "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32) } tensors = python_input.python_input(simple_generator, simple_features) # Request batches of size 20 at a time, the final batch may be smaller. _, batched_tensors = bucket_ops.bucket( tensors, which_bucket=tensors["value"] % 5, batch_size=20, num_buckets=5, num_threads=7, capacity=17, allow_smaller_final_batch=True) self.assertEqual(["value"], batched_tensors.keys()) self.assertEqual(dtypes.int32, batched_tensors["value"].dtype) self.assertEqual([None], batched_tensors["value"].shape.as_list()) with self.test_session() as sess: # The generator emits 5 items total. The first 4 are returned in # the first session run; the final one is returned in the # second. This works because allow_smaller_final_batch=True. coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) results = [] while True: try: r = sess.run(batched_tensors) results.extend(r["value"].tolist()) except errors.OutOfRangeError: break coord.request_stop() for thread in threads: thread.join() self.assertEqual(sorted(results), list(range(5000)))
def testEvenOddBuckets(self): which_bucket = (self.scalar_int % 2) bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c], which_bucket=which_bucket, num_buckets=2, batch_size=32, num_threads=10, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[32], [32, None], [32, 3], [None, None]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.cached_session() as sess: for v in range(64): self.enqueue_inputs(sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get two minibatches (one containing even values, one containing odds) bucketed_values_0 = sess.run(bucketed_dynamic) bucketed_values_1 = sess.run(bucketed_dynamic) # (which_bucket, bucket_tensors). self.assertEqual(2, len(bucketed_values_0)) self.assertEqual(2, len(bucketed_values_1)) # Count number of bucket_tensors. self.assertEqual(4, len(bucketed_values_0[1])) self.assertEqual(4, len(bucketed_values_1[1])) # Figure out which output has the even values (there's # randomness due to the multithreaded nature of bucketing) if bucketed_values_0[0] % 2 == 1: bucketed_values_even, bucketed_values_odd = (bucketed_values_1, bucketed_values_0) else: bucketed_values_even, bucketed_values_odd = (bucketed_values_0, bucketed_values_1) # Ensure bucket 0 was used for all minibatch entries. self.assertAllEqual(0, bucketed_values_even[0]) self.assertAllEqual(1, bucketed_values_odd[0]) # Test the first bucket outputted, the events starting at 0 expected_scalar_int = np.arange(0, 32 * 2, 2) expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64) for i in range(0, 32): expected_unk_int64[i, :2 * i] = 2 * i expected_vec3_str = np.vstack(3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T # Must resort the output because num_threads > 1 leads to # sometimes-inconsistent insertion order. resort = np.argsort(bucketed_values_even[1][0]) self.assertAllEqual(expected_scalar_int, bucketed_values_even[1][0][resort]) self.assertAllEqual(expected_unk_int64, bucketed_values_even[1][1][resort]) self.assertAllEqual(expected_vec3_str, bucketed_values_even[1][2][resort]) # Test the second bucket outputted, the odds starting at 1 expected_scalar_int = np.arange(1, 32 * 2 + 1, 2) expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64) for i in range(0, 32): expected_unk_int64[i, :2 * i + 1] = 2 * i + 1 expected_vec3_str = np.vstack( 3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T # Must resort the output because num_threads > 1 leads to # sometimes-inconsistent insertion order. resort = np.argsort(bucketed_values_odd[1][0]) self.assertAllEqual(expected_scalar_int, bucketed_values_odd[1][0][resort]) self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1][1][resort]) self.assertAllEqual(expected_vec3_str, bucketed_values_odd[1][2][resort])