def batch_join(tensor_list_list, batch_size, capacity=32, enqueue_many=False, shapes=None, name=None): """Runs a list of tensors to fill a queue to create batches of examples. Enqueues a different list of tensors in different threads. Implemented using a queue -- a `QueueRunner` for the queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. `len(tensor_list_list)` threads will be started, with thread `i` enqueuing the tensors from `tensor_list_list[i]`. `tensor_list_list[i1][j]` must match `tensor_list_list[i2][j]` in type and shape, except in the first dimension if `enqueue_many` is true. If `enqueue_many` is `False`, each `tensor_list_list[i]` is assumed to represent a single example. An input tensor `x` will be output as a tensor with shape `[batch_size] + x.shape`. If `enqueue_many` is `True`, `tensor_list_list[i]` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list_list[i]` should have the same size in the first dimension. The slices of any input tensor `x` are treated as examples, and the output tensors will have shape `[batch_size] + x.shape[1:]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. Args: tensor_list_list: A list of tuples of tensors to enqueue. batch_size: An integer. The new batch size pulled from the queue. capacity: An integer. The maximum number of elements in the queue. enqueue_many: Whether each tensor in `tensor_list_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list_list[i]`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list_list[i]`. """ with ops.op_scope(_flatten(tensor_list_list), name, "batch_join") as name: tensor_list_list = _validate_join(tensor_list_list) dtypes = _dtypes(tensor_list_list) shapes = _shapes(tensor_list_list, shapes, enqueue_many) # TODO(josh11b,mrry): Switch to BatchQueue once it is written. queue = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=dtypes, shapes=shapes) _enqueue_join(queue, tensor_list_list, enqueue_many) summary_ops.scalar_summary( "queue/%s/fraction_of_%d_full" % (queue.name, capacity), math_ops.cast(queue.size(), types.float32) * (1. / capacity)) return queue.dequeue_many(batch_size, name=name)
def batch(tensor_list, batch_size, num_threads=1, capacity=32, enqueue_many=False, shapes=None, name=None): """Creates batches of tensors in `tensor_list`. This function is implemented using a queue. A `QueueRunner` for the queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. If `enqueue_many` is `False`, `tensor_list` is assumed to represent a single example. An input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If `enqueue_many` is `True`, `tensor_list` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list` should have the same size in the first dimension. If an input tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x, y, z]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. The returned operation is a dequeue operation and will throw `tf.errors.OutOfRangeError` if the input queue is exhausted. If this operation is feeding another input queue, its queue runner will catch this exception, however, if this operation is used in your main thread you are responsible for catching this yourself. *N.B.:* You must ensure that either (i) the `shapes` argument is passed, or (ii) all of the tensors in `tensor_list` must have fully-defined shapes. `ValueError` will be raised if neither of these conditions holds. Args: tensor_list: The list of tensors to enqueue. batch_size: The new batch size pulled from the queue. num_threads: The number of threads enqueuing `tensor_list`. capacity: An integer. The maximum number of elements in the queue. enqueue_many: Whether each tensor in `tensor_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list`. Raises: ValueError: If the `shapes` are not specified, and cannot be inferred from the elements of `tensor_list`. """ with ops.op_scope(tensor_list, name, "batch") as name: tensor_list = _validate(tensor_list) types = _dtypes([tensor_list]) shapes = _shapes([tensor_list], shapes, enqueue_many) # TODO(josh11b,mrry): Switch to BatchQueue once it is written. queue = data_flow_ops.FIFOQueue( capacity=capacity, dtypes=types, shapes=shapes) _enqueue(queue, tensor_list, num_threads, enqueue_many) summary_ops.scalar_summary( "queue/%s/fraction_of_%d_full" % (queue.name, capacity), math_ops.cast(queue.size(), dtypes.float32) * (1. / capacity)) return queue.dequeue_many(batch_size, name=name)
def batch(tensor_list, batch_size, num_threads=1, capacity=32, enqueue_many=False, shapes=None, name=None): """Creates batches of tensors in `tensor_list`. This function is implemented using a queue. A `QueueRunner` for the queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. If `enqueue_many` is `False`, `tensor_list` is assumed to represent a single example. An input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If `enqueue_many` is `True`, `tensor_list` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list` should have the same size in the first dimension. If an input tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x, y, z]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. The returned operation is a dequeue operation and will throw `tf.errors.OutOfRangeError` if the input queue is exhausted. If this operation is feeding another input queue, its queue runner will catch this exception, however, if this operation is used in your main thread you are responsible for catching this yourself. *N.B.:* You must ensure that either (i) the `shapes` argument is passed, or (ii) all of the tensors in `tensor_list` must have fully-defined shapes. `ValueError` will be raised if neither of these conditions holds. Args: tensor_list: The list of tensors to enqueue. batch_size: The new batch size pulled from the queue. num_threads: The number of threads enqueuing `tensor_list`. capacity: An integer. The maximum number of elements in the queue. enqueue_many: Whether each tensor in `tensor_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list`. Raises: ValueError: If the `shapes` are not specified, and cannot be inferred from the elements of `tensor_list`. """ with ops.op_scope(tensor_list, name, "batch") as name: tensor_list = _validate(tensor_list) types = _dtypes([tensor_list]) shapes = _shapes([tensor_list], shapes, enqueue_many) # TODO(josh11b,mrry): Switch to BatchQueue once it is written. queue = data_flow_ops.FIFOQueue( capacity=capacity, dtypes=types, shapes=shapes) _enqueue(queue, tensor_list, num_threads, enqueue_many) summary_ops.scalar_summary( "queue/%s/fraction_of_%d_full" % (queue.name, capacity), math_ops.cast(queue.size(), dtypes.float32) * (1. / capacity)) return queue.dequeue_many(batch_size, name=name)
def batch_join(tensor_list_list, batch_size, capacity=32, enqueue_many=False, shapes=None, name=None): """Run a list of tensors to fill a queue to create batches of examples. This version enqueues a different list of tensors in different threads. Implemented using a queue -- a QueueRunner for the queue is added to the current Graph's QUEUE_RUNNER collection. Args: tensor_list_list: A list of tuples of tensors to enqueue. len(tensor_list_list) threads will be started, with the i-th thread enqueuing the tensors from tensor_list[i]. tensor_list[i1][j] must match tensor_list[i2][j] in type and shape (except in the first dimension if enqueue_many is true). batch_size: The new batch size pulled from the queue. capacity: Maximum number of elements in the queue, controls the how far ahead the prefetching allowed is allowed to get and memory usage. enqueue_many: If False, each tensor_list_list[i] is assumed to represent a single example. If True, tensor_list_list[i] is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of tensor_list_list[i] should have the same size in the first dimension. shapes: Optional. The shapes for each example. Defaults to the inferred shapes for tensor_list_list[i] (which must match, after leaving off the first dimension if enqueue_many is True). name: A name for the operations (optional). Returns: A list of tensors with the same number and types as tensor_list_list[i]. If enqueue_many is false, then an input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If enqueue_many is True, and an input tensor has shape `[*, x, y, z]`, the the output will have shape `[batch_size, x, y, z]`. """ with ops.op_scope(_flatten(tensor_list_list), name, "batch_join") as name: tensor_list_list = _validate_join(tensor_list_list) dtypes = _dtypes(tensor_list_list) shapes = _shapes(tensor_list_list, shapes, enqueue_many) # TODO(josh11b,mrry): Switch to BatchQueue once it is written. queue = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=dtypes, shapes=shapes) _enqueue_join(queue, tensor_list_list, enqueue_many) summary_ops.scalar_summary( "queue/%s/fraction_of_%d_full" % (queue.name, capacity), math_ops.cast(queue.size(), types.float32) * (1. / capacity)) return queue.dequeue_many(batch_size, name=name)
def batch_join(tensor_list_list, batch_size, capacity=32, enqueue_many=False, shapes=None, name=None): """Runs a list of tensors to fill a queue to create batches of examples. Enqueues a different list of tensors in different threads. Implemented using a queue -- a `QueueRunner` for the queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. `len(tensor_list_list)` threads will be started, with thread `i` enqueuing the tensors from `tensor_list_list[i]`. `tensor_list_list[i1][j]` must match `tensor_list_list[i2][j]` in type and shape, except in the first dimension if `enqueue_many` is true. If `enqueue_many` is `False`, each `tensor_list_list[i]` is assumed to represent a single example. An input tensor `x` will be output as a tensor with shape `[batch_size] + x.shape`. If `enqueue_many` is `True`, `tensor_list_list[i]` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list_list[i]` should have the same size in the first dimension. The slices of any input tensor `x` are treated as examples, and the output tensors will have shape `[batch_size] + x.shape[1:]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. Args: tensor_list_list: A list of tuples of tensors to enqueue. batch_size: An integer. The new batch size pulled from the queue. capacity: An integer. The maximum number of elements in the queue. enqueue_many: Whether each tensor in `tensor_list_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list_list[i]`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list_list[i]`. """ with ops.op_scope(_flatten(tensor_list_list), name, "batch_join") as name: tensor_list_list = _validate_join(tensor_list_list) dtypes = _dtypes(tensor_list_list) shapes = _shapes(tensor_list_list, shapes, enqueue_many) # TODO(josh11b,mrry): Switch to BatchQueue once it is written. queue = data_flow_ops.FIFOQueue( capacity=capacity, dtypes=dtypes, shapes=shapes) _enqueue_join(queue, tensor_list_list, enqueue_many) summary_ops.scalar_summary( "queue/%s/fraction_of_%d_full" % (queue.name, capacity), math_ops.cast(queue.size(), types.float32) * (1. / capacity)) return queue.dequeue_many(batch_size, name=name)
def _input_producer(input_tensor, dtype, num_epochs, shuffle, seed, capacity, name, summary_name): if shuffle: input_tensor = random_ops.random_shuffle(input_tensor, seed=seed) input_tensor = limit_epochs(input_tensor, num_epochs) q = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=[dtype], shapes=[[]], name=name) enq = q.enqueue_many([input_tensor]) queue_runner.add_queue_runner(queue_runner.QueueRunner(q, [enq])) summary_ops.scalar_summary( "queue/%s/%s" % (q.name, summary_name), math_ops.cast(q.size(), dtypes.float32) * (1.0 / capacity) ) return q
def _input_producer(input_tensor, dtype, num_epochs, shuffle, seed, capacity, name, summary_name): if shuffle: input_tensor = random_ops.random_shuffle(input_tensor, seed=seed) input_tensor = limit_epochs(input_tensor, num_epochs) q = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=[dtype], shapes=[[]], name=name) enq = q.enqueue_many([input_tensor]) queue_runner.add_queue_runner(queue_runner.QueueRunner(q, [enq])) summary_ops.scalar_summary("queue/%s/%s" % (q.name, summary_name), math_ops.cast(q.size(), dtypes.float32) * (1. / capacity)) return q
def batch(tensor_list, batch_size, num_threads=1, capacity=32, enqueue_many=False, shapes=None, name=None): """Creates batches of tensors in `tensor_list`. This function is implemented using a queue. A `QueueRunner` for the queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. If `enqueue_many` is `False`, `tensor_list` is assumed to represent a single example. An input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If `enqueue_many` is `True`, `tensor_list` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list` should have the same size in the first dimension. If an input tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x, y, z]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. Args: tensor_list: The list of tensors to enqueue. batch_size: The new batch size pulled from the queue. num_threads: The number of threads enqueuing `tensor_list`. capacity: An integer. The maximum number of elements in the queue. enqueue_many: Whether each tensor in `tensor_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list`. """ with ops.op_scope(tensor_list, name, "batch") as name: tensor_list = _validate(tensor_list) dtypes = _dtypes([tensor_list]) shapes = _shapes([tensor_list], shapes, enqueue_many) # TODO(josh11b,mrry): Switch to BatchQueue once it is written. queue = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=dtypes, shapes=shapes) _enqueue(queue, tensor_list, num_threads, enqueue_many) summary_ops.scalar_summary( "queue/%s/fraction_of_%d_full" % (queue.name, capacity), math_ops.cast(queue.size(), types.float32) * (1. / capacity)) return queue.dequeue_many(batch_size, name=name)
def batch(tensor_list, batch_size, num_threads=1, capacity=32, enqueue_many=False, shapes=None, name=None): """Creates batches of tensors in `tensor_list`. This function is implemented using a queue. A `QueueRunner` for the queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. If `enqueue_many` is `False`, `tensor_list` is assumed to represent a single example. An input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If `enqueue_many` is `True`, `tensor_list` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list` should have the same size in the first dimension. If an input tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x, y, z]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. Args: tensor_list: The list of tensors to enqueue. batch_size: The new batch size pulled from the queue. num_threads: The number of threads enqueuing `tensor_list`. capacity: An integer. The maximum number of elements in the queue. enqueue_many: Whether each tensor in `tensor_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list`. """ with ops.op_scope(tensor_list, name, "batch") as name: tensor_list = _validate(tensor_list) dtypes = _dtypes([tensor_list]) shapes = _shapes([tensor_list], shapes, enqueue_many) # TODO(josh11b,mrry): Switch to BatchQueue once it is written. queue = data_flow_ops.FIFOQueue( capacity=capacity, dtypes=dtypes, shapes=shapes) _enqueue(queue, tensor_list, num_threads, enqueue_many) summary_ops.scalar_summary( "queue/%s/fraction_of_%d_full" % (queue.name, capacity), math_ops.cast(queue.size(), types.float32) * (1. / capacity)) return queue.dequeue_many(batch_size, name=name)
def shuffle_batch_join(tensor_list_list, batch_size, capacity, min_after_dequeue, seed=None, enqueue_many=False, shapes=None, name=None): """Create batches by randomly shuffling tensors. This version enqueues a different list of tensors in different threads. It adds the following to the current `Graph`: * A shuffling queue into which tensors from `tensor_list_list` are enqueued. * A `dequeue_many` operation to create batches from the queue. * A `QueueRunner` to `QUEUE_RUNNER` collection, to enqueue the tensors from `tensor_list_list`. `len(tensor_list_list)` threads will be started, with thread `i` enqueuing the tensors from `tensor_list_list[i]`. `tensor_list_list[i1][j]` must match `tensor_list_list[i2][j]` in type and shape, except in the first dimension if `enqueue_many` is true. If `enqueue_many` is `False`, each `tensor_list_list[i]` is assumed to represent a single example. An input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If `enqueue_many` is `True`, `tensor_list_list[i]` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list_list[i]` should have the same size in the first dimension. If an input tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x, y, z]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. The returned operation is a dequeue operation and will throw `tf.errors.OutOfRangeError` if the input queue is exhausted. If this operation is feeding another input queue, its queue runner will catch this exception, however, if this operation is used in your main thread you are responsible for catching this yourself. Args: tensor_list_list: A list of tuples of tensors to enqueue. batch_size: An integer. The new batch size pulled from the queue. capacity: An integer. The maximum number of elements in the queue. min_after_dequeue: Minimum number elements in the queue after a dequeue, used to ensure a level of mixing of elements. seed: Seed for the random shuffling within the queue. enqueue_many: Whether each tensor in `tensor_list_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list_list[i]`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list_list[i]`. Raises: ValueError: If the `shapes` are not specified, and cannot be inferred from the elements of `tensor_list_list`. """ with ops.op_scope( _flatten(tensor_list_list), name, "shuffle_batch_join") as name: tensor_list_list = _validate_join(tensor_list_list) types = _dtypes(tensor_list_list) shapes = _shapes(tensor_list_list, shapes, enqueue_many) queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, dtypes=types, shapes=shapes) _enqueue_join(queue, tensor_list_list, enqueue_many) full = (math_ops.cast(math_ops.maximum(0, queue.size() - min_after_dequeue), dtypes.float32) * (1. / (capacity - min_after_dequeue))) # Note that name contains a '/' at the end so we intentionally do not place # a '/' after %s below. summary_name = ( "queue/%sfraction_over_%d_of_%d_full" % (name, min_after_dequeue, capacity - min_after_dequeue)) summary_ops.scalar_summary(summary_name, full) return queue.dequeue_many(batch_size, name=name)
def shuffle_batch(tensor_list, batch_size, capacity, min_after_dequeue, num_threads=1, seed=None, enqueue_many=False, shapes=None, name=None): """Creates batches by randomly shuffling tensors. This function adds the following to the current `Graph`: * A shuffling queue into which tensors from `tensor_list` are enqueued. * A `dequeue_many` operation to create batches from the queue. * A `QueueRunner` to `QUEUE_RUNNER` collection, to enqueue the tensors from `tensor_list`. If `enqueue_many` is `False`, `tensor_list` is assumed to represent a single example. An input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If `enqueue_many` is `True`, `tensor_list` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list` should have the same size in the first dimension. If an input tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x, y, z]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. The returned operation is a dequeue operation and will throw `tf.errors.OutOfRangeError` if the input queue is exhausted. If this operation is feeding another input queue, its queue runner will catch this exception, however, if this operation is used in your main thread you are responsible for catching this yourself. For example: ```python # Creates batches of 32 images and 32 labels. image_batch, label_batch = tf.train.shuffle_batch( [single_image, single_label], batch_size=32, num_threads=4, capacity=50000, min_after_dequeue=10000) ``` *N.B.:* You must ensure that either (i) the `shapes` argument is passed, or (ii) all of the tensors in `tensor_list` must have fully-defined shapes. `ValueError` will be raised if neither of these conditions holds. Args: tensor_list: The list of tensors to enqueue. batch_size: The new batch size pulled from the queue. capacity: An integer. The maximum number of elements in the queue. min_after_dequeue: Minimum number elements in the queue after a dequeue, used to ensure a level of mixing of elements. num_threads: The number of threads enqueuing `tensor_list`. seed: Seed for the random shuffling within the queue. enqueue_many: Whether each tensor in `tensor_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list`. Raises: ValueError: If the `shapes` are not specified, and cannot be inferred from the elements of `tensor_list`. """ with ops.op_scope(tensor_list, name, "shuffle_batch") as name: tensor_list = _validate(tensor_list) types = _dtypes([tensor_list]) shapes = _shapes([tensor_list], shapes, enqueue_many) queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, dtypes=types, shapes=shapes) _enqueue(queue, tensor_list, num_threads, enqueue_many) full = (math_ops.cast(math_ops.maximum(0, queue.size() - min_after_dequeue), dtypes.float32) * (1. / (capacity - min_after_dequeue))) # Note that name contains a '/' at the end so we intentionally do not place # a '/' after %s below. summary_name = ( "queue/%sfraction_over_%d_of_%d_full" % (name, min_after_dequeue, capacity - min_after_dequeue)) summary_ops.scalar_summary(summary_name, full) return queue.dequeue_many(batch_size, name=name)
def shuffle_batch_join(tensor_list_list, batch_size, capacity, min_after_dequeue, seed=None, enqueue_many=False, shapes=None, name=None): """Create batches by randomly shuffling tensors. This version enqueues a different list of tensors in different threads. It adds the following to the current `Graph`: * A shuffling queue into which tensors from `tensor_list_list` are enqueued. * A `dequeue_many` operation to create batches from the queue. * A `QueueRunner` to `QUEUE_RUNNER` collection, to enqueue the tensors from `tensor_list_list`. `len(tensor_list_list)` threads will be started, with thread `i` enqueuing the tensors from `tensor_list_list[i]`. `tensor_list_list[i1][j]` must match `tensor_list_list[i2][j]` in type and shape, except in the first dimension if `enqueue_many` is true. If `enqueue_many` is `False`, each `tensor_list_list[i]` is assumed to represent a single example. An input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If `enqueue_many` is `True`, `tensor_list_list[i]` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list_list[i]` should have the same size in the first dimension. If an input tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x, y, z]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. Args: tensor_list_list: A list of tuples of tensors to enqueue. batch_size: An integer. The new batch size pulled from the queue. capacity: An integer. The maximum number of elements in the queue. min_after_dequeue: Minimum number elements in the queue after a dequeue, used to ensure a level of mixing of elements. seed: Seed for the random shuffling within the queue. enqueue_many: Whether each tensor in `tensor_list_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list_list[i]`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list_list[i]`. """ with ops.op_scope(_flatten(tensor_list_list), name, "shuffle_batch_join") as name: tensor_list_list = _validate_join(tensor_list_list) dtypes = _dtypes(tensor_list_list) shapes = _shapes(tensor_list_list, shapes, enqueue_many) queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, dtypes=dtypes, shapes=shapes) _enqueue_join(queue, tensor_list_list, enqueue_many) full = ( math_ops.cast(queue.size() - min_after_dequeue, types.float32) * (1. / (capacity - min_after_dequeue))) # Note that name contains a '/' at the end so we intentionally do not place # a '/' after %s below. summary_name = ( "queue/%sfraction_over_%d_of_%d_full" % (name, min_after_dequeue, capacity - min_after_dequeue)) summary_ops.scalar_summary(summary_name, full) return queue.dequeue_many(batch_size, name=name)
def shuffle_batch(tensor_list, batch_size, capacity, min_after_dequeue, num_threads=1, seed=None, enqueue_many=False, shapes=None, name=None): """Creates batches by randomly shuffling tensors. This function adds the following to the current `Graph`: * A shuffling queue into which tensors from `tensor_list` are enqueued. * A `dequeue_many` operation to create batches from the queue. * A `QueueRunner` to `QUEUE_RUNNER` collection, to enqueue the tensors from `tensor_list`. If `enqueue_many` is `False`, `tensor_list` is assumed to represent a single example. An input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If `enqueue_many` is `True`, `tensor_list` is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of `tensor_list` should have the same size in the first dimension. If an input tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x, y, z]`. The `capacity` argument controls the how long the prefetching is allowed to grow the queues. For example: ```python # Creates batches of 32 images and 32 labels. image_batch, label_batch = tf.train.shuffle_batch( [single_image, single_label], batch_size=32, num_threads=4, capacity=50000, min_after_dequeue=10000) ``` Args: tensor_list: The list of tensors to enqueue. batch_size: The new batch size pulled from the queue. capacity: An integer. The maximum number of elements in the queue. min_after_dequeue: Minimum number elements in the queue after a dequeue, used to ensure a level of mixing of elements. num_threads: The number of threads enqueuing `tensor_list`. seed: Seed for the random shuffling within the queue. enqueue_many: Whether each tensor in `tensor_list` is a single example. shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for `tensor_list`. name: (Optional) A name for the operations. Returns: A list of tensors with the same number and types as `tensor_list`. """ with ops.op_scope(tensor_list, name, "shuffle_batch") as name: tensor_list = _validate(tensor_list) dtypes = _dtypes([tensor_list]) shapes = _shapes([tensor_list], shapes, enqueue_many) queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, dtypes=dtypes, shapes=shapes) _enqueue(queue, tensor_list, num_threads, enqueue_many) full = ( math_ops.cast(queue.size() - min_after_dequeue, types.float32) * (1. / (capacity - min_after_dequeue))) # Note that name contains a '/' at the end so we intentionally do not place # a '/' after %s below. summary_name = ( "queue/%sfraction_over_%d_of_%d_full" % (name, min_after_dequeue, capacity - min_after_dequeue)) summary_ops.scalar_summary(summary_name, full) return queue.dequeue_many(batch_size, name=name)
def shuffle_batch_join(tensor_list_list, batch_size, capacity, min_after_dequeue, seed=None, enqueue_many=False, shapes=None, name=None): """Create batches by randomly shuffling tensors. This version enqueues a different list of tensors in different threads. It adds: * a shuffling queue into which tensors from tensor_list_list are enqueued. * a dequeue many operation to create batches from the queue, * and a QueueRunner is added to the current Graph's QUEUE_RUNNER collection, to enqueue the tensors from tensor_list_list. Args: tensor_list_list: A list of tuples of tensors to enqueue. len(tensor_list_list) threads will be started, with the i-th thread enqueuing the tensors from tensor_list[i]. tensor_list[i1][j] must match tensor_list[i2][j] in type and shape (except in the first dimension if enqueue_many is true). batch_size: The new batch size pulled from the queue. capacity: Maximum number of elements in the queue, controls the how far ahead the prefetching allowed is allowed to get and memory usage. min_after_dequeue: Minimum number elements in the queue after a dequeue, used to ensure a level of mixing of elements. seed: Seed for the random shuffling within the queue. enqueue_many: If False, each tensor_list_list[i] is assumed to represent a single example. If True, tensor_list_list[i] is assumed to represent a batch of examples, where the first dimension is indexed by example, and all members of tensor_list_list[i] should have the same size in the first dimension. shapes: Optional. The shapes for each example. Defaults to the inferred shapes for tensor_list_list[i] (which must match, after leaving off the first dimension if enqueue_many is True). name: A name for the operations (optional). Returns: A list of tensors with the same number and types as tensor_list_list[i]. If enqueue_many is false, then an input tensor with shape `[x, y, z]` will be output as a tensor with shape `[batch_size, x, y, z]`. If enqueue_many is True, and an input tensor has shape `[*, x, y, z]`, the the output will have shape `[batch_size, x, y, z]`. """ with ops.op_scope(_flatten(tensor_list_list), name, "shuffle_batch_join") as name: tensor_list_list = _validate_join(tensor_list_list) dtypes = _dtypes(tensor_list_list) shapes = _shapes(tensor_list_list, shapes, enqueue_many) queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, dtypes=dtypes, shapes=shapes) _enqueue_join(queue, tensor_list_list, enqueue_many) full = ( math_ops.cast(queue.size() - min_after_dequeue, types.float32) * (1. / (capacity - min_after_dequeue))) # Note that name contains a '/' at the end so we intentionally do not place # a '/' after %s below. summary_name = ( "queue/%sfraction_over_%d_of_%d_full" % (name, min_after_dequeue, capacity - min_after_dequeue)) summary_ops.scalar_summary(summary_name, full) return queue.dequeue_many(batch_size, name=name)