Пример #1
0
def batch_inputs(feature_map, data_files, height=2048, width=2448,
                 batch_size=1, is_train=True, num_readers=1, num_preprocess_threads=4):
    # feature_map: 对应proto的数据映射。
    # data_files: list类型,存放的是tfrecord的文件列表。
    # batch_size: 一个批次batch的大小。
    # is_train: DataProvider在train和test节点的表现形式有所不同,主要test时并不需要一个循环队列。
    # num_reader: 每一个线程reader的个数。
    # num_preprocess_threads: 处理数据的线程的个数。
    with tf.name_scope('reader_defination'):
        # 创建文件队列,如果是训练,创建一个随机文件队列,如果是测试,创建一个顺序文件队列。
        if is_train:
            filename_queue = tf.train.string_input_producer(data_files, shuffle=True, capacity=16)
        else:
            filename_queue = tf.train.string_input_producer(data_files, shuffle=False, capacity=1)
        # reader的个数至少为1。
        num_readers = 1 if num_readers < 1 else num_readers
        
        if num_readers > 1:
            # 定义缓冲池的大小。
            examples_per_shard = 1024
            min_queue_examples = examples_per_shard * 16
            if is_train:
                examples_queue = tf.RandomShuffleQueue(capacity=min_queue_examples + 3 * batch_size,
                                                       min_after_dequeue=min_queue_examples,
                                                       dtypes=[tf.string])
            else:
                examples_queue = tf.FIFOQueue(capacity=examples_per_shard + 3 * batch_size, 
                                              dtypes=[tf.string])
            
            # 多个reader时对reader队列进行管理。
            enqueue_ops = []
            for _ in range(num_readers):
                reader = tf.TFRecordReader()
                _, value = reader.read(filename_queue)
                enqueue_ops.append(examples_queue.enqueue([value]))
            
            tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
            example_serialized = examples_queue.dequeue()
        else:
            reader = tf.TFRecordReader()
            _, example_serialized = reader.read(filename_queue)
        
        samples = []
        for _ in range(num_preprocess_threads):
            features = tf.parse_single_example(example_serialized, feature_map)
            samples.append([image_processing(features['image/encoded'], height, width), features['image/format']])
            
        batch_data = tf.train.batch_join(samples, batch_size=batch_size,
                                         capacity=2 * num_preprocess_threads * batch_size)
                
        data = tf.reshape(batch_data[0], [batch_size, -1])
        label = tf.reshape(batch_data[1], [batch_size])
        return (data, label)
Пример #2
0
def _shuffle_inputs(input_tensors, capacity, min_after_dequeue, num_threads):
    """Shuffles tensors in `input_tensors`, maintaining grouping."""
    shuffle_queue = tf.RandomShuffleQueue(
        capacity, min_after_dequeue, dtypes=[t.dtype for t in input_tensors])
    enqueue_op = shuffle_queue.enqueue(input_tensors)
    runner = tf.train.QueueRunner(shuffle_queue, [enqueue_op] * num_threads)
    tf.train.add_queue_runner(runner)

    output_tensors = shuffle_queue.dequeue()

    for i in range(len(input_tensors)):
        output_tensors[i].set_shape(input_tensors[i].shape)

    return output_tensors
Пример #3
0
    def make_batch(observations):
        queue = tf.RandomShuffleQueue(capacity=queue_capacity,
                                      min_after_dequeue=queue_min,
                                      shapes=[[k, height, width, nch]],
                                      dtypes=[tf.float32])

        example = tf.stack(observations, axis=0)
        enqueue_op = queue.enqueue(example)
        qr = tf.train.QueueRunner(queue, [enqueue_op] * queue_nthreads)
        tf.train.add_queue_runner(qr)

        tf.summary.scalar('queue_size', queue.size())

        return queue.dequeue_many(batch_size)
Пример #4
0
def _shuffling_queue(shuffling_queue_capacity, min_after_dequeue, dtypes, fields_as_list):
    """Creates a shuffling queue with enqueue/dequeue pair. Always a single writing thread."""

    # Named tuples loose the 'named' part when going via queue
    shuffling_queue = tf.RandomShuffleQueue(shuffling_queue_capacity, min_after_dequeue, dtypes)

    # The following call to .size has a side effect of creating a new node in the TF graph. We are interested
    # in the side effect so we can read the queue size somewhere else, addressing the node by a 'well-known-name'
    shuffling_queue.size(name=RANDOM_SHUFFLING_QUEUE_SIZE)

    # We need the queue only for shuffling, so we use only a single enqueuing thread (actually would be happy
    # not to introduce any threads. Not sure if there is such a mechanism in TF)
    queue_runner = tf.train.QueueRunner(shuffling_queue, 1 * [shuffling_queue.enqueue(fields_as_list)])

    tf.train.add_queue_runner(queue_runner)

    # Passed through the queue. We got an ordered list. The order matches the order of fields in unischema
    fields_as_list = shuffling_queue.dequeue()
    return fields_as_list
Пример #5
0
    def _build(self):
        # Find split file from which we are going to read.
        split_path = os.path.join(self._dataset_dir,
                                  '{}.tfrecords'.format(self._split))
        if not tf.gfile.Exists(split_path):
            raise InvalidDataDirectory(
                '"{}" does not exist.'.format(split_path))
        # String input producer allows for a variable number of files to read
        # from. We just know we have a single file.
        filename_queue = tf.train.string_input_producer(
            [split_path], num_epochs=self._num_epochs, seed=self._seed)

        # Define reader to parse records.
        reader = tf.TFRecordReader()
        _, raw_record = reader.read(filename_queue)

        values, dtypes, names = self.read_record(raw_record)

        if self._random_shuffle:
            queue = tf.RandomShuffleQueue(capacity=100,
                                          min_after_dequeue=0,
                                          dtypes=dtypes,
                                          names=names,
                                          name='tfrecord_random_queue',
                                          seed=self._seed)
        else:
            queue = tf.FIFOQueue(capacity=100,
                                 dtypes=dtypes,
                                 names=names,
                                 name='tfrecord_fifo_queue')

        # Generate queueing ops for QueueRunner.
        enqueue_ops = [queue.enqueue(values)] * self._total_queue_ops
        self.queue_runner = tf.train.QueueRunner(queue, enqueue_ops)

        tf.train.add_queue_runner(self.queue_runner)

        return queue.dequeue()
def imagenet_inputs(batch_size,
                    image_size,
                    num_readers=1,
                    num_preprocess_threads=4):
    """Loads a batch of imagenet inputs.

  Used as a replacement for inception.image_processing.inputs in
  tensorflow/checkpoint in order to get around the use of hard-coded flags in the
  image_processing module.

  Args:
    batch_size: int, batch size.
    image_size: int. The images will be resized bilinearly to shape
        [image_size, image_size].
    num_readers: int, number of preprocessing threads per tower.  Must be a
        multiple of 4.
    num_preprocess_threads: int, number of parallel readers.

  Returns:
    4-D tensor of images of shape [batch_size, image_size, image_size, 3], with
    values in [0, 1].

  Raises:
    IOError: If ImageNet data files cannot be found.
    ValueError: If `num_preprocess_threads is not a multiple of 4 or
        `num_readers` is less than 1.
  """
    imagenet = imagenet_data.ImagenetData('train')

    with tf.name_scope('batch_processing'):
        data_files = imagenet.data_files()
        if data_files is None:
            raise IOError('No ImageNet data files found')

        # Create filename_queue.
        filename_queue = tf.train.string_input_producer(data_files,
                                                        shuffle=True,
                                                        capacity=16)

        if num_preprocess_threads % 4:
            raise ValueError('Please make num_preprocess_threads a multiple '
                             'of 4 (%d %% 4 != 0).' % num_preprocess_threads)

        if num_readers < 1:
            raise ValueError('Please make num_readers at least 1')

        # Approximate number of examples per shard.
        examples_per_shard = 1024
        # Size the random shuffle queue to balance between good global
        # mixing (more examples) and memory use (fewer examples).
        # 1 image uses 299*299*3*4 bytes = 1MB
        # The default input_queue_memory_factor is 16 implying a shuffling queue
        # size: examples_per_shard * 16 * 1MB = 17.6GB
        input_queue_memory_factor = 16
        min_queue_examples = examples_per_shard * input_queue_memory_factor
        examples_queue = tf.RandomShuffleQueue(
            capacity=min_queue_examples + 3 * batch_size,
            min_after_dequeue=min_queue_examples,
            dtypes=[tf.string])

        # Create multiple readers to populate the queue of examples.
        enqueue_ops = []
        for _ in range(num_readers):
            reader = imagenet.reader()
            _, value = reader.read(filename_queue)
            enqueue_ops.append(examples_queue.enqueue([value]))

        tf.train.queue_runner.add_queue_runner(
            tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
        example_serialized = examples_queue.dequeue()

        images_and_labels = []
        for _ in range(num_preprocess_threads):
            # Parse a serialized Example proto to extract the image and metadata.
            image_buffer, label_index, _, _ = _parse_example_proto(
                example_serialized)
            image = tf.image.decode_jpeg(image_buffer, channels=3)

            # pylint: disable=protected-access
            image = _aspect_preserving_resize(image, image_size + 2)
            image = _central_crop([image], image_size, image_size)[0]
            # pylint: enable=protected-access
            image.set_shape([image_size, image_size, 3])
            image = tf.to_float(image) / 255.0

            images_and_labels.append([image, label_index])

        images, label_index_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size,
            capacity=2 * num_preprocess_threads * batch_size)

        images = tf.reshape(images,
                            shape=[batch_size, image_size, image_size, 3])

        # Display the training images in the visualizer.
        tf.summary.image('images', images)

        return images, tf.reshape(label_index_batch, [batch_size])
def arbitrary_style_image_inputs(style_dataset_file,
                                 batch_size=None,
                                 image_size=None,
                                 center_crop=True,
                                 shuffle=True,
                                 augment_style_images=False,
                                 random_style_image_size=False,
                                 min_rand_image_size=128,
                                 max_rand_image_size=300):
    """Loads a batch of random style image given the path of tfrecord dataset.

  This method does not return pre-compute Gram matrices for the images like
  style_image_inputs. But it can provide data augmentation. If
  augment_style_images is equal to True, then style images will randomly
  modified (eg. changes in brightness, hue or saturation) for data
  augmentation. If random_style_image_size is set to True then all images
  in one batch will be resized to a random size.
  Args:
    style_dataset_file: str, path to the tfrecord dataset of style files.
    batch_size: int. If provided, batches style images. Defaults to None.
    image_size: int. The images will be resized bilinearly so that the smallest
        side has size image_size. Defaults to None.
    center_crop: bool. If True, center-crops to [image_size, image_size].
        Defaults to False.
    shuffle: bool, whether to shuffle style files at random. Defaults to False.
    augment_style_images: bool. Wheather to augment style images or not.
    random_style_image_size: bool. If this value is True, then all the style
        images in one batch will be resized to a random size between
        min_rand_image_size and max_rand_image_size.
    min_rand_image_size: int. If random_style_image_size is True, this value
        specifies the minimum image size.
    max_rand_image_size: int. If random_style_image_size is True, this value
        specifies the maximum image size.

  Returns:
    4-D tensor of shape [1, ?, ?, 3] with values in [0, 1] for the style
    image (with random changes for data augmentation if
    augment_style_image_size is set to true), and 0-D tensor for the style
    label, 4-D tensor of shape [1, ?, ?, 3] with values in [0, 1] for the style
    image without random changes for data augmentation.

  Raises:
    ValueError: if center cropping is requested but no image size is provided,
        or if batch size is specified but center-cropping or
        augment-style-images is not requested,
        or if both augment-style-images and center-cropping are requested.
  """
    if center_crop and image_size is None:
        raise ValueError('center-cropping requires specifying the image size.')
    if center_crop and augment_style_images:
        raise ValueError(
            'When augment_style_images is true images will be randomly cropped.'
        )
    if batch_size is not None and not center_crop and not augment_style_images:
        raise ValueError(
            'batching requires same image sizes (Set center-cropping or '
            'augment_style_images to true)')

    with tf.name_scope('style_image_processing'):
        # Force all input processing onto CPU in order to reserve the GPU for the
        # forward inference and back-propagation.
        with tf.device('/cpu:0'):
            filename_queue = tf.train.string_input_producer(
                [style_dataset_file],
                shuffle=False,
                capacity=1,
                name='filename_queue')
            if shuffle:
                examples_queue = tf.RandomShuffleQueue(
                    capacity=64,
                    min_after_dequeue=32,
                    dtypes=[tf.string],
                    name='random_examples_queue')
            else:
                examples_queue = tf.FIFOQueue(capacity=64,
                                              dtypes=[tf.string],
                                              name='fifo_examples_queue')
            reader = tf.TFRecordReader()
            _, value = reader.read(filename_queue)
            enqueue_ops = [examples_queue.enqueue([value])]
            tf.train.queue_runner.add_queue_runner(
                tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
            example_serialized = examples_queue.dequeue()
            features = tf.parse_single_example(
                example_serialized,
                features={
                    'label': tf.FixedLenFeature([], tf.int64),
                    'image_raw': tf.FixedLenFeature([], tf.string)
                })
            image = tf.image.decode_jpeg(features['image_raw'])
            image.set_shape([None, None, 3])
            label = features['label']

            if image_size is not None:
                image_channels = int(image.shape[2])
                if augment_style_images:
                    image_orig = image
                    image = tf.image.random_brightness(image, max_delta=0.8)
                    image = tf.image.random_saturation(image,
                                                       lower=0.5,
                                                       upper=1.5)
                    image = tf.image.random_hue(image, max_delta=0.2)
                    image = tf.image.random_flip_left_right(image)
                    image = tf.image.random_flip_up_down(image)
                    random_larger_image_size = tf.random_uniform(
                        [],
                        minval=image_size + 2,
                        maxval=image_size + 200,
                        dtype=tf.int32)
                    image = _aspect_preserving_resize(
                        image, random_larger_image_size)
                    image = tf.random_crop(
                        image, size=[image_size, image_size, image_channels])
                    image.set_shape([image_size, image_size, image_channels])

                    image_orig = _aspect_preserving_resize(
                        image_orig, image_size + 2)
                    image_orig = _central_crop([image_orig], image_size,
                                               image_size)[0]
                    image_orig.set_shape([image_size, image_size, 3])
                elif center_crop:
                    image = _aspect_preserving_resize(image, image_size + 2)
                    image = _central_crop([image], image_size, image_size)[0]
                    image.set_shape([image_size, image_size, image_channels])
                    image_orig = image
                else:
                    image = _aspect_preserving_resize(image, image_size)
                    image_orig = image

            image = tf.to_float(image) / 255.0
            image_orig = tf.to_float(image_orig) / 255.0

            if batch_size is None:
                image = tf.expand_dims(image, 0)
            else:
                [image, image_orig,
                 label] = tf.train.batch([image, image_orig, label],
                                         batch_size=batch_size)

            if random_style_image_size:
                # Selects a random size for the style images and resizes all the images
                # in the batch to that size.
                image = _aspect_preserving_resize(
                    image,
                    tf.random_uniform([],
                                      minval=min_rand_image_size,
                                      maxval=max_rand_image_size,
                                      dtype=tf.int32))

            return image, label, image_orig
def style_image_inputs(style_dataset_file,
                       batch_size=None,
                       image_size=None,
                       square_crop=False,
                       shuffle=True):
    """Loads a batch of random style image given the path of tfrecord dataset.

  Args:
    style_dataset_file: str, path to the tfrecord dataset of style files.
        The dataset is produced via the create_style_dataset.py script and is
        made of Example protobufs with the following features:
        * 'image_raw': byte encoding of the JPEG string of the style image.
        * 'label': integer identifier of the style image in [0, N - 1], where
              N is the number of examples in the dataset.
        * 'vgg_16/<LAYER_NAME>': Gram matrix at layer <LAYER_NAME> of the VGG-16
              network (<LAYER_NAME> in {conv,pool}{1,2,3,4,5}) for the style
              image.
    batch_size: int. If provided, batches style images. Defaults to None.
    image_size: int. The images will be resized bilinearly so that the smallest
        side has size image_size. Defaults to None.
    square_crop: bool. If True, square-crops to [image_size, image_size].
        Defaults to False.
    shuffle: bool, whether to shuffle style files at random. Defaults to True.

  Returns:
    If batch_size is defined, a 4-D tensor of shape [batch_size, ?, ?, 3] with
    values in [0, 1] for the style image, and 1-D tensor for the style label.

  Raises:
    ValueError: if center cropping is requested but no image size is provided,
        or if batch size is specified but center-cropping is not requested.
  """
    vgg_layers = [
        'vgg_16/conv1', 'vgg_16/pool1', 'vgg_16/conv2', 'vgg_16/pool2',
        'vgg_16/conv3', 'vgg_16/pool3', 'vgg_16/conv4', 'vgg_16/pool4',
        'vgg_16/conv5', 'vgg_16/pool5'
    ]

    if square_crop and image_size is None:
        raise ValueError('center-cropping requires specifying the image size.')
    if batch_size is not None and not square_crop:
        raise ValueError('batching requires center-cropping.')

    with tf.name_scope('style_image_processing'):
        filename_queue = tf.train.string_input_producer([style_dataset_file],
                                                        shuffle=False,
                                                        capacity=1,
                                                        name='filename_queue')
        if shuffle:
            examples_queue = tf.RandomShuffleQueue(
                capacity=64,
                min_after_dequeue=32,
                dtypes=[tf.string],
                name='random_examples_queue')
        else:
            examples_queue = tf.FIFOQueue(capacity=64,
                                          dtypes=[tf.string],
                                          name='fifo_examples_queue')
        reader = tf.TFRecordReader()
        _, value = reader.read(filename_queue)
        enqueue_ops = [examples_queue.enqueue([value])]
        tf.train.queue_runner.add_queue_runner(
            tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
        example_serialized = examples_queue.dequeue()
        features = tf.parse_single_example(
            example_serialized,
            features={
                'label': tf.FixedLenFeature([], tf.int64),
                'image_raw': tf.FixedLenFeature([], tf.string),
                'vgg_16/conv1': tf.FixedLenFeature([64, 64], tf.float32),
                'vgg_16/pool1': tf.FixedLenFeature([64, 64], tf.float32),
                'vgg_16/conv2': tf.FixedLenFeature([128, 128], tf.float32),
                'vgg_16/pool2': tf.FixedLenFeature([128, 128], tf.float32),
                'vgg_16/conv3': tf.FixedLenFeature([256, 256], tf.float32),
                'vgg_16/pool3': tf.FixedLenFeature([256, 256], tf.float32),
                'vgg_16/conv4': tf.FixedLenFeature([512, 512], tf.float32),
                'vgg_16/pool4': tf.FixedLenFeature([512, 512], tf.float32),
                'vgg_16/conv5': tf.FixedLenFeature([512, 512], tf.float32),
                'vgg_16/pool5': tf.FixedLenFeature([512, 512], tf.float32)
            })
        image = tf.image.decode_jpeg(features['image_raw'])
        label = features['label']
        gram_matrices = [features[vgg_layer] for vgg_layer in vgg_layers]
        image.set_shape([None, None, 3])

        if image_size:
            if square_crop:
                image = _aspect_preserving_resize(image, image_size + 2)
                image = _central_crop([image], image_size, image_size)[0]
                image.set_shape([image_size, image_size, 3])
            else:
                image = _aspect_preserving_resize(image, image_size)

        image = tf.to_float(image) / 255.0

        if batch_size is None:
            image = tf.expand_dims(image, 0)
        else:
            image_label_gram_matrices = tf.train.batch([image, label] +
                                                       gram_matrices,
                                                       batch_size=batch_size)
            image, label = image_label_gram_matrices[:2]
            gram_matrices = image_label_gram_matrices[2:]

        gram_matrices = dict(
            (vgg_layer, gram_matrix)
            for vgg_layer, gram_matrix in zip(vgg_layers, gram_matrices))
        return image, label, gram_matrices
Пример #9
0
def group_choose_k(named_id_to_fps,
                   k,
                   n=None,
                   with_replacement=False,
                   capacity=4096,
                   min_after_dequeue=2048,
                   nthreads=4):
    assert k > 0

    # Join (variable-length) groups into CSV strings for enqueueing
    avg_group_size = int(
        np.ceil(
            np.mean([len(group_fps)
                     for group_fps in named_id_to_fps.values()])))
    named_id_to_fps = [
        ','.join(group_fps) for group_fps in named_id_to_fps.values()
    ]

    # If n is None, compute a reasonable value (avg group len choose k)
    if n is None:
        f = math.factorial
        n = f(avg_group_size) / f(k) / f(avg_group_size - k)

    # Dequeue one and split it into group
    group_fps = tf.train.string_input_producer(named_id_to_fps).dequeue()
    group_fps = tf.string_split([group_fps], ',').values
    group_size = tf.shape(group_fps)[0]
    tf.summary.histogram('group_size', group_size)

    # Select some at random
    # TODO: Should be some way to sample without replacement here rather than manually filtering
    tuple_ids = tf.random_uniform([n, k],
                                  minval=0,
                                  maxval=group_size,
                                  dtype=tf.int32)

    # Count num tuples enqueued
    ntotal = tf.Variable(0)
    tf.summary.scalar('tuples_ntotal', ntotal)
    add_total = tf.assign_add(ntotal, n)

    # Filter duplicates if sampling tuples without replacement
    if not with_replacement and k > 1:
        # Find unique tuples
        tuple_unique = tf.ones([n], tf.bool)
        for i in xrange(k):
            for j in xrange(k):
                if i == j:
                    continue
                pair_unique = tf.not_equal(tuple_ids[:, i], tuple_ids[:, j])
            tuple_unique = tf.logical_and(tuple_unique, pair_unique)

        # Filter tuples with duplicates
        valid_tuples = tf.where(tuple_unique)[:, 0]

        # Count num valid tuples enqueued
        nvalid = tf.Variable(0)
        tf.summary.scalar('tuples_nvalid', nvalid)
        tf.summary.scalar(
            'tuples_valid_ratio',
            tf.cast(nvalid, tf.float32) / tf.cast(ntotal, tf.float32))
        add_valid = tf.assign_add(nvalid, tf.shape(valid_tuples)[0])

        # Gather valid ids
        with tf.control_dependencies([add_valid]):
            tuple_ids = tf.gather(tuple_ids, valid_tuples)

    # Gather valid tuples
    with tf.control_dependencies([add_total]):
        tuples = tf.gather(group_fps, tuple_ids)

    # Make batches
    tuple_q = tf.RandomShuffleQueue(capacity, min_after_dequeue, tuples.dtype,
                                    [k])
    tuple_enq = tuple_q.enqueue_many(tuples)
    tf.train.add_queue_runner(
        tf.train.QueueRunner(tuple_q, [tuple_enq] * nthreads))

    tf.summary.scalar('tuples_queue_size', tuple_q.size())

    return tuple_q.dequeue()
Пример #10
0
    reader = tf.TextLineReader(skip_header_lines=1)
    key, value = reader.read(filename_queue)
    x1, x2, target = tf.decode_csv(value, record_defaults=[[-1.], [-1.], [-1]])
    features = tf.stack([x1, x2])
    enqueue_instance = instance_queue.enqueue([features, target])
    return enqueue_instance


filename_queue = tf.FIFOQueue(capacity=10, dtypes=[tf.string], shapes=[()])
filename = tf.placeholder(tf.string)
enqueue_filename = filename_queue.enqueue([filename])
close_filename_queue = filename_queue.close()

instance_queue = tf.RandomShuffleQueue(capacity=10,
                                       min_after_dequeue=2,
                                       dtypes=[tf.float32, tf.int32],
                                       shapes=[[2], []],
                                       name="instance_q",
                                       shared_name="shared_instance_q")

minibatch_instances, minibatch_targets = instance_queue.dequeue_up_to(2)

read_and_enqueue_ops = [
    read_and_push_instance(filename_queue, instance_queue) for i in range(5)
]
queue_runner = tf.train.QueueRunner(instance_queue, read_and_enqueue_ops)

with tf.Session() as sess:
    sess.run(enqueue_filename, feed_dict={filename: "my_test.csv"})
    sess.run(close_filename_queue)
    coord = tf.train.Coordinator()
    enqueue_threads = queue_runner.create_threads(sess,