def build_inputs(self):
    """Input prefetching, preprocessing and batching.

    Outputs:
      self.images
      self.input_seqs
      self.target_seqs (training and eval only)
      self.input_mask (training and eval only)
    """
    if self.mode == "inference":
      # In inference mode, images and inputs are fed via placeholders.
      image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
      input_feed = tf.placeholder(dtype=tf.int64,
                                  shape=[None],  # batch_size
                                  name="input_feed")

      # Process image and insert batch dimensions.
      images = tf.expand_dims(self.process_image(image_feed), 0)
      input_seqs = tf.expand_dims(input_feed, 1)

      # No target sequences or input mask in inference mode.
      target_seqs = None
      input_mask = None
    else:
      # Prefetch serialized SequenceExample protos.
      input_queue = input_ops.prefetch_input_data(
          self.reader,
          self.config.input_file_pattern,
          is_training=self.is_training(),
          batch_size=self.config.batch_size,
          values_per_shard=self.config.values_per_input_shard,
          input_queue_capacity_factor=self.config.input_queue_capacity_factor,
          num_reader_threads=self.config.num_input_reader_threads)

      # Image processing and random distortion. Split across multiple threads
      # with each thread applying a slightly different distortion.
      assert self.config.num_preprocess_threads % 2 == 0
      images_and_captions = []
      for thread_id in range(self.config.num_preprocess_threads):
        serialized_sequence_example = input_queue.dequeue()
        encoded_image, caption = input_ops.parse_sequence_example(
            serialized_sequence_example,
            image_feature=self.config.image_feature_name,
            caption_feature=self.config.caption_feature_name)
        image = self.process_image(encoded_image, thread_id=thread_id)
        images_and_captions.append([image, caption])

      # Batch inputs.
      queue_capacity = (2 * self.config.num_preprocess_threads *
                        self.config.batch_size)
      images, input_seqs, target_seqs, input_mask = (
          input_ops.batch_with_dynamic_pad(images_and_captions,
                                           batch_size=self.config.batch_size,
                                           queue_capacity=queue_capacity))

    self.images = images
    self.input_seqs = input_seqs
    self.target_seqs = target_seqs
    self.input_mask = input_mask
Exemplo n.º 2
0
  def build_inputs(self):
    """Input prefetching, preprocessing and batching.

    Outputs:
      self.images
      self.input_seqs
      self.target_seqs (training and eval only)
      self.input_mask (training and eval only)
    """
    if self.mode == "inference":
      # In inference mode, images and inputs are fed via placeholders.
      image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
      input_feed = tf.placeholder(dtype=tf.int64,
                                  shape=[None],  # batch_size
                                  name="input_feed")

      # Process image and insert batch dimensions.
      images = tf.expand_dims(self.process_image(image_feed), 0)
      input_seqs = tf.expand_dims(input_feed, 1)

      # No target sequences or input mask in inference mode.
      target_seqs = None
      input_mask = None
    else:
      # Prefetch serialized SequenceExample protos.
      input_queue = input_ops.prefetch_input_data(
          self.reader,
          self.config.input_file_pattern,
          is_training=self.is_training(),
          batch_size=self.config.batch_size,
          values_per_shard=self.config.values_per_input_shard,
          input_queue_capacity_factor=self.config.input_queue_capacity_factor,
          num_reader_threads=self.config.num_input_reader_threads)

      # Image processing and random distortion. Split across multiple threads
      # with each thread applying a slightly different distortion.
      assert self.config.num_preprocess_threads % 2 == 0
      images_and_captions = []
      for thread_id in range(self.config.num_preprocess_threads):
        serialized_sequence_example = input_queue.dequeue()
        encoded_image, caption = input_ops.parse_sequence_example(
            serialized_sequence_example,
            image_feature=self.config.image_feature_name,
            caption_feature=self.config.caption_feature_name)
        image = self.process_image(encoded_image, thread_id=thread_id)
        images_and_captions.append([image, caption])

      # Batch inputs.
      queue_capacity = (2 * self.config.num_preprocess_threads *
                        self.config.batch_size)
      images, input_seqs, target_seqs, input_mask = (
          input_ops.batch_with_dynamic_pad(images_and_captions,
                                           batch_size=self.config.batch_size,
                                           queue_capacity=queue_capacity))

    self.images = images
    self.input_seqs = input_seqs
    self.target_seqs = target_seqs
    self.input_mask = input_mask
Exemplo n.º 3
0
    def build_inputs(self):
        if self.mode == "inference":
            # In inference mode, images and inputs are fed via placeholders.
            image_feed_0 = tf.placeholder(dtype=tf.string,
                                          shape=[],
                                          name="image_feed_0")
            image_feed_1 = tf.placeholder(dtype=tf.string,
                                          shape=[],
                                          name="image_feed_1")
            image_feed_2 = tf.placeholder(dtype=tf.string,
                                          shape=[],
                                          name="image_feed_2")
            image_feed_3 = tf.placeholder(dtype=tf.string,
                                          shape=[],
                                          name="image_feed_3")
            image_feed_4 = tf.placeholder(dtype=tf.string,
                                          shape=[],
                                          name="image_feed_4")

            input_feed_0 = tf.placeholder(dtype=tf.int64,
                                          shape=[None],
                                          name="input_feed_0")
            input_feed_1 = tf.placeholder(dtype=tf.int64,
                                          shape=[None],
                                          name="input_feed_1")
            input_feed_2 = tf.placeholder(dtype=tf.int64,
                                          shape=[None],
                                          name="input_feed_2")
            input_feed_3 = tf.placeholder(dtype=tf.int64,
                                          shape=[None],
                                          name="input_feed_3")
            input_feed_4 = tf.placeholder(dtype=tf.int64,
                                          shape=[None],
                                          name="input_feed_4")

            # Process each image and insert batch dimensions.
            images_0 = tf.expand_dims(self.process_image(image_feed_0), 0)
            images_1 = tf.expand_dims(self.process_image(image_feed_1), 0)
            images_2 = tf.expand_dims(self.process_image(image_feed_2), 0)
            images_3 = tf.expand_dims(self.process_image(image_feed_3), 0)
            images_4 = tf.expand_dims(self.process_image(image_feed_4), 0)

            input_seqs_0 = tf.expand_dims(input_feed_0, 1)
            input_seqs_1 = tf.expand_dims(input_feed_1, 1)
            input_seqs_2 = tf.expand_dims(input_feed_2, 1)
            input_seqs_3 = tf.expand_dims(input_feed_3, 1)
            input_seqs_4 = tf.expand_dims(input_feed_4, 1)

            # No target sequences or input mask in inference mode.
            target_seqs_0 = None
            target_seqs_1 = None
            target_seqs_2 = None
            target_seqs_3 = None
            target_seqs_4 = None

            input_mask_0 = None
            input_mask_1 = None
            input_mask_2 = None
            input_mask_3 = None
            input_mask_4 = None

        else:
            # Prefetch serialized SequenceExample protos.
            input_queue = input_ops.prefetch_input_data(
                self.reader,
                self.config.input_file_pattern,
                is_training=self.is_training(),
                batch_size=self.config.batch_size,
                values_per_shard=self.config.values_per_input_shard,
                input_queue_capacity_factor=self.config.
                input_queue_capacity_factor,
                num_reader_threads=self.config.num_input_reader_threads)

            self.input_queue = input_queue

            assert self.config.num_preprocess_threads % 2 == 0

            # Image processing and random distortion. Split across multiple threads
            # with each thread applying a slightly different distortion.
            images_and_captions = []
            for thread_id in range(self.config.num_preprocess_threads):
                serialized_sequence_example = input_queue.dequeue()
                encoded_image_0, caption_0, encoded_image_1, caption_1, encoded_image_2, caption_2, encoded_image_3, caption_3, encoded_image_4, caption_4 = input_ops.parse_sequence_example(
                    serialized_sequence_example,
                    image_feature=self.config.image_feature_name,
                    caption_feature=self.config.caption_feature_name)
                image_0 = self.process_image(encoded_image_0,
                                             thread_id=thread_id)
                image_1 = self.process_image(encoded_image_1,
                                             thread_id=thread_id)
                image_2 = self.process_image(encoded_image_2,
                                             thread_id=thread_id)
                image_3 = self.process_image(encoded_image_3,
                                             thread_id=thread_id)
                image_4 = self.process_image(encoded_image_4,
                                             thread_id=thread_id)
                images_and_captions.append([
                    image_0, caption_0, image_1, caption_1, image_2, caption_2,
                    image_3, caption_3, image_4, caption_4
                ])

            self.images_and_captions = images_and_captions

            queue_capacity = (2 * self.config.num_preprocess_threads *
                              self.config.batch_size)  #200
            images_0, input_seqs_0, target_seqs_0, input_mask_0, images_1, input_seqs_1, target_seqs_1, input_mask_1, images_2, input_seqs_2, target_seqs_2, input_mask_2, images_3, input_seqs_3, target_seqs_3, input_mask_3, images_4, input_seqs_4, target_seqs_4, input_mask_4 = (
                input_ops.batch_with_dynamic_pad(
                    images_and_captions,
                    batch_size=self.config.batch_size,
                    queue_capacity=queue_capacity))
        self.images_0 = images_0
        self.input_seqs_0 = input_seqs_0
        self.target_seqs_0 = target_seqs_0
        self.input_mask_0 = input_mask_0

        self.images_1 = images_1
        self.input_seqs_1 = input_seqs_1
        self.target_seqs_1 = target_seqs_1
        self.input_mask_1 = input_mask_1

        self.images_2 = images_2
        self.input_seqs_2 = input_seqs_2
        self.target_seqs_2 = target_seqs_2
        self.input_mask_2 = input_mask_2

        self.images_3 = images_3
        self.input_seqs_3 = input_seqs_3
        self.target_seqs_3 = target_seqs_3
        self.input_mask_3 = input_mask_3

        self.images_4 = images_4
        self.input_seqs_4 = input_seqs_4
        self.target_seqs_4 = target_seqs_4
        self.input_mask_4 = input_mask_4
Exemplo n.º 4
0
    def build_inputs(self):
        """Input prefetching, preprocessing and batching.

    Outputs:
      self.images
      self.input_seqs
      self.target_seqs (training and eval only)
      self.input_mask (training and eval only)
    """
        if self.mode == "gradcam":
            image_feed = tf.placeholder(dtype=tf.string,
                                        shape=[],
                                        name="image_feed")
            images = self.process_image(image_feed)
            input_feed = tf.placeholder(dtype=tf.int64,
                                        shape=[None],
                                        name="input_feed")
            # image is a Tensor of shape [height, width, channels]
            # caption is a 1-D Tensor of any length
            self.config.batch_size = 1
            queue_capacity = (2 * self.config.num_preprocess_threads *
                              self.config.batch_size)

            num_queues = 1
            all_images = []
            all_input_seqs = []
            all_target_seqs = []
            all_input_masks = []
            enqueue_list = input_ops.batch_with_dynamic_pad(
                [[images, input_feed]],
                batch_size=self.config.batch_size,
                queue_capacity=queue_capacity,
                return_enqueue_list=True)
            all_images.append(tf.expand_dims(enqueue_list[0][0], 0))
            all_input_seqs.append(tf.expand_dims(enqueue_list[0][1], 0))
            all_target_seqs.append(tf.expand_dims(enqueue_list[0][2], 0))
            all_input_masks.append(tf.expand_dims(enqueue_list[0][3], 0))

            self.target_seqs = all_target_seqs
            self.input_mask = all_input_masks
            self.num_parallel_batches = 1
        elif self.mode == "saliency":
            #      import pdb; pdb.set_trace()
            image_feed = tf.placeholder(dtype=tf.string,
                                        shape=[None],
                                        name="image_feed")

            images = []
            for i in range(self.config.batch_size):
                images.append(self.process_image(image_feed[i]))
            input_feed = tf.placeholder(dtype=tf.int64,
                                        shape=[None],
                                        name="input_feed")
            # image is a Tensor of shape [height, width, channels]
            # caption is a 1-D Tensor of any length
            queue_capacity = (2 * self.config.num_preprocess_threads *
                              self.config.batch_size)

            images_and_captions = []
            for i in range(self.config.batch_size):
                images_and_captions.append([images[i], input_feed])

            num_queues = 1
            all_images = []
            all_input_seqs = []
            all_target_seqs = []
            all_input_masks = []
            enqueue_list = input_ops.batch_with_dynamic_pad(
                images_and_captions,
                batch_size=self.config.batch_size,
                queue_capacity=queue_capacity,
                return_enqueue_list=True)
            for i in range(self.config.batch_size):
                all_images.append(tf.expand_dims(enqueue_list[i][0], 0))
                all_input_seqs.append(tf.expand_dims(enqueue_list[i][1], 0))
                all_target_seqs.append(tf.expand_dims(enqueue_list[i][2], 0))
                all_input_masks.append(tf.expand_dims(enqueue_list[i][3], 0))


#        all_images.append(enqueue_list[0])
#        all_input_seqs.append(enqueue_list[1])
#        all_target_seqs.append(enqueue_list[2])
#        all_input_masks.append(enqueue_list[3])

            self.target_seqs = [tf.concat(all_target_seqs, 0)]
            self.input_mask = [tf.concat(all_input_masks, 0)]
            self.num_parallel_batches = 1
            all_input_seqs = [tf.concat(all_input_seqs, 0)]
        elif self.mode == "inference":
            # In inference mode, images and inputs are fed via placeholders.
            image_feed = tf.placeholder(dtype=tf.string,
                                        shape=[],
                                        name="image_feed")
            input_feed = tf.placeholder(
                dtype=tf.int64,
                shape=[None],  # batch_size
                name="input_feed")

            # Process image and insert batch dimensions.
            all_images = [tf.expand_dims(self.process_image(image_feed), 0)]
            all_input_seqs = [tf.expand_dims(input_feed, 1)]

            # No target sequences or input mask in inference mode.
            # No input mask in saliency mode. Single sentence not padded.
            input_mask = None
            self.num_parallel_batches = 1
        else:
            # Prefetch serialized SequenceExample protos.
            input_queues = [
            ]  #input queues is a list so we can easily handle data from other tfrecord files
            input_queue = input_ops.prefetch_input_data(
                self.reader,
                self.config.input_file_pattern,
                is_training=self.is_training(),
                batch_size=self.config.batch_size,
                values_per_shard=self.config.values_per_input_shard,
                input_queue_capacity_factor=self.config.
                input_queue_capacity_factor,
                num_reader_threads=self.config.num_input_reader_threads)
            input_queues.append(input_queue)
            if self.flags['blocked_image'] or self.flags['two_input_queues']:
                #start a new input queue for the blocked images
                input_queue2 = input_ops.prefetch_input_data(
                    self.reader,
                    self.config.blocked_input_file_pattern,
                    is_training=self.is_training(),
                    batch_size=self.config.batch_size,
                    values_per_shard=self.config.values_per_input_shard,
                    input_queue_capacity_factor=self.config.
                    input_queue_capacity_factor,
                    num_reader_threads=self.config.num_input_reader_threads)
                input_queues.append(input_queue2)

            self.num_parallel_batches = len(input_queues)

            # Image processing and random distortion. Split across multiple threads
            # with each thread applying a slightly different distortion.
            assert self.config.num_preprocess_threads % 2 == 0
            images_and_captions = []
            for thread_id in range(self.config.num_preprocess_threads):
                serialized_sequence_example = input_queue.dequeue()

            images_and_captions_list = [[] for _ in range(len(input_queues))]
            for thread_id in range(self.config.num_preprocess_threads):

                for i, input_queue in enumerate(input_queues):
                    serialized_sequence_example = input_queue.dequeue()
                    encoded_image, caption = input_ops.parse_sequence_example(
                        serialized_sequence_example,
                        image_feature=self.config.
                        image_keys[i],  #TODO change this!
                        caption_feature=self.config.caption_feature_name)
                    image = self.process_image(encoded_image,
                                               thread_id=thread_id)
                    images_and_captions_list[i].append([image, caption])
            # Batch inputs.

            queue_capacity = (2 * self.config.num_preprocess_threads *
                              self.config.batch_size)

            num_queues = len(images_and_captions_list)
            all_images = []
            all_input_seqs = []
            all_target_seqs = []
            all_input_masks = []
            for i in range(len(input_queues)):
                outputs = input_ops.batch_with_dynamic_pad(
                    images_and_captions_list[i],
                    batch_size=self.config.batch_size,
                    num_queues=num_queues,
                    queue_capacity=queue_capacity,
                    loss_weight_value=self.flags['loss_weight_value'])
                all_images.append(outputs[0])
                all_input_seqs.append(outputs[1])
                all_target_seqs.append(outputs[2])
                all_input_masks.append(outputs[3])
            print(len(all_images))
            self.target_seqs = all_target_seqs
            self.input_mask = all_input_masks
        self.images = tf.concat(all_images, 0)
        self.input_seqs = all_input_seqs