def build_inputs(self): """Input prefetching, preprocessing and batching. Outputs: self.images self.input_seqs self.target_seqs (training and eval only) self.input_mask (training and eval only) """ if self.mode == "inference": # In inference mode, images and inputs are fed via placeholders. image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed") input_feed = tf.placeholder(dtype=tf.int64, shape=[None], # batch_size name="input_feed") # Process image and insert batch dimensions. images = tf.expand_dims(self.process_image(image_feed), 0) input_seqs = tf.expand_dims(input_feed, 1) # No target sequences or input mask in inference mode. target_seqs = None input_mask = None else: # Prefetch serialized SequenceExample protos. input_queue = input_ops.prefetch_input_data( self.reader, self.config.input_file_pattern, is_training=self.is_training(), batch_size=self.config.batch_size, values_per_shard=self.config.values_per_input_shard, input_queue_capacity_factor=self.config.input_queue_capacity_factor, num_reader_threads=self.config.num_input_reader_threads) # Image processing and random distortion. Split across multiple threads # with each thread applying a slightly different distortion. assert self.config.num_preprocess_threads % 2 == 0 images_and_captions = [] for thread_id in range(self.config.num_preprocess_threads): serialized_sequence_example = input_queue.dequeue() encoded_image, caption = input_ops.parse_sequence_example( serialized_sequence_example, image_feature=self.config.image_feature_name, caption_feature=self.config.caption_feature_name) image = self.process_image(encoded_image, thread_id=thread_id) images_and_captions.append([image, caption]) # Batch inputs. queue_capacity = (2 * self.config.num_preprocess_threads * self.config.batch_size) images, input_seqs, target_seqs, input_mask = ( input_ops.batch_with_dynamic_pad(images_and_captions, batch_size=self.config.batch_size, queue_capacity=queue_capacity)) self.images = images self.input_seqs = input_seqs self.target_seqs = target_seqs self.input_mask = input_mask
def build_inputs(self): if self.mode == "inference": # In inference mode, images and inputs are fed via placeholders. image_feed_0 = tf.placeholder(dtype=tf.string, shape=[], name="image_feed_0") image_feed_1 = tf.placeholder(dtype=tf.string, shape=[], name="image_feed_1") image_feed_2 = tf.placeholder(dtype=tf.string, shape=[], name="image_feed_2") image_feed_3 = tf.placeholder(dtype=tf.string, shape=[], name="image_feed_3") image_feed_4 = tf.placeholder(dtype=tf.string, shape=[], name="image_feed_4") input_feed_0 = tf.placeholder(dtype=tf.int64, shape=[None], name="input_feed_0") input_feed_1 = tf.placeholder(dtype=tf.int64, shape=[None], name="input_feed_1") input_feed_2 = tf.placeholder(dtype=tf.int64, shape=[None], name="input_feed_2") input_feed_3 = tf.placeholder(dtype=tf.int64, shape=[None], name="input_feed_3") input_feed_4 = tf.placeholder(dtype=tf.int64, shape=[None], name="input_feed_4") # Process each image and insert batch dimensions. images_0 = tf.expand_dims(self.process_image(image_feed_0), 0) images_1 = tf.expand_dims(self.process_image(image_feed_1), 0) images_2 = tf.expand_dims(self.process_image(image_feed_2), 0) images_3 = tf.expand_dims(self.process_image(image_feed_3), 0) images_4 = tf.expand_dims(self.process_image(image_feed_4), 0) input_seqs_0 = tf.expand_dims(input_feed_0, 1) input_seqs_1 = tf.expand_dims(input_feed_1, 1) input_seqs_2 = tf.expand_dims(input_feed_2, 1) input_seqs_3 = tf.expand_dims(input_feed_3, 1) input_seqs_4 = tf.expand_dims(input_feed_4, 1) # No target sequences or input mask in inference mode. target_seqs_0 = None target_seqs_1 = None target_seqs_2 = None target_seqs_3 = None target_seqs_4 = None input_mask_0 = None input_mask_1 = None input_mask_2 = None input_mask_3 = None input_mask_4 = None else: # Prefetch serialized SequenceExample protos. input_queue = input_ops.prefetch_input_data( self.reader, self.config.input_file_pattern, is_training=self.is_training(), batch_size=self.config.batch_size, values_per_shard=self.config.values_per_input_shard, input_queue_capacity_factor=self.config. input_queue_capacity_factor, num_reader_threads=self.config.num_input_reader_threads) self.input_queue = input_queue assert self.config.num_preprocess_threads % 2 == 0 # Image processing and random distortion. Split across multiple threads # with each thread applying a slightly different distortion. images_and_captions = [] for thread_id in range(self.config.num_preprocess_threads): serialized_sequence_example = input_queue.dequeue() encoded_image_0, caption_0, encoded_image_1, caption_1, encoded_image_2, caption_2, encoded_image_3, caption_3, encoded_image_4, caption_4 = input_ops.parse_sequence_example( serialized_sequence_example, image_feature=self.config.image_feature_name, caption_feature=self.config.caption_feature_name) image_0 = self.process_image(encoded_image_0, thread_id=thread_id) image_1 = self.process_image(encoded_image_1, thread_id=thread_id) image_2 = self.process_image(encoded_image_2, thread_id=thread_id) image_3 = self.process_image(encoded_image_3, thread_id=thread_id) image_4 = self.process_image(encoded_image_4, thread_id=thread_id) images_and_captions.append([ image_0, caption_0, image_1, caption_1, image_2, caption_2, image_3, caption_3, image_4, caption_4 ]) self.images_and_captions = images_and_captions queue_capacity = (2 * self.config.num_preprocess_threads * self.config.batch_size) #200 images_0, input_seqs_0, target_seqs_0, input_mask_0, images_1, input_seqs_1, target_seqs_1, input_mask_1, images_2, input_seqs_2, target_seqs_2, input_mask_2, images_3, input_seqs_3, target_seqs_3, input_mask_3, images_4, input_seqs_4, target_seqs_4, input_mask_4 = ( input_ops.batch_with_dynamic_pad( images_and_captions, batch_size=self.config.batch_size, queue_capacity=queue_capacity)) self.images_0 = images_0 self.input_seqs_0 = input_seqs_0 self.target_seqs_0 = target_seqs_0 self.input_mask_0 = input_mask_0 self.images_1 = images_1 self.input_seqs_1 = input_seqs_1 self.target_seqs_1 = target_seqs_1 self.input_mask_1 = input_mask_1 self.images_2 = images_2 self.input_seqs_2 = input_seqs_2 self.target_seqs_2 = target_seqs_2 self.input_mask_2 = input_mask_2 self.images_3 = images_3 self.input_seqs_3 = input_seqs_3 self.target_seqs_3 = target_seqs_3 self.input_mask_3 = input_mask_3 self.images_4 = images_4 self.input_seqs_4 = input_seqs_4 self.target_seqs_4 = target_seqs_4 self.input_mask_4 = input_mask_4
def build_inputs(self): """Input prefetching, preprocessing and batching. Outputs: self.images self.input_seqs self.target_seqs (training and eval only) self.input_mask (training and eval only) """ if self.mode == "gradcam": image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed") images = self.process_image(image_feed) input_feed = tf.placeholder(dtype=tf.int64, shape=[None], name="input_feed") # image is a Tensor of shape [height, width, channels] # caption is a 1-D Tensor of any length self.config.batch_size = 1 queue_capacity = (2 * self.config.num_preprocess_threads * self.config.batch_size) num_queues = 1 all_images = [] all_input_seqs = [] all_target_seqs = [] all_input_masks = [] enqueue_list = input_ops.batch_with_dynamic_pad( [[images, input_feed]], batch_size=self.config.batch_size, queue_capacity=queue_capacity, return_enqueue_list=True) all_images.append(tf.expand_dims(enqueue_list[0][0], 0)) all_input_seqs.append(tf.expand_dims(enqueue_list[0][1], 0)) all_target_seqs.append(tf.expand_dims(enqueue_list[0][2], 0)) all_input_masks.append(tf.expand_dims(enqueue_list[0][3], 0)) self.target_seqs = all_target_seqs self.input_mask = all_input_masks self.num_parallel_batches = 1 elif self.mode == "saliency": # import pdb; pdb.set_trace() image_feed = tf.placeholder(dtype=tf.string, shape=[None], name="image_feed") images = [] for i in range(self.config.batch_size): images.append(self.process_image(image_feed[i])) input_feed = tf.placeholder(dtype=tf.int64, shape=[None], name="input_feed") # image is a Tensor of shape [height, width, channels] # caption is a 1-D Tensor of any length queue_capacity = (2 * self.config.num_preprocess_threads * self.config.batch_size) images_and_captions = [] for i in range(self.config.batch_size): images_and_captions.append([images[i], input_feed]) num_queues = 1 all_images = [] all_input_seqs = [] all_target_seqs = [] all_input_masks = [] enqueue_list = input_ops.batch_with_dynamic_pad( images_and_captions, batch_size=self.config.batch_size, queue_capacity=queue_capacity, return_enqueue_list=True) for i in range(self.config.batch_size): all_images.append(tf.expand_dims(enqueue_list[i][0], 0)) all_input_seqs.append(tf.expand_dims(enqueue_list[i][1], 0)) all_target_seqs.append(tf.expand_dims(enqueue_list[i][2], 0)) all_input_masks.append(tf.expand_dims(enqueue_list[i][3], 0)) # all_images.append(enqueue_list[0]) # all_input_seqs.append(enqueue_list[1]) # all_target_seqs.append(enqueue_list[2]) # all_input_masks.append(enqueue_list[3]) self.target_seqs = [tf.concat(all_target_seqs, 0)] self.input_mask = [tf.concat(all_input_masks, 0)] self.num_parallel_batches = 1 all_input_seqs = [tf.concat(all_input_seqs, 0)] elif self.mode == "inference": # In inference mode, images and inputs are fed via placeholders. image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed") input_feed = tf.placeholder( dtype=tf.int64, shape=[None], # batch_size name="input_feed") # Process image and insert batch dimensions. all_images = [tf.expand_dims(self.process_image(image_feed), 0)] all_input_seqs = [tf.expand_dims(input_feed, 1)] # No target sequences or input mask in inference mode. # No input mask in saliency mode. Single sentence not padded. input_mask = None self.num_parallel_batches = 1 else: # Prefetch serialized SequenceExample protos. input_queues = [ ] #input queues is a list so we can easily handle data from other tfrecord files input_queue = input_ops.prefetch_input_data( self.reader, self.config.input_file_pattern, is_training=self.is_training(), batch_size=self.config.batch_size, values_per_shard=self.config.values_per_input_shard, input_queue_capacity_factor=self.config. input_queue_capacity_factor, num_reader_threads=self.config.num_input_reader_threads) input_queues.append(input_queue) if self.flags['blocked_image'] or self.flags['two_input_queues']: #start a new input queue for the blocked images input_queue2 = input_ops.prefetch_input_data( self.reader, self.config.blocked_input_file_pattern, is_training=self.is_training(), batch_size=self.config.batch_size, values_per_shard=self.config.values_per_input_shard, input_queue_capacity_factor=self.config. input_queue_capacity_factor, num_reader_threads=self.config.num_input_reader_threads) input_queues.append(input_queue2) self.num_parallel_batches = len(input_queues) # Image processing and random distortion. Split across multiple threads # with each thread applying a slightly different distortion. assert self.config.num_preprocess_threads % 2 == 0 images_and_captions = [] for thread_id in range(self.config.num_preprocess_threads): serialized_sequence_example = input_queue.dequeue() images_and_captions_list = [[] for _ in range(len(input_queues))] for thread_id in range(self.config.num_preprocess_threads): for i, input_queue in enumerate(input_queues): serialized_sequence_example = input_queue.dequeue() encoded_image, caption = input_ops.parse_sequence_example( serialized_sequence_example, image_feature=self.config. image_keys[i], #TODO change this! caption_feature=self.config.caption_feature_name) image = self.process_image(encoded_image, thread_id=thread_id) images_and_captions_list[i].append([image, caption]) # Batch inputs. queue_capacity = (2 * self.config.num_preprocess_threads * self.config.batch_size) num_queues = len(images_and_captions_list) all_images = [] all_input_seqs = [] all_target_seqs = [] all_input_masks = [] for i in range(len(input_queues)): outputs = input_ops.batch_with_dynamic_pad( images_and_captions_list[i], batch_size=self.config.batch_size, num_queues=num_queues, queue_capacity=queue_capacity, loss_weight_value=self.flags['loss_weight_value']) all_images.append(outputs[0]) all_input_seqs.append(outputs[1]) all_target_seqs.append(outputs[2]) all_input_masks.append(outputs[3]) print(len(all_images)) self.target_seqs = all_target_seqs self.input_mask = all_input_masks self.images = tf.concat(all_images, 0) self.input_seqs = all_input_seqs