def build_inputs(self): if self.mode == "encode": encode_ids = tf.placeholder(tf.int64, (200, None), name="encode_ids") encode_mask = tf.placeholder(tf.int8, (200, None), name="encode_mask") encode_labels = tf.placeholder(tf.int64, (200, None), name="encode_labels") else: # Prefetch serialized tf.Example protos. input_queue_word = input_ops.prefetch_input_data( self.reader_word, FLAGS.input_file_pattern_word, shuffle=FLAGS.shuffle_input_data, capacity=FLAGS.input_queue_capacity, num_reader_threads=FLAGS.num_input_reader_threads) # Deserialize a batch. serialized = input_queue_word.dequeue_many(FLAGS.batch_size) encode_word = input_ops.parse_example_batch(serialized) # Prefetch serialized tf.Example protos. input_queue_POS = input_ops.prefetch_input_data( self.reader_POS, FLAGS.input_file_pattern_POS, shuffle=FLAGS.shuffle_input_data, capacity=FLAGS.input_queue_capacity, num_reader_threads=FLAGS.num_input_reader_threads) # Deserialize a batch. serialized = input_queue_POS.dequeue_many(FLAGS.batch_size) encode_POS = input_ops.parse_example_batch(serialized) encode_ids = encode_word.ids encode_mask = encode_word.mask encode_labels = tf.cast(encode_POS.ids, tf.int32) encode_mask_labels = encode_POS.mask # encode_ids = tf.Print(encode_ids,[encode_ids[1,:30]], summarize=30) # encode_labels = tf.Print(encode_labels,[encode_labels[1,:30]], summarize=30) # encode_mask = tf.Print(encode_mask,[encode_mask[1,:30]], summarize=10) # words_length = tf.reduce_sum(encode_mask[1,:30]) # words_length = tf.Print(words_length,[words_length]) # POSs_length = tf.reduce_sum(encode_mask_labels[1,:30]) # POSs_length = tf.Print(POSs_length,[POSs_length]) self.encode_ids = encode_ids self.encode_mask = encode_mask self.encode_labels = encode_labels
def build_inputs(self): if self.mode == "encode": encode_ids = tf.placeholder(tf.int64, (None, None), name="encode_ids") encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask") else: # Prefetch serialized tf.Example protos. input_queue = input_ops.prefetch_input_data( self.reader, FLAGS.input_file_pattern, shuffle=FLAGS.shuffle_input_data, capacity=FLAGS.input_queue_capacity, num_reader_threads=FLAGS.num_input_reader_threads) # Deserialize a batch. serialized = input_queue.dequeue_many(FLAGS.batch_size) encode = input_ops.parse_example_batch(serialized) encode_ids = encode.ids encode_mask = encode.mask self.encode_ids = encode_ids self.encode_mask = encode_mask
def build_COCO_inputs(self, reader, pattern, batch_size, reuse): # Prefetch serialized tf.Example protos. input_queue = input_ops.prefetch_input_data( reader, pattern, shuffle=self.config.shuffle_input_data, capacity=self.config.visual_capacity, num_reader_threads=self.config.num_input_reader_threads, dataset="COCO") serialized = input_queue.dequeue_many(batch_size) if (self.config.video_embedding == "O"): caption, indices = input_ops.parse_HL_batch(serialized) indices = tf.cast(indices, tf.int32) input_COCO_vector = tf.one_hot(indices[:, 0], 80) if (self.config.video_embedding == "R") or (self.config.video_embedding == "IF"): caption, input_COCO_vector = input_ops.parse_image_example_batch( serialized) encoded_visual = input_COCO_vector COCO_ids = caption.ids COCO_mask = caption.mask caption_word_emb = tf.nn.embedding_lookup(self.word_emb, COCO_ids) encoded_caption = self.text_encoder(caption_word_emb, COCO_mask, reuse=reuse) return encoded_caption, encoded_visual
def build_ranking_text_inputs(self, reader, pattern, batch_size, reuse): # Prefetch serialized tf.Example protos. input_queue = input_ops.prefetch_input_data( reader, pattern, shuffle=self.config.shuffle_input_data, capacity=self.config.visual_capacity, num_reader_threads=self.config.num_input_reader_threads, dataset="COCO") # Deserialize a batch. serialized = input_queue.dequeue_many(batch_size) s1, s2 = input_ops.parse_pair_example_batch(serialized) s1_ids = s1.ids s1_mask = s1.mask s1_word_emb = tf.nn.embedding_lookup(self.word_emb, s1_ids) s2_ids = s2.ids s2_mask = s2.mask s2_word_emb = tf.nn.embedding_lookup(self.word_emb, s2_ids) encoded_s1 = self.text_encoder(s1_word_emb, s1_mask, reuse=reuse) encoded_s2 = self.text_encoder(s2_word_emb, s2_mask, reuse=True) return encoded_s1, encoded_s2, s2_word_emb, s2_ids, s2_mask
def build_inputs(self): """Builds the ops for reading input data. Outputs: self.encode_ids self.decode_pre_ids self.decode_post_ids self.encode_mask self.decode_pre_mask self.decode_post_mask """ if self.mode == "encode": # Word embeddings are fed from an external vocabulary which has possibly # been expanded (see vocabulary_expansion.py). encode_ids = None decode_pre_ids = None decode_post_ids = None encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask") decode_pre_mask = None decode_post_mask = None else: # Prefetch serialized tf.Example protos. input_queue = input_ops.prefetch_input_data( self.reader, self.config.input_file_pattern, shuffle=self.config.shuffle_input_data, capacity=self.config.input_queue_capacity, num_reader_threads=self.config.num_input_reader_threads) # Deserialize a batch. serialized = input_queue.dequeue_many(self.config.batch_size) encode, decode_pre, decode_post = input_ops.parse_example_batch( serialized) encode_ids = encode.ids decode_pre_ids = decode_pre.ids decode_post_ids = decode_post.ids encode_mask = encode.mask decode_pre_mask = decode_pre.mask decode_post_mask = decode_post.mask self.encode_ids = encode_ids self.decode_pre_ids = decode_pre_ids self.decode_post_ids = decode_post_ids self.encode_mask = encode_mask self.decode_pre_mask = decode_pre_mask self.decode_post_mask = decode_post_mask
def build_MSVD_inputs(self, reader, pattern, batch_size, reuse): if self.mode == "encode": caption = None input_MSVD_vector = tf.placeholder(tf.float32, (None, None), name="video_vector") video = tf.placeholder(tf.float32, (None, None, self.config.dim_image), name="video") video_length = tf.placeholder(tf.float32, (None), name="video_length") MSVD_mask = tf.placeholder(tf.int32, (None, None), name="caption_mask") MSVD_ids = tf.placeholder(tf.int32, (None, None), name="caption_ids") nb_sentences = tf.placeholder(tf.int32, name="nb_sentences") MSVD_emb = tf.placeholder( tf.float32, (None, None, self.config.word_embedding_dim), "caption_emb") encoded_caption = self.text_encoder(MSVD_emb, MSVD_mask, reuse=reuse) encoded_visual = self.video_encoder(video, video_length, batch_size=batch_size) else: input_queue = input_ops.prefetch_input_data( reader, pattern, shuffle=self.config.shuffle_input_data, capacity=self.config.visual_capacity, num_reader_threads=self.config.num_input_reader_threads, dataset="MSVD") # Deserialize a batch. serialized = input_queue.dequeue_many(batch_size) caption, video, video_length = input_ops.parse_video_example_batch( serialized) video = tf.reshape(video, [batch_size, -1, self.config.dim_image]) MSVD_ids = caption.ids MSVD_mask = caption.mask caption_word_emb = tf.nn.embedding_lookup(self.word_emb, MSVD_ids) encoded_caption = self.text_encoder(caption_word_emb, MSVD_mask, reuse=reuse) caption_W2V_emb = tf.reduce_sum( tf.nn.embedding_lookup(self.w2v_matrix, MSVD_ids), 1) encoded_visual = self.video_encoder(video, video_length, h_t=caption_W2V_emb, batch_size=batch_size) if self.config.video_embedding == "TG": return MSVD_ids, encoded_caption, video, video_length, encoded_visual else: return encoded_caption, encoded_visual
def build_bookcorpus_inputs(self): if self.mode == "encode": # Word embeddings are fed from an external vocabulary which has possibly # been expanded (see vocabulary_expansion.py). encode_ids = None decode_pre_ids = None decode_post_ids = None encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask") decode_pre_mask = None decode_post_mask = None else: # Prefetch serialized tf.Example protos. input_queue = input_ops.prefetch_input_data( self.reader_bookcorpus, self.config.bookcorpus_pattern, shuffle=self.config.shuffle_input_data, capacity=self.config.bookcorpus_capacity, num_reader_threads=self.config.num_input_reader_threads, dataset="bookcorpus") # Deserialize a batch. serialized = input_queue.dequeue_many( self.config.bookcorpus_batch_size) encode, decode_pre, decode_post = input_ops.parse_text_batch( serialized) encode_ids = pad_up_to( encode.ids, [self.config.bookcorpus_batch_size, self.config.len_sentence], 0) decode_pre_ids = pad_up_to( decode_pre.ids, [self.config.bookcorpus_batch_size, self.config.len_sentence], 0) decode_post_ids = pad_up_to( decode_post.ids, [self.config.bookcorpus_batch_size, self.config.len_sentence], 0) encode_mask = pad_up_to( encode.mask, [self.config.bookcorpus_batch_size, self.config.len_sentence], 0) decode_pre_mask = pad_up_to( decode_pre.mask, [self.config.bookcorpus_batch_size, self.config.len_sentence], 0) decode_post_mask = pad_up_to( decode_pre.mask, [self.config.bookcorpus_batch_size, self.config.len_sentence], 0) self.encode_ids = encode_ids self.decode_pre_ids = decode_pre_ids self.decode_post_ids = decode_post_ids self.encode_mask = encode_mask self.decode_pre_mask = decode_pre_mask self.decode_post_mask = decode_post_mask if self.mode == "encode": self.encode_emb = tf.placeholder( tf.float32, (None, None, self.config.word_embedding_dim), "encode_emb") self.decode_pre_emb = None self.decode_post_emb = None else: self.decode_pre_emb = tf.nn.embedding_lookup( self.word_emb, self.decode_pre_ids) self.decode_post_emb = tf.nn.embedding_lookup( self.word_emb, self.decode_post_ids) self.encode_emb = tf.nn.embedding_lookup(self.word_emb, self.encode_ids) self.thought_vectors = tf.identity(self.text_encoder( self.encode_emb, self.encode_mask, reuse=(self.config.video_embedding != "T")), name="thought_vectors")