Exemplo n.º 1
0
    def build_inputs(self):

        if self.mode == "encode":
            encode_ids = tf.placeholder(tf.int64, (200, None),
                                        name="encode_ids")
            encode_mask = tf.placeholder(tf.int8, (200, None),
                                         name="encode_mask")
            encode_labels = tf.placeholder(tf.int64, (200, None),
                                           name="encode_labels")
        else:
            # Prefetch serialized tf.Example protos.
            input_queue_word = input_ops.prefetch_input_data(
                self.reader_word,
                FLAGS.input_file_pattern_word,
                shuffle=FLAGS.shuffle_input_data,
                capacity=FLAGS.input_queue_capacity,
                num_reader_threads=FLAGS.num_input_reader_threads)
            # Deserialize a batch.
            serialized = input_queue_word.dequeue_many(FLAGS.batch_size)
            encode_word = input_ops.parse_example_batch(serialized)

            # Prefetch serialized tf.Example protos.
            input_queue_POS = input_ops.prefetch_input_data(
                self.reader_POS,
                FLAGS.input_file_pattern_POS,
                shuffle=FLAGS.shuffle_input_data,
                capacity=FLAGS.input_queue_capacity,
                num_reader_threads=FLAGS.num_input_reader_threads)
            # Deserialize a batch.
            serialized = input_queue_POS.dequeue_many(FLAGS.batch_size)
            encode_POS = input_ops.parse_example_batch(serialized)

            encode_ids = encode_word.ids
            encode_mask = encode_word.mask
            encode_labels = tf.cast(encode_POS.ids, tf.int32)
            encode_mask_labels = encode_POS.mask

#      encode_ids = tf.Print(encode_ids,[encode_ids[1,:30]], summarize=30)
#      encode_labels = tf.Print(encode_labels,[encode_labels[1,:30]], summarize=30)
#      encode_mask = tf.Print(encode_mask,[encode_mask[1,:30]], summarize=10)
#      words_length = tf.reduce_sum(encode_mask[1,:30])
#      words_length = tf.Print(words_length,[words_length])
#      POSs_length = tf.reduce_sum(encode_mask_labels[1,:30])
#      POSs_length = tf.Print(POSs_length,[POSs_length])

        self.encode_ids = encode_ids
        self.encode_mask = encode_mask
        self.encode_labels = encode_labels
Exemplo n.º 2
0
    def build_inputs(self):

        if self.mode == "encode":
            encode_ids = tf.placeholder(tf.int64, (None, None),
                                        name="encode_ids")
            encode_mask = tf.placeholder(tf.int8, (None, None),
                                         name="encode_mask")
        else:
            # Prefetch serialized tf.Example protos.
            input_queue = input_ops.prefetch_input_data(
                self.reader,
                FLAGS.input_file_pattern,
                shuffle=FLAGS.shuffle_input_data,
                capacity=FLAGS.input_queue_capacity,
                num_reader_threads=FLAGS.num_input_reader_threads)

            # Deserialize a batch.
            serialized = input_queue.dequeue_many(FLAGS.batch_size)
            encode = input_ops.parse_example_batch(serialized)

            encode_ids = encode.ids
            encode_mask = encode.mask

        self.encode_ids = encode_ids
        self.encode_mask = encode_mask
Exemplo n.º 3
0
    def build_COCO_inputs(self, reader, pattern, batch_size, reuse):
        # Prefetch serialized tf.Example protos.
        input_queue = input_ops.prefetch_input_data(
            reader,
            pattern,
            shuffle=self.config.shuffle_input_data,
            capacity=self.config.visual_capacity,
            num_reader_threads=self.config.num_input_reader_threads,
            dataset="COCO")

        serialized = input_queue.dequeue_many(batch_size)

        if (self.config.video_embedding == "O"):
            caption, indices = input_ops.parse_HL_batch(serialized)
            indices = tf.cast(indices, tf.int32)
            input_COCO_vector = tf.one_hot(indices[:, 0], 80)

        if (self.config.video_embedding == "R") or (self.config.video_embedding
                                                    == "IF"):
            caption, input_COCO_vector = input_ops.parse_image_example_batch(
                serialized)

        encoded_visual = input_COCO_vector

        COCO_ids = caption.ids
        COCO_mask = caption.mask
        caption_word_emb = tf.nn.embedding_lookup(self.word_emb, COCO_ids)

        encoded_caption = self.text_encoder(caption_word_emb,
                                            COCO_mask,
                                            reuse=reuse)

        return encoded_caption, encoded_visual
Exemplo n.º 4
0
    def build_ranking_text_inputs(self, reader, pattern, batch_size, reuse):
        # Prefetch serialized tf.Example protos.
        input_queue = input_ops.prefetch_input_data(
            reader,
            pattern,
            shuffle=self.config.shuffle_input_data,
            capacity=self.config.visual_capacity,
            num_reader_threads=self.config.num_input_reader_threads,
            dataset="COCO")

        # Deserialize a batch.
        serialized = input_queue.dequeue_many(batch_size)

        s1, s2 = input_ops.parse_pair_example_batch(serialized)

        s1_ids = s1.ids
        s1_mask = s1.mask
        s1_word_emb = tf.nn.embedding_lookup(self.word_emb, s1_ids)
        s2_ids = s2.ids
        s2_mask = s2.mask
        s2_word_emb = tf.nn.embedding_lookup(self.word_emb, s2_ids)

        encoded_s1 = self.text_encoder(s1_word_emb, s1_mask, reuse=reuse)
        encoded_s2 = self.text_encoder(s2_word_emb, s2_mask, reuse=True)

        return encoded_s1, encoded_s2, s2_word_emb, s2_ids, s2_mask
    def build_inputs(self):
        """Builds the ops for reading input data.

    Outputs:
      self.encode_ids
      self.decode_pre_ids
      self.decode_post_ids
      self.encode_mask
      self.decode_pre_mask
      self.decode_post_mask
    """
        if self.mode == "encode":
            # Word embeddings are fed from an external vocabulary which has possibly
            # been expanded (see vocabulary_expansion.py).
            encode_ids = None
            decode_pre_ids = None
            decode_post_ids = None
            encode_mask = tf.placeholder(tf.int8, (None, None),
                                         name="encode_mask")
            decode_pre_mask = None
            decode_post_mask = None
        else:
            # Prefetch serialized tf.Example protos.
            input_queue = input_ops.prefetch_input_data(
                self.reader,
                self.config.input_file_pattern,
                shuffle=self.config.shuffle_input_data,
                capacity=self.config.input_queue_capacity,
                num_reader_threads=self.config.num_input_reader_threads)

            # Deserialize a batch.
            serialized = input_queue.dequeue_many(self.config.batch_size)
            encode, decode_pre, decode_post = input_ops.parse_example_batch(
                serialized)

            encode_ids = encode.ids
            decode_pre_ids = decode_pre.ids
            decode_post_ids = decode_post.ids

            encode_mask = encode.mask
            decode_pre_mask = decode_pre.mask
            decode_post_mask = decode_post.mask

        self.encode_ids = encode_ids
        self.decode_pre_ids = decode_pre_ids
        self.decode_post_ids = decode_post_ids

        self.encode_mask = encode_mask
        self.decode_pre_mask = decode_pre_mask
        self.decode_post_mask = decode_post_mask
Exemplo n.º 6
0
    def build_MSVD_inputs(self, reader, pattern, batch_size, reuse):

        if self.mode == "encode":

            caption = None
            input_MSVD_vector = tf.placeholder(tf.float32, (None, None),
                                               name="video_vector")
            video = tf.placeholder(tf.float32,
                                   (None, None, self.config.dim_image),
                                   name="video")
            video_length = tf.placeholder(tf.float32, (None),
                                          name="video_length")
            MSVD_mask = tf.placeholder(tf.int32, (None, None),
                                       name="caption_mask")
            MSVD_ids = tf.placeholder(tf.int32, (None, None),
                                      name="caption_ids")
            nb_sentences = tf.placeholder(tf.int32, name="nb_sentences")
            MSVD_emb = tf.placeholder(
                tf.float32, (None, None, self.config.word_embedding_dim),
                "caption_emb")

            encoded_caption = self.text_encoder(MSVD_emb,
                                                MSVD_mask,
                                                reuse=reuse)
            encoded_visual = self.video_encoder(video,
                                                video_length,
                                                batch_size=batch_size)

        else:

            input_queue = input_ops.prefetch_input_data(
                reader,
                pattern,
                shuffle=self.config.shuffle_input_data,
                capacity=self.config.visual_capacity,
                num_reader_threads=self.config.num_input_reader_threads,
                dataset="MSVD")

            # Deserialize a batch.
            serialized = input_queue.dequeue_many(batch_size)
            caption, video, video_length = input_ops.parse_video_example_batch(
                serialized)
            video = tf.reshape(video, [batch_size, -1, self.config.dim_image])

            MSVD_ids = caption.ids
            MSVD_mask = caption.mask

            caption_word_emb = tf.nn.embedding_lookup(self.word_emb, MSVD_ids)
            encoded_caption = self.text_encoder(caption_word_emb,
                                                MSVD_mask,
                                                reuse=reuse)

        caption_W2V_emb = tf.reduce_sum(
            tf.nn.embedding_lookup(self.w2v_matrix, MSVD_ids), 1)
        encoded_visual = self.video_encoder(video,
                                            video_length,
                                            h_t=caption_W2V_emb,
                                            batch_size=batch_size)

        if self.config.video_embedding == "TG":
            return MSVD_ids, encoded_caption, video, video_length, encoded_visual
        else:
            return encoded_caption, encoded_visual
Exemplo n.º 7
0
    def build_bookcorpus_inputs(self):

        if self.mode == "encode":
            # Word embeddings are fed from an external vocabulary which has possibly
            # been expanded (see vocabulary_expansion.py).
            encode_ids = None
            decode_pre_ids = None
            decode_post_ids = None
            encode_mask = tf.placeholder(tf.int8, (None, None),
                                         name="encode_mask")
            decode_pre_mask = None
            decode_post_mask = None
        else:
            # Prefetch serialized tf.Example protos.
            input_queue = input_ops.prefetch_input_data(
                self.reader_bookcorpus,
                self.config.bookcorpus_pattern,
                shuffle=self.config.shuffle_input_data,
                capacity=self.config.bookcorpus_capacity,
                num_reader_threads=self.config.num_input_reader_threads,
                dataset="bookcorpus")

            # Deserialize a batch.
            serialized = input_queue.dequeue_many(
                self.config.bookcorpus_batch_size)
            encode, decode_pre, decode_post = input_ops.parse_text_batch(
                serialized)

            encode_ids = pad_up_to(
                encode.ids,
                [self.config.bookcorpus_batch_size, self.config.len_sentence],
                0)
            decode_pre_ids = pad_up_to(
                decode_pre.ids,
                [self.config.bookcorpus_batch_size, self.config.len_sentence],
                0)
            decode_post_ids = pad_up_to(
                decode_post.ids,
                [self.config.bookcorpus_batch_size, self.config.len_sentence],
                0)

            encode_mask = pad_up_to(
                encode.mask,
                [self.config.bookcorpus_batch_size, self.config.len_sentence],
                0)
            decode_pre_mask = pad_up_to(
                decode_pre.mask,
                [self.config.bookcorpus_batch_size, self.config.len_sentence],
                0)
            decode_post_mask = pad_up_to(
                decode_pre.mask,
                [self.config.bookcorpus_batch_size, self.config.len_sentence],
                0)

        self.encode_ids = encode_ids
        self.decode_pre_ids = decode_pre_ids
        self.decode_post_ids = decode_post_ids

        self.encode_mask = encode_mask
        self.decode_pre_mask = decode_pre_mask
        self.decode_post_mask = decode_post_mask

        if self.mode == "encode":
            self.encode_emb = tf.placeholder(
                tf.float32, (None, None, self.config.word_embedding_dim),
                "encode_emb")
            self.decode_pre_emb = None
            self.decode_post_emb = None
        else:
            self.decode_pre_emb = tf.nn.embedding_lookup(
                self.word_emb, self.decode_pre_ids)
            self.decode_post_emb = tf.nn.embedding_lookup(
                self.word_emb, self.decode_post_ids)
            self.encode_emb = tf.nn.embedding_lookup(self.word_emb,
                                                     self.encode_ids)

        self.thought_vectors = tf.identity(self.text_encoder(
            self.encode_emb,
            self.encode_mask,
            reuse=(self.config.video_embedding != "T")),
                                           name="thought_vectors")