Exemplo n.º 1
0
    def build_inputs(self):

        if self.mode == "encode":
            encode_ids = tf.placeholder(tf.int64, (200, None),
                                        name="encode_ids")
            encode_mask = tf.placeholder(tf.int8, (200, None),
                                         name="encode_mask")
            encode_labels = tf.placeholder(tf.int64, (200, None),
                                           name="encode_labels")
        else:
            # Prefetch serialized tf.Example protos.
            input_queue_word = input_ops.prefetch_input_data(
                self.reader_word,
                FLAGS.input_file_pattern_word,
                shuffle=FLAGS.shuffle_input_data,
                capacity=FLAGS.input_queue_capacity,
                num_reader_threads=FLAGS.num_input_reader_threads)
            # Deserialize a batch.
            serialized = input_queue_word.dequeue_many(FLAGS.batch_size)
            encode_word = input_ops.parse_example_batch(serialized)

            # Prefetch serialized tf.Example protos.
            input_queue_POS = input_ops.prefetch_input_data(
                self.reader_POS,
                FLAGS.input_file_pattern_POS,
                shuffle=FLAGS.shuffle_input_data,
                capacity=FLAGS.input_queue_capacity,
                num_reader_threads=FLAGS.num_input_reader_threads)
            # Deserialize a batch.
            serialized = input_queue_POS.dequeue_many(FLAGS.batch_size)
            encode_POS = input_ops.parse_example_batch(serialized)

            encode_ids = encode_word.ids
            encode_mask = encode_word.mask
            encode_labels = tf.cast(encode_POS.ids, tf.int32)
            encode_mask_labels = encode_POS.mask

#      encode_ids = tf.Print(encode_ids,[encode_ids[1,:30]], summarize=30)
#      encode_labels = tf.Print(encode_labels,[encode_labels[1,:30]], summarize=30)
#      encode_mask = tf.Print(encode_mask,[encode_mask[1,:30]], summarize=10)
#      words_length = tf.reduce_sum(encode_mask[1,:30])
#      words_length = tf.Print(words_length,[words_length])
#      POSs_length = tf.reduce_sum(encode_mask_labels[1,:30])
#      POSs_length = tf.Print(POSs_length,[POSs_length])

        self.encode_ids = encode_ids
        self.encode_mask = encode_mask
        self.encode_labels = encode_labels
Exemplo n.º 2
0
    def build_inputs(self):

        if self.mode == "encode":
            encode_ids = tf.placeholder(tf.int64, (None, None),
                                        name="encode_ids")
            encode_mask = tf.placeholder(tf.int8, (None, None),
                                         name="encode_mask")
        else:
            # Prefetch serialized tf.Example protos.
            input_queue = input_ops.prefetch_input_data(
                self.reader,
                FLAGS.input_file_pattern,
                shuffle=FLAGS.shuffle_input_data,
                capacity=FLAGS.input_queue_capacity,
                num_reader_threads=FLAGS.num_input_reader_threads)

            # Deserialize a batch.
            serialized = input_queue.dequeue_many(FLAGS.batch_size)
            encode = input_ops.parse_example_batch(serialized)

            encode_ids = encode.ids
            encode_mask = encode.mask

        self.encode_ids = encode_ids
        self.encode_mask = encode_mask
    def build_inputs(self):
        """Builds the ops for reading input data.

    Outputs:
      self.encode_ids
      self.decode_pre_ids
      self.decode_post_ids
      self.encode_mask
      self.decode_pre_mask
      self.decode_post_mask
    """
        if self.mode == "encode":
            # Word embeddings are fed from an external vocabulary which has possibly
            # been expanded (see vocabulary_expansion.py).
            encode_ids = None
            decode_pre_ids = None
            decode_post_ids = None
            encode_mask = tf.placeholder(tf.int8, (None, None),
                                         name="encode_mask")
            decode_pre_mask = None
            decode_post_mask = None
        else:
            # Prefetch serialized tf.Example protos.
            input_queue = input_ops.prefetch_input_data(
                self.reader,
                self.config.input_file_pattern,
                shuffle=self.config.shuffle_input_data,
                capacity=self.config.input_queue_capacity,
                num_reader_threads=self.config.num_input_reader_threads)

            # Deserialize a batch.
            serialized = input_queue.dequeue_many(self.config.batch_size)
            encode, decode_pre, decode_post = input_ops.parse_example_batch(
                serialized)

            encode_ids = encode.ids
            decode_pre_ids = decode_pre.ids
            decode_post_ids = decode_post.ids

            encode_mask = encode.mask
            decode_pre_mask = decode_pre.mask
            decode_post_mask = decode_post.mask

        self.encode_ids = encode_ids
        self.decode_pre_ids = decode_pre_ids
        self.decode_post_ids = decode_post_ids

        self.encode_mask = encode_mask
        self.decode_pre_mask = decode_pre_mask
        self.decode_post_mask = decode_post_mask