示例#1
0
    def __init__(self, mode, config):
        self._mode = mode
        self.config = config

        if self._mode != Mode.Predict:
            self.input = tf.placeholder(name='inputs', dtype=tf.int32, shape=[None, None])
            self.pos_input = tf.placeholder(name='pos_input', dtype=tf.int32, shape=[None])
            self.morph_input = tf.placeholder(name='morph_input', dtype=tf.int32, shape=[None, config.max_morph_tags])
        else:
            k, v = list(zip(*config.dec_char_numberer.value2num.items()))
            i = KeyValueTensorInitializer(v, k,key_dtype=tf.int64,value_dtype=tf.string)
            self.dec2char = HashTable(i, default_value="<UNK>")
            self.enc_chars = self.lookup(config.enc_char_numberer)
            self.morph_tags = self.lookup(config.morph_numberer)
            self.pos_tags = self.lookup(config.pos_numberer)

            self.input_plc =tf.placeholder(name='inputs', dtype=tf.string, shape=[None, None])
            self.pos_plc = tf.placeholder(name='pos_input', dtype=tf.string, shape=[None])
            self.morph_plc = tf.placeholder(name='morph_input', dtype=tf.string, shape=[None, config.max_morph_tags])
            self.input = self.enc_chars.lookup(self.input_plc)
            self.pos_input = self.pos_tags.lookup(self.pos_plc)
            self.morph_input = self.morph_tags.lookup(self.morph_plc)


        self.input_lengths = tf.placeholder(name='input_lengths', dtype=tf.int32, shape=[None])



        self.enc_character_embeddings = tf.get_variable('enc_character_embeddings',
                                                        shape=[self.config.enc_vocab_size,
                                                               self.config.char_embedding_size])
        self.dec_character_embeddings = tf.get_variable('dec_character_embeddings',
                                                        shape=[self.config.dec_vocab_size,
                                                               self.config.char_embedding_size])

        self.pos_embeddings = tf.get_variable('pos_embeddings',
                                              shape=[self.config.pos_vocab_size,
                                                     self.config.pos_embedding_size])



        self.morph_embeddings = tf.get_variable('morph_embeddings',
                                              shape=[self.config.morph_vocab_size,
                                                     self.config.morph_embedding_size])

        if self._mode != Mode.Train:
            self.start_tokens = tf.tile([config.start_idx], [tf.shape(self.input)[0]])

        if self._mode != Mode.Predict:
            # length +2 since lengths are <bow> + word + <eow>
            self.dec_input = tf.placeholder(name='dec_in', dtype=tf.int32, shape=[None, None])
            # exclude start token from targets for loss-computations since we feed the start token and don't want to
            # predict it
            self.decoder_targets = self.dec_input[:, 1:]
            self.dec_lengths = tf.placeholder(name='dec_lengths', dtype=tf.int32, shape=[None])
示例#2
0
 def __init__(self, returns_dict=False):
   embeddings = [
       ("", [0, 0, 0, 0]),  # OOV items are mapped to this embedding.
       ("hello world", [1, 2, 3, 4]),
       ("pair-programming", [5, 5, 5, 5]),
   ]
   keys = tf.constant([item[0] for item in embeddings], dtype=tf.string)
   indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64)
   tbl_init = KeyValueTensorInitializer(keys, indices)
   self.table = HashTable(tbl_init, 0)
   self.weights = tf.Variable(
       list([item[1] for item in embeddings]), dtype=tf.float32)
   self.variables = [self.weights]
   self.trainable_variables = self.variables
   self._returns_dict = returns_dict
示例#3
0
def text_module_fn():
  embeddings = [
      ("", [0, 0, 0, 0]),  # OOV items are mapped to this embedding.
      ("hello world", [1, 2, 3, 4]),
      ("pair-programming", [5, 5, 5, 5]),
  ]
  keys = tf.constant([item[0] for item in embeddings], dtype=tf.string)
  indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64)
  tbl_init = KeyValueTensorInitializer(keys, indices)
  table = HashTable(tbl_init, 0)

  weights_initializer = tf.cast(
      tf.constant(list([item[1] for item in embeddings])), tf.float32)

  weights = tf_v1.get_variable(
      "weights", dtype=tf.float32, initializer=weights_initializer)

  text_tensor = tf_v1.placeholder(dtype=tf.string, name="text", shape=[None])
  indices_tensor = table.lookup(text_tensor)
  embedding_tensor = tf.gather(weights, indices_tensor)
  hub.add_signature(inputs=text_tensor, outputs=embedding_tensor)
示例#4
0
文件: data.py 项目: tongda/ImSAT
        def input_fn():
            with tf.variable_scope("input_fn"), tf.device("/cpu:0"):
                caption_dataset = Dataset.from_tensor_slices(list(captions))
                filename_dataset = Dataset.from_tensor_slices(list(filenames))

                table_init = KeyValueTensorInitializer(
                    list(self.word_to_idx.keys()),
                    list(self.word_to_idx.values()),
                    key_dtype=tf.string,
                    value_dtype=tf.int32)
                table = HashTable(table_init, default_value=0)

                def split_sentence(sentence):
                    words = tf.string_split(tf.reshape(sentence, (1, ))).values
                    words = tf.concat([
                        tf.constant(["<START>"]), words,
                        tf.constant(["<END>"])
                    ],
                                      axis=0)
                    return table.lookup(words)

                index_dataset = caption_dataset.map(split_sentence,
                                                    num_threads=8)

                def decode_image(filename):
                    image = tf.image.decode_jpeg(tf.read_file(filename),
                                                 channels=3)
                    # image = tf.image.resize_images(image, [224, 224])
                    image = tf.to_float(image)
                    return image

                image_dataset = filename_dataset.map(decode_image,
                                                     num_threads=8)
                caption_structure = {
                    "raw": caption_dataset,
                    "index": index_dataset
                }
            return image_dataset, caption_structure
示例#5
0
 def lookup(self, numberer):
     k, v = list(zip(*numberer.value2num.items()))
     i = KeyValueTensorInitializer(k, v)
     return HashTable(i, default_value=numberer.unknown_idx)