def embed_text(tensors, embeddings): """Build embeddings using the word/char ids from `build_tensorize_text_fn`.""" wids = tensors["wids"] cids = tensors["cids"] embedding_weights = embeddings.get_initialized_params(trainable=False) word_vecs = tf.nn.embedding_lookup(embedding_weights, wids) char_emb = common_layers.character_cnn(cids) return tf.concat([word_vecs, char_emb], -1)
def test_character_cnn(self): with tf.Graph().as_default(): input_words = [["google", "lumiere"], [u"¯\\_(ツ)_/¯", u"(ᵔᴥᵔ)"], [u"谷", u"歌"]] char_ids = char_utils.batch_word_to_char_ids( tf.constant(input_words), 10) output_emb = common_layers.character_cnn(char_ids, num_filters=5) with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) actual_output_emb = sess.run(output_emb) self.assertAllEqual(actual_output_emb.shape, [3, 2, 5])
def _embed(prefix): """Embed the input text based and word and character IDs.""" word_emb = tf.nn.embedding_lookup(embedding_weights, features[prefix + "_wid"]) char_emb = common_layers.character_cnn( char_ids=features[prefix + "_cid"], emb_size=FLAGS.char_emb_size, kernel_width=FLAGS.char_kernel_width, num_filters=FLAGS.num_char_filters) concat_emb = tf.concat([word_emb, char_emb], -1) if mode == tf.estimator.ModeKeys.TRAIN: concat_emb = tf.nn.dropout(concat_emb, 1.0 - FLAGS.dropout_ratio) return concat_emb