Exemplo n.º 1
0
 def build_word_decoder(self, word_vectors_3, char_ids_3):
     config = self.config
     with tf.variable_scope('word_condition_projection'):
         word_vectors_3 = layers.mlp(word_vectors_3, self.config['sentence_decoder_projection'])
     with tf.variable_scope('word_decoder'):
         spell_vector_len = config['spell_vector_len']
         spell_vector_size = spell_vector_len * config['char_embed_size']
         spell_vector_size *= 2 # TODO make this factor configurable
         # Grab char embeds and concat them to spelling vector representations of words
         char_ids_3 = self.add_go(char_ids_3, axis=2)
         char_embeds_4 = layers.embedding(self.num_chars, config['char_embed_size'], char_ids_3)
         spell_vectors_3 = self.create_spell_vector(char_embeds_4, spell_vector_len)
         # Pass spelling vector through a layer that can see previous chars, but can't see ahead
         with tf.variable_scope('future_masked_spelling'):
             spell_vectors_projected_3 = layers.feed_forward(spell_vectors_3,
                 num_nodes=spell_vector_size, seq_len_for_future_mask=spell_vector_len)
         # Reshape word representation into individual char representations
         batch_size, sentence_len, word_len = tf.unstack(tf.shape(char_ids_3))
         char_size = spell_vectors_projected_3.shape.as_list()[-1]/spell_vector_len
         char_vectors_4 = tf.reshape(spell_vectors_projected_3,
             [batch_size, sentence_len, spell_vector_len, char_size])
         char_vectors_4 = char_vectors_4[:, :, :word_len, :]
         # Project each char_vector up to the size of the conditioning word_vector
         with tf.variable_scope('char_projection'):
             word_depth = word_vectors_3.shape.as_list()[-1]
             char_vectors_4 = layers.feed_forward(char_vectors_4, num_nodes=word_depth)
         # Add the conditioning word_vector to each char and pass result through an mlp
         char_vectors_4 += tf.expand_dims(word_vectors_3, axis=2)
         char_vectors_4 = layers.mlp(char_vectors_4, config['word_decoder_mlp'])
     with tf.variable_scope('logits'):
         char_logits_4 = layers.feed_forward(char_vectors_4, num_nodes=self.num_chars,
             noise_level=config['noise_level'])
     return char_logits_4
Exemplo n.º 2
0
 def build_positional_char_embeds(self, char_ids_3, char_embed_size, mlp_layer_specs,
     word_len_limit):
     """ """
     char_ids_3 = char_ids_3[:, :, :word_len_limit] # potentially trim long words
     batch_size, max_sentence_len, max_word_len = tf.unstack(tf.shape(char_ids_3))
     # Select char embeddings
     with tf.variable_scope('chars'):
         char_embeds_4 = layers.embedding(self.num_chars, char_embed_size, char_ids_3)
     # Create char position ids for every possible char position in the batch (including padding)
     position_ids_1 = tf.range(max_word_len)
     position_ids_3 = tf.expand_dims(tf.expand_dims(position_ids_1, 0), 0)
     position_ids_3 = tf.tile(position_ids_3, [batch_size, max_sentence_len, 1])
     # Mask position_ids for padding chars
     where = tf.equal(char_ids_3, -1)
     position_ids_3 = tf.where(where, char_ids_3, tf.cast(position_ids_3, char_ids_3.dtype))
     # Convert position_ids to relative position (scalar between 0 and 1)
     word_lengths_3 = tf.reduce_max(position_ids_3, axis=2, keep_dims=True)
     word_lengths_3 = tf.where(tf.equal(word_lengths_3, 0), tf.ones_like(word_lengths_3), word_lengths_3)
     word_lengths_3 = tf.cast(word_lengths_3, char_embeds_4.dtype)
     position_ids_3 = tf.cast(position_ids_3, char_embeds_4.dtype)
     relative_positions_3 = position_ids_3 / word_lengths_3
     # Mask relative_positions for padding chars
     relative_positions_3 = tf.where(where, tf.zeros_like(relative_positions_3), relative_positions_3)
     # Combine char embeddings with their respective positions
     relative_positions_4 = tf.expand_dims(relative_positions_3, axis=3)
     positional_char_embeds_4 = tf.concat([char_embeds_4, relative_positions_4], axis=3)
     positional_char_embeds_4 = layers.mlp(positional_char_embeds_4, mlp_layer_specs)
     return positional_char_embeds_4
Exemplo n.º 3
0
  def join(self, is_training, hparams, inputs, out_w, out_c, tags):
    """Meta model joins word and char model."""
    with tf.variable_scope('meta_char_word'):
      out_1 = layers.dropout(is_training, hparams.keep_prob, out_w)
      out_2 = layers.dropout(is_training, hparams.keep_prob, out_c)

      outputs = tf.concat([out_1, out_2], axis=2)
      out_fw, out_bw, _ = layers.lstm_layers(is_training, outputs,
                                             hparams.num_layers_meta,
                                             hparams.hidden_meta_size,
                                             hparams.recur_keep_j_prob)
      outputs = tf.concat([out_fw, out_bw], axis=2)
      outputs = layers.mlp(
          is_training,
          outputs,
          output_size=tags,
          keep_prob=hparams.keep_prob)
      preds_w = tf.to_int32(tf.argmax(outputs, axis=-1))
      targets_w = inputs[:, :, 2]
      tokens_to_keep = tf.to_float(tf.greater(inputs[:, :, 0], PAD))
      loss = tf.losses.sparse_softmax_cross_entropy(targets_w, outputs,
                                                    tokens_to_keep)
    if is_training:
      return loss
    else:
      return preds_w
Exemplo n.º 4
0
 def test_compiles(self):
     tf.reset_default_graph()
     with tf.Session() as sess:
         inputs = tf.constant([[0, 0], [1, 1], [2, 2]], dtype=tf.float32)
         layer_specs = [{'num_nodes': 20}, {'num_nodes': 30}]
         outputs = layers.mlp(inputs, layer_specs)
         initialize_vars(sess)
         sess.run(outputs)
         self.assertEqual(outputs.shape, (3, 30))
Exemplo n.º 5
0
  def char_model(self, is_training, hparams, chars, embedding_char_size, tags,
                 inputs_char, indexs_start, indexs_end, targets_w):
    """Character model."""
    with tf.variable_scope('chars'):
      if is_training:
        embed_dims = [chars, embedding_char_size]
        np.random.seed(seed=1)
        embeddings_char = np.random.randn(*embed_dims).astype(np.float32)
        cembed = tf.get_variable(
            'char_embeddings', dtype=tf.float32, initializer=embeddings_char)
      else:
        cembed = tf.get_variable('char_embeddings')

      # joint for both
      embed_nd = tf.nn.embedding_lookup(cembed, inputs_char[:, :])
      embed = layers.dropout(is_training, hparams.embed_keep_prob_ch, embed_nd)

      output_fw, output_bw, _ = layers.lstm_layers(
          is_training, embed, hparams.num_layers_chars,
          hparams.hidden_char_size, hparams.recur_keep_prob)

      # Gather forward start and end of word of char LSTM output.
      output_fw_fst = tf.gather_nd(output_fw, indexs_start)
      output_fw_lst = tf.gather_nd(output_fw, indexs_end)

      # Gather backword start and end of word of char LSTM output.
      output_bw_fst = tf.gather_nd(output_bw, indexs_start)
      output_bw_lst = tf.gather_nd(output_bw, indexs_end)

      # Gathered LSTM outputs into the right shape and concatenate it.
      outputs = tf.concat(
          [output_fw_fst, output_fw_lst, output_bw_fst, output_bw_lst], axis=2)

      outputs = layers.mlp(
          is_training,
          outputs,
          output_size=hparams.mlp_size,
          keep_prob=hparams.keep_prob)

      targets = targets_w[:, :]
      tok_keep = tf.to_float(tf.greater(targets, PAD))

      linear = layers.linear_with_dropout(
          is_training, outputs, tags, keep_prob=hparams.keep_prob)
      preds = tf.to_int32(tf.argmax(linear, axis=-1))

      if is_training:
        int_tok_keep = tf.to_int32(tok_keep)
        t_correct = tf.to_int32(tf.equal(preds, targets)) * int_tok_keep
        accuracy = tf.reduce_sum(t_correct) / tf.reduce_sum(int_tok_keep)

        loss = tf.losses.sparse_softmax_cross_entropy(targets, linear, tok_keep)
        return loss, accuracy
      else:
        return preds, outputs
Exemplo n.º 6
0
 def build_word_encoder(self, char_ids_3, reuse_vars=None):
     config = self.config
     with tf.variable_scope('char_encoder', reuse=reuse_vars):
         char_embeds_4 = self.build_positional_char_embeds(char_ids_3, config['char_embed_size'],
             config['char_encoder_mlp'], config['max_word_len'])
     with tf.variable_scope('word_encoder', reuse=reuse_vars):
         # Sum positional_char_embeds to get a word_vector, normalize and noise it.
         word_vectors_3 = layers.do_layer_norm(tf.reduce_sum(char_embeds_4, axis=2))
         shape_1 = tf.shape(word_vectors_3)
         word_vectors_2 = tf.reshape(word_vectors_3, [-1, shape_1[-1]])
         word_vectors_2 += layers.gaussian_noise(word_vectors_2, self.config['noise_level'])
         word_vectors_3 = tf.reshape(word_vectors_2, shape_1)
         # Pass word_vectors through an MLP
         word_vectors_3 = layers.mlp(word_vectors_3, config['word_encoder_mlp'])
     return word_vectors_3
Exemplo n.º 7
0
  def word_model(self, is_training, hparams, words, embedding_word_size,
                 tags, pretrained_embed, inputs):
    """Word model."""
    with tf.variable_scope('words'):
      embedding = tf.get_variable(
          'word_embedding', [words, embedding_word_size],
          dtype=tf.float32,
          initializer=tf.zeros_initializer())
      word_inputs = tf.nn.embedding_lookup(embedding, inputs[:, :, 0])
      word_inputs = word_inputs
      word_inputs = layers.dropout(is_training, hparams.embed_keep_prob,
                                   word_inputs)
      pret_inputs = tf.nn.embedding_lookup(pretrained_embed, inputs[:, :, 1])
      pret_inputs = layers.dropout(is_training, hparams.embed_keep_prob,
                                   pret_inputs)
      word_inputs += pret_inputs

      targets_w = inputs[:, :, 2]
      outputs = word_inputs

      output_fw, output_bw, _ = layers.lstm_layers(
          is_training, outputs, hparams.num_layers_words,
          hparams.hidden_word_size, hparams.recur_keep_w_prob)
      outputs = tf.concat([output_fw, output_bw], axis=2)

      outputs = layers.mlp(
          is_training,
          outputs,
          output_size=hparams.mlp_size,
          keep_prob=hparams.keep_prob)

      logits = layers.linear_with_dropout(
          is_training,
          outputs,
          tags,
          keep_prob=hparams.keep_prob)
      preds_w = tf.to_int32(tf.argmax(logits, axis=-1))
      tag_correct_w = tf.to_int32(tf.equal(preds_w, targets_w))
      correct = tf.reduce_sum(tag_correct_w) / tf.size(
          tag_correct_w)
      tokens_to_keep = tf.to_float(tf.greater(inputs[:, :, 0], PAD))
      loss_w = tf.losses.sparse_softmax_cross_entropy(targets_w, logits,
                                                      tokens_to_keep)

      if is_training:
        return loss_w, correct
      else:
        return preds_w, outputs