def build_word_decoder(self, word_vectors_3, char_ids_3): config = self.config with tf.variable_scope('word_condition_projection'): word_vectors_3 = layers.mlp(word_vectors_3, self.config['sentence_decoder_projection']) with tf.variable_scope('word_decoder'): spell_vector_len = config['spell_vector_len'] spell_vector_size = spell_vector_len * config['char_embed_size'] spell_vector_size *= 2 # TODO make this factor configurable # Grab char embeds and concat them to spelling vector representations of words char_ids_3 = self.add_go(char_ids_3, axis=2) char_embeds_4 = layers.embedding(self.num_chars, config['char_embed_size'], char_ids_3) spell_vectors_3 = self.create_spell_vector(char_embeds_4, spell_vector_len) # Pass spelling vector through a layer that can see previous chars, but can't see ahead with tf.variable_scope('future_masked_spelling'): spell_vectors_projected_3 = layers.feed_forward(spell_vectors_3, num_nodes=spell_vector_size, seq_len_for_future_mask=spell_vector_len) # Reshape word representation into individual char representations batch_size, sentence_len, word_len = tf.unstack(tf.shape(char_ids_3)) char_size = spell_vectors_projected_3.shape.as_list()[-1]/spell_vector_len char_vectors_4 = tf.reshape(spell_vectors_projected_3, [batch_size, sentence_len, spell_vector_len, char_size]) char_vectors_4 = char_vectors_4[:, :, :word_len, :] # Project each char_vector up to the size of the conditioning word_vector with tf.variable_scope('char_projection'): word_depth = word_vectors_3.shape.as_list()[-1] char_vectors_4 = layers.feed_forward(char_vectors_4, num_nodes=word_depth) # Add the conditioning word_vector to each char and pass result through an mlp char_vectors_4 += tf.expand_dims(word_vectors_3, axis=2) char_vectors_4 = layers.mlp(char_vectors_4, config['word_decoder_mlp']) with tf.variable_scope('logits'): char_logits_4 = layers.feed_forward(char_vectors_4, num_nodes=self.num_chars, noise_level=config['noise_level']) return char_logits_4
def test_compiles(self): tf.reset_default_graph() with tf.Session() as sess: inputs = tf.constant([[0, 0], [1, 1], [2, 2]], dtype=tf.float32) outputs = layers.feed_forward(inputs, num_nodes=20) initialize_vars(sess) outputs = sess.run(outputs) self.assertEqual(outputs.shape, (3, 20))
def test_layer_norm(self): tf.reset_default_graph() def check_for_var(varname, count): var = filter(lambda var: 'gamma' in var.name, tf.trainable_variables()) self.assertEqual(len(var), count) with tf.Session() as sess: inputs = tf.constant([[0, 0], [1, 1], [2, 2]], dtype=tf.float32) outputs = layers.feed_forward(inputs, num_nodes=20, layer_norm=False) check_for_var('gamma', 0) check_for_var('beta', 0) with tf.variable_scope('norm'): outputs = layers.feed_forward(outputs, num_nodes=10, layer_norm=True) check_for_var('gamma', 1) check_for_var('beta', 1)
def test_activation_fn(self): tf.reset_default_graph() def to_zero(tensor): return tensor * 0 with tf.Session() as sess: inputs = tf.constant([[0, 0], [1, 1], [2, 2]], dtype=tf.float32) outputs = layers.feed_forward(inputs, num_nodes=20, activation_fn=to_zero) initialize_vars(sess) outputs = sess.run(outputs) self.assertEqual(np.sum(outputs), 0.0)
def test_dropout(self): tf.reset_default_graph() with tf.Session() as sess: inputs = tf.constant([[0.5, 0.5], [1, 1], [2, 2]], dtype=tf.float32) with tf.variable_scope('dropout_test'): outputs_dropped = layers.feed_forward(inputs, num_nodes=1024, keep_prob=0.5) with tf.variable_scope('dropout_test', reuse=True): outputs_kept = layers.feed_forward(inputs, num_nodes=1024, keep_prob=1.0) sess.run(tf.global_variables_initializer()) outputs_dropped = sess.run(outputs_dropped) outputs_kept = sess.run(outputs_kept) # values that haven't been dropped should be the same (after scaling), # therefore the only values that differ should be the dropped ones. zero_indices = np.where( np.not_equal(outputs_dropped, 2 * outputs_kept)) self.assertEqual(np.sum(outputs_dropped[zero_indices]), 0) # roughly half of the values should be dropped drop_rate = float(zero_indices[0].size) / outputs_kept.size self.assertTrue(abs(0.5 - drop_rate) < 0.05)