def _linear(self, x, h, bias_default=0.0): I, D = x.get_shape().as_list()[1], self._num_units w = weight("W", [I, D]) u = weight("U", [D, D]) b = bias("b", D, bias_default) if self.batch_norm: with tf.variable_scope("Linear1"): x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training) with tf.variable_scope("Linear2"): h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training) return x_w + h_u + b else: return tf.matmul(x, w) + tf.matmul(h, u) + b
def _linear(self, x, h, bias_default=0.0 ): #this is to multiply the internal things with gates I, D = x.get_shape().as_list()[1], self._num_units w = weight('W', [I, D]) u = weight('U', [D, D]) b = bias('b', D, bias_default) if self.batch_normx: #batch norm with tf.variable_scope('Linear1'): x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training) with tf.variable_scope('Linear2'): h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training) return x_w + h_u + b else: return tf.matmul(x, w) + tf.matmul(h, u) + b
def build(self): params = self.params N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count V, d, A = params.glove_size, params.hidden_size, self.words.vocab_size # initialize self # placeholders input = tf.placeholder(tf.float32, shape=[N, L, V], name='x') # [num_batch, sentence_len, glove_dim] question = tf.placeholder(tf.float32, shape=[N, Q, V], name='q') # [num_batch, sentence_len, glove_dim] answer = tf.placeholder(tf.int64, shape=[N], name='y') # [num_batch] - one word answer input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask') # [num_batch, sentence_len] is_training = tf.placeholder(tf.bool) # Prepare parameters gru = rnn_cell.GRUCell(d) # Input module with tf.variable_scope('input') as scope: input_list = self.make_decoder_batch_input(input) input_states, _ = seq2seq.rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru) # Question module scope.reuse_variables() ques_list = self.make_decoder_batch_input(question) questions, _ = seq2seq.rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru) question_vec = questions[-1] # use final state # Masking: to extract fact vectors at end of sentence. (details in paper) input_states = tf.transpose(tf.pack(input_states), [1, 0, 2]) # [N, L, D] facts = [] for n in range(N): filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :]) # [?, D] padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d])) facts.append(tf.concat(0, [filtered, padding])) # [F, D] facked = tf.pack(facts) # packing for transpose... I hate TF so much facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F) # F x [N, D] # Episodic Memory with tf.variable_scope('episodic') as scope: episode = EpisodeModule(d, question_vec, facts) memory = tf.identity(question_vec) for t in range(params.memory_step): memory = gru(episode.new(memory), memory)[0] scope.reuse_variables() # Regularizations if params.batch_norm: memory = batch_norm(memory, is_training=is_training) memory = dropout(memory, params.keep_prob, is_training) with tf.name_scope('Answer'): # Answer module : feed-forward version (for it is one word answer) w_a = weight('w_a', [d, A]) logits = tf.matmul(memory, w_a) # [N, A] with tf.name_scope('Loss'): # Cross-Entropy loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer) loss = tf.reduce_mean(cross_entropy) total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) with tf.variable_scope('Accuracy'): # Accuracy predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, answer) num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32)) # Training optimizer = tf.train.AdadeltaOptimizer(params.learning_rate) opt_op = optimizer.minimize(total_loss, global_step=self.global_step) # placeholders self.x = input self.q = question self.y = answer self.mask = input_mask self.is_training = is_training # tensors self.total_loss = total_loss self.num_corrects = num_corrects self.accuracy = accuracy self.opt_op = opt_op
def build(self): params = self.params N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size # initialize self # placeholders input = tf.placeholder( 'int32', shape=[N, F, L], name='x') # [num_batch, fact_count, sentence_len] question = tf.placeholder('int32', shape=[N, Q], name='q') # [num_batch, question_len] answer = tf.placeholder('int32', shape=[N], name='y') # [num_batch] - one word answer fact_counts = tf.placeholder('int64', shape=[N], name='fc') input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm') is_training = tf.placeholder(tf.bool) self.att = tf.constant(0.) # Prepare parameters gru = rnn_cell.GRUCell(d) l = self.positional_encoding() embedding = weight('embedding', [A, V], init='uniform', range=3**(1 / 2)) with tf.name_scope('SentenceReader'): input_list = tf.unpack(tf.transpose(input)) # L x [F, N] input_embed = [] for facts in input_list: facts = tf.unpack(facts) embed = tf.pack([ tf.nn.embedding_lookup(embedding, w) for w in facts ]) # [F, N, V] input_embed.append(embed) # apply positional encoding input_embed = tf.transpose(tf.pack(input_embed), [2, 1, 0, 3]) # [N, F, L, V] encoded = l * input_embed * input_mask facts = tf.reduce_sum(encoded, 2) # [N, F, V] # dropout time facts = dropout(facts, params.keep_prob, is_training) with tf.name_scope('InputFusion'): # Bidirectional RNN with tf.variable_scope('Forward'): forward_states, _ = tf.nn.dynamic_rnn(gru, facts, fact_counts, dtype=tf.float32) with tf.variable_scope('Backward'): facts_reverse = tf.reverse_sequence(facts, fact_counts, 1) backward_states, _ = tf.nn.dynamic_rnn(gru, facts_reverse, fact_counts, dtype=tf.float32) # Use forward and backward states both facts = forward_states + backward_states # [N, F, d] with tf.variable_scope('Question'): ques_list = tf.unpack(tf.transpose(question)) ques_embed = [ tf.nn.embedding_lookup(embedding, w) for w in ques_list ] _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32) # Episodic Memory with tf.variable_scope('Episodic'): episode = EpisodeModule(d, question_vec, facts, is_training, params.batch_norm) memory = tf.identity(question_vec) for t in range(params.memory_step): with tf.variable_scope('Layer%d' % t) as scope: if params.memory_update == 'gru': memory = gru(episode.new(memory), memory)[0] else: # ReLU update c = episode.new(memory) concated = tf.concat(1, [memory, c, question_vec]) w_t = weight('w_t', [3 * d, d]) z = tf.matmul(concated, w_t) if params.batch_norm: z = batch_norm(z, is_training) else: b_t = bias('b_t', d) z = z + b_t memory = tf.nn.relu(z) # [N, d] scope.reuse_variables() # Regularizations if params.batch_norm: memory = batch_norm(memory, is_training=is_training) memory = dropout(memory, params.keep_prob, is_training) with tf.name_scope('Answer'): # Answer module : feed-forward version (for it is one word answer) w_a = weight('w_a', [d, A], init='xavier') logits = tf.matmul(memory, w_a) # [N, A] with tf.name_scope('Loss'): # Cross-Entropy loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, answer) loss = tf.reduce_mean(cross_entropy) total_loss = loss + params.weight_decay * tf.add_n( tf.get_collection('l2')) with tf.variable_scope('Accuracy'): # Accuracy predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, answer) num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32)) # Training optimizer = tf.train.AdamOptimizer(params.learning_rate) opt_op = optimizer.minimize(total_loss, global_step=self.global_step) # placeholders self.x = input self.xm = input_mask self.q = question self.y = answer self.fc = fact_counts self.is_training = is_training # tensors self.total_loss = total_loss self.num_corrects = num_corrects self.accuracy = accuracy self.opt_op = opt_op
def build(self): params = self.params N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size # initialize self # placeholders input = tf.placeholder('int32', shape=[N, L], name='x') # [num_batch, sentence_len] question = tf.placeholder('int32', shape=[N, Q], name='q') # [num_batch, sentence_len] answer = tf.placeholder('int32', shape=[N], name='y') # [num_batch] - one word answer input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask') # [num_batch, sentence_len] is_training = tf.placeholder(tf.bool) # Prepare parameters gru = rnn_cell.GRUCell(d) # Input module with tf.variable_scope('input') as scope: input_list = tf.unpack(tf.transpose(input)) input_states, _ = seq2seq.embedding_rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru, A, V) # Question module scope.reuse_variables() ques_list = tf.unpack(tf.transpose(question)) questions, _ = seq2seq.embedding_rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru, A, V) question_vec = questions[-1] # use final state # Masking: to extract fact vectors at end of sentence. (details in paper) input_states = tf.transpose(tf.pack(input_states), [1, 0, 2]) # [N, L, D] facts = [] for n in range(N): filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :]) # [?, D] padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d])) facts.append(tf.concat(0, [filtered, padding])) # [F, D] facked = tf.pack(facts) # packing for transpose... I hate TF so much facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F) # F x [N, D] # Episodic Memory with tf.variable_scope('episodic') as scope: episode = EpisodeModule(d, question_vec, facts) memory = tf.identity(question_vec) for t in range(params.memory_step): memory = gru(episode.new(memory), memory)[0] scope.reuse_variables() # Regularizations if params.batch_norm: memory = batch_norm(memory, is_training=is_training) memory = dropout(memory, params.keep_prob, is_training) with tf.name_scope('Answer'): # Answer module : feed-forward version (for it is one word answer) w_a = weight('w_a', [d, A]) logits = tf.matmul(memory, w_a) # [N, A] with tf.name_scope('Loss'): # Cross-Entropy loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer) loss = tf.reduce_mean(cross_entropy) total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) with tf.variable_scope('Accuracy'): # Accuracy predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, answer) num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32)) # Training optimizer = tf.train.AdadeltaOptimizer(params.learning_rate) opt_op = optimizer.minimize(total_loss, global_step=self.global_step) # placeholders self.x = input self.q = question self.y = answer self.mask = input_mask self.is_training = is_training # tensors self.total_loss = total_loss self.num_corrects = num_corrects self.accuracy = accuracy self.opt_op = opt_op
def build(self): params = self.params N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size # initialize self # placeholders input = tf.placeholder('int32', shape=[N, F, L], name='x') # [num_batch, fact_count, sentence_len] question = tf.placeholder('int32', shape=[N, Q], name='q') # [num_batch, question_len] answer = tf.placeholder('int32', shape=[N], name='y') # [num_batch] - one word answer fact_counts = tf.placeholder('int64', shape=[N], name='fc') input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm') is_training = tf.placeholder(tf.bool) self.att = tf.constant(0.) # Prepare parameters gru = rnn_cell.GRUCell(d) l = self.positional_encoding() embedding = weight('embedding', [A, V], init='uniform', range=3**(1/2)) with tf.name_scope('SentenceReader'): input_list = tf.unpack(tf.transpose(input)) # L x [F, N] input_embed = [] for facts in input_list: facts = tf.unpack(facts) embed = tf.pack([tf.nn.embedding_lookup(embedding, w) for w in facts]) # [F, N, V] input_embed.append(embed) # apply positional encoding input_embed = tf.transpose(tf.pack(input_embed), [2, 1, 0, 3]) # [N, F, L, V] encoded = l * input_embed * input_mask facts = tf.reduce_sum(encoded, 2) # [N, F, V] # dropout time facts = dropout(facts, params.keep_prob, is_training) with tf.name_scope('InputFusion'): # Bidirectional RNN with tf.variable_scope('Forward'): forward_states, _ = tf.nn.dynamic_rnn(gru, facts, fact_counts, dtype=tf.float32) with tf.variable_scope('Backward'): facts_reverse = tf.reverse_sequence(facts, fact_counts, 1) backward_states, _ = tf.nn.dynamic_rnn(gru, facts_reverse, fact_counts, dtype=tf.float32) # Use forward and backward states both facts = forward_states + backward_states # [N, F, d] with tf.variable_scope('Question'): ques_list = tf.unpack(tf.transpose(question)) ques_embed = [tf.nn.embedding_lookup(embedding, w) for w in ques_list] _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32) # Episodic Memory with tf.variable_scope('Episodic'): episode = EpisodeModule(d, question_vec, facts, is_training, params.batch_norm) memory = tf.identity(question_vec) for t in range(params.memory_step): with tf.variable_scope('Layer%d' % t) as scope: if params.memory_update == 'gru': memory = gru(episode.new(memory), memory)[0] else: # ReLU update c = episode.new(memory) concated = tf.concat(1, [memory, c, question_vec]) w_t = weight('w_t', [3 * d, d]) z = tf.matmul(concated, w_t) if params.batch_norm: z = batch_norm(z, is_training) else: b_t = bias('b_t', d) z = z + b_t memory = tf.nn.relu(z) # [N, d] scope.reuse_variables() # Regularizations if params.batch_norm: memory = batch_norm(memory, is_training=is_training) memory = dropout(memory, params.keep_prob, is_training) with tf.name_scope('Answer'): # Answer module : feed-forward version (for it is one word answer) w_a = weight('w_a', [d, A], init='xavier') logits = tf.matmul(memory, w_a) # [N, A] with tf.name_scope('Loss'): # Cross-Entropy loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer) loss = tf.reduce_mean(cross_entropy) total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) with tf.variable_scope('Accuracy'): # Accuracy predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, answer) num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32)) # Training optimizer = tf.train.AdamOptimizer(params.learning_rate) opt_op = optimizer.minimize(total_loss, global_step=self.global_step) # placeholders self.x = input self.xm = input_mask self.q = question self.y = answer self.fc = fact_counts self.is_training = is_training # tensors self.total_loss = total_loss self.num_corrects = num_corrects self.accuracy = accuracy self.opt_op = opt_op