def inference(self): """Performs inference on the DMN model""" #word_embedding = np.random.uniform(-config.embedding_init, config.embedding_init, (len(ivocab), config.embed_size)) # set up embedding embeddings = tf.Variable(self.word_embedding.astype(np.float32), name="Embedding") '''^i think this is based on input vocab size''' with tf.variable_scope("input", initializer=_xavier_weight_init()): print('==> get input representation') fact_vecs = self.get_input_representation(embeddings) # keep track of attentions for possible strong supervision self.attentions = [] #self.prev_a_all = [] #self.prev_y_all = [] # memory module with tf.variable_scope("memory", initializer=_xavier_weight_init(), reuse=None): '''you have an arbitrary length and prev_a and prev_y here for now''' #''' #prev_a = tf.zeros_like(tf.transpose(fact_vecs, perm=[1,0,2])[0]) prev_a = tf.transpose(fact_vecs, perm=[1, 0, 2])[-1] prev_y = tf.zeros([self.config.batch_size, self.target_vocab_size]) #''' #prev_a = [tf.zeros_like(tf.transpose(fact_vecs, perm=[1,0,2])[0])] #prev_y = [tf.zeros([self.config.batch_size, self.target_vocab_size])] output = [] prev_a_all = [] prev_y_all = [] arbitrary_num = 2 print(self.max_input_len) print(self.max_t_len) #for i in range(0, arbitrary_num): for i in range(0, self.max_t_len): #print i prev_a_all.append(prev_a) prev_y_all.append(prev_y) #prev_a, prev_y, vocab_probs = self.attention_decode_for_each_output_step(prev_a, prev_y, fact_vecs) prev_a, prev_y, vocab_probs, attn_iters_step, attn_halt_probs_step = self.attention_decode_for_each_output_step( prev_a_all, prev_y_all, fact_vecs) output.append(vocab_probs) self.attn_iters.append(attn_iters_step) self.attn_halt_probs.append(attn_halt_probs_step) #print self.attn_iters return output
def add_decode_variables(self): '''based on github.com/tensorflow/tensorflow/issues/5608#issuecomment-260549420''' self.total_input_hops = self.config.num_hops #version for if you want to set it with tf.variable_scope("memory/decode", initializer=_xavier_weight_init()): untied_weights = tf.get_variable( "W_t", (self.total_input_hops, 2 * self.config.hidden_size, self.config.hidden_size)) untied_biases = tf.get_variable("bias_t", ( self.total_input_hops, self.config.hidden_size, )) #''' # The clear_after_read variable must be False, otherwise the TA will # only allow you to read from that index once. self.weight_container = tf.TensorArray(tf.float32, self.total_input_hops, clear_after_read=False, dynamic_size=None, name="w_container") self.bias_container = tf.TensorArray(tf.float32, self.total_input_hops, clear_after_read=False, dynamic_size=None, name="b_container") # This initialises the TensorArray with the weights broken up in to pieces. # The reason this has to be a TensorArray is so that we can index it with a tensor(!) self.weight_container = self.weight_container.unpack(untied_weights) self.bias_container = self.bias_container.unpack(untied_biases)
def decoder_step(self, rnn_output): """Linear softmax answer module""" with tf.variable_scope("answer", reuse=True, initializer=_xavier_weight_init()): rnn_output = tf.nn.dropout(rnn_output, self.dropout_placeholder) U_p = tf.get_variable("U") b_p = tf.get_variable("bias_p") vocab_probs = tf.matmul(rnn_output, U_p) + b_p output_probs = tf.nn.softmax(vocab_probs) return vocab_probs, output_probs
def add_decode_variables(self): """Adds trainable variables which are later (not?) reused""" """ for i in range(self.total_input_hops): with tf.variable_scope("memory/decode" + "/" + str(i), initializer=_xavier_weight_init()): #Wt = tf.get_variable("W_t", (2*self.config.hidden_size+self.config.embed_size, self.config.hidden_size)) #''' Wt = tf.get_variable("W_t", (2*self.config.hidden_size, self.config.hidden_size)) bt = tf.get_variable("bias_t", (self.config.hidden_size,)) #''' #""" '''based on github.com/tensorflow/tensorflow/issues/5608#issuecomment-260549420''' self.total_input_hops = self.config.num_hops #version for if you want to set it #self.total_input_hops = self.max_input_len - 6 #don't need to attend to headers #''' with tf.variable_scope("memory/decode", initializer=_xavier_weight_init()): untied_weights = tf.get_variable( "W_t", (self.total_input_hops, 2 * self.config.hidden_size, self.config.hidden_size)) untied_biases = tf.get_variable("bias_t", ( self.total_input_hops, self.config.hidden_size, )) #''' # The clear_after_read variable must be False, otherwise the TA will # only allow you to read from that index once. self.weight_container = tf.TensorArray(tf.float32, self.total_input_hops, clear_after_read=False, dynamic_size=None, name="w_container") self.bias_container = tf.TensorArray(tf.float32, self.total_input_hops, clear_after_read=False, dynamic_size=None, name="b_container") # This initialises the TensorArray with the weights broken up in to pieces. # The reason this has to be a TensorArray is so that we can index it with a tensor(!) self.weight_container = self.weight_container.unpack(untied_weights) self.bias_container = self.bias_container.unpack(untied_biases)
def _attention_GRU_step(self, rnn_input, h, g): """Implement attention GRU as described by https://arxiv.org/abs/1603.01417""" with tf.variable_scope("attention_gru", reuse=True, initializer=_xavier_weight_init()): Wr = tf.get_variable("Wr") Ur = tf.get_variable("Ur") br = tf.get_variable("bias_r") W = tf.get_variable("W") U = tf.get_variable("U") bh = tf.get_variable("bias_h") r = tf.sigmoid(tf.matmul(rnn_input, Wr) + tf.matmul(h, Ur) + br) h_hat = tf.tanh(tf.matmul(rnn_input, W) + r * tf.matmul(h, U) + bh) rnn_output = g * h_hat + (1 - g) * h return rnn_output
def get_attention(self, prev_memory, fact_vec): """Use question vector and previous memory to create scalar attention for current fact""" with tf.variable_scope("attention", reuse=True, initializer=_xavier_weight_init()): W_1 = tf.get_variable("W_1") b_1 = tf.get_variable("bias_1") W_2 = tf.get_variable("W_2") b_2 = tf.get_variable("bias_2") features = [fact_vec * prev_memory, tf.abs(fact_vec - prev_memory)] feature_vec = tf.concat(1, features) attention = tf.matmul(tf.tanh(tf.matmul(feature_vec, W_1) + b_1), W_2) + b_2 return attention
def normal_GRU_step(self, rnn_input, h): """Implement normal GRU""" with tf.variable_scope("normal_gru", reuse=True, initializer=_xavier_weight_init()): Wu = tf.get_variable("Wu") Uu = tf.get_variable("Uu") bu = tf.get_variable("bias_u") Wr = tf.get_variable("Wr") Ur = tf.get_variable("Ur") br = tf.get_variable("bias_r") W = tf.get_variable("W") U = tf.get_variable("U") bh = tf.get_variable("bias_h") u = tf.sigmoid(tf.matmul(rnn_input, Wu) + tf.matmul(h, Uu) + bu) r = tf.sigmoid(tf.matmul(rnn_input, Wr) + tf.matmul(h, Ur) + br) h_hat = tf.tanh(tf.matmul(rnn_input, W) + r * tf.matmul(h, U) + bh) rnn_output = u * h_hat + (1 - u) * h return rnn_output
def add_reused_variables(self): """Adds trainable variables which are later reused""" gru_cell = tf.nn.rnn_cell.GRUCell(self.config.hidden_size) self.shared_gru_cell_before_dropout = SharedGRUCell( self.config.hidden_size) attn_length = 1 '''^DEFINATELY TRY OUT DIFFERENT LENGTHS''' with tf.variable_scope('input/forward', initializer=_xavier_weight_init(), reuse=True): self.intra_attention_GRU_cell_fw = tf.nn.rnn_cell.DropoutWrapper( tf.contrib.rnn.AttentionCellWrapper( self.shared_gru_cell_before_dropout, attn_length, state_is_tuple=False), input_keep_prob=self.dropout_placeholder, output_keep_prob=self.dropout_placeholder) with tf.variable_scope('input/backward', initializer=_xavier_weight_init(), reuse=True): self.intra_attention_GRU_cell_bw = tf.nn.rnn_cell.DropoutWrapper( tf.contrib.rnn.AttentionCellWrapper( self.shared_gru_cell_before_dropout, attn_length, state_is_tuple=False), input_keep_prob=self.dropout_placeholder, output_keep_prob=self.dropout_placeholder) # apply droput to grus if flag set if self.config.drop_grus: self.gru_cell = tf.nn.rnn_cell.DropoutWrapper( gru_cell, input_keep_prob=self.dropout_placeholder, output_keep_prob=self.dropout_placeholder) else: self.gru_cell = gru_cell with tf.variable_scope("memory/attention", initializer=_xavier_weight_init()): b_1 = tf.get_variable("bias_1", (self.config.embed_size, )) W_1 = tf.get_variable( "W_1", (self.config.embed_size * self.config.num_attention_features, self.config.embed_size)) W_2 = tf.get_variable("W_2", (self.config.embed_size, 1)) b_2 = tf.get_variable("bias_2", 1) with tf.variable_scope("memory/attention_gru", initializer=_xavier_weight_init()): Wr = tf.get_variable( "Wr", (self.config.embed_size, self.config.hidden_size)) Ur = tf.get_variable( "Ur", (self.config.hidden_size, self.config.hidden_size)) br = tf.get_variable("bias_r", (1, self.config.hidden_size)) W = tf.get_variable( "W", (self.config.embed_size, self.config.hidden_size)) U = tf.get_variable( "U", (self.config.hidden_size, self.config.hidden_size)) bh = tf.get_variable("bias_h", (1, self.config.hidden_size)) with tf.variable_scope("memory/normal_gru", initializer=_xavier_weight_init()): Wu = tf.get_variable( "Wu", (self.config.embed_size + self.target_vocab_size, self.config.hidden_size)) Uu = tf.get_variable( "Uu", (self.config.hidden_size, self.config.hidden_size)) bu = tf.get_variable("bias_u", (1, self.config.hidden_size)) Wr = tf.get_variable( "Wr", (self.config.embed_size + self.target_vocab_size, self.config.hidden_size)) Ur = tf.get_variable( "Ur", (self.config.hidden_size, self.config.hidden_size)) br = tf.get_variable("bias_r", (1, self.config.hidden_size)) W = tf.get_variable( "W", (self.config.embed_size + self.target_vocab_size, self.config.hidden_size)) U = tf.get_variable( "U", (self.config.hidden_size, self.config.hidden_size)) bh = tf.get_variable("bias_h", (1, self.config.hidden_size)) with tf.variable_scope("memory/answer", initializer=_xavier_weight_init()): U_p = tf.get_variable( "U", (self.config.embed_size, self.target_vocab_size)) b_p = tf.get_variable("bias_p", (self.target_vocab_size, ))