def _decode(self): """ Employs Pointer Network to get the the probs of each position to be the start or end of the predicted answer. Note that we concat the fuse_p_encodes for the passages in the same document. And since the encodes of queries in the same document is same, we select the first one. """ with tf.variable_scope('start_pos_predict'): self.fuse_p_encodes, _ = bilstm_layer(self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) start_weight = tf.get_variable("start_weight", self.hidden_size * 2) start_logits = tf.tensordot(self.fuse_p_encodes, start_weight, axes=[[2], [0]]) with tf.variable_scope('end_pos_predict'): concat_GM_2 = tf.concat([self.match_p_encodes, self.fuse_p_encodes], -1) self.end_p_encodes, _ = bilstm_layer(concat_GM_2, self.p_length, self.hidden_size, layer_num=1) end_weight = tf.get_variable("start_weight", self.hidden_size * 2) end_logits = tf.tensordot(self.end_p_encodes, end_weight, axes=[[2], [0]]) with tf.variable_scope('same_question_concat'): batch_size = tf.shape(self.start_label)[0] concat_start_logits = tf.reshape(start_logits, [batch_size, -1]) concat_end_logits = tf.reshape(end_logits, [batch_size, -1]) self.start_probs = tf.nn.softmax(concat_start_logits, axis=1) self.end_probs = tf.nn.softmax(concat_end_logits, axis=1)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('encoding'): self.sep_p_encodes, _ = bilstm_layer(self.p_emb, self.p_length, self.hidden_size) tf.get_variable_scope().reuse_variables() self.sep_q_encodes, _ = bilstm_layer(self.q_emb, self.q_length, self.hidden_size) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, 1-self.dropout) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, 1-self.dropout)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ if self.use_dropout: self.p_emb = tf.nn.dropout(self.p_emb, self.dropout_keep_prob) self.q_emb = tf.nn.dropout(self.q_emb, self.dropout_keep_prob) with tf.variable_scope('passage_encoding'): self.sep_p_encodes, _ = bilstm_layer(self.p_emb, self.p_length, self.hidden_size) with tf.variable_scope('question_encoding'): self.sep_q_encodes, _ = bilstm_layer(self.q_emb, self.q_length, self.hidden_size)
def _fuse(self): with tf.variable_scope('self-attention'): self.fuse_p_encodes,_= bilstm_layer(self.match_p_encodes, self.p_length, self.hidden_size) JX = tf.shape(self.fuse_p_encodes)[1] sim_matrix = tf.matmul(self.fuse_p_encodes, self.fuse_p_encodes, transpose_b=True) sim_matrix /= self.hidden_size ** 0.5 mask_c = tf.tile(tf.expand_dims(self.c_mask, axis=1), [1, JX, 1]) context2context_attn = tf.matmul(tf.nn.softmax(softmax_mask(sim_matrix, mask_c), -1), self.fuse_p_encodes) self.fuse_p_encodes2, _ = bilstm_layer(context2context_attn+self.match_p_encodes, self.p_length, self.hidden_size) r=tf.tanh(tf.layers.dense(tf.concat([self.fuse_p_encodes,self.fuse_p_encodes2],-1), self.hidden_size * 2, activation=None)) g =tf.sigmoid(tf.layers.dense(tf.concat([self.fuse_p_encodes, self.fuse_p_encodes2], -1), self.hidden_size * 2,activation=None)) self.concat_p_encodes=g*r+(1-g)*self.fuse_p_encodes if self.use_dropout: self.concat_p_encodes = tf.nn.dropout(self.concat_p_encodes, 1 - self.dropout)
def _decode_yesno(self): """ Employs Pointer Network to get the the probs of each position to be the start or end of the predicted answer. Note that we concat the fuse_p_encodes for the passages in the same document. And since the encodes of queries in the same document is same, we select the first one. """ with tf.variable_scope('class_predict'): self.fuse_p_encodes, _ = bilstm_layer(self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) fuse_p_encodes_pool = tf.reduce_max( self.fuse_p_encodes, axis=1) #TODO--self.p_length作为mask? classify_weight = tf.get_variable( "classify_weight", shape=[self.hidden_size * 2, 3], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) bais = tf.get_variable("bais", shape=[3], dtype=tf.float32, initializer=tf.constant_initializer(0)) class_logits = tf.nn.bias_add( tf.matmul(fuse_p_encodes_pool, classify_weight), bais) self.classprobs = tf.nn.softmax(class_logits, axis=1)
def _fuse(self): with tf.variable_scope('self-attention'): self.fuse_p_encodes, _ = bilstm_layer(self.match_p_encodes, self.p_length, self.hidden_size) Q = tf.layers.dense(self.fuse_p_encodes, 2 * self.hidden_size, use_bias=False) # (N, T_q, d_model) K = tf.layers.dense(self.fuse_p_encodes, 2 * self.hidden_size, use_bias=False) # (N, T_k, d_model) V = tf.layers.dense(self.fuse_p_encodes, 2 * self.hidden_size, use_bias=False) # (N, T_k, d_model) # Split and concat Q_ = tf.concat(tf.split(Q, 4, axis=2), axis=0) # (h*N, T_q, d_model/h) K_ = tf.concat(tf.split(K, 4, axis=2), axis=0) # (h*N, T_k, d_model/h) V_ = tf.concat(tf.split(V, 4, axis=2), axis=0) # (h*N, T_k, d_model/h) d_k = Q_.get_shape().as_list()[-1] sim_matrix = tf.matmul(Q_, K_, transpose_b=True) sim_matrix /= d_k**0.5 context2context_attn = tf.matmul(tf.nn.softmax(sim_matrix, -1), V_) context2context_attn = tf.concat(tf.split(context2context_attn, 4, axis=0), axis=2) # (N, T_q, d_model) self.residual_match = self.match_p_encodes + tf.nn.dropout( tf.layers.dense(tf.concat([ self.fuse_p_encodes, context2context_attn, self.fuse_p_encodes * context2context_attn ], -1), self.hidden_size * 2, activation=tf.nn.relu), 1 - self.dropout) with tf.variable_scope('modeling'): self.fuse_p_encodes2, _ = bilstm_layer(self.residual_match, self.p_length, self.hidden_size) if self.use_dropout: self.fuse_p_encodes2 = tf.nn.dropout(self.fuse_p_encodes2, 1 - self.dropout)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion'): self.fuse_p_encodes, _ = bilstm_layer(self.match_p_encodes, self.p_length, self.hidden_size) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, 1 - self.dropout)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion'): self.match_p_encodes = tf.layers.dense(self.match_p_encodes, self.hidden_size * 2, activation=tf.nn.relu) self.residual_p_emb = self.match_p_encodes if self.use_dropout: self.residual_p_emb = tf.nn.dropout(self.match_p_encodes, self.dropout_keep_prob) self.residual_p_encodes, _ = bilstm_layer(self.residual_p_emb, self.p_length, self.hidden_size, layer_num=1) if self.use_dropout: self.residual_p_encodes = tf.nn.dropout(self.residual_p_encodes, self.dropout_keep_prob) #bilstm不能直接连接dense AttributeError: 'Bidirectional' object has no attribute 'outbound_nodes' sim_weight_1 = tf.get_variable("sim_weight_1", self.hidden_size * 2) weight_passage_encodes = self.residual_p_encodes * sim_weight_1 dot_sim_matrix = tf.matmul(weight_passage_encodes, self.residual_p_encodes, transpose_b=True) sim_weight_2 = tf.get_variable("sim_weight_2", self.hidden_size * 2) passage_sim = tf.tensordot(self.residual_p_encodes, sim_weight_2, axes=[[2], [0]]) sim_weight_3 = tf.get_variable("sim_weight_3", self.hidden_size * 2) question_sim = tf.tensordot(self.residual_p_encodes, sim_weight_3, axes=[[2], [0]]) sim_matrix = dot_sim_matrix + tf.expand_dims(passage_sim, 2) + tf.expand_dims(question_sim, 1) # sim_matrix = tf.matmul(self.residual_p_encodes, self.residual_p_encodes, transpose_b=True) batch_size, num_rows = tf.shape(sim_matrix)[0:1], tf.shape(sim_matrix)[1] mask = tf.eye(num_rows, batch_shape=batch_size) sim_matrix = sim_matrix + -1e9 * mask context2question_attn = tf.matmul(tf.nn.softmax(sim_matrix, -1), self.residual_p_encodes) concat_outputs = tf.concat([self.residual_p_encodes, context2question_attn, self.residual_p_encodes * context2question_attn], -1) self.residual_match_p_encodes = tf.layers.dense(concat_outputs, self.hidden_size * 2, activation=tf.nn.relu) self.match_p_encodes = tf.add(self.match_p_encodes, self.residual_match_p_encodes) if self.use_dropout: self.match_p_encodes = tf.nn.dropout(self.match_p_encodes, self.dropout_keep_prob)
def _fuse(self): with tf.variable_scope('self-attention'): self.context2context, _ = bilstm_layer(self.match_p_encodes, self.p_length, self.hidden_size) if self.use_dropout: self.context2context = tf.nn.dropout(self.context2context, 1 - self.dropout) sim_matrix = tf.matmul(self.context2context, self.context2context, transpose_b=True) sim_matrix /= self.hidden_size**0.5 context2context_attn = tf.matmul(tf.nn.softmax(sim_matrix, -1), self.context2context) self.fuse_p_encodes = self.match_p_encodes + tf.nn.dropout( tf.layers.dense(tf.concat([ self.context2context, context2context_attn, self.context2context * context2context_attn ], -1), self.hidden_size * 2, activation=tf.nn.relu), 1 - self.dropout)