def traditional_attention(rep_tensor, rep_mask, scope=None, keep_prob=1., is_train=None, wd=0., activation='elu', tensor_dict=None, name=None): bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape( rep_tensor)[2] ivec = rep_tensor.get_shape()[2] with tf.variable_scope(scope or 'traditional_attention'): rep_tensor_map = bn_dense_layer(rep_tensor, ivec, True, 0., 'bn_dense_map', activation, False, wd, keep_prob, is_train) rep_tensor_logits = get_logits([rep_tensor_map], None, False, scope='self_attn_logits', mask=rep_mask, input_keep_prob=keep_prob, is_train=is_train) # bs,sl attn_res = softsel(rep_tensor, rep_tensor_logits, rep_mask) # bs,vec # save attn if tensor_dict is not None and name is not None: tensor_dict[name] = tf.nn.softmax(rep_tensor_logits) return attn_res
def normal_attention(rep_tensor, rep_mask, scope=None, keep_prob=1., is_train=None, wd=0., activation='elu', tensor_dict=None, name=None): batch_size, code_len, vec_size = tf.shape(rep_tensor)[0], tf.shape( rep_tensor)[1], tf.shape(rep_tensor)[2] ivec = rep_tensor.get_shape()[2] with tf.variable_scope(scope or 'normal_attention'): rep_tensor_map = bn_dense_layer(rep_tensor, ivec, True, 0., 'bn_dense_map', activation, False, wd, keep_prob, is_train) rep_tensor_logits = get_logits([rep_tensor_map], None, False, scope='self_attn_logits', mask=rep_mask, input_keep_prob=keep_prob, is_train=is_train) # bs,sl attn_result = softsel(rep_tensor, rep_tensor_logits, rep_mask) # bs,vec # save attn if tensor_dict is not None and name is not None: tensor_dict[name] = tf.nn.softmax(rep_tensor_logits) with tf.variable_scope('output'): o_bias = tf.get_variable('o_bias', [ivec], tf.float32, tf.constant_initializer(0.)) # input gate fusion_gate = tf.nn.sigmoid( linear(rep_tensor_map, ivec, True, 0., 'linear_fusion_i', False, wd, keep_prob, is_train) + linear(attn_result, ivec, True, 0., 'linear_fusion_a', False, wd, keep_prob, is_train) + o_bias) output = fusion_gate * rep_tensor_map + (1 - fusion_gate) * attn_result output = mask_for_high_rank(output, rep_mask) # bs,sl,vec return output
def self_choose_attention(rep_tensor, rep_mask, hn, # correct keep_prob=1., is_train=None, scope=None, simplify=False): """ self soft choose attention with :param rep_tensor: rank must be 3 [bs,sl,hn] :param rep_mask: [bs,sl] :param hn: :param keep_prob: :param is_train: :param scope: :param simplify :return: """ with tf.variable_scope(scope or 'self_choose_attention'): if not simplify: rep_tensor_map = tf.nn.relu(linear([rep_tensor], hn, True, scope='linear_map', input_keep_prob=keep_prob, is_train=is_train)) else: rep_tensor_map = tf.identity(rep_tensor) rep_tensor_logits = get_logits([rep_tensor_map], None, False, scope='self_attn_logits', mask=rep_mask, input_keep_prob=keep_prob, is_train=is_train) # bs,sl attn_res = softsel(rep_tensor, rep_tensor_logits, rep_mask) # bs,vec return attn_res
def gene_similarity_mat_and_mask(tensor_row, tensor_col, mask_for_tensor_row, mask_for_tensor_col, similarity_method='inner', hn=100, scope = None): with tf.variable_scope(scope or 'gene_similarity_mat_and_mask'): # --------parameters-------- t_main = tensor_row # [bs,sl,vec] t_sec = tensor_col # [bs,ql,vec] mask_main = mask_for_tensor_row # [bs,sl] mask_sec = mask_for_tensor_col # [bs,ql] bs, sl, vec = tf.shape(t_main)[0], tf.shape(t_main)[1], tf.shape(t_main)[2] ql = tf.shape(t_sec)[1] # ------------------------------- # --------similarity_mat-------- mask_main_etd = tf.expand_dims(mask_main, 2) # bs,sl,1 mask_sec_etd = tf.expand_dims(mask_sec, 1) # bs,1,ql mask_similarity_mat = tf.logical_and(mask_main_etd, mask_sec_etd) # bs,sl,ql if similarity_method == 'inner': t_main_etd = tf.expand_dims(t_main, 2) # bs,sl,1,vec t_sec_etd = tf.expand_dims(t_sec, 1) # bs,1,ql,vec similarity_mat = tf.reduce_sum(t_main_etd*t_sec_etd, -1) # bs,sl,ql elif similarity_method == 'tri_linear': t_main_tiled = tf.tile(tf.expand_dims(t_main, 2), [1, 1, ql, 1]) # bs,sl,ql,vec t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1]) # bs,sl,ql,vec similarity_mat = get_logits([t_main_tiled, t_sec_tiled], None, False, scope='tri_linear_tri_linear', func='tri_linear') elif similarity_method == 'map_linear': t_main_map = tf.nn.relu(linear([t_main], hn, True, scope='linear_map_main')) t_sec_map = tf.nn.relu(linear([t_sec], hn, True, scope='linear_map_sec')) t_main_map_etd = tf.expand_dims(t_main_map, 2) # bs,sl,1,hn t_sec_map_etd = tf.expand_dims(t_sec_map, 1) # bs,1,ql,hn similarity_mat = tf.reduce_sum(t_main_map_etd * t_sec_map_etd, -1) # bs,sl,ql else: raise AttributeError('No similarity matrix calculation method \'%s\'' % similarity_method) return similarity_mat, mask_similarity_mat
def normal_attention(tensor_base, tensor_to_attend, mask_for_tensor_base, mask_for_tensor_to_attend, similarity_method='inner', hn=100, use_pooling=False, pooling_method='max', reverse=False, scope=None): """ normal_attention for attention strategy 2 :param tensor_base: rank 3 [bs,sl,vec] :param tensor_to_attend: rank 3 [bs,ql,vec] :param mask_for_tensor_base: [bs,ql] :param mask_for_tensor_to_attend: [bs,sl] :param similarity_method: 'inner' 'tri_linear' 'map_linear' :param hn: some method need :param use_pooling: True or False :param pooling_method: 'max' or 'mean' :param reverse: if use strategy 3 :param scope: :return: use_pooling==True: [bs,sl,hn] else [bs,hn] """ with tf.variable_scope(scope or 'normal_attention'): # --------parameters-------- t_main = tensor_base # [bs,sl,vec] t_sec = tensor_to_attend # [bs,ql,vec] mask_main = mask_for_tensor_base # [bs,sl] mask_sec = mask_for_tensor_to_attend # [bs,ql] bs, sl, vec = tf.shape(t_main)[0], tf.shape(t_main)[1], tf.shape(t_main)[2] ql = tf.shape(t_sec)[1] # ------------------------------- # --------similarity_mat-------- mask_main_etd = tf.expand_dims(mask_main, 2) # bs,sl,1 mask_sec_etd = tf.expand_dims(mask_sec, 1) # bs,1,ql mask_similarity_mat = tf.logical_and(mask_main_etd, mask_sec_etd) # bs,sl,ql if similarity_method == 'inner': t_main_etd = tf.expand_dims(t_main, 2) # bs,sl,1,vec t_sec_etd = tf.expand_dims(t_sec, 1) # bs,1,ql,vec similarity_mat = tf.reduce_sum(t_main_etd*t_sec_etd, -1) # bs,sl,ql elif similarity_method == 'tri_linear': t_main_tiled = tf.tile(tf.expand_dims(t_main, 2), [1, 1, ql, 1]) # bs,sl,ql,vec t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1]) # bs,sl,ql,vec similarity_mat = get_logits([t_main_tiled, t_sec_tiled], None, False, scope='tri_linear_tri_linear', func='tri_linear') elif similarity_method == 'map_linear': t_main_map = tf.nn.relu(linear([t_main], hn, True, scope='linear_map_main')) t_sec_map = tf.nn.relu(linear([t_sec], hn, True, scope='linear_map_sec')) t_main_map_etd = tf.expand_dims(t_main_map, 2) # bs,sl,1,hn t_sec_map_etd = tf.expand_dims(t_sec_map, 1) # bs,1,ql,hn similarity_mat = tf.reduce_sum(t_main_map_etd * t_sec_map_etd, -1) # bs,sl,ql else: raise AttributeError('No similarity matrix calculation method \'%s\'' % similarity_method) # ------------------------------- if use_pooling: # pool mat along -2 if pooling_method == 'max': pooling_out = tf.reduce_max(exp_mask(similarity_mat, mask_similarity_mat), -2) # bs,sl,ql -> bs,ql elif pooling_method == 'mean': sum_out = tf.reduce_sum(normal_mask(similarity_mat, mask_similarity_mat), -2) # bs,sl,ql -> bs,ql num = tf.reduce_sum(tf.cast(mask_similarity_mat, tf.int32), -2) # bs,ql num = tf.where(tf.equal(num, tf.zeros_like(num, tf.int32)), tf.ones_like(num, tf.int32), num) pooling_out = sum_out / tf.cast(num, tf.float32) # bs,ql else: raise AttributeError('No pooling method \'%s\'' % pooling_method) return softsel(t_sec, pooling_out, mask_sec) # bs,ql,vec -> bs,ql else: t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1]) # bs,sl,ql,vec # target: q_tiled:[bs,sl,ql,hn]; logits: [bs,sl,ql] if not reverse: out = normal_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat) else: out = reverse_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat) return out # bs,sl,vec
def build_network(self): tds, tel, hn = self.tds, self.tel, self.hn bs, sn, sl, ql = self.bs, self.sn, self.sl, self.ql with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, scope='gene_token_emb_mat') c_emb = tf.nn.embedding_lookup(token_emb_mat, self.context_token) # bs,sn,sl,tel q_emb = tf.nn.embedding_lookup(token_emb_mat, self.question_token) # s,ql,tel with tf.variable_scope('prepro'): q_rep = multi_dimensional_attention(q_emb, self.question_token_mask, 'q2coding', cfg.dropout, self.is_train, cfg.wd, 'relu') # bs, hn q_rep_map = bn_dense_layer(q_rep, hn, True, 0., 'q_rep_map', 'relu', False, cfg.wd, cfg.dropout, self.is_train) # bs, hn with tf.variable_scope('sent_emb'): c_emb_rshp = tf.reshape(c_emb, [bs * sn, sl, tel], 'c_emb_rshp') # bs*sn,sl,tel c_mask_rshp = tf.reshape(self.context_token_mask, [bs * sn, sl], 'c_mask_rshp') # bs*sn,sl,tel sent_enc_rshp = sentence_encoding_models( c_emb_rshp, c_mask_rshp, cfg.context_fusion_method, 'relu', 'sent2enc', cfg.wd, self.is_train, cfg.dropout, hn, block_len=cfg.block_len) # bs*sn, 2*hn sent_enc = tf.reshape(sent_enc_rshp, [bs, sn, 2 * hn]) # bs,sn, 2*hn sent_enc_map = bn_dense_layer(sent_enc, hn, True, 0., 'sent_enc_map', 'relu', False, cfg.wd, cfg.dropout, self.is_train) with tf.variable_scope('fusion'): q_rep_map_ex = tf.tile(tf.expand_dims(q_rep_map, 1), [1, sn, 1]) # bs, sn, hn fusion_rep = tf.concat([ sent_enc_map, q_rep_map_ex, sent_enc_map - q_rep_map_ex, sent_enc_map * q_rep_map_ex ], -1) # bs,sn,4hn with tf.variable_scope('output'): out_cf = context_fusion_layers(fusion_rep, self.context_sent_mask, cfg.context_fusion_method, 'relu', 'out_cf', cfg.wd, self.is_train, cfg.dropout, hn, block_len=4) pre_output = bn_dense_layer(out_cf, hn, True, 0., 'pre_output', 'relu', False, cfg.wd, cfg.dropout, self.is_train) logits = get_logits( # exp masked pre_output, None, True, 0., 'logits', self.context_sent_mask, cfg.wd, cfg.dropout, self.is_train, 'linear') return logits