示例#1
0
def multi_self_choose_attention(tensor_rep, tensor_mask, hn, channel_num,
                                wd, keep_prob, is_train, scope=None):
    bs, sl, vec = tf.shape(tensor_rep)[0], tf.shape(tensor_rep)[1], tf.shape(tensor_rep)[2]
    fixed_shape = tensor_rep.get_shape().as_list()
    ibs = fixed_shape[0] or bs
    isl = fixed_shape[1] or sl
    ivec = fixed_shape[2]
    each_hn = int(hn / channel_num)
    with tf.variable_scope(scope or 'multi_self_choose_attention'):
        tensor_rep_re = tf.reshape(tensor_rep, [ibs*isl, ivec])
        tensor_rep_re = tf.tile(tf.expand_dims(tensor_rep_re, 0), [channel_num, 1, 1])  # n,bs*sl,vec
        tensor_map_re = linear_3d(tensor_rep_re, each_hn, True, 0., 'linear_3d_map',
                                  False, wd, keep_prob, is_train)  # n,bs*sl,ehn
        tensor_map = tf.reshape(tensor_map_re, [channel_num, ibs, isl, each_hn])  # n,bs, sl,ehn
        mask_tile = tf.tile(tf.expand_dims(tensor_mask, 0), [channel_num, 1, 1])

        # attention score
        attn_pre = linear_3d(tensor_map_re, each_hn, True, 0., 'linear_3d_pre', False,
                             wd, keep_prob, is_train)  # n,bs*sl,ehn
        attn_score = linear_3d(attn_pre, 1, True, 0., 'linear_3d_logits', True,
                               wd, keep_prob, is_train)  # n,bs*sl
        attn_score = tf.reshape(attn_score, [channel_num, ibs, isl])

        # execute attention
        if False:
            output = softsel_with_dropout(tensor_map, attn_score, mask_tile, keep_prob, is_train)  # n,bs,ehn
        else:
            output = softsel(tensor_map, attn_score, mask_tile)
        output = tf.transpose(output, [1, 0, 2])  # bs, n, ehn
        return tf.reshape(output, [ibs, channel_num*each_hn])
示例#2
0
def traditional_attention(rep_tensor,
                          rep_mask,
                          scope=None,
                          keep_prob=1.,
                          is_train=None,
                          wd=0.,
                          activation='elu',
                          tensor_dict=None,
                          name=None):
    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(
        rep_tensor)[2]
    ivec = rep_tensor.get_shape()[2]
    with tf.variable_scope(scope or 'traditional_attention'):
        rep_tensor_map = bn_dense_layer(rep_tensor, ivec, True, 0.,
                                        'bn_dense_map', activation, False, wd,
                                        keep_prob, is_train)

        rep_tensor_logits = get_logits([rep_tensor_map],
                                       None,
                                       False,
                                       scope='self_attn_logits',
                                       mask=rep_mask,
                                       input_keep_prob=keep_prob,
                                       is_train=is_train)  # bs,sl
        attn_res = softsel(rep_tensor, rep_tensor_logits, rep_mask)  # bs,vec

        # save attn
        if tensor_dict is not None and name is not None:
            tensor_dict[name] = tf.nn.softmax(rep_tensor_logits)

        return attn_res
示例#3
0
def attention_with_similarity_mat(similarity_mat, mask_similarity_mat,
                                  tensor_to_attend, mask_for_tensor_to_attend,
                                  use_pooling=False, pooling_method='max',
                                  reverse=False, scope=None):
    if use_pooling:
        # pool mat along -2
        pooling_out = pooling_with_mask(similarity_mat, mask_similarity_mat, -2, pooling_method)  # bs,ql
        return softsel(tensor_to_attend, pooling_out, mask_for_tensor_to_attend)  # bs,ql,vec -> bs,vec
    else:
        t_sec_tiled = tf.tile(tf.expand_dims(tensor_to_attend, 1),
                              [1, tf.shape(similarity_mat)[-2], 1, 1])  # bs,sl,ql,vec
        # target: q_tiled:[bs,sl,ql,hn]; logits: [bs,sl,ql]
        if not reverse:
            out = normal_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat)
        else:
            out = reverse_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat)
        return out  # bs,sl,vec
示例#4
0
def normal_attention(rep_tensor,
                     rep_mask,
                     scope=None,
                     keep_prob=1.,
                     is_train=None,
                     wd=0.,
                     activation='elu',
                     tensor_dict=None,
                     name=None):
    batch_size, code_len, vec_size = tf.shape(rep_tensor)[0], tf.shape(
        rep_tensor)[1], tf.shape(rep_tensor)[2]
    ivec = rep_tensor.get_shape()[2]
    with tf.variable_scope(scope or 'normal_attention'):
        rep_tensor_map = bn_dense_layer(rep_tensor, ivec, True, 0.,
                                        'bn_dense_map', activation, False, wd,
                                        keep_prob, is_train)

        rep_tensor_logits = get_logits([rep_tensor_map],
                                       None,
                                       False,
                                       scope='self_attn_logits',
                                       mask=rep_mask,
                                       input_keep_prob=keep_prob,
                                       is_train=is_train)  # bs,sl
        attn_result = softsel(rep_tensor, rep_tensor_logits,
                              rep_mask)  # bs,vec

        # save attn
        if tensor_dict is not None and name is not None:
            tensor_dict[name] = tf.nn.softmax(rep_tensor_logits)

        with tf.variable_scope('output'):
            o_bias = tf.get_variable('o_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            # input gate
            fusion_gate = tf.nn.sigmoid(
                linear(rep_tensor_map, ivec, True, 0., 'linear_fusion_i',
                       False, wd, keep_prob, is_train) +
                linear(attn_result, ivec, True, 0., 'linear_fusion_a', False,
                       wd, keep_prob, is_train) + o_bias)
            output = fusion_gate * rep_tensor_map + (1 -
                                                     fusion_gate) * attn_result
            output = mask_for_high_rank(output, rep_mask)  # bs,sl,vec
        return output
示例#5
0
def self_align_attention(rep_tensor, mask, scope=None, simplify=True, hn=None):  # correct
    """
    attention strategy 4: self * self => attention self
    :param rep_tensor: rank is three [bs,sl,hn]
    :param mask: [bs,sl] tf.bool
    :param scope
    :param simplify:
    :return:  attended tensor [bs,sl,hn]
    """
    with tf.name_scope(scope or 'self_attention'):
        bs = tf.shape(rep_tensor)[0]
        sl = tf.shape(rep_tensor)[1]
        #vec = tf.shape(rep_tensor)[2]
        ivec = rep_tensor.get_shape().as_list()[-1]

        to_be_attended = tf.tile(tf.expand_dims(rep_tensor, 1), [1, sl, 1, 1])
        if not simplify:
            assert hn is not None
            rep_tensor = tf.nn.relu(linear([rep_tensor], hn, True, 0., 'linear_transform'))
        # 1. self alignment
        mask_tiled_sec = tf.tile(tf.expand_dims(mask, 1), [1, sl, 1])  # bs,sl,sl
        mask_tiled_mian = tf.tile(tf.expand_dims(mask, 2), [1, 1, sl])  # bs,sl,sl
        mask_tiled = tf.logical_and(mask_tiled_sec, mask_tiled_mian)
        input_sec = tf.tile(tf.expand_dims(rep_tensor, 1), [1, sl, 1, 1])  # bs,1-sl,sl,hn
        input_main = tf.tile(tf.expand_dims(rep_tensor, 2), [1, 1, sl, 1])  # bs,sl,1-sl,hn
        # self_alignment = tf.reduce_sum(input_sec * input_main, -1)  # bs,sl,sl
        self_alignment = (1.0 / ivec) * tf.reduce_sum(input_sec * input_main, -1)  # bs,sl,sl
        # 2. generate diag~/ mat
        # diag = tf.expand_dims(
        #     tf.cast(tf.logical_not(
        #         tf.cast(
        #             tf.diag(
        #                 tf.ones([sl], tf.int32)), tf.bool)
        #     ), tf.float32), 0)  # 1,sl,sl
        diag = tf.expand_dims(tf.logical_not(
                tf.cast(tf.diag(tf.ones([sl], tf.int32)), tf.bool)), 0)  # 1,sl,sl
        diag = tf.tile(diag, [bs, 1, 1])  # bs, sl, sl
        # self_alignment = self_alignment * diag  # bs,sl,sl
        # 3. attend data
        context = softsel(to_be_attended, self_alignment, tf.logical_and(mask_tiled, diag))  # [bs,sl,sl],  bs,sl,hn
        return context
示例#6
0
def self_choose_attention(rep_tensor, rep_mask, hn,  # correct
                          keep_prob=1., is_train=None, scope=None, simplify=False):
    """
    self soft choose attention with 
    :param rep_tensor: rank must be 3 [bs,sl,hn]
    :param rep_mask: [bs,sl]
    :param hn: 
    :param keep_prob: 
    :param is_train: 
    :param scope:
    :param simplify
    :return: 
    """
    with tf.variable_scope(scope or 'self_choose_attention'):
        if not simplify:
            rep_tensor_map = tf.nn.relu(linear([rep_tensor], hn, True, scope='linear_map',
                                        input_keep_prob=keep_prob, is_train=is_train))
        else:
            rep_tensor_map = tf.identity(rep_tensor)
        rep_tensor_logits = get_logits([rep_tensor_map], None, False, scope='self_attn_logits',
                                       mask=rep_mask, input_keep_prob=keep_prob, is_train=is_train)  # bs,sl
        attn_res = softsel(rep_tensor, rep_tensor_logits, rep_mask)  # bs,vec
        return attn_res
示例#7
0
def normal_attention(tensor_base, tensor_to_attend,
                     mask_for_tensor_base,
                     mask_for_tensor_to_attend,
                     similarity_method='inner', hn=100,
                     use_pooling=False, pooling_method='max',
                     reverse=False, scope=None):
    """
    normal_attention for attention strategy 2 
    :param tensor_base: rank 3 [bs,sl,vec]
    :param tensor_to_attend: rank 3 [bs,ql,vec]
    :param mask_for_tensor_base: [bs,ql]
    :param mask_for_tensor_to_attend: [bs,sl]
    :param similarity_method: 'inner' 'tri_linear' 'map_linear'
    :param hn: some method need 
    :param use_pooling: True or False
    :param pooling_method: 'max' or 'mean'
    :param reverse: if use strategy 3
    :param scope: 
    :return: use_pooling==True: [bs,sl,hn] else [bs,hn]
    """
    with tf.variable_scope(scope or 'normal_attention'):
        # --------parameters--------
        t_main = tensor_base  # [bs,sl,vec]
        t_sec = tensor_to_attend  # [bs,ql,vec]
        mask_main = mask_for_tensor_base  # [bs,sl]
        mask_sec = mask_for_tensor_to_attend  # [bs,ql]

        bs, sl, vec = tf.shape(t_main)[0], tf.shape(t_main)[1], tf.shape(t_main)[2]
        ql = tf.shape(t_sec)[1]
        # -------------------------------
        # --------similarity_mat--------
        mask_main_etd = tf.expand_dims(mask_main, 2)  # bs,sl,1
        mask_sec_etd = tf.expand_dims(mask_sec, 1)  # bs,1,ql
        mask_similarity_mat = tf.logical_and(mask_main_etd, mask_sec_etd)  # bs,sl,ql
        if similarity_method == 'inner':
            t_main_etd = tf.expand_dims(t_main, 2)  # bs,sl,1,vec
            t_sec_etd = tf.expand_dims(t_sec, 1)  # bs,1,ql,vec
            similarity_mat = tf.reduce_sum(t_main_etd*t_sec_etd, -1)  # bs,sl,ql
        elif similarity_method == 'tri_linear':
            t_main_tiled = tf.tile(tf.expand_dims(t_main, 2), [1, 1, ql, 1])  # bs,sl,ql,vec
            t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1])  # bs,sl,ql,vec
            similarity_mat = get_logits([t_main_tiled, t_sec_tiled], None, False,
                                        scope='tri_linear_tri_linear', func='tri_linear')
        elif similarity_method == 'map_linear':
            t_main_map = tf.nn.relu(linear([t_main], hn, True, scope='linear_map_main'))
            t_sec_map = tf.nn.relu(linear([t_sec], hn, True, scope='linear_map_sec'))
            t_main_map_etd = tf.expand_dims(t_main_map, 2)  # bs,sl,1,hn
            t_sec_map_etd = tf.expand_dims(t_sec_map, 1)  # bs,1,ql,hn
            similarity_mat = tf.reduce_sum(t_main_map_etd * t_sec_map_etd, -1)  # bs,sl,ql
        else:
            raise AttributeError('No similarity matrix calculation method \'%s\'' % similarity_method)
        # -------------------------------
        if use_pooling:
            # pool mat along -2
            if pooling_method == 'max':
                pooling_out = tf.reduce_max(exp_mask(similarity_mat, mask_similarity_mat), -2)  # bs,sl,ql -> bs,ql
            elif pooling_method == 'mean':
                sum_out = tf.reduce_sum(normal_mask(similarity_mat, mask_similarity_mat), -2)  # bs,sl,ql -> bs,ql
                num = tf.reduce_sum(tf.cast(mask_similarity_mat, tf.int32), -2)  # bs,ql
                num = tf.where(tf.equal(num, tf.zeros_like(num, tf.int32)),
                               tf.ones_like(num, tf.int32), num)
                pooling_out = sum_out / tf.cast(num, tf.float32)  # bs,ql
            else:
                raise AttributeError('No pooling method \'%s\'' % pooling_method)
            return softsel(t_sec, pooling_out, mask_sec)  # bs,ql,vec -> bs,ql
        else:
            t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1])  # bs,sl,ql,vec
            # target: q_tiled:[bs,sl,ql,hn]; logits: [bs,sl,ql]
            if not reverse:
                out = normal_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat)
            else:
                out = reverse_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat)
            return out  # bs,sl,vec