Пример #1
0
def double_linear_logits(args,
                         size,
                         bias,
                         bias_start=0.0,
                         scope=None,
                         mask=None,
                         wd=0.0,
                         input_keep_prob=1.0,
                         is_train=None):
    with tf.variable_scope(scope or "Double_Linear_Logits"):
        first = tf.tanh(
            linear(args,
                   size,
                   bias,
                   bias_start=bias_start,
                   scope='first',
                   wd=wd,
                   input_keep_prob=input_keep_prob,
                   is_train=is_train))
        second = linear(first,
                        1,
                        bias,
                        bias_start=bias_start,
                        squeeze=True,
                        scope='second',
                        wd=wd,
                        input_keep_prob=input_keep_prob,
                        is_train=is_train)
        if mask is not None:
            second = exp_mask(second, mask)
        return second
Пример #2
0
def reverse_softsel(target, logits, mask=None, scope=None):

    with tf.name_scope(scope or "reverse_softsel"):
        logits_rank = len(logits.get_shape().as_list())
        if mask is not None:
            logits = exp_mask(logits, mask)
        a = tf.nn.softmax(logits, logits_rank-2)
        target_rank = len(target.get_shape().as_list())
        out = tf.reduce_sum(tf.expand_dims(a, -1) * target, target_rank - 2)
        return out
Пример #3
0
def sum_logits(args, mask=None, name=None):
    with tf.name_scope(name or "sum_logits"):
        if args is None or (isinstance(args, (tuple, list)) and not args):
            raise ValueError("`args` must be specified")
        if not isinstance(args, (tuple, list)):
            args = [args]
        rank = len(args[0].get_shape())
        logits = sum(tf.reduce_sum(arg, rank - 1) for arg in args)
        if mask is not None:
            logits = exp_mask(logits, mask)
        return logits
Пример #4
0
    def forward(self, hidden_states, rep_mask, attn_mask, **kwargs):
        bs, sl, hn = hidden_states.size()
        # co
        attn_scores = torch.bmm(  # bs,sl,sl
            hidden_states, torch.transpose(hidden_states, 1, 2))/(sl ** 0.5)
        # change graph edge [bs,n,2] to 2d index [N, 3]
        graph_mask = attn_mask

        attn_prob = self._attn_softmax(exp_mask(graph_mask, attn_scores))  # bs,sl,sl
        attn_res = torch.bmm(attn_prob, hidden_states)  # [bs,sl,sl]x[bs,sl,hn] ==> [bs,sl,hn]
        final_res = zero_mask(rep_mask, attn_res, high_rank=True)
        return final_res
Пример #5
0
def pooling_with_mask(rep_tensor, rep_mask, dim = -1, pooling_method='max', scope=None):
    # rep_tensor and rep_mask must have sampe shape
    with tf.name_scope(scope or '%s_pooling_with_mask_'%pooling_method):
        if pooling_method == 'max':
            pooling_out = tf.reduce_max(exp_mask(rep_tensor, rep_mask), dim)  # bs,sl,ql -> bs,xl
        elif pooling_method == 'mean':
            sum_out = tf.reduce_sum(normal_mask(rep_tensor, rep_mask), dim)  # bs,sl,ql -> bs,xl
            num = tf.reduce_sum(tf.cast(rep_tensor, tf.int32), dim)  # bs,xl
            num = tf.where(tf.equal(num, tf.zeros_like(num, tf.int32)),
                           tf.ones_like(num, tf.int32), num)
            pooling_out = sum_out / tf.cast(num, tf.float32)  # bs,xl
        else:
            raise AttributeError('No pooling method \'%s\'' % pooling_method)
        return pooling_out
Пример #6
0
def linear_logits(args,
                  bias,
                  bias_start=0.0,
                  scope=None,
                  mask=None,
                  wd=0.0,
                  input_keep_prob=1.0,
                  is_train=None):
    with tf.variable_scope(scope or "Linear_Logits"):
        logits = linear(args,
                        1,
                        bias,
                        bias_start=bias_start,
                        squeeze=True,
                        scope='first',
                        wd=wd,
                        input_keep_prob=input_keep_prob,
                        is_train=is_train)
        if mask is not None:
            logits = exp_mask(logits, mask)
        return logits
Пример #7
0
def softmax(logits, mask=None, scope=None):
    with tf.name_scope(scope or "Softmax"):
        if mask is not None:
            logits = exp_mask(logits, mask)
        out = tf.nn.softmax(logits, -1)
        return out
Пример #8
0
def normal_attention(tensor_base, tensor_to_attend,
                     mask_for_tensor_base,
                     mask_for_tensor_to_attend,
                     similarity_method='inner', hn=100,
                     use_pooling=False, pooling_method='max',
                     reverse=False, scope=None):
    """
    normal_attention for attention strategy 2 
    :param tensor_base: rank 3 [bs,sl,vec]
    :param tensor_to_attend: rank 3 [bs,ql,vec]
    :param mask_for_tensor_base: [bs,ql]
    :param mask_for_tensor_to_attend: [bs,sl]
    :param similarity_method: 'inner' 'tri_linear' 'map_linear'
    :param hn: some method need 
    :param use_pooling: True or False
    :param pooling_method: 'max' or 'mean'
    :param reverse: if use strategy 3
    :param scope: 
    :return: use_pooling==True: [bs,sl,hn] else [bs,hn]
    """
    with tf.variable_scope(scope or 'normal_attention'):
        # --------parameters--------
        t_main = tensor_base  # [bs,sl,vec]
        t_sec = tensor_to_attend  # [bs,ql,vec]
        mask_main = mask_for_tensor_base  # [bs,sl]
        mask_sec = mask_for_tensor_to_attend  # [bs,ql]

        bs, sl, vec = tf.shape(t_main)[0], tf.shape(t_main)[1], tf.shape(t_main)[2]
        ql = tf.shape(t_sec)[1]
        # -------------------------------
        # --------similarity_mat--------
        mask_main_etd = tf.expand_dims(mask_main, 2)  # bs,sl,1
        mask_sec_etd = tf.expand_dims(mask_sec, 1)  # bs,1,ql
        mask_similarity_mat = tf.logical_and(mask_main_etd, mask_sec_etd)  # bs,sl,ql
        if similarity_method == 'inner':
            t_main_etd = tf.expand_dims(t_main, 2)  # bs,sl,1,vec
            t_sec_etd = tf.expand_dims(t_sec, 1)  # bs,1,ql,vec
            similarity_mat = tf.reduce_sum(t_main_etd*t_sec_etd, -1)  # bs,sl,ql
        elif similarity_method == 'tri_linear':
            t_main_tiled = tf.tile(tf.expand_dims(t_main, 2), [1, 1, ql, 1])  # bs,sl,ql,vec
            t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1])  # bs,sl,ql,vec
            similarity_mat = get_logits([t_main_tiled, t_sec_tiled], None, False,
                                        scope='tri_linear_tri_linear', func='tri_linear')
        elif similarity_method == 'map_linear':
            t_main_map = tf.nn.relu(linear([t_main], hn, True, scope='linear_map_main'))
            t_sec_map = tf.nn.relu(linear([t_sec], hn, True, scope='linear_map_sec'))
            t_main_map_etd = tf.expand_dims(t_main_map, 2)  # bs,sl,1,hn
            t_sec_map_etd = tf.expand_dims(t_sec_map, 1)  # bs,1,ql,hn
            similarity_mat = tf.reduce_sum(t_main_map_etd * t_sec_map_etd, -1)  # bs,sl,ql
        else:
            raise AttributeError('No similarity matrix calculation method \'%s\'' % similarity_method)
        # -------------------------------
        if use_pooling:
            # pool mat along -2
            if pooling_method == 'max':
                pooling_out = tf.reduce_max(exp_mask(similarity_mat, mask_similarity_mat), -2)  # bs,sl,ql -> bs,ql
            elif pooling_method == 'mean':
                sum_out = tf.reduce_sum(normal_mask(similarity_mat, mask_similarity_mat), -2)  # bs,sl,ql -> bs,ql
                num = tf.reduce_sum(tf.cast(mask_similarity_mat, tf.int32), -2)  # bs,ql
                num = tf.where(tf.equal(num, tf.zeros_like(num, tf.int32)),
                               tf.ones_like(num, tf.int32), num)
                pooling_out = sum_out / tf.cast(num, tf.float32)  # bs,ql
            else:
                raise AttributeError('No pooling method \'%s\'' % pooling_method)
            return softsel(t_sec, pooling_out, mask_sec)  # bs,ql,vec -> bs,ql
        else:
            t_sec_tiled = tf.tile(tf.expand_dims(t_sec, 1), [1, sl, 1, 1])  # bs,sl,ql,vec
            # target: q_tiled:[bs,sl,ql,hn]; logits: [bs,sl,ql]
            if not reverse:
                out = normal_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat)
            else:
                out = reverse_softsel(t_sec_tiled, similarity_mat, mask_similarity_mat)
            return out  # bs,sl,vec
Пример #9
0
def multi_head_attention(rep_tensor,
                         rep_mask,
                         head_num=8,
                         hidden_units_num=64,
                         scope=None,
                         is_train=None,
                         keep_prob=1.,
                         wd=0.):
    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(
        rep_tensor)[2]
    ivec = rep_tensor.get_shape().as_list()[2]

    with tf.variable_scope(scope or 'multi_head_attention'):

        with tf.variable_scope('positional_encoding'):
            seq_idxs = tf.tile(tf.expand_dims(tf.range(sl), 1),
                               [1, ivec])  # sl, ivec
            feature_idxs = tf.tile(tf.expand_dims(tf.range(ivec), 0),
                                   [sl, 1])  # sl, ivec
            pos_enc = tf.where(
                tf.equal(tf.mod(feature_idxs, 2), 0),
                tf.sin(
                    tf.cast(seq_idxs, tf.float32) / tf.pow(
                        10000., 2.0 * tf.cast(feature_idxs, tf.float32) /
                        (1.0 * ivec))),
                tf.cos(
                    tf.cast(seq_idxs, tf.float32) / tf.pow(
                        10000., 2.0 * tf.cast(feature_idxs - 1, tf.float32) /
                        (1.0 * ivec))),
            )
            rep_tensor_pos = mask_for_high_rank(rep_tensor + pos_enc,
                                                rep_mask)  # bs, sl, ivec

        with tf.variable_scope('multi_head_attention'):
            W = tf.get_variable('W', [3, head_num, ivec, hidden_units_num],
                                tf.float32)
            rep_tile = tf.tile(
                tf.expand_dims(tf.expand_dims(rep_tensor_pos, 0), 0),
                [3, head_num, 1, 1, 1])  # 3,head_num,bs,sl,ivec
            rep_tile_reshape = tf.reshape(
                rep_tile, [3, head_num, bs * sl, ivec])  # head_num,bs*sl,ivec

            maps = tf.reshape(  # 3,head_num,bs*sl,hn ->  3,head_num,bs,sl,hn
                tf.matmul(dropout(rep_tile_reshape, keep_prob, is_train), W),
                [3, head_num, bs, sl, hidden_units_num])
            Q_map, K_map, V_map = tf.split(maps, 3, 0)
            Q_map = tf.squeeze(Q_map, [0])  # head_num,bs,sl,hn
            K_map = tf.squeeze(K_map, [0])  # head_num,bs,sl,hn
            V_map = tf.squeeze(V_map, [0])  # head_num,bs,sl,hn

            # head_num,bs,sl,sl
            # similarity_mat = tf.reduce_sum(Q_map_tile * K_map_tile, -1) / math.sqrt(1. * hidden_units_num)
            similarity_mat = tf.matmul(Q_map, tf.transpose(
                K_map, [0, 1, 3, 2])) / math.sqrt(1. * hidden_units_num)

            # mask: bs,sl -> head_num,bs,sl
            multi_mask = tf.tile(tf.expand_dims(rep_mask, 0),
                                 [head_num, 1, 1])  # head_num,bs,sl
            multi_mask_tile_1 = tf.expand_dims(multi_mask,
                                               2)  # head_num,bs,1,sl
            multi_mask_tile_2 = tf.expand_dims(multi_mask,
                                               3)  # head_num,bs,sl,1
            multi_mask_tile = tf.logical_and(
                multi_mask_tile_1, multi_mask_tile_2)  # head_num,bs,sl,sl
            similarity_mat_masked = exp_mask(
                similarity_mat, multi_mask_tile)  # head_num,bs,sl,sl
            prob_dist = tf.nn.softmax(
                similarity_mat_masked)  # head_num,bs,sl,sl
            prob_dist_dp = dropout(prob_dist, keep_prob, is_train)

            attn_res = tf.matmul(prob_dist_dp, V_map)  # head_num,bs,sl,hn

            attn_res_tran = tf.transpose(attn_res, [1, 2, 0, 3])
            output = tf.reshape(attn_res_tran,
                                [bs, sl, head_num * hidden_units_num])

            if wd > 0.:
                add_reg_without_bias()

            return output