Пример #1
0
def multi_dimensional_attention(rep_tensor,
                                scope=None,
                                keep_prob=1.,
                                is_train=None,
                                wd=0.,
                                activation='elu',
                                tensor_dict=None,
                                name=None):
    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(
        rep_tensor)[2]
    ivec = rep_tensor.get_shape()[3]
    with tf.variable_scope(scope or 'multi_dimensional_attention'):
        map1 = layers.bn_dense_layer(rep_tensor, ivec, True, 0.,
                                     'bn_dense_map1', activation, False, wd,
                                     keep_prob, is_train)
        map2 = layers.bn_dense_layer(map1, ivec, True, 0., 'bn_dense_map2',
                                     'linear', False, wd, keep_prob, is_train)
        # map2_masked = layers.exp_mask_for_high_rank(map2, rep_mask)

        soft = tf.nn.softmax(map2, 1)  # bs,sl,vec
        print("soft.shape :: ", soft.shape)
        return soft * rep_tensor
        attn_output = tf.reduce_sum(soft * rep_tensor, 1)  # bs, vec

        # save attn
        if tensor_dict is not None and name is not None:
            tensor_dict[name] = soft

        return 0
Пример #2
0
def MultiDm_attention_with_dense(rep_tensor,
                                 rep_mask,
                                 img_tensor,
                                 direction=None,
                                 scope=None,
                                 keep_prob=1.,
                                 is_train=None,
                                 wd=0.,
                                 activation='elu',
                                 tensor_dict=None,
                                 name=None):
    def scaled_tanh(x, scale=5.):
        return scale * tf.nn.tanh(1. / scale * x)

    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(
        rep_tensor)[2]
    ivec = rep_tensor.get_shape()[2]
    with tf.variable_scope(scope or 'directional_attention_%s' % direction
                           or 'diag'):
        # mask generation
        sl_indices = tf.range(sl, dtype=tf.int32)
        sl_col, sl_row = tf.meshgrid(sl_indices, sl_indices)
        # if direction is None:
        direct_mask = tf.cast(tf.diag(-tf.ones([sl], tf.int32)) + 1, tf.bool)
        # else:
        #     if direction == 'forward':
        #         direct_mask = tf.greater(sl_row, sl_col)
        #     else:
        #         direct_mask = tf.greater(sl_col, sl_row)
        direct_mask_tile = tf.tile(tf.expand_dims(direct_mask, 0),
                                   [bs, 1, 1])  # bs,sl,sl
        rep_mask_tile = tf.tile(tf.expand_dims(rep_mask, 1),
                                [1, sl, 1])  # bs,sl,sl
        attn_mask = tf.logical_and(direct_mask_tile, rep_mask_tile)  # bs,sl,sl

        # non-linear
        rep_map = layers.bn_dense_layer(rep_tensor, ivec, True, 0.,
                                        'bn_dense_map', activation, False, wd,
                                        keep_prob, is_train)

        img_tensor_ = layers.bn_dense_layer(img_tensor, ivec, True, 0.,
                                            'img_tensor_', activation, False,
                                            wd, keep_prob, is_train)

        print("rep_map shape :: ", rep_map.shape)
        print("img_tensor_.shape :: ", img_tensor_.shape)

        rep_map_tile = tf.tile(tf.expand_dims(rep_map, 1),
                               [1, sl, 1, 1])  # bs,sl,sl,vec

        img_tensor_tile = tf.tile(tf.expand_dims(img_tensor_, 1),
                                  [1, sl, 1, 1])

        rep_map_dp = layers.dropout(rep_map, keep_prob, is_train)

        img_tensor_dp = layers.dropout(img_tensor_, keep_prob, is_train)

        # attention
        with tf.variable_scope('attention'):  # bs,sl,sl,vec
            f_bias = tf.get_variable('f_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            dependent = layers.linear(img_tensor_dp,
                                      ivec,
                                      False,
                                      scope='linear_dependent')  # bs,sl,vec
            dependent_etd = tf.expand_dims(dependent, 1)  # bs,1,sl,vec

            head = layers.linear(rep_map_dp, ivec, False,
                                 scope='linear_head')  # bs,sl,vec
            head_etd = tf.expand_dims(head, 2)  # bs,sl,1,vec

            print("head_etd :: ", head_etd.shape)
            print("dependent_etd :: ", dependent_etd.shape)

            logits = scaled_tanh(dependent_etd + head_etd + f_bias,
                                 5.0)  # bs,sl,sl,vec

            print("logits.sape :: ", logits.shape)

            # logits_masked = layers.exp_mask_for_high_rank(logits, attn_mask)
            attn_score = tf.nn.softmax(logits, 2)  # bs,sl,sl,vec
            # attn_score = layers.mask_for_high_rank(attn_score, attn_mask)

            print("attn_score ", attn_score.shape)

            print("img_tensor_tile ", img_tensor_tile.shape)

            # concat_images = multi_dimensional_attention(attn_score * img_tensor_tile)

            # print("concat_images.shape : ",concat_images.shape).

            apply_attention = attn_score * img_tensor_tile

            simple = True

            if simple:

                attn_result = tf.reduce_sum(apply_attention, 2)
                # attn_result = tf.reduce_sum(attn_result, 2)

                attn_result = tf.layers.dense(attn_result, ivec)

                # bs,sl,vec
                #attn_result = fc1
                print(" attn_result :: ", attn_result.shape)

            # return attn_result

            else:

                # print("apply_attention ",apply_attention.shape)

                i = tf.constant(0)

                matrix_rows = tf.shape(apply_attention)[0]

                while_condition = lambda i, data: i < matrix_rows  #tf.less(i, 32)

                data = tf.TensorArray(dtype='float32', size=matrix_rows)

                # init_state = (i, data)

                def body(i, data):

                    inp = tf.expand_dims(apply_attention[i], 3)
                    print("input size :: ", inp.shape)
                    conv1 = tf.layers.conv2d(inp, 32, 5)
                    # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
                    conv1 = tf.layers.max_pooling2d(conv1, 2, 2)

                    print("conv1 ", conv1.shape)

                    data = data.write(i, conv1)

                    i = i + 1
                    return [i, data]

                all_data, ta_final = tf.while_loop(while_condition, body,
                                                   [i, data])  #,
                # s//////////////hape_invariants=[i.get_shape(), [None,128]])

                # fc1 = tf.contrib.layers.flatten(conv1)

                ta_final_result = ta_final.stack()

                print("all_data ", ta_final_result.shape)

                # print("conv1.shape :: ",conv1.shape)

                # Fully connected layer (in tf contrib folder for now)

                attn_result = tf.reduce_sum(ta_final_result, 2)
                attn_result = tf.reduce_sum(attn_result, 2)

                attn_result = tf.layers.dense(attn_result, 600)

                # bs,sl,vec
                #attn_result = fc1
                print(" attn_result :: ", attn_result.shape)

                # return attn_result

        with tf.variable_scope('output'):
            o_bias = tf.get_variable('o_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            # input gate
            fusion_gate = tf.nn.sigmoid(
                layers.linear(rep_map, ivec, True, 0., 'linear_fusion_i',
                              False, wd, keep_prob, is_train) +
                layers.linear(attn_result, ivec, True, 0., 'linear_fusion_a',
                              False, wd, keep_prob, is_train) + o_bias)
            output = fusion_gate * rep_map + (1 - fusion_gate) * attn_result
            # output = layers.mask_for_high_rank(output, rep_mask)

        # save attn
        if tensor_dict is not None and name is not None:
            tensor_dict[name + '_dependent'] = dependent
            tensor_dict[name + '_head'] = head
            tensor_dict[name] = attn_score
            tensor_dict[name + '_gate'] = fusion_gate
        return output
Пример #3
0
def directional_attention_Image_dense(rep_tensor,
                                      rep_mask,
                                      img_tensor,
                                      direction=None,
                                      scope=None,
                                      keep_prob=1.,
                                      is_train=None,
                                      wd=0.,
                                      activation='elu',
                                      tensor_dict=None,
                                      name=None):
    def scaled_tanh(x, scale=5.):
        return scale * tf.nn.tanh(1. / scale * x)

    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(
        rep_tensor)[2]
    ivec = rep_tensor.get_shape()[2]
    with tf.variable_scope(scope or 'directional_attention_%s' % direction
                           or 'diag'):
        # mask generation
        sl_indices = tf.range(sl, dtype=tf.int32)
        sl_col, sl_row = tf.meshgrid(sl_indices, sl_indices)
        if direction is None:
            direct_mask = tf.cast(
                tf.diag(-tf.ones([sl], tf.int32)) + 1, tf.bool)
        else:
            if direction == 'forward':
                direct_mask = tf.greater(sl_row, sl_col)
            else:
                direct_mask = tf.greater(sl_col, sl_row)
        direct_mask_tile = tf.tile(tf.expand_dims(direct_mask, 0),
                                   [bs, 1, 1])  # bs,sl,sl
        rep_mask_tile = tf.tile(tf.expand_dims(rep_mask, 1),
                                [1, sl, 1])  # bs,sl,sl
        attn_mask = tf.logical_and(direct_mask_tile, rep_mask_tile)  # bs,sl,sl

        # non-linear
        rep_map = layers.bn_dense_layer(rep_tensor, ivec, True, 0.,
                                        'bn_dense_map', activation, False, wd,
                                        keep_prob, is_train)
        rep_map_tile = tf.tile(tf.expand_dims(rep_map, 1),
                               [1, sl, 1, 1])  # bs,sl,sl,vec

        img_tensor_ = layers.bn_dense_layer(img_tensor, ivec, True, 0.,
                                            'img_tensor_', activation, False,
                                            wd, keep_prob, is_train)

        img_tensor_dp = layers.dropout(img_tensor_, keep_prob, is_train)

        img_tensor_tile = tf.tile(tf.expand_dims(img_tensor_, 1),
                                  [1, sl, 1, 1])

        # img_tensor_tile = tf.tile(tf.expand_dims(img_tensor_tile, 1), [1, sl, 1, 1,1])

        # print("img_tensor_tile.shape ", img_tensor_tile.shape)

        rep_map_dp = layers.dropout(rep_map, keep_prob, is_train)

        shape = tf.shape(img_tensor_tile)

        T1 = False

        # attention
        with tf.variable_scope('attention'):  # bs,sl,sl,vec
            f_bias = tf.get_variable('f_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            dependent = layers.linear(rep_map_dp,
                                      ivec,
                                      False,
                                      scope='linear_dependent')  # bs,sl,vec
            dependent_etd = tf.expand_dims(dependent, 1)  # bs,1,sl,vec

            # dependent_etd = tf.concat([dependent_etd,img_tensor_tile],2)

            # print("dependent_etd.shape :: ",dependent_etd.shape)

            dependent2 = layers.linear(img_tensor_dp,
                                       ivec,
                                       False,
                                       scope='linear_dependent2')  # bs,sl,vec

            if T1:
                dependent_etd2 = tf.expand_dims(dependent2, 2)
            else:

                dependent_etd2 = tf.expand_dims(dependent2, 1)  # bs,1,sl,vec
                dependent_etd2 = tf.expand_dims(dependent_etd2,
                                                2)  # bs,1,sl,vec

            head = layers.linear(rep_map_dp, ivec, False,
                                 scope='linear_head')  # bs,sl,vec
            head_etd = tf.expand_dims(head, 2)  # bs,sl,1,vec

            # img_tensor_tile = layers.linear(img_tensor_tile, ivec, False, scope='img_tensor_tile2')
            img_tensor_tile = tf.expand_dims(img_tensor_tile, 2)
            print("new img_tensor_tile.shape ", img_tensor_tile.shape)

            logits = scaled_tanh(dependent_etd + head_etd + f_bias,
                                 5.0)  # bs,sl,sl,vec

            logits_ = layers.bn_dense_layer(logits, ivec, True, 0., 'logits_',
                                            activation, False, wd, keep_prob,
                                            is_train)

            logits_ = layers.dropout(logits_, keep_prob, is_train)

            logits_ = layers.linear(logits_, ivec, False, scope='logits_2')

            logits_etd = tf.expand_dims(logits_, 3)

            new_logits = scaled_tanh(logits_etd + dependent_etd2 + f_bias, 5.0)

            print("dependent_etd2.shape ", dependent_etd2.shape)

            print("dir new_logits : ", new_logits.shape)
            attn = tf.nn.softmax(new_logits, 3)

            apply_Atten = img_tensor_tile * attn
            # print("apply_Atten ", apply_Atten.shape)
            # new_logits = tf.reduce_sum(apply_Atten,3)
            new_logits = tf.reduce_sum(apply_Atten, 3)

            print("new_logits.shape :: ", new_logits.shape)

            # logits = new_logits + logits

            o_bias = tf.get_variable('o_bias1', [ivec], tf.float32,
                                     tf.constant_initializer(1.))

            fusion_gate = tf.nn.sigmoid(
                layers.linear(new_logits, ivec, True, 0., 'linear_fusion_1i',
                              False, wd, keep_prob, is_train) +
                layers.linear(logits, ivec, True, 0., 'linear_fusion_1a',
                              False, wd, keep_prob, is_train) + o_bias)
            logits = fusion_gate * logits + (1 - fusion_gate) * new_logits

            print("logits.shape ", logits.shape)

            logits_masked = layers.exp_mask_for_high_rank(logits, attn_mask)

            print("logits_masked : ", logits_masked.shape)

            attn_score = tf.nn.softmax(logits_masked, 2)  # bs,sl,sl,vec

            # attn_score = tf.clip_by_value(
            #                             attn_score,
            #                             0.,
            #                             1.0,
            #                             name=None
            #                         )

            attn_score = layers.mask_for_high_rank(attn_score, attn_mask)

            print("attn_score : ", attn_score.shape)

            attn_result = tf.reduce_sum(attn_score * rep_map_tile,
                                        2)  # bs,sl,vec

            print("attn_result : ", attn_result.shape)

        # img_atten = MultiDm_attention_with_dense( attn_result, rep_mask,img_tensor,None, 'img_atten',
        #                0.80, is_train,1e-4, 'relu',tensor_dict={}, name='fw_fw_attn2')

        with tf.variable_scope('output'):
            o_bias = tf.get_variable('o_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            # input gate
            fusion_gate = tf.nn.sigmoid(
                layers.linear(rep_map, ivec, True, 0., 'linear_fusion_i',
                              False, wd, keep_prob, is_train) +
                layers.linear(attn_result, ivec, True, 0., 'linear_fusion_a',
                              False, wd, keep_prob, is_train) + o_bias)
            output = fusion_gate * rep_map + (1 - fusion_gate) * attn_result
            output = layers.mask_for_high_rank(output, rep_mask)

        #output = tf.add(new_logits,output)

        # with tf.variable_scope('output2'):
        #     o_bias = tf.get_variable('o_bias2',[ivec], tf.float32, tf.constant_initializer(0.))
        #     # input gate
        #     fusion_gate = tf.nn.sigmoid(
        #         layers.linear(output, ivec, True, 0., 'linear_fusion_2i', False, wd, keep_prob, is_train) +
        #         layers.linear(new_logits, ivec, True, 0., 'linear_fusion_2a', False, wd, keep_prob, is_train) +
        #         o_bias)
        #     output = fusion_gate * output + (1-fusion_gate) * new_logits
        #     output = layers.mask_for_high_rank(output, rep_mask)

        # with tf.variable_scope('output2'):
        #     o_bias = tf.get_variable('o_bias2',[ivec], tf.float32, tf.constant_initializer(0.))
        #     # input gate
        #     fusion_gate = tf.nn.sigmoid(
        #         layers.linear(output, ivec, True, 0., 'linear_fusion_i2', False, wd, keep_prob, is_train) +
        #         layers.linear(img_atten, ivec, True, 0., 'linear_fusion_a2', False, wd, keep_prob, is_train) +
        #         o_bias)
        #     output = fusion_gate * output + (1-fusion_gate) * img_atten
        #     output = layers.mask_for_high_rank(output, rep_mask)

        # save attn
        if tensor_dict is not None and name is not None:
            tensor_dict[name + '_dependent'] = dependent
            tensor_dict[name + '_head'] = head
            tensor_dict[name] = attn_score
            tensor_dict[name + '_gate'] = fusion_gate

        # output = layers.layer_normalize(output)

        return output, apply_Atten, attn
Пример #4
0
def MultiDm_attention_toktag(rep_tensor,
                             rep_mask,
                             img_tensor,
                             direction=None,
                             scope=None,
                             keep_prob=1.,
                             is_train=None,
                             wd=0.,
                             activation='elu',
                             tensor_dict=None,
                             name=None):
    def scaled_tanh(x, scale=5.):
        return scale * tf.nn.tanh(1. / scale * x)

    bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(
        rep_tensor)[2]
    ivec = rep_tensor.get_shape()[2]
    with tf.variable_scope(scope or 'directional_attention_%s' % direction
                           or 'diag'):
        # mask generation
        sl_indices = tf.range(sl, dtype=tf.int32)
        sl_col, sl_row = tf.meshgrid(sl_indices, sl_indices)
        # if direction is None:
        direct_mask = tf.cast(tf.diag(-tf.ones([sl], tf.int32)) + 1, tf.bool)
        # else:
        #     if direction == 'forward':
        #         direct_mask = tf.greater(sl_row, sl_col)
        #     else:
        #         direct_mask = tf.greater(sl_col, sl_row)
        direct_mask_tile = tf.tile(tf.expand_dims(direct_mask, 0),
                                   [bs, 1, 1])  # bs,sl,sl
        rep_mask_tile = tf.tile(tf.expand_dims(rep_mask, 1),
                                [1, sl, 1])  # bs,sl,sl
        attn_mask = tf.logical_and(direct_mask_tile, rep_mask_tile)  # bs,sl,sl

        # non-linear
        rep_map = layers.bn_dense_layer(rep_tensor, ivec, True, 0.,
                                        'bn_dense_map', activation, False, wd,
                                        keep_prob, is_train)

        img_tensor_ = layers.bn_dense_layer(img_tensor, ivec, True, 0.,
                                            'img_tensor_', activation, False,
                                            wd, keep_prob, is_train)

        print("rep_map shape :: ", rep_map.shape)
        print("img_tensor_.shape :: ", img_tensor_.shape)

        rep_map_tile = tf.tile(tf.expand_dims(rep_map, 1),
                               [1, sl, 1, 1])  # bs,sl,sl,vec

        img_tensor_ = tf.expand_dims(img_tensor_, 0)
        img_tensor_tile = tf.tile(tf.expand_dims(img_tensor_, 1),
                                  [1, sl, 1, 1])

        rep_map_dp = layers.dropout(rep_map, keep_prob, is_train)

        img_tensor_dp = layers.dropout(img_tensor_, keep_prob, is_train)

        # attention
        with tf.variable_scope('attention'):  # bs,sl,sl,vec
            f_bias = tf.get_variable('f_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            dependent = layers.linear(img_tensor_tile,
                                      ivec,
                                      False,
                                      scope='linear_dependent')  # bs,sl,vec
            # dependent_etd = tf.expand_dims(dependent, 1)  # bs,1,sl,vec

            head = layers.linear(rep_map_dp, ivec, False,
                                 scope='linear_head')  # bs,sl,vec
            head_etd = tf.expand_dims(head, 2)  # bs,sl,1,vec

            print("head_etd :: ", head_etd.shape)
            print("dependent :: ", dependent.shape)

            logits = scaled_tanh(dependent + head_etd + f_bias,
                                 5.0)  # bs,sl,sl,vec

            print("logits.sape :: ", logits.shape)

            # logits_masked = layers.exp_mask_for_high_rank(logits, attn_mask)
            attn_score = tf.nn.softmax(logits, 2)  # bs,sl,sl,vec
            # attn_score = layers.mask_for_high_rank(attn_score, attn_mask)

            print("attn_score ", attn_score.shape)

            print("img_tensor_tile ", img_tensor_tile.shape)

            # concat_images = multi_dimensional_attention(attn_score * img_tensor_tile)

            # print("concat_images.shape : ",concat_images.shape).

            apply_attention = attn_score * img_tensor_tile

            simple = True

            if simple:

                attn_result = tf.reduce_sum(apply_attention, 2)
                # attn_result = tf.reduce_sum(attn_result, 2)

                attn_result = tf.layers.dense(attn_result, ivec)

                # bs,sl,vec
                #attn_result = fc1
                print(" attn_result :: ", attn_result.shape)

            # return attn_result

            # return attn_result

        with tf.variable_scope('output'):
            o_bias = tf.get_variable('o_bias', [ivec], tf.float32,
                                     tf.constant_initializer(0.))
            # input gate
            fusion_gate = tf.nn.sigmoid(
                layers.linear(rep_map, ivec, True, 0., 'linear_fusion_i',
                              False, wd, keep_prob, is_train) +
                layers.linear(attn_result, ivec, True, 0., 'linear_fusion_a',
                              False, wd, keep_prob, is_train) + o_bias)
            output = fusion_gate * rep_map + (1 - fusion_gate) * attn_result
            # output = layers.mask_for_high_rank(output, rep_mask)

        # save attn
        if tensor_dict is not None and name is not None:
            tensor_dict[name + '_dependent'] = dependent
            tensor_dict[name + '_head'] = head
            tensor_dict[name] = attn_score
            tensor_dict[name + '_gate'] = fusion_gate
        return output