示例#1
0
def lstm_match(mid, pat, mid_mask, pat_mask, mid_len, pat_len, hidden,
               keep_prob, is_train):

    rnn = Cudnn_RNN(num_layers=1, num_units=hidden // 2)
    mid, _ = rnn(mid, seq_len=mid_len, concat_layers=False)
    pat, _ = rnn(pat, seq_len=pat_len, concat_layers=False)

    mid_d = dropout(mid, keep_prob=keep_prob, is_train=is_train)
    pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train)
    mid_a = attention(mid_d, hidden, mask=mid_mask)
    pat_a = attention(pat_d, hidden, mask=pat_mask)

    mid_v = tf.reduce_sum(tf.expand_dims(mid_a, axis=2) * mid, axis=1)
    pat_v = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat, axis=1)
    pat_v_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1)
    sur_sim = cosine(mid_v, pat_v_d)
    pat_sim = cosine(pat_v, pat_v_d)
    return sur_sim, pat_sim
示例#2
0
    def build_GAT(self, scope='MOGANED_Trigger'):
        maxlen = self.maxlen
        num_class = len(constant.EVENT_TYPE_TO_ID)
        keepprob = constant.t_keepprob
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            with tf.variable_scope('Initialize'):
                posi_mat = tf.concat([
                    tf.zeros([1, constant.posi_embedding_dim], tf.float32),
                    tf.get_variable(
                        'posi_emb', [2 * maxlen, constant.posi_embedding_dim],
                        tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer())
                ],
                                     axis=0)
                word_mat = tf.concat([
                    tf.zeros((1, constant.embedding_dim), dtype=tf.float32),
                    tf.get_variable(
                        "unk_word_embedding", [1, constant.embedding_dim],
                        dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer()),
                    tf.get_variable(
                        "wordemb", initializer=self.wordemb, trainable=True)
                ],
                                     axis=0)
                pos_mat = tf.concat([
                    tf.zeros((1, constant.pos_dim), dtype=tf.float32),
                    tf.get_variable(
                        "pos_embedding",
                        [len(constant.POS_TO_ID) - 1, constant.pos_dim],
                        dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer())
                ],
                                    axis=0)
                ner_mat = tf.concat([
                    tf.zeros((1, constant.ner_dim), dtype=tf.float32),
                    tf.get_variable(
                        "ner_embedding",
                        [len(constant.NER_TO_ID) - 1, constant.ner_dim],
                        dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer())
                ],
                                    axis=0)

            with tf.variable_scope("Placeholder"):
                self.sents = sents = tf.placeholder(tf.int32, [None, maxlen],
                                                    'sents')
                self.posis = posis = tf.placeholder(tf.int32, [None, maxlen],
                                                    'posis')
                self.maskls = maskls = tf.placeholder(tf.float32,
                                                      [None, maxlen], 'maskls')
                self.maskrs = maskrs = tf.placeholder(tf.float32,
                                                      [None, maxlen], 'maskrs')
                self._labels = _labels = tf.placeholder(
                    tf.int32, [None], 'labels')
                labels = tf.one_hot(_labels, num_class)
                self.is_train = is_train = tf.placeholder(
                    tf.bool, [], 'is_train')
                self.lexical = lexical = tf.placeholder(
                    tf.int32, [None, 3], 'lexicals')

                self.ner_idx = ner_idx = tf.placeholder(
                    tf.int32, [None, maxlen], 'ner_tags')
                self.pos_idx = pos_idx = tf.placeholder(
                    tf.int32, [None, maxlen], 'pos_tags')

                self.subg_a = tf.sparse_placeholder(tf.float32,
                                                    [None, maxlen, maxlen],
                                                    'subg')

                self.subg_b = tf.sparse_transpose(self.subg_a, [0, 2, 1])

                subg_a = tf.sparse_tensor_to_dense(self.subg_a,
                                                   validate_indices=False)
                subg_b = tf.sparse_tensor_to_dense(self.subg_b,
                                                   validate_indices=False)

                self.gather_idxs = tf.placeholder(tf.int32, [None, 2],
                                                  'gather_idxs')

                sents_len = tf.reduce_sum(tf.cast(tf.cast(sents, tf.bool),
                                                  tf.int32),
                                          axis=1)
                sents_mask = tf.expand_dims(tf.sequence_mask(
                    sents_len, maxlen, tf.float32),
                                            axis=2)

                eyes = tf.tile(tf.expand_dims(tf.eye(maxlen), 0),
                               [tf.shape(pos_idx)[0], 1, 1])

            with tf.variable_scope("Embedding"):
                sents_emb = tf.nn.embedding_lookup(word_mat, sents)
                posis_emb = tf.nn.embedding_lookup(posi_mat, posis)
                pos_emb = tf.nn.embedding_lookup(pos_mat, pos_idx)
                ner_emb = tf.nn.embedding_lookup(ner_mat, ner_idx)
                concat_emb = tf.concat(
                    [sents_emb, posis_emb, pos_emb, ner_emb], axis=2)

            with tf.variable_scope("Lstm_layer"):
                rnn = Cudnn_RNN(num_layers=1,
                                num_units=constant.hidden_dim,
                                keep_prob=keepprob,
                                is_train=self.is_train)
                ps, _ = rnn(concat_emb,
                            seq_len=sents_len,
                            concat_layers=False,
                            keep_prob=keepprob,
                            is_train=self.is_train)

            with tf.variable_scope("GAC"):
                hs = []
                for layer in range(1, constant.K + 1):
                    h_layer = GAC_func(ps, matmuls(
                        subg_a, layer), maxlen, 'a', layer) + GAC_func(
                            ps, matmuls(subg_b, layer), maxlen, 'b',
                            layer) + GAC_func(ps, eyes, maxlen, 'c', layer)
                    hs.append(h_layer)

            with tf.variable_scope("Aggregation"):
                s_ctxs = []
                for layer in range(1, constant.K + 1):
                    s_raw = tf.layers.dense(hs[layer - 1],
                                            constant.s_dim,
                                            name='Wawa')
                    s_layer = tf.nn.tanh(s_raw)
                    ctx_apply = tf.layers.dense(s_layer,
                                                1,
                                                name='ctx',
                                                use_bias=False)
                    s_ctxs.append(ctx_apply)
                vs = tf.nn.softmax(tf.concat(s_ctxs, axis=2),
                                   axis=2)  #[None,maxlen,3]
                h_concats = tf.concat([
                    tf.expand_dims(hs[layer], 2) for layer in range(constant.K)
                ],
                                      axis=2)
                final_h = tf.reduce_sum(tf.multiply(tf.expand_dims(vs, 3),
                                                    h_concats),
                                        axis=2)
                gather_final_h = tf.gather_nd(final_h, self.gather_idxs)

            with tf.variable_scope('classifier'):
                bias_weight = (constant.t_bias_lambda - 1) * (
                    1 - tf.cast(tf.equal(_labels, 0), tf.float32)) + 1
                self.logits = logits = tf.layers.dense(
                    gather_final_h,
                    num_class,
                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                    bias_initializer=tf.contrib.layers.xavier_initializer(),
                    name='Wo')
                self.pred = pred = tf.nn.softmax(logits, axis=1)
                self.pred_label = pred_label = tf.argmax(pred, axis=1)
                self.loss = loss = tf.reduce_sum(
                    bias_weight * tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=labels, logits=logits),
                    axis=0) / tf.reduce_sum(bias_weight, axis=0)
                self.train_op = train_op = tf.train.AdamOptimizer(
                    constant.t_lr).minimize(loss)
示例#3
0
    def ready(self):
        config = self.config
        d = config.hidden

        batch_size = tf.shape(self.sent)[0]
        sent_mask = tf.cast(self.sent, tf.bool)
        sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1)
        sent_maxlen = tf.reduce_max(sent_len)
        sent_mask = tf.slice(sent_mask, [0, 0], [batch_size, sent_maxlen])
        sent = tf.slice(self.sent, [0, 0], [batch_size, sent_maxlen])

        mid_mask = tf.cast(self.mid, tf.bool)
        mid_len = tf.reduce_sum(tf.cast(mid_mask, tf.int32), axis=1)
        mid_maxlen = tf.reduce_max(mid_len)
        mid_mask = tf.slice(mid_mask, [0, 0], [batch_size, mid_maxlen])
        mid = tf.slice(self.mid, [0, 0], [batch_size, mid_maxlen])

        pat_mask = tf.cast(self.pats, tf.bool)
        pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1)

        with tf.variable_scope("embedding"):
            sent_emb = tf.nn.embedding_lookup(self.word_mat, sent)
            mid_emb = tf.nn.embedding_lookup(self.word_mat, mid)
            sent_emb = dropout(sent_emb,
                               keep_prob=config.word_keep_prob,
                               is_train=self.is_train,
                               mode="embedding")
            pat_emb = tf.nn.embedding_lookup(self.word_mat, self.pats)

        with tf.variable_scope("encoder"):
            rnn = Cudnn_RNN(num_layers=2, num_units=d // 2)
            cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False)
            pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False)

            cont_d = dropout(cont,
                             keep_prob=config.keep_prob,
                             is_train=self.is_train)
            pat_d = dropout(pat,
                            keep_prob=config.keep_prob,
                            is_train=self.is_train)

        with tf.variable_scope("attention"):
            att_a = attention(cont_d, config.att_hidden, mask=sent_mask)
            pat_a = self.pat_a = attention(pat_d,
                                           config.att_hidden,
                                           mask=pat_mask)

        with tf.variable_scope("sim"):
            sim, pat_sim = att_match(mid_emb,
                                     pat_emb,
                                     mid_mask,
                                     pat_mask,
                                     d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)

            neg_idxs = tf.matmul(self.rels, tf.transpose(self.rels, [1, 0]))
            pat_pos = tf.square(tf.maximum(config.tau - pat_sim, 0.))
            pat_pos = tf.reduce_max(pat_pos - (1 - neg_idxs) * 1e30, axis=1)
            pat_neg = tf.square(tf.maximum(pat_sim, 0.))
            pat_neg = tf.reduce_max(pat_neg - 1e30 * neg_idxs, axis=1)
            l_sim = tf.reduce_sum(self.weight * (pat_pos + pat_neg), axis=0)

            with tf.variable_scope("pred"):
                att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d,
                                       axis=1)
                pat2_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d,
                                       axis=1)

                logit = self.logit = dense(att2_d,
                                           config.num_class,
                                           use_bias=False)
                pred = tf.nn.softmax(logit)
                l_a = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[:config.batch_size],
                        labels=self.rel[:config.batch_size]),
                    axis=0)

                xsim = tf.stop_gradient(sim[config.batch_size:])
                pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1))
                bound = tf.reduce_max(xsim, axis=1)
                weight = tf.nn.softmax(10 * bound)
                l_u = tf.reduce_sum(
                    weight * tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[config.batch_size:], labels=pseudo_rel),
                    axis=0)

                logit = dense(pat2_d, config.num_class, use_bias=False)
                l_pat = self.pat_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit, labels=self.rels),
                    axis=0)

        self.max_val = tf.reduce_sum(pred * -log(pred), axis=1)
        self.pred = tf.argmax(pred, axis=1)

        self.loss = l_a + config.alpha * l_pat + config.beta * l_sim + config.gamma * l_u
        self.sim_pred = tf.argmax(tf.gather(self.rels,
                                            tf.argmax(self.sim, axis=1)),
                                  axis=1)
        self.sim_max_val = tf.reduce_max(self.sim, axis=1)
        self.gold = tf.argmax(self.rel, axis=1)
        self.max_logit = tf.reduce_max(self.logit, axis=1)
示例#4
0
    def build_HMEAE(self, scope="HMEAE"):
        maxlen = self.maxlen
        num_class = len(constant.ROLE_TO_ID)
        #新增的变量
        keepprob = constant.t_keepprob

        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            with tf.variable_scope('Initialize'):
                posi_mat = tf.concat([
                    tf.zeros([1, constant.posi_embedding_dim], tf.float32),
                    tf.get_variable(
                        'posi_emb', [2 * maxlen, constant.posi_embedding_dim],
                        tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer())
                ],
                                     axis=0)
                word_mat = tf.concat([
                    tf.zeros((1, constant.embedding_dim), dtype=tf.float32),
                    tf.get_variable(
                        "unk_word_embedding", [1, constant.embedding_dim],
                        dtype=tf.float32,
                        initializer=tf.contrib.layers.xavier_initializer()),
                    tf.get_variable(
                        "word_emb", initializer=self.wordemb, trainable=True)
                ],
                                     axis=0)

                event_mat = tf.concat([
                    tf.zeros((1, constant.event_type_embedding_dim),
                             dtype=tf.float32),
                    tf.get_variable(
                        "event_emb", [
                            len(constant.EVENT_TYPE_TO_ID) - 1,
                            constant.event_type_embedding_dim
                        ],
                        initializer=tf.contrib.layers.xavier_initializer(),
                        trainable=True)
                ],
                                      axis=0)

                u_c = tf.get_variable(
                    'feat_vatiable',
                    [constant.module_num, 1, constant.a_u_c_dim],
                    initializer=tf.contrib.layers.xavier_initializer())
                module_design = tf.constant(constant.module_design, tf.float32)

            with tf.variable_scope('placeholder'):
                self.sents = sents = tf.placeholder(tf.int32, [None, maxlen],
                                                    'sents')
                self.trigger_posis = trigger_posis = tf.placeholder(
                    tf.int32, [None, maxlen], 'trigger_posis')
                self.argument_posis = argument_posis = tf.placeholder(
                    tf.int32, [None, maxlen], 'argument_posis')
                self.maskls = maskls = tf.placeholder(tf.float32,
                                                      [None, maxlen], 'maskls')
                self.maskms = maskms = tf.placeholder(tf.float32,
                                                      [None, maxlen], 'maskms')
                self.maskrs = maskrs = tf.placeholder(tf.float32,
                                                      [None, maxlen], 'maskrs')
                self.event_types = event_types = tf.placeholder(
                    tf.int32, [None], 'event_types')
                self.trigger_lexical = trigger_lexical = tf.placeholder(
                    tf.int32, [None, 3], 'trigger_lexicals')
                self.argument_lexical = argument_lexical = tf.placeholder(
                    tf.int32, [None, 2 + self.max_argument_len],
                    'argument_lexicals')
                self._labels = _labels = tf.placeholder(
                    tf.int32, [None], 'labels')
                labels = tf.one_hot(_labels, num_class)
                self.is_train = is_train = tf.placeholder(
                    tf.bool, [], 'is_train')

                #sents,event_types,roles,maskl,maskm,maskr,\
                #trigger_lexical,argument_lexical,trigger_maskl,trigger_maskr,trigger_posis,argument_posis

                #新增的变量
                self.is_negative = tf.placeholder(tf.float32, [None])
                # self.pos_idx = pos_idx = tf.placeholder(tf.int32,[None,maxlen],'pos_tags')
                # self.subg_a =  tf.sparse_placeholder(tf.float32,[None,maxlen,maxlen],'subg')
                # self.subg_b =  tf.sparse_transpose(self.subg_a,[0,2,1])
                # self.gather_idxs = tf.placeholder(tf.int32,[None,2],'gather_idxs')
                # subg_a = tf.sparse_tensor_to_dense(self.subg_a,validate_indices=False)
                # subg_b = tf.sparse_tensor_to_dense(self.subg_b,validate_indices=False)

                # eyes = tf.tile(tf.expand_dims(tf.eye(maxlen),0),[tf.shape(pos_idx)[0],1,1])

                sents_len = tf.reduce_sum(tf.cast(tf.cast(sents, tf.bool),
                                                  tf.int32),
                                          axis=1)
                sents_mask = tf.sequence_mask(sents_len, maxlen, tf.float32)
                event_types = tf.tile(tf.expand_dims(event_types, axis=1),
                                      [1, maxlen]) * tf.cast(
                                          sents_mask, tf.int32)
                batch_size = tf.shape(sents)[0]

            with tf.variable_scope('embedding'):
                sents_emb = tf.nn.embedding_lookup(word_mat, sents)
                trigger_posis_emb = tf.nn.embedding_lookup(
                    posi_mat, trigger_posis)
                trigger_lexical_emb = tf.nn.embedding_lookup(
                    word_mat, trigger_lexical)
                argument_posis_emb = tf.nn.embedding_lookup(
                    posi_mat, argument_posis)
                argument_lexical_emb = tf.nn.embedding_lookup(
                    word_mat, argument_lexical)
                event_type_emb = tf.nn.embedding_lookup(event_mat, event_types)

            with tf.variable_scope('lexical_feature'):
                trigger_lexical_feature = tf.reshape(
                    trigger_lexical_emb, [-1, 3 * constant.embedding_dim])
                argument_len = tf.reduce_sum(tf.cast(
                    tf.cast(argument_lexical[:, 1:-1], tf.bool), tf.float32),
                                             axis=1,
                                             keepdims=True)
                argument_lexical_mid = tf.reduce_sum(
                    argument_lexical_emb[:, 1:-1, :], axis=1) / argument_len
                argument_lexical_feature = tf.concat([
                    argument_lexical_emb[:, 0, :], argument_lexical_mid,
                    argument_lexical_emb[:, -1, :]
                ],
                                                     axis=1)
                lexical_feature = tf.concat(
                    [trigger_lexical_feature, argument_lexical_feature],
                    axis=1)

            with tf.variable_scope('encoder'):
                emb = tf.concat([
                    sents_emb, trigger_posis_emb, argument_posis_emb,
                    event_type_emb
                ],
                                axis=2)
                # emb_shape = tf.shape(emb)
                # pad = tf.zeros([emb_shape[0],1,emb_shape[2]],tf.float32)
                # conv_input = tf.concat([pad,emb,pad],axis=1)
                # conv_res = tf.layers.conv1d(
                #         inputs=conv_input,
                #         filters=constant.a_filters, kernel_size=3,
                #         strides=1,
                #         padding='valid',
                #         activation=tf.nn.relu,
                #         kernel_initializer=tf.contrib.layers.xavier_initializer(),
                #         name='convlution_layer')
                # conv_res = tf.reshape(conv_res,[-1,maxlen,constant.a_filters])
                rnn = Cudnn_RNN(num_layers=1,
                                num_units=constant.hidden_dim,
                                keep_prob=keepprob,
                                is_train=self.is_train)
                conv_res, _ = rnn(emb,
                                  seq_len=sents_len,
                                  concat_layers=False,
                                  keep_prob=keepprob,
                                  is_train=self.is_train)

                #GAT
                # hs = []
                # for layer in range(1,constant.K+1):
                #     # h_layer= GAC_func(conv_res,matmuls(subg_a,layer),maxlen,'a',layer)+GAC_func(conv_res,matmuls(subg_b,layer),maxlen,'b',layer)+GAC_func(conv_res,eyes,maxlen,'c',layer)
                #     h_layer= GAC_func(conv_res,matmuls(subg_a,layer),maxlen,'a',layer)+GAC_func(conv_res,matmuls(subg_b,layer),maxlen,'b',layer)
                #     hs.append(h_layer)

                # s_ctxs = []
                # for layer in range(1,constant.K+1):
                #     s_raw = tf.layers.dense(hs[layer-1],constant.s_dim,name='Wawa')
                #     s_layer = tf.nn.tanh(s_raw)
                #     ctx_apply = tf.layers.dense(s_layer,1,name='ctx',use_bias=False)
                #     s_ctxs.append(ctx_apply)
                # vs = tf.nn.softmax(tf.concat(s_ctxs,axis=2),axis=2) #[None,maxlen,3]
                # h_concats = tf.concat([tf.expand_dims(hs[layer],2) for layer in range(constant.K)],axis=2)
                # final_h = tf.reduce_sum(tf.multiply(tf.expand_dims(vs,3),h_concats),axis=2)
                # gather_final_h = tf.gather_nd(final_h,self.gather_idxs)

                # #新增的维度变换
                # conv_res = tf.reshape(gather_final_h,[-1,maxlen,constant.a_filters])

            with tf.variable_scope("attention"):
                conv_res_extend = tf.tile(tf.expand_dims(conv_res, axis=1),
                                          [1, constant.module_num, 1, 1])
                u_c_feat = tf.tile(tf.expand_dims(
                    u_c, axis=0), [batch_size, 1, maxlen, 1]) * tf.tile(
                        tf.expand_dims(tf.expand_dims(sents_mask, axis=2),
                                       axis=1), [1, constant.module_num, 1, 1])
                hidden_state = tf.layers.dense(
                    tf.concat([conv_res_extend, u_c_feat], axis=3),
                    constant.a_W_a_dim,
                    use_bias=False,
                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                    activation=tf.nn.tanh)
                score_logit = tf.reshape(
                    tf.layers.dense(hidden_state,
                                    1,
                                    use_bias=False,
                                    kernel_initializer=tf.contrib.layers.
                                    xavier_initializer()),
                    [batch_size, constant.module_num, maxlen])
                score_mask = tf.tile(tf.expand_dims(sents_mask, axis=1),
                                     [1, constant.module_num, 1])
                score_logit = score_logit * score_mask - (
                    1 - score_mask) * constant.INF
                module_score = tf.nn.softmax(score_logit, axis=2)
                module_mask = tf.tile(
                    tf.expand_dims(tf.expand_dims(module_design, axis=0),
                                   axis=3), [batch_size, 1, 1, maxlen])
                score_mask = tf.tile(
                    tf.expand_dims(module_score, axis=1),
                    [1, len(constant.ROLE_TO_ID), 1, 1]) * module_mask
                module_of_role = tf.expand_dims(tf.expand_dims(tf.reduce_sum(
                    module_design, axis=1),
                                                               axis=0),
                                                axis=2)
                role_score = tf.reduce_sum(score_mask, axis=2) / module_of_role
                role_oriented_emb = tf.reduce_sum(
                    tf.expand_dims(role_score, axis=3) *
                    tf.tile(tf.expand_dims(conv_res, axis=1),
                            [1, len(constant.ROLE_TO_ID), 1, 1]),
                    axis=2)

            with tf.variable_scope('maxpooling'):
                maskl = tf.tile(tf.expand_dims(maskls, axis=2),
                                [1, 1, constant.a_filters])
                left = maskl * conv_res
                maskm = tf.tile(tf.expand_dims(maskms, axis=2),
                                [1, 1, constant.a_filters])
                mid = maskm * conv_res
                maskr = tf.tile(tf.expand_dims(maskrs, axis=2),
                                [1, 1, constant.a_filters])
                right = maskr * conv_res
                sentence_feature = tf.concat([
                    tf.reduce_max(left, axis=1),
                    tf.reduce_max(mid, axis=1),
                    tf.reduce_max(right, axis=1)
                ],
                                             axis=1)

            with tf.variable_scope('classifier'):
                dmcnn_feature = tf.concat([sentence_feature, lexical_feature],
                                          axis=1)
                hmeae_feature = tf.concat([
                    tf.tile(tf.expand_dims(dmcnn_feature, axis=1),
                            [1, len(constant.ROLE_TO_ID), 1]),
                    role_oriented_emb
                ],
                                          axis=2)
                feature = tf.layers.dropout(hmeae_feature,
                                            1 - constant.a_keepprob,
                                            training=is_train)
                eye_mask = tf.tile(tf.expand_dims(tf.eye(num_class), axis=0),
                                   [batch_size, 1, 1])
                dense_res = tf.layers.dense(
                    feature,
                    num_class,
                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                    bias_initializer=tf.contrib.layers.xavier_initializer())
                self.logits = logits = tf.reduce_max(
                    eye_mask * dense_res - (1 - eye_mask) * constant.INF,
                    axis=2)
                self.pred = pred = tf.nn.softmax(logits, axis=1)
                self.pred_label = pred_label = tf.argmax(pred, axis=1)
                #新增的变量
                wrong_confusion_matrix = [[0.0] * num_class] * num_class
                correct_class_weight = [1.0] * num_class
                positive_idx = 15.0
                negative_idx = 1 - positive_idx
                self.loss = loss = tf.reduce_mean(
                    f1_confusion_loss(_labels, logits, positive_idx,
                                      negative_idx, correct_class_weight,
                                      wrong_confusion_matrix, num_class))
                # self.loss = loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels,logits=logits),axis=0)
                self.train_op = train_op = tf.train.AdamOptimizer(
                    constant.a_lr).minimize(loss)
示例#5
0
    def ready(self):
        config = self.config
        d = config.hidden

        batch_size = tf.shape(self.sent_word)[0]
        sent_mask = tf.cast(self.sent_word, tf.bool)
        sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1)
        sent_maxlen = config.length

        sent = self.sent_word

        pretrain_sent_mask = tf.cast(self.pretrain_sents,tf.bool)
        rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train)
        label_mat,_= FIND_module(sent,self.raw_pats,self.word_mat,config,tf.constant(False,tf.bool),rnn)
        label_mat = tf.sigmoid(label_mat)*tf.tile(tf.reshape(tf.cast(sent_mask,tf.float32),[batch_size,sent_maxlen,1]),[1,1,self.raw_pats.get_shape()[0]])

        # label_mat = tf.cast(tf.greater(label_mat,0.7),tf.float32)

        _,keywords_sim= FIND_module(sent,self.pats,self.word_mat,config,self.is_train,rnn)
        # keywords_sim = tf.sigmoid(keywords_sim)

        pretrain_pred_labels,_ = FIND_module(self.pretrain_sents,self.pretrain_pats,self.word_mat,config,self.is_train,rnn)
        pretrain_pred_labels = tf.transpose(pretrain_pred_labels,[0,2,1])
        gather_order = tf.tile(tf.reshape(tf.range(max(config.pretrain_size,config.pretrain_size_together)), [-1, 1]),[1,2])
        pretrain_pred_labels = tf.gather_nd(pretrain_pred_labels,gather_order)
        self.pretrain_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32),axis=1)/tf.reduce_sum(tf.cast(pretrain_sent_mask,tf.float32),axis=1))#tf.losses.mean_squared_error(labels=self.pretrain_labels,predictions=pretrain_pred_labels)

        self.prt_loss = tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32)
        self.prt_pred = tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32)
        self.pretrain_pred_labels = tf.reshape(tf.cast(tf.greater(tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32),config.pretrain_threshold),tf.int32),[-1])

        neg_idxs = tf.matmul(self.keywords_rels, tf.transpose(self.keywords_rels, [1, 0]))
        pat_pos = tf.square(tf.maximum(0.9 - keywords_sim, 0.))
        pat_pos = tf.reduce_max(pat_pos - tf.cast(1 - neg_idxs,tf.float32)*tf.constant(1e30,tf.float32), axis=1)

        pat_neg = tf.square(tf.maximum(keywords_sim, 0.))
        pat_neg = tf.reduce_max(pat_neg - tf.constant(1e30,tf.float32) * tf.cast(neg_idxs,tf.float32), axis=1)
        pat_simloss = tf.reduce_mean(pat_pos + pat_neg,axis=0)

        # clustering的loss
        self.sim_loss = sim_loss = pat_simloss

        self.pretrain_loss_v2 = self.pretrain_loss+self.pretrain_alpha*self.sim_loss

        sim_raw = []

        for i, soft_labeling_function in enumerate(self.labeling_functions_soft):
            try:
                sim_raw.append(soft_labeling_function(label_mat, self.raw_keyword_dict, self.mask_mat)(
                    self.phrases_input) * self.type_restrict(i))
            except:
                print(i)
                sim_raw.append(tf.cast(tf.reshape(0*self.phrases_input[:,0],[1,-1]),tf.float32))

        self.sim =sim= tf.transpose(tf.concat(sim_raw,axis=0),[1,0]) #[tf.shape==(batch_size,1)]*num_functions->[batch_size,]
        with tf.variable_scope("classifier"):
            sent_emb = tf.nn.embedding_lookup(self.word_mat, sent)
            sent_emb = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=self.is_train, mode="embedding")
            rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train)
            cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False)
            cont_d = dropout(cont, keep_prob=config.keep_prob, is_train=self.is_train)
            att_a = attention(cont_d, config.att_hidden, mask=sent_mask)
            att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d, axis=1)
            logit = dense(att2_d, config.num_class, use_bias=False)
            pred = tf.nn.softmax(logit)
            with tf.variable_scope("pred"):

                if not self.pseudo:

                    sent_loss = self.sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=self.rel), axis=0)
                else:

                    self.hard_train_loss = sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[:config.batch_size], labels=self.rel[:config.batch_size]), axis=0)

                    lsim = sim[:config.batch_size]
                    index_tensor = tf.reshape(tf.constant(np.arange(config.batch_size),tf.int32),[config.batch_size,1])
                    select_tensor = tf.reshape(self.hard_match_func_idx,[config.batch_size,1])
                    probs = tf.reshape(tf.gather_nd(lsim,tf.concat([index_tensor,select_tensor],axis=1)),[config.batch_size,1])
                    self.labeled_loss = labeled_loss = tf.reduce_mean(tf.square((1-probs)))

                    xsim = tf.stop_gradient(sim[config.batch_size:])

                    pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1))
                    bound = tf.reduce_max(xsim, axis=1)
                    weight = tf.nn.softmax(10.0 * bound)

                    self.unlabeled_loss = unlabeled_loss = tf.reduce_sum(weight * tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[config.batch_size:], labels=pseudo_rel), axis=0)

                    sent_loss = self.sent_loss = sent_loss + self.gamma * unlabeled_loss+self.alpha*self.pretrain_loss#+self.alpha*labeled_loss

        #算entropy来对no_relation推断
        self.max_val = entropy = tf.reduce_sum(pred * -log(pred), axis=1)
        #pred是test时候用到的
        self.pred = tf.argmax(pred, axis=1)
        self.loss = sent_loss + self.beta * sim_loss
        #similarity model预测出来的结果
        self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(self.sim, axis=1)), axis=1)
        self.sim_max_val = tf.reduce_max(self.sim, axis=1)
        #true label
        self.gold = tf.argmax(self.rel, axis=1)
        self.entropy = tf.reduce_mean(entropy, axis=0)