Exemplos de attention em Python, exemplos de func.attention em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: string_sim.py Projeto: zhenpingli/NERO

def att_match(mid, pat, mid_mask, pat_mask, hidden, keep_prob, is_train):
    mid_d = dropout(mid, keep_prob=keep_prob, is_train=is_train)
    pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train)
    mid_a = attention(mid_d, hidden, mask=mid_mask)
    pat_a = attention(pat_d, hidden, mask=pat_mask)
    mid_v = tf.reduce_sum(tf.expand_dims(mid_a, axis=2) * mid, axis=1)
    pat_v = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat, axis=1)
    pat_v_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1)
    sur_sim = cosine(mid_v, pat_v_d)
    pat_sim = cosine(pat_v, pat_v_d)
    return sur_sim, pat_sim

Exemplo n.º 2

0

Exibir arquivo

Arquivo: string_sim.py Projeto: zhenpingli/NERO

def lstm_match(mid, pat, mid_mask, pat_mask, mid_len, pat_len, hidden,
               keep_prob, is_train):

    rnn = Cudnn_RNN(num_layers=1, num_units=hidden // 2)
    mid, _ = rnn(mid, seq_len=mid_len, concat_layers=False)
    pat, _ = rnn(pat, seq_len=pat_len, concat_layers=False)

    mid_d = dropout(mid, keep_prob=keep_prob, is_train=is_train)
    pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train)
    mid_a = attention(mid_d, hidden, mask=mid_mask)
    pat_a = attention(pat_d, hidden, mask=pat_mask)

    mid_v = tf.reduce_sum(tf.expand_dims(mid_a, axis=2) * mid, axis=1)
    pat_v = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat, axis=1)
    pat_v_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1)
    sur_sim = cosine(mid_v, pat_v_d)
    pat_sim = cosine(pat_v, pat_v_d)
    return sur_sim, pat_sim

Exemplo n.º 3

0

Exibir arquivo

Arquivo: model.py Projeto: burglarhobbit/R-Net

    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        gi = []
        att_vP = []
        for i in range(config.max_para):
            with tf.variable_scope("emb"):
                with tf.variable_scope("char"):
                    ch_emb = tf.reshape(tf.nn.embedding_lookup(\
                     self.char_mat, self.pr_ch), [N * PL, CL, dc])
                    #	self.char_mat, self.ch), [N * PL, CL, dc])
                    qh_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.qh),
                        [N * QL, CL, dc])
                    ch_emb = dropout(ch_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    qh_emb = dropout(qh_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dg)
                    cell_bw = tf.contrib.rnn.GRUCell(dg)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        ch_emb,
                        self.ch_len,
                        dtype=tf.float32)
                    ch_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qh_emb,
                        self.qh_len,
                        dtype=tf.float32)
                    qh_emb = tf.concat([state_fw, state_bw], axis=1)
                    qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                    ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

                with tf.name_scope("word"):
                    c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                    q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

                c_emb = tf.concat([c_emb, ch_emb], axis=2)
                q_emb = tf.concat([q_emb, qh_emb], axis=2)

            with tf.variable_scope("encoding"):
                rnn = gru(num_layers=3,
                          num_units=d,
                          batch_size=N,
                          input_size=c_emb.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                c = rnn(c_emb, seq_len=self.c_len)
                q = rnn(q_emb, seq_len=self.q_len)

            with tf.variable_scope("attention"):
                qc_att = dot_attention(c,
                                       q,
                                       mask=self.q_mask,
                                       hidden=d,
                                       keep_prob=config.keep_prob,
                                       is_train=self.is_train)
                rnn = gru(num_layers=1,
                          num_units=d,
                          batch_size=N,
                          input_size=qc_att.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                att = rnn(qc_att, seq_len=self.c_len)
                # att is the v_P
                att_vP.append(att)
            """
			with tf.variable_scope("match"):
				self_att = dot_attention(
					att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
				rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
				).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
				match = rnn(self_att, seq_len=self.c_len)
			"""
            with tf.variable_scope("pointer"):

                # r_Q:
                init = summ(q[:, :, -2 * d:],
                            d,
                            mask=self.q_mask,
                            keep_prob=config.ptr_keep_prob,
                            is_train=self.is_train)

                pointer = ptr_net(batch=N,
                                  hidden=init.get_shape().as_list()[-1],
                                  keep_prob=config.ptr_keep_prob,
                                  is_train=self.is_train)
                logits1, logits2 = pointer(init, att, d, self.c_mask)

            with tf.variable_scope("predict"):
                outer = tf.matmul(
                    tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                    tf.expand_dims(tf.nn.softmax(logits2), axis=1))
                outer = tf.matrix_band_part(outer, 0, 15)
                self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
                self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits1, labels=self.y1)
                losses2 = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits2, labels=self.y2)
                self.loss = tf.reduce_mean(losses + losses2)

                # print losses
                #condition = tf.greater(self.loss, 11)
                #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1)
                #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1)

        for i in range(config.max_para):
            # Passage ranking
            with tf.variable_scope("passage-ranking-attention"):
                vj_P = dropout(att, keep_prob=keep_prob, is_train=is_train)
                r_Q = dropout(init, keep_prob=keep_prob, is_train=is_train)
                r_P = attention(r_Q,
                                vj_P,
                                mask=self.c_mask,
                                hidden=d,
                                keep_prob=config.keep_prob,
                                is_train=self.is_train)

                #rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=pr_att.get_shape(
                #).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
                #att_rp = rnn(qc_att, seq_len=self.c_len)

                # Wg
                concatenate = tf.concat([init, att_rp], axis=2)
                g = tf.nn.tanh(
                    dense(concatenate, hidden=d, use_bias=False, scope="g"))
                g_ = dense(g, 1, use_bias=False, scope="g_")
                gi.append(g_)
        gi_ = tf.convert_to_tensor(gi)
        gi = tf.nn.softmax(gi_)
        self.pr_loss = tf.nn.softmax_cross_entropy_with_logits(logits=gi,
                                                               labels=self.pr)

Exemplo n.º 4

0

Exibir arquivo

    def ready(self):
        config = self.config
        d = config.hidden

        batch_size = tf.shape(self.sent)[0]
        sent_mask = tf.cast(self.sent, tf.bool)
        sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1)
        sent_maxlen = tf.reduce_max(sent_len)
        sent_mask = tf.slice(sent_mask, [0, 0], [batch_size, sent_maxlen])
        sent = tf.slice(self.sent, [0, 0], [batch_size, sent_maxlen])

        mid_mask = tf.cast(self.mid, tf.bool)
        mid_len = tf.reduce_sum(tf.cast(mid_mask, tf.int32), axis=1)
        mid_maxlen = tf.reduce_max(mid_len)
        mid_mask = tf.slice(mid_mask, [0, 0], [batch_size, mid_maxlen])
        mid = tf.slice(self.mid, [0, 0], [batch_size, mid_maxlen])

        pat_mask = tf.cast(self.pats, tf.bool)
        pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1)

        with tf.variable_scope("embedding"):
            sent_emb = tf.nn.embedding_lookup(self.word_mat, sent)
            mid_emb = tf.nn.embedding_lookup(self.word_mat, mid)
            sent_emb = dropout(sent_emb,
                               keep_prob=config.word_keep_prob,
                               is_train=self.is_train,
                               mode="embedding")
            pat_emb = tf.nn.embedding_lookup(self.word_mat, self.pats)

        with tf.variable_scope("encoder"):
            rnn = Cudnn_RNN(num_layers=2, num_units=d // 2)
            cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False)
            pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False)

            cont_d = dropout(cont,
                             keep_prob=config.keep_prob,
                             is_train=self.is_train)
            pat_d = dropout(pat,
                            keep_prob=config.keep_prob,
                            is_train=self.is_train)

        with tf.variable_scope("attention"):
            att_a = attention(cont_d, config.att_hidden, mask=sent_mask)
            pat_a = self.pat_a = attention(pat_d,
                                           config.att_hidden,
                                           mask=pat_mask)

        with tf.variable_scope("sim"):
            sim, pat_sim = att_match(mid_emb,
                                     pat_emb,
                                     mid_mask,
                                     pat_mask,
                                     d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)

            neg_idxs = tf.matmul(self.rels, tf.transpose(self.rels, [1, 0]))
            pat_pos = tf.square(tf.maximum(config.tau - pat_sim, 0.))
            pat_pos = tf.reduce_max(pat_pos - (1 - neg_idxs) * 1e30, axis=1)
            pat_neg = tf.square(tf.maximum(pat_sim, 0.))
            pat_neg = tf.reduce_max(pat_neg - 1e30 * neg_idxs, axis=1)
            l_sim = tf.reduce_sum(self.weight * (pat_pos + pat_neg), axis=0)

            with tf.variable_scope("pred"):
                att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d,
                                       axis=1)
                pat2_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d,
                                       axis=1)

                logit = self.logit = dense(att2_d,
                                           config.num_class,
                                           use_bias=False)
                pred = tf.nn.softmax(logit)
                l_a = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[:config.batch_size],
                        labels=self.rel[:config.batch_size]),
                    axis=0)

                xsim = tf.stop_gradient(sim[config.batch_size:])
                pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1))
                bound = tf.reduce_max(xsim, axis=1)
                weight = tf.nn.softmax(10 * bound)
                l_u = tf.reduce_sum(
                    weight * tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[config.batch_size:], labels=pseudo_rel),
                    axis=0)

                logit = dense(pat2_d, config.num_class, use_bias=False)
                l_pat = self.pat_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit, labels=self.rels),
                    axis=0)

        self.max_val = tf.reduce_sum(pred * -log(pred), axis=1)
        self.pred = tf.argmax(pred, axis=1)

        self.loss = l_a + config.alpha * l_pat + config.beta * l_sim + config.gamma * l_u
        self.sim_pred = tf.argmax(tf.gather(self.rels,
                                            tf.argmax(self.sim, axis=1)),
                                  axis=1)
        self.sim_max_val = tf.reduce_max(self.sim, axis=1)
        self.gold = tf.argmax(self.rel, axis=1)
        self.max_logit = tf.reduce_max(self.logit, axis=1)

Exemplo n.º 5

0

Exibir arquivo

def FIND_module(sent,pats,word_mat,config,is_train,rnn,scope='Find_module'):#sents_emb [batch,maxlength_sent] pats [num_pats,maxlength_pat]   [batch,maxlength_sent,dim]
    with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):
        keep_prob = config.keep_prob
        d = config.hidden
        batch_size = tf.shape(sent)[0]
        maxlength_sent = tf.shape(sent)[1]
        dim = tf.shape(word_mat)[1]
        num_pats = tf.shape(pats)[0]

        sent_mask = tf.cast(sent, tf.bool)

        pat_mask = tf.cast(pats, tf.bool)
        pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1)

        with tf.variable_scope('embedding'):
            sent_emb = tf.nn.embedding_lookup(word_mat, sent)
            sent_emb_d = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=is_train, mode="embedding")
            pat_emb = tf.nn.embedding_lookup(word_mat, pats)
            pat_emb_d = dropout(pat_emb, keep_prob=config.word_keep_prob, is_train=is_train,mode='embedding')

        with tf.variable_scope('stack'):
            pad = tf.zeros([batch_size,1,dim],tf.float32)

            sent_emb_pad = tf.concat([pad,sent_emb,pad],axis=1)
            sent_emb_stack_2 = tf.reshape(sent_emb_pad,[batch_size,maxlength_sent+2,1,dim])
            sent_emb_stack_2 = tf.concat([sent_emb_stack_2[:,0:-1,:],sent_emb_stack_2[:,1:,:]],axis=2)
            sent_emb_stack_2 = tf.reshape(sent_emb_stack_2,[batch_size*(maxlength_sent+1),2,dim])

            sent_emb_pad2 = tf.concat([pad,pad,sent_emb,pad,pad],axis=1)
            sent_emb_stack_3 = tf.reshape(sent_emb_pad2,[batch_size,maxlength_sent+4,1,dim])
            sent_emb_stack_3 = tf.concat([sent_emb_stack_3[:, 0:-2, :], sent_emb_stack_3[:, 1:-1, :], sent_emb_stack_3[:, 2:, :]], axis=2)
            sent_emb_stack_3 = tf.reshape(sent_emb_stack_3,[batch_size*(maxlength_sent+2),3,dim])

            sent_emb_stack_1 = tf.reshape(sent_emb,[batch_size*maxlength_sent,1,dim])

        with tf.variable_scope('stack_d'):
            pad = tf.zeros([batch_size,1,dim],tf.float32)

            sent_emb_pad_d = tf.concat([pad,sent_emb_d,pad],axis=1)
            sent_emb_stack_2_d = tf.reshape(sent_emb_pad_d,[batch_size,maxlength_sent+2,1,dim])
            sent_emb_stack_2_d = tf.concat([sent_emb_stack_2_d[:,0:-1,:],sent_emb_stack_2_d[:,1:,:]],axis=2)
            sent_emb_stack_2_d = tf.reshape(sent_emb_stack_2_d,[batch_size*(maxlength_sent+1),2,dim])

            sent_emb_pad2_d = tf.concat([pad,pad,sent_emb_d,pad,pad],axis=1)
            sent_emb_stack_3_d = tf.reshape(sent_emb_pad2_d,[batch_size,maxlength_sent+4,1,dim])
            sent_emb_stack_3_d = tf.concat([sent_emb_stack_3_d[:, 0:-2, :], sent_emb_stack_3_d[:, 1:-1, :], sent_emb_stack_3_d[:, 2:, :]], axis=2)
            sent_emb_stack_3_d = tf.reshape(sent_emb_stack_3_d,[batch_size*(maxlength_sent+2),3,dim])

            sent_emb_stack_1_d = tf.reshape(sent_emb_d,[batch_size*maxlength_sent,1,dim])

        with tf.variable_scope("encoder"):
            with tf.variable_scope('encode_pat'):
                pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False)       #[numpats,d]
                pat_d = dropout(pat, keep_prob=config.keep_prob, is_train=is_train)
            with tf.variable_scope('encode_sent'):
                cont_stack_3, _ = rnn(sent_emb_stack_3,seq_len=3 * tf.ones([batch_size * (maxlength_sent + 2)], tf.int32),concat_layers=False)
                cont_stack_2, _ = rnn(sent_emb_stack_2, seq_len=2*tf.ones([batch_size*(maxlength_sent+1)],tf.int32), concat_layers=False)  #[batch_size*(maxlength_sent+1),d]
                cont_stack_1, _ = rnn(sent_emb_stack_1, seq_len=tf.ones([batch_size*maxlength_sent],tf.int32), concat_layers=False)  #[batch_size*maxlength_sent,d]
                cont_stack_3_d = dropout(cont_stack_3, keep_prob=keep_prob, is_train=is_train)
                cont_stack_2_d = dropout(cont_stack_2, keep_prob=keep_prob, is_train=is_train)
                cont_stack_1_d = dropout(cont_stack_1, keep_prob=keep_prob, is_train=is_train)

        with tf.variable_scope('attention'):
            pat_d_a = attention(pat_d,config.att_hidden, mask=pat_mask)
            cont_stack_2_d_a = attention(cont_stack_2_d,config.att_hidden)
            cont_stack_3_d_a = attention(cont_stack_3_d,config.att_hidden)

            cont_stack_3_att = tf.reduce_sum(tf.expand_dims(cont_stack_3_d_a, axis=2) * cont_stack_3, axis=1)
            cont_stack_2_att = tf.reduce_sum(tf.expand_dims(cont_stack_2_d_a, axis=2) * cont_stack_2, axis=1)
            pat_d_att = tf.reduce_sum(tf.expand_dims(pat_d_a, axis=2) * pat_d, axis=1)
            pat_att = tf.reduce_sum(tf.expand_dims(pat_d_a, axis=2) * pat, axis=1)
            cont_stack_1_att = tf.squeeze(cont_stack_1)
        with tf.variable_scope('emb_attention'):
            pat_emb_d_a = attention(pat_emb_d, config.att_hidden, mask=pat_mask)
            pat_emb_d_att = tf.reduce_sum(tf.expand_dims(pat_emb_d_a, axis=2) * pat_emb_d, axis=1)
            pat_emb_att = tf.reduce_sum(tf.expand_dims(pat_emb_d_a, axis=2) * pat_emb, axis=1)

            sent_emb_stack_3_d_a = attention(sent_emb_stack_3_d, config.att_hidden)
            sent_emb_stack_3_att = tf.reduce_sum(tf.expand_dims(sent_emb_stack_3_d_a, axis=2) * sent_emb_stack_3, axis=1)

            sent_emb_stack_2_d_a = attention(sent_emb_stack_2_d, config.att_hidden)
            sent_emb_stack_2_att = tf.reduce_sum(tf.expand_dims(sent_emb_stack_2_d_a, axis=2) * sent_emb_stack_2, axis=1)

            sent_emb_stack_1_att = tf.squeeze(sent_emb_stack_1)

        with tf.variable_scope('Score'):
            scores_stack_2 = cosine(cont_stack_2_att,pat_d_att,weighted=False)
            scores_stack_1 = cosine(cont_stack_1_att,pat_d_att,weighted=False)
            scores_stack_3 = cosine(cont_stack_3_att, pat_d_att, weighted=False)

            scores_stack_3 = tf.reshape(scores_stack_3, [batch_size, 1, maxlength_sent + 2, num_pats])
            scores_stack_2 = tf.reshape(scores_stack_2,[batch_size,1,maxlength_sent+1,num_pats])
            scores_stack_1 = tf.reshape(scores_stack_1,[batch_size,1,maxlength_sent,num_pats])
            scores_sim = cosine(pat_att, pat_d_att, weighted=False)

        with tf.variable_scope('emb_Score'):
            scores_stack_3_emb = cosine(sent_emb_stack_3_att,pat_emb_d_att)
            scores_stack_2_emb = cosine(sent_emb_stack_2_att,pat_emb_d_att)
            scores_stack_1_emb = cosine(sent_emb_stack_1_att,pat_emb_d_att)

            scores_stack_3_emb = tf.reshape(scores_stack_3_emb, [batch_size, 1, maxlength_sent + 2, num_pats])
            scores_stack_2_emb = tf.reshape(scores_stack_2_emb,[batch_size,1,maxlength_sent+1,num_pats])
            scores_stack_1_emb = tf.reshape(scores_stack_1_emb,[batch_size,1,maxlength_sent,num_pats])

            phi = 0
            scores_stack_3 = phi * scores_stack_3_emb + (1 - phi) * scores_stack_3
            scores_stack_2 = phi*scores_stack_2_emb+(1-phi)*scores_stack_2
            scores_stack_1 = phi*scores_stack_1_emb+(1-phi)*scores_stack_1

            scores = tf.concat([scores_stack_3[:,:,0:-2,:],scores_stack_3[:,:,1:-1,:],scores_stack_3[:,:,2:,:],scores_stack_2[:,:,0:-1,:],scores_stack_2[:,:,1:,:],scores_stack_1],axis=1)
            scores = tf.reshape(scores,[batch_size,6,maxlength_sent,num_pats])
            scores = tf.transpose(scores,[0,3,1,2])
            scores = tf.reshape(scores,[batch_size*num_pats,6,maxlength_sent])

            scores_sim_emb = cosine(pat_emb_att, pat_emb_d_att)
            scores_sim = phi*scores_sim_emb+(1-phi)*scores_sim

        with tf.variable_scope('SeqLabel'):
            seq = tf.layers.dense(tf.transpose(scores,[0,2,1]),1)
            seq = tf.squeeze(seq)
            seq = tf.reshape(seq,[batch_size,num_pats,maxlength_sent])
            #seq = tf.reshape(tf.reduce_max(scores,axis=1),[batch_size,num_pats,maxlength_sent])
            seq = tf.transpose(seq,[0,2,1])
            seq = seq*tf.tile(tf.cast(tf.reshape(sent_mask,[batch_size,maxlength_sent,1]),tf.float32),[1,1,num_pats])

        return seq,scores_sim

Exemplo n.º 6

0

Exibir arquivo

    def ready(self):
        config = self.config
        d = config.hidden

        batch_size = tf.shape(self.sent_word)[0]
        sent_mask = tf.cast(self.sent_word, tf.bool)
        sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1)
        sent_maxlen = config.length

        sent = self.sent_word

        pretrain_sent_mask = tf.cast(self.pretrain_sents,tf.bool)
        rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train)
        label_mat,_= FIND_module(sent,self.raw_pats,self.word_mat,config,tf.constant(False,tf.bool),rnn)
        label_mat = tf.sigmoid(label_mat)*tf.tile(tf.reshape(tf.cast(sent_mask,tf.float32),[batch_size,sent_maxlen,1]),[1,1,self.raw_pats.get_shape()[0]])

        # label_mat = tf.cast(tf.greater(label_mat,0.7),tf.float32)

        _,keywords_sim= FIND_module(sent,self.pats,self.word_mat,config,self.is_train,rnn)
        # keywords_sim = tf.sigmoid(keywords_sim)

        pretrain_pred_labels,_ = FIND_module(self.pretrain_sents,self.pretrain_pats,self.word_mat,config,self.is_train,rnn)
        pretrain_pred_labels = tf.transpose(pretrain_pred_labels,[0,2,1])
        gather_order = tf.tile(tf.reshape(tf.range(max(config.pretrain_size,config.pretrain_size_together)), [-1, 1]),[1,2])
        pretrain_pred_labels = tf.gather_nd(pretrain_pred_labels,gather_order)
        self.pretrain_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32),axis=1)/tf.reduce_sum(tf.cast(pretrain_sent_mask,tf.float32),axis=1))#tf.losses.mean_squared_error(labels=self.pretrain_labels,predictions=pretrain_pred_labels)

        self.prt_loss = tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32)
        self.prt_pred = tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32)
        self.pretrain_pred_labels = tf.reshape(tf.cast(tf.greater(tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32),config.pretrain_threshold),tf.int32),[-1])

        neg_idxs = tf.matmul(self.keywords_rels, tf.transpose(self.keywords_rels, [1, 0]))
        pat_pos = tf.square(tf.maximum(0.9 - keywords_sim, 0.))
        pat_pos = tf.reduce_max(pat_pos - tf.cast(1 - neg_idxs,tf.float32)*tf.constant(1e30,tf.float32), axis=1)

        pat_neg = tf.square(tf.maximum(keywords_sim, 0.))
        pat_neg = tf.reduce_max(pat_neg - tf.constant(1e30,tf.float32) * tf.cast(neg_idxs,tf.float32), axis=1)
        pat_simloss = tf.reduce_mean(pat_pos + pat_neg,axis=0)

        # clustering的loss
        self.sim_loss = sim_loss = pat_simloss

        self.pretrain_loss_v2 = self.pretrain_loss+self.pretrain_alpha*self.sim_loss

        sim_raw = []

        for i, soft_labeling_function in enumerate(self.labeling_functions_soft):
            try:
                sim_raw.append(soft_labeling_function(label_mat, self.raw_keyword_dict, self.mask_mat)(
                    self.phrases_input) * self.type_restrict(i))
            except:
                print(i)
                sim_raw.append(tf.cast(tf.reshape(0*self.phrases_input[:,0],[1,-1]),tf.float32))

        self.sim =sim= tf.transpose(tf.concat(sim_raw,axis=0),[1,0]) #[tf.shape==(batch_size,1)]*num_functions->[batch_size,]
        with tf.variable_scope("classifier"):
            sent_emb = tf.nn.embedding_lookup(self.word_mat, sent)
            sent_emb = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=self.is_train, mode="embedding")
            rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train)
            cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False)
            cont_d = dropout(cont, keep_prob=config.keep_prob, is_train=self.is_train)
            att_a = attention(cont_d, config.att_hidden, mask=sent_mask)
            att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d, axis=1)
            logit = dense(att2_d, config.num_class, use_bias=False)
            pred = tf.nn.softmax(logit)
            with tf.variable_scope("pred"):

                if not self.pseudo:

                    sent_loss = self.sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=self.rel), axis=0)
                else:

                    self.hard_train_loss = sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[:config.batch_size], labels=self.rel[:config.batch_size]), axis=0)

                    lsim = sim[:config.batch_size]
                    index_tensor = tf.reshape(tf.constant(np.arange(config.batch_size),tf.int32),[config.batch_size,1])
                    select_tensor = tf.reshape(self.hard_match_func_idx,[config.batch_size,1])
                    probs = tf.reshape(tf.gather_nd(lsim,tf.concat([index_tensor,select_tensor],axis=1)),[config.batch_size,1])
                    self.labeled_loss = labeled_loss = tf.reduce_mean(tf.square((1-probs)))

                    xsim = tf.stop_gradient(sim[config.batch_size:])

                    pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1))
                    bound = tf.reduce_max(xsim, axis=1)
                    weight = tf.nn.softmax(10.0 * bound)

                    self.unlabeled_loss = unlabeled_loss = tf.reduce_sum(weight * tf.nn.softmax_cross_entropy_with_logits_v2(
                        logits=logit[config.batch_size:], labels=pseudo_rel), axis=0)

                    sent_loss = self.sent_loss = sent_loss + self.gamma * unlabeled_loss+self.alpha*self.pretrain_loss#+self.alpha*labeled_loss

        #算entropy来对no_relation推断
        self.max_val = entropy = tf.reduce_sum(pred * -log(pred), axis=1)
        #pred是test时候用到的
        self.pred = tf.argmax(pred, axis=1)
        self.loss = sent_loss + self.beta * sim_loss
        #similarity model预测出来的结果
        self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(self.sim, axis=1)), axis=1)
        self.sim_max_val = tf.reduce_max(self.sim, axis=1)
        #true label
        self.gold = tf.argmax(self.rel, axis=1)
        self.entropy = tf.reduce_mean(entropy, axis=0)