def lstm_match(mid, pat, mid_mask, pat_mask, mid_len, pat_len, hidden, keep_prob, is_train): rnn = Cudnn_RNN(num_layers=1, num_units=hidden // 2) mid, _ = rnn(mid, seq_len=mid_len, concat_layers=False) pat, _ = rnn(pat, seq_len=pat_len, concat_layers=False) mid_d = dropout(mid, keep_prob=keep_prob, is_train=is_train) pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train) mid_a = attention(mid_d, hidden, mask=mid_mask) pat_a = attention(pat_d, hidden, mask=pat_mask) mid_v = tf.reduce_sum(tf.expand_dims(mid_a, axis=2) * mid, axis=1) pat_v = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat, axis=1) pat_v_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1) sur_sim = cosine(mid_v, pat_v_d) pat_sim = cosine(pat_v, pat_v_d) return sur_sim, pat_sim
def build_GAT(self, scope='MOGANED_Trigger'): maxlen = self.maxlen num_class = len(constant.EVENT_TYPE_TO_ID) keepprob = constant.t_keepprob with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): with tf.variable_scope('Initialize'): posi_mat = tf.concat([ tf.zeros([1, constant.posi_embedding_dim], tf.float32), tf.get_variable( 'posi_emb', [2 * maxlen, constant.posi_embedding_dim], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) ], axis=0) word_mat = tf.concat([ tf.zeros((1, constant.embedding_dim), dtype=tf.float32), tf.get_variable( "unk_word_embedding", [1, constant.embedding_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()), tf.get_variable( "wordemb", initializer=self.wordemb, trainable=True) ], axis=0) pos_mat = tf.concat([ tf.zeros((1, constant.pos_dim), dtype=tf.float32), tf.get_variable( "pos_embedding", [len(constant.POS_TO_ID) - 1, constant.pos_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) ], axis=0) ner_mat = tf.concat([ tf.zeros((1, constant.ner_dim), dtype=tf.float32), tf.get_variable( "ner_embedding", [len(constant.NER_TO_ID) - 1, constant.ner_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) ], axis=0) with tf.variable_scope("Placeholder"): self.sents = sents = tf.placeholder(tf.int32, [None, maxlen], 'sents') self.posis = posis = tf.placeholder(tf.int32, [None, maxlen], 'posis') self.maskls = maskls = tf.placeholder(tf.float32, [None, maxlen], 'maskls') self.maskrs = maskrs = tf.placeholder(tf.float32, [None, maxlen], 'maskrs') self._labels = _labels = tf.placeholder( tf.int32, [None], 'labels') labels = tf.one_hot(_labels, num_class) self.is_train = is_train = tf.placeholder( tf.bool, [], 'is_train') self.lexical = lexical = tf.placeholder( tf.int32, [None, 3], 'lexicals') self.ner_idx = ner_idx = tf.placeholder( tf.int32, [None, maxlen], 'ner_tags') self.pos_idx = pos_idx = tf.placeholder( tf.int32, [None, maxlen], 'pos_tags') self.subg_a = tf.sparse_placeholder(tf.float32, [None, maxlen, maxlen], 'subg') self.subg_b = tf.sparse_transpose(self.subg_a, [0, 2, 1]) subg_a = tf.sparse_tensor_to_dense(self.subg_a, validate_indices=False) subg_b = tf.sparse_tensor_to_dense(self.subg_b, validate_indices=False) self.gather_idxs = tf.placeholder(tf.int32, [None, 2], 'gather_idxs') sents_len = tf.reduce_sum(tf.cast(tf.cast(sents, tf.bool), tf.int32), axis=1) sents_mask = tf.expand_dims(tf.sequence_mask( sents_len, maxlen, tf.float32), axis=2) eyes = tf.tile(tf.expand_dims(tf.eye(maxlen), 0), [tf.shape(pos_idx)[0], 1, 1]) with tf.variable_scope("Embedding"): sents_emb = tf.nn.embedding_lookup(word_mat, sents) posis_emb = tf.nn.embedding_lookup(posi_mat, posis) pos_emb = tf.nn.embedding_lookup(pos_mat, pos_idx) ner_emb = tf.nn.embedding_lookup(ner_mat, ner_idx) concat_emb = tf.concat( [sents_emb, posis_emb, pos_emb, ner_emb], axis=2) with tf.variable_scope("Lstm_layer"): rnn = Cudnn_RNN(num_layers=1, num_units=constant.hidden_dim, keep_prob=keepprob, is_train=self.is_train) ps, _ = rnn(concat_emb, seq_len=sents_len, concat_layers=False, keep_prob=keepprob, is_train=self.is_train) with tf.variable_scope("GAC"): hs = [] for layer in range(1, constant.K + 1): h_layer = GAC_func(ps, matmuls( subg_a, layer), maxlen, 'a', layer) + GAC_func( ps, matmuls(subg_b, layer), maxlen, 'b', layer) + GAC_func(ps, eyes, maxlen, 'c', layer) hs.append(h_layer) with tf.variable_scope("Aggregation"): s_ctxs = [] for layer in range(1, constant.K + 1): s_raw = tf.layers.dense(hs[layer - 1], constant.s_dim, name='Wawa') s_layer = tf.nn.tanh(s_raw) ctx_apply = tf.layers.dense(s_layer, 1, name='ctx', use_bias=False) s_ctxs.append(ctx_apply) vs = tf.nn.softmax(tf.concat(s_ctxs, axis=2), axis=2) #[None,maxlen,3] h_concats = tf.concat([ tf.expand_dims(hs[layer], 2) for layer in range(constant.K) ], axis=2) final_h = tf.reduce_sum(tf.multiply(tf.expand_dims(vs, 3), h_concats), axis=2) gather_final_h = tf.gather_nd(final_h, self.gather_idxs) with tf.variable_scope('classifier'): bias_weight = (constant.t_bias_lambda - 1) * ( 1 - tf.cast(tf.equal(_labels, 0), tf.float32)) + 1 self.logits = logits = tf.layers.dense( gather_final_h, num_class, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer(), name='Wo') self.pred = pred = tf.nn.softmax(logits, axis=1) self.pred_label = pred_label = tf.argmax(pred, axis=1) self.loss = loss = tf.reduce_sum( bias_weight * tf.nn.softmax_cross_entropy_with_logits_v2( labels=labels, logits=logits), axis=0) / tf.reduce_sum(bias_weight, axis=0) self.train_op = train_op = tf.train.AdamOptimizer( constant.t_lr).minimize(loss)
def ready(self): config = self.config d = config.hidden batch_size = tf.shape(self.sent)[0] sent_mask = tf.cast(self.sent, tf.bool) sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1) sent_maxlen = tf.reduce_max(sent_len) sent_mask = tf.slice(sent_mask, [0, 0], [batch_size, sent_maxlen]) sent = tf.slice(self.sent, [0, 0], [batch_size, sent_maxlen]) mid_mask = tf.cast(self.mid, tf.bool) mid_len = tf.reduce_sum(tf.cast(mid_mask, tf.int32), axis=1) mid_maxlen = tf.reduce_max(mid_len) mid_mask = tf.slice(mid_mask, [0, 0], [batch_size, mid_maxlen]) mid = tf.slice(self.mid, [0, 0], [batch_size, mid_maxlen]) pat_mask = tf.cast(self.pats, tf.bool) pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1) with tf.variable_scope("embedding"): sent_emb = tf.nn.embedding_lookup(self.word_mat, sent) mid_emb = tf.nn.embedding_lookup(self.word_mat, mid) sent_emb = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=self.is_train, mode="embedding") pat_emb = tf.nn.embedding_lookup(self.word_mat, self.pats) with tf.variable_scope("encoder"): rnn = Cudnn_RNN(num_layers=2, num_units=d // 2) cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False) pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False) cont_d = dropout(cont, keep_prob=config.keep_prob, is_train=self.is_train) pat_d = dropout(pat, keep_prob=config.keep_prob, is_train=self.is_train) with tf.variable_scope("attention"): att_a = attention(cont_d, config.att_hidden, mask=sent_mask) pat_a = self.pat_a = attention(pat_d, config.att_hidden, mask=pat_mask) with tf.variable_scope("sim"): sim, pat_sim = att_match(mid_emb, pat_emb, mid_mask, pat_mask, d, keep_prob=config.keep_prob, is_train=self.is_train) neg_idxs = tf.matmul(self.rels, tf.transpose(self.rels, [1, 0])) pat_pos = tf.square(tf.maximum(config.tau - pat_sim, 0.)) pat_pos = tf.reduce_max(pat_pos - (1 - neg_idxs) * 1e30, axis=1) pat_neg = tf.square(tf.maximum(pat_sim, 0.)) pat_neg = tf.reduce_max(pat_neg - 1e30 * neg_idxs, axis=1) l_sim = tf.reduce_sum(self.weight * (pat_pos + pat_neg), axis=0) with tf.variable_scope("pred"): att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d, axis=1) pat2_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1) logit = self.logit = dense(att2_d, config.num_class, use_bias=False) pred = tf.nn.softmax(logit) l_a = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[:config.batch_size], labels=self.rel[:config.batch_size]), axis=0) xsim = tf.stop_gradient(sim[config.batch_size:]) pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1)) bound = tf.reduce_max(xsim, axis=1) weight = tf.nn.softmax(10 * bound) l_u = tf.reduce_sum( weight * tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[config.batch_size:], labels=pseudo_rel), axis=0) logit = dense(pat2_d, config.num_class, use_bias=False) l_pat = self.pat_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit, labels=self.rels), axis=0) self.max_val = tf.reduce_sum(pred * -log(pred), axis=1) self.pred = tf.argmax(pred, axis=1) self.loss = l_a + config.alpha * l_pat + config.beta * l_sim + config.gamma * l_u self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(self.sim, axis=1)), axis=1) self.sim_max_val = tf.reduce_max(self.sim, axis=1) self.gold = tf.argmax(self.rel, axis=1) self.max_logit = tf.reduce_max(self.logit, axis=1)
def build_HMEAE(self, scope="HMEAE"): maxlen = self.maxlen num_class = len(constant.ROLE_TO_ID) #新增的变量 keepprob = constant.t_keepprob with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): with tf.variable_scope('Initialize'): posi_mat = tf.concat([ tf.zeros([1, constant.posi_embedding_dim], tf.float32), tf.get_variable( 'posi_emb', [2 * maxlen, constant.posi_embedding_dim], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) ], axis=0) word_mat = tf.concat([ tf.zeros((1, constant.embedding_dim), dtype=tf.float32), tf.get_variable( "unk_word_embedding", [1, constant.embedding_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()), tf.get_variable( "word_emb", initializer=self.wordemb, trainable=True) ], axis=0) event_mat = tf.concat([ tf.zeros((1, constant.event_type_embedding_dim), dtype=tf.float32), tf.get_variable( "event_emb", [ len(constant.EVENT_TYPE_TO_ID) - 1, constant.event_type_embedding_dim ], initializer=tf.contrib.layers.xavier_initializer(), trainable=True) ], axis=0) u_c = tf.get_variable( 'feat_vatiable', [constant.module_num, 1, constant.a_u_c_dim], initializer=tf.contrib.layers.xavier_initializer()) module_design = tf.constant(constant.module_design, tf.float32) with tf.variable_scope('placeholder'): self.sents = sents = tf.placeholder(tf.int32, [None, maxlen], 'sents') self.trigger_posis = trigger_posis = tf.placeholder( tf.int32, [None, maxlen], 'trigger_posis') self.argument_posis = argument_posis = tf.placeholder( tf.int32, [None, maxlen], 'argument_posis') self.maskls = maskls = tf.placeholder(tf.float32, [None, maxlen], 'maskls') self.maskms = maskms = tf.placeholder(tf.float32, [None, maxlen], 'maskms') self.maskrs = maskrs = tf.placeholder(tf.float32, [None, maxlen], 'maskrs') self.event_types = event_types = tf.placeholder( tf.int32, [None], 'event_types') self.trigger_lexical = trigger_lexical = tf.placeholder( tf.int32, [None, 3], 'trigger_lexicals') self.argument_lexical = argument_lexical = tf.placeholder( tf.int32, [None, 2 + self.max_argument_len], 'argument_lexicals') self._labels = _labels = tf.placeholder( tf.int32, [None], 'labels') labels = tf.one_hot(_labels, num_class) self.is_train = is_train = tf.placeholder( tf.bool, [], 'is_train') #sents,event_types,roles,maskl,maskm,maskr,\ #trigger_lexical,argument_lexical,trigger_maskl,trigger_maskr,trigger_posis,argument_posis #新增的变量 self.is_negative = tf.placeholder(tf.float32, [None]) # self.pos_idx = pos_idx = tf.placeholder(tf.int32,[None,maxlen],'pos_tags') # self.subg_a = tf.sparse_placeholder(tf.float32,[None,maxlen,maxlen],'subg') # self.subg_b = tf.sparse_transpose(self.subg_a,[0,2,1]) # self.gather_idxs = tf.placeholder(tf.int32,[None,2],'gather_idxs') # subg_a = tf.sparse_tensor_to_dense(self.subg_a,validate_indices=False) # subg_b = tf.sparse_tensor_to_dense(self.subg_b,validate_indices=False) # eyes = tf.tile(tf.expand_dims(tf.eye(maxlen),0),[tf.shape(pos_idx)[0],1,1]) sents_len = tf.reduce_sum(tf.cast(tf.cast(sents, tf.bool), tf.int32), axis=1) sents_mask = tf.sequence_mask(sents_len, maxlen, tf.float32) event_types = tf.tile(tf.expand_dims(event_types, axis=1), [1, maxlen]) * tf.cast( sents_mask, tf.int32) batch_size = tf.shape(sents)[0] with tf.variable_scope('embedding'): sents_emb = tf.nn.embedding_lookup(word_mat, sents) trigger_posis_emb = tf.nn.embedding_lookup( posi_mat, trigger_posis) trigger_lexical_emb = tf.nn.embedding_lookup( word_mat, trigger_lexical) argument_posis_emb = tf.nn.embedding_lookup( posi_mat, argument_posis) argument_lexical_emb = tf.nn.embedding_lookup( word_mat, argument_lexical) event_type_emb = tf.nn.embedding_lookup(event_mat, event_types) with tf.variable_scope('lexical_feature'): trigger_lexical_feature = tf.reshape( trigger_lexical_emb, [-1, 3 * constant.embedding_dim]) argument_len = tf.reduce_sum(tf.cast( tf.cast(argument_lexical[:, 1:-1], tf.bool), tf.float32), axis=1, keepdims=True) argument_lexical_mid = tf.reduce_sum( argument_lexical_emb[:, 1:-1, :], axis=1) / argument_len argument_lexical_feature = tf.concat([ argument_lexical_emb[:, 0, :], argument_lexical_mid, argument_lexical_emb[:, -1, :] ], axis=1) lexical_feature = tf.concat( [trigger_lexical_feature, argument_lexical_feature], axis=1) with tf.variable_scope('encoder'): emb = tf.concat([ sents_emb, trigger_posis_emb, argument_posis_emb, event_type_emb ], axis=2) # emb_shape = tf.shape(emb) # pad = tf.zeros([emb_shape[0],1,emb_shape[2]],tf.float32) # conv_input = tf.concat([pad,emb,pad],axis=1) # conv_res = tf.layers.conv1d( # inputs=conv_input, # filters=constant.a_filters, kernel_size=3, # strides=1, # padding='valid', # activation=tf.nn.relu, # kernel_initializer=tf.contrib.layers.xavier_initializer(), # name='convlution_layer') # conv_res = tf.reshape(conv_res,[-1,maxlen,constant.a_filters]) rnn = Cudnn_RNN(num_layers=1, num_units=constant.hidden_dim, keep_prob=keepprob, is_train=self.is_train) conv_res, _ = rnn(emb, seq_len=sents_len, concat_layers=False, keep_prob=keepprob, is_train=self.is_train) #GAT # hs = [] # for layer in range(1,constant.K+1): # # h_layer= GAC_func(conv_res,matmuls(subg_a,layer),maxlen,'a',layer)+GAC_func(conv_res,matmuls(subg_b,layer),maxlen,'b',layer)+GAC_func(conv_res,eyes,maxlen,'c',layer) # h_layer= GAC_func(conv_res,matmuls(subg_a,layer),maxlen,'a',layer)+GAC_func(conv_res,matmuls(subg_b,layer),maxlen,'b',layer) # hs.append(h_layer) # s_ctxs = [] # for layer in range(1,constant.K+1): # s_raw = tf.layers.dense(hs[layer-1],constant.s_dim,name='Wawa') # s_layer = tf.nn.tanh(s_raw) # ctx_apply = tf.layers.dense(s_layer,1,name='ctx',use_bias=False) # s_ctxs.append(ctx_apply) # vs = tf.nn.softmax(tf.concat(s_ctxs,axis=2),axis=2) #[None,maxlen,3] # h_concats = tf.concat([tf.expand_dims(hs[layer],2) for layer in range(constant.K)],axis=2) # final_h = tf.reduce_sum(tf.multiply(tf.expand_dims(vs,3),h_concats),axis=2) # gather_final_h = tf.gather_nd(final_h,self.gather_idxs) # #新增的维度变换 # conv_res = tf.reshape(gather_final_h,[-1,maxlen,constant.a_filters]) with tf.variable_scope("attention"): conv_res_extend = tf.tile(tf.expand_dims(conv_res, axis=1), [1, constant.module_num, 1, 1]) u_c_feat = tf.tile(tf.expand_dims( u_c, axis=0), [batch_size, 1, maxlen, 1]) * tf.tile( tf.expand_dims(tf.expand_dims(sents_mask, axis=2), axis=1), [1, constant.module_num, 1, 1]) hidden_state = tf.layers.dense( tf.concat([conv_res_extend, u_c_feat], axis=3), constant.a_W_a_dim, use_bias=False, kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.tanh) score_logit = tf.reshape( tf.layers.dense(hidden_state, 1, use_bias=False, kernel_initializer=tf.contrib.layers. xavier_initializer()), [batch_size, constant.module_num, maxlen]) score_mask = tf.tile(tf.expand_dims(sents_mask, axis=1), [1, constant.module_num, 1]) score_logit = score_logit * score_mask - ( 1 - score_mask) * constant.INF module_score = tf.nn.softmax(score_logit, axis=2) module_mask = tf.tile( tf.expand_dims(tf.expand_dims(module_design, axis=0), axis=3), [batch_size, 1, 1, maxlen]) score_mask = tf.tile( tf.expand_dims(module_score, axis=1), [1, len(constant.ROLE_TO_ID), 1, 1]) * module_mask module_of_role = tf.expand_dims(tf.expand_dims(tf.reduce_sum( module_design, axis=1), axis=0), axis=2) role_score = tf.reduce_sum(score_mask, axis=2) / module_of_role role_oriented_emb = tf.reduce_sum( tf.expand_dims(role_score, axis=3) * tf.tile(tf.expand_dims(conv_res, axis=1), [1, len(constant.ROLE_TO_ID), 1, 1]), axis=2) with tf.variable_scope('maxpooling'): maskl = tf.tile(tf.expand_dims(maskls, axis=2), [1, 1, constant.a_filters]) left = maskl * conv_res maskm = tf.tile(tf.expand_dims(maskms, axis=2), [1, 1, constant.a_filters]) mid = maskm * conv_res maskr = tf.tile(tf.expand_dims(maskrs, axis=2), [1, 1, constant.a_filters]) right = maskr * conv_res sentence_feature = tf.concat([ tf.reduce_max(left, axis=1), tf.reduce_max(mid, axis=1), tf.reduce_max(right, axis=1) ], axis=1) with tf.variable_scope('classifier'): dmcnn_feature = tf.concat([sentence_feature, lexical_feature], axis=1) hmeae_feature = tf.concat([ tf.tile(tf.expand_dims(dmcnn_feature, axis=1), [1, len(constant.ROLE_TO_ID), 1]), role_oriented_emb ], axis=2) feature = tf.layers.dropout(hmeae_feature, 1 - constant.a_keepprob, training=is_train) eye_mask = tf.tile(tf.expand_dims(tf.eye(num_class), axis=0), [batch_size, 1, 1]) dense_res = tf.layers.dense( feature, num_class, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer()) self.logits = logits = tf.reduce_max( eye_mask * dense_res - (1 - eye_mask) * constant.INF, axis=2) self.pred = pred = tf.nn.softmax(logits, axis=1) self.pred_label = pred_label = tf.argmax(pred, axis=1) #新增的变量 wrong_confusion_matrix = [[0.0] * num_class] * num_class correct_class_weight = [1.0] * num_class positive_idx = 15.0 negative_idx = 1 - positive_idx self.loss = loss = tf.reduce_mean( f1_confusion_loss(_labels, logits, positive_idx, negative_idx, correct_class_weight, wrong_confusion_matrix, num_class)) # self.loss = loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels,logits=logits),axis=0) self.train_op = train_op = tf.train.AdamOptimizer( constant.a_lr).minimize(loss)
def ready(self): config = self.config d = config.hidden batch_size = tf.shape(self.sent_word)[0] sent_mask = tf.cast(self.sent_word, tf.bool) sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1) sent_maxlen = config.length sent = self.sent_word pretrain_sent_mask = tf.cast(self.pretrain_sents,tf.bool) rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train) label_mat,_= FIND_module(sent,self.raw_pats,self.word_mat,config,tf.constant(False,tf.bool),rnn) label_mat = tf.sigmoid(label_mat)*tf.tile(tf.reshape(tf.cast(sent_mask,tf.float32),[batch_size,sent_maxlen,1]),[1,1,self.raw_pats.get_shape()[0]]) # label_mat = tf.cast(tf.greater(label_mat,0.7),tf.float32) _,keywords_sim= FIND_module(sent,self.pats,self.word_mat,config,self.is_train,rnn) # keywords_sim = tf.sigmoid(keywords_sim) pretrain_pred_labels,_ = FIND_module(self.pretrain_sents,self.pretrain_pats,self.word_mat,config,self.is_train,rnn) pretrain_pred_labels = tf.transpose(pretrain_pred_labels,[0,2,1]) gather_order = tf.tile(tf.reshape(tf.range(max(config.pretrain_size,config.pretrain_size_together)), [-1, 1]),[1,2]) pretrain_pred_labels = tf.gather_nd(pretrain_pred_labels,gather_order) self.pretrain_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32),axis=1)/tf.reduce_sum(tf.cast(pretrain_sent_mask,tf.float32),axis=1))#tf.losses.mean_squared_error(labels=self.pretrain_labels,predictions=pretrain_pred_labels) self.prt_loss = tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32) self.prt_pred = tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32) self.pretrain_pred_labels = tf.reshape(tf.cast(tf.greater(tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32),config.pretrain_threshold),tf.int32),[-1]) neg_idxs = tf.matmul(self.keywords_rels, tf.transpose(self.keywords_rels, [1, 0])) pat_pos = tf.square(tf.maximum(0.9 - keywords_sim, 0.)) pat_pos = tf.reduce_max(pat_pos - tf.cast(1 - neg_idxs,tf.float32)*tf.constant(1e30,tf.float32), axis=1) pat_neg = tf.square(tf.maximum(keywords_sim, 0.)) pat_neg = tf.reduce_max(pat_neg - tf.constant(1e30,tf.float32) * tf.cast(neg_idxs,tf.float32), axis=1) pat_simloss = tf.reduce_mean(pat_pos + pat_neg,axis=0) # clustering的loss self.sim_loss = sim_loss = pat_simloss self.pretrain_loss_v2 = self.pretrain_loss+self.pretrain_alpha*self.sim_loss sim_raw = [] for i, soft_labeling_function in enumerate(self.labeling_functions_soft): try: sim_raw.append(soft_labeling_function(label_mat, self.raw_keyword_dict, self.mask_mat)( self.phrases_input) * self.type_restrict(i)) except: print(i) sim_raw.append(tf.cast(tf.reshape(0*self.phrases_input[:,0],[1,-1]),tf.float32)) self.sim =sim= tf.transpose(tf.concat(sim_raw,axis=0),[1,0]) #[tf.shape==(batch_size,1)]*num_functions->[batch_size,] with tf.variable_scope("classifier"): sent_emb = tf.nn.embedding_lookup(self.word_mat, sent) sent_emb = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=self.is_train, mode="embedding") rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train) cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False) cont_d = dropout(cont, keep_prob=config.keep_prob, is_train=self.is_train) att_a = attention(cont_d, config.att_hidden, mask=sent_mask) att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d, axis=1) logit = dense(att2_d, config.num_class, use_bias=False) pred = tf.nn.softmax(logit) with tf.variable_scope("pred"): if not self.pseudo: sent_loss = self.sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=self.rel), axis=0) else: self.hard_train_loss = sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[:config.batch_size], labels=self.rel[:config.batch_size]), axis=0) lsim = sim[:config.batch_size] index_tensor = tf.reshape(tf.constant(np.arange(config.batch_size),tf.int32),[config.batch_size,1]) select_tensor = tf.reshape(self.hard_match_func_idx,[config.batch_size,1]) probs = tf.reshape(tf.gather_nd(lsim,tf.concat([index_tensor,select_tensor],axis=1)),[config.batch_size,1]) self.labeled_loss = labeled_loss = tf.reduce_mean(tf.square((1-probs))) xsim = tf.stop_gradient(sim[config.batch_size:]) pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1)) bound = tf.reduce_max(xsim, axis=1) weight = tf.nn.softmax(10.0 * bound) self.unlabeled_loss = unlabeled_loss = tf.reduce_sum(weight * tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[config.batch_size:], labels=pseudo_rel), axis=0) sent_loss = self.sent_loss = sent_loss + self.gamma * unlabeled_loss+self.alpha*self.pretrain_loss#+self.alpha*labeled_loss #算entropy来对no_relation推断 self.max_val = entropy = tf.reduce_sum(pred * -log(pred), axis=1) #pred是test时候用到的 self.pred = tf.argmax(pred, axis=1) self.loss = sent_loss + self.beta * sim_loss #similarity model预测出来的结果 self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(self.sim, axis=1)), axis=1) self.sim_max_val = tf.reduce_max(self.sim, axis=1) #true label self.gold = tf.argmax(self.rel, axis=1) self.entropy = tf.reduce_mean(entropy, axis=0)