def SegPredic_api(inputtext): material = inputtext #material = 'data/24s/*' #material = "data/sjw/A05*" filename = 'model' charstop = True # True means label attributes to previous char crfmethod = "lbfgs" # {‘lbfgs’, ‘l2sgd’, ‘ap’, ‘pa’, ‘arow’} #將文本從JSON轉換 rawalldata = json.loads(material) testdata = testdataconvert(rawalldata['testdata']) trainidx = [] testidx = [] text_score = [] #紀錄每個區塊的不確定 print(datetime.datetime.now()) modelname = filename.replace('/', '').replace('*', '') + str(charstop) + ".m" tagger = pycrfsuite.Tagger() #modelname = 'modelTrue1.m' print(modelname) tagger.open(modelname) print(datetime.datetime.now()) print("Start testing...") results = [] lines = [] Spp = [] Npp = [] all_len = 0 #while testdata: x, yref = testdata.pop() yout = tagger.tag(x) #print(yout) #pr = tagger.probability(yref) results.append(util.eval(yref, yout, "S")) tp, fp, fn, tn = zip(*results) tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn) #print(tp, fp, fn, tn) if tp <= 0 or fp <= 0: p = 0 r = 0 f_score = 0 else: p, r = tp / (tp + fp), tp / (tp + fn) f_score = 2 * p * r / (p + r) print("Total tokens in Test Set:", tp + fp + fn + tn) print("Total S in REF:", tp + fn) print("Total S in OUT:", tp + fp) print("Presicion:", p) print("Recall:", r) print("F1-score:", f_score) return yout
def solve(input_path): result = 0 f = open(input_path) for l in f: e = util.eval(l) print('Line result', e, l) result += e return result
def advance_batch(): print("Step for model(%s) is %s"%(model.name, u.eval(model.step))) sess = u.get_default_session() # TODO: get rid of _sampled_labels sessrun([sampled_labels.initializer, _sampled_labels.initializer]) if args.advance_batch: with u.timeit("advance_batch"): sessrun(advance_batch_op) sessrun(advance_step_op)
def evaluate(self, ann_file, res_file, style_name, return_img_scores=False): metrics, img_scores = util.eval(ann_file, res_file, True) with open(res_file, 'r') as f: d = json.load(f) sent_list = [i['caption'] for i in d] if self.use_lm: lm_score, _ = lm_kenlm.eval_sentences(self.lm[style_name], sent_list) for i, score in enumerate(img_scores): score['lm'] = _[i] metrics['lm'] = lm_score if self.use_clf: clf_score, _ = clf_path.eval_sentences(self.clf[style_name], sent_list) for i, score in enumerate(img_scores): score['clf'] = int(_[i]) metrics['clf'] = clf_score if self.use_srilm: lm_score, _ = lm_srilm.eval_sentences(self.srilm[style_name], sent_list) for i, score in enumerate(img_scores): score['srilm'] = _[i] metrics['srilm'] = lm_score if self.use_nnclf: nnclf_score, _ = self.nn_clf[style_name].evaluate( ann_file, res_file) for i, score in enumerate(img_scores): score['nnclf'] = _[i] metrics['nnclf'] = nnclf_score if return_img_scores: return metrics, img_scores else: return metrics
yref.append(a) #LSTM需要轉換 x_test_seq = token.texts_to_sequences(x) x_test = sequence.pad_sequences(x_test_seq, maxlen=MAX_LEN_OF_TOKEN) yout = model.predict_classes(x_test) #pr = tagger.probability(yref) p_1 = 0 p_2 = 0 prob = model.predict_proba(x_test) for i in range(len(yout)): p_1 = prob[i, 0] Spp.append(p_1) #標記的機率 np_2p = prob[i, 1] Npp.append(p_2) #標記的機率 results.append(util.eval(yref, yout, "S")) score_array = [] All_u_score = 0 p_Scount = 0 p_Ncount = 0 for i in range(len(Spp)): _s = 0 if Spp[i] > Npp[i]: _s = Spp[i] else: _s = Npp[i] #_s = (_s - 0.5) * 10 _s = (1 - _s) #U_score = U_score + _s
def test_eval(self): self.assertEqual(51, util.eval('1 + (2 * 3) + (4 * (5 + 6))')) self.assertEqual(26, util.eval('2 * 3 + (4 * 5)')) self.assertEqual(437, util.eval('5 + (8 * 3 + 9 + 3 * 4 * 3)')) self.assertEqual(12240, util.eval('5 * 9 * (7 * 3 * 3 + 9 * 3 + (8 + 6 * 4))')) self.assertEqual(13632, util.eval('((2 + 4 * 9) * (6 + 9 * 8 + 6) + 6) + 2 + 4 * 2'))
def predic_unscore_api(inputtext): charstop = True # True means label attributes to previous char features = 3 # 1=discrete; 2=vectors; 3=both dictfile = 'vector/24scbow50.txt' modelname = 'datalunyu5001.m' vdict = util.readvec(dictfile) inputtext = inputtext #li = [line for line in util.text_to_lines(inputtext)] li = util.text_to_lines(inputtext) print(li) data = [] for line in li: x, y = util.line_toseq(line, charstop) print(x) if features == 1: d = crf.x_seq_to_features_discrete(x, charstop), y elif features == 2: d = crf.x_seq_to_features_vector(x, vdict, charstop), y elif features == 3: d = crf.x_seq_to_features_both(x, vdict, charstop), y data.append(d) tagger = pycrfsuite.Tagger() tagger.open(modelname) print("Start testing...") results = [] lines = [] Spp = [] Npp = [] out = [] #while data: for index in range(len(data)): print(len(data)) xseq, yref = data.pop(0) yout = tagger.tag(xseq) sp = 0 np = 0 for i in range(len(yout)): sp = tagger.marginal('S', i) Spp.append(sp) #S標記的機率 print(sp) np = tagger.marginal('N', i) Npp.append(np) #Nㄅ標記的機率 print(np) results.append(util.eval(yref, yout, "S")) lines.append( util.seq_to_line([x['gs0'] for x in xseq], yout, charstop, Spp, Npp)) #print(util.seq_to_line([x['gs0'] for x in xseq], (str(sp) +'/'+ str(np)),charstop)) out.append(yout) tp, fp, fn, tn = zip(*results) tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn) p, r = tp / (tp + fp), tp / (tp + fn) score = '' score = score + '<br>' + "Total tokens in Test Set:" + repr(tp + fp + fn + tn) score = score + '<br>' + "Total S in REF:" + repr(tp + fn) score = score + '<br>' + "Total S in OUT:" + repr(tp + fp) score = score + '<br>' + "Presicion:" + repr(p) score = score + '<br>' + "Recall:" + repr(r) score = score + '<br>' + "*******************F1-score:" + repr(2 * p * r / (p + r)) output = '' print(lines) for line in lines: #line = unquote(line) print("output:") print(line.encode('utf8')) #output = output + '<br>' + line output += line print(line) output = score + '<br>' + output #output = jsonify({'str': output}) return (out)
def predic(): charstop = True # True means label attributes to previous char features = 3 # 1=discrete; 2=vectors; 3=both dictfile = 'vector/24scbow50.txt' modelname = 'datalunyu5001.m' vdict = util.readvec(dictfile) inputtext = request.form.get('input_text', '') #li = [line for line in util.text_to_lines(inputtext)] li = util.text_to_lines(inputtext) print(li) data = [] for line in li: x, y = util.line_toseq(line, charstop) print(x) if features == 1: d = crf.x_seq_to_features_discrete(x, charstop), y elif features == 2: d = crf.x_seq_to_features_vector(x, vdict, charstop), y elif features == 3: d = crf.x_seq_to_features_both(x, vdict, charstop), y data.append(d) tagger = pycrfsuite.Tagger() tagger.open(modelname) print("Start testing...") results = [] lines = [] Spp = [] Npp = [] #while data: for index in range(len(data)): print(len(data)) xseq, yref = data.pop(0) yout = tagger.tag(xseq) sp = 0 np = 0 for i in range(len(yout)): sp = tagger.marginal('S', i) Spp.append(sp) #S標記的機率 print(sp) np = tagger.marginal('N', i) Npp.append(np) #Nㄅ標記的機率 print(np) results.append(util.eval(yref, yout, "S")) lines.append( util.seq_to_line([x['gs0'] for x in xseq], yout, charstop, Spp, Npp)) #print(util.seq_to_line([x['gs0'] for x in xseq], (str(sp) +'/'+ str(np)),charstop)) U_score = 0 p_Scount = 0 p_Ncount = 0 for i in range(len(Spp)): _s = 0 if Spp[i] > Npp[i]: _s = Spp[i] else: _s = Npp[i] _s = (_s - 0.5) * 10 U_score = U_score + _s p_Scount = p_Scount + Spp[i] p_Ncount = p_Ncount + Npp[i] tp, fp, fn, tn = zip(*results) tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn) p, r = tp / (tp + fp), tp / (tp + fn) score = '' score = score + '<br>' + "Total tokens in Test Set:" + repr(tp + fp + fn + tn) score = score + '<br>' + "Total S in REF:" + repr(tp + fn) score = score + '<br>' + "Total S in OUT:" + repr(tp + fp) score = score + '<br>' + "Presicion:" + repr(p) score = score + '<br>' + "Recall:" + repr(r) score = score + '<br>' + "*******************F1-score:" + repr(2 * p * r / (p + r)) score = score + '<br>' + "=======================" score = score + '<br>' + "character count:" + str(len(Spp)) score = score + '<br>' + "block uncertain rate:" + str( (U_score / len(Spp))) output = '' key = 0 for line in lines: #print (line.encode('utf8')) output = output + '<br>' + line #print (line) key = key + 1 #for index_m in ypp: # output = output + '<br>' + line output = score + '<br>' + output return (output)
def buildCrf(inputtext): material = inputtext #material = 'data/24s/*' #material = "data/sjw/A05*" filename = 'model' size = 80 trainportion = 0.9 dictfile = 'data/vector/24scbow300.txt' crfmethod = "l2sgd" # {‘lbfgs’, ‘l2sgd’, ‘ap’, ‘pa’, ‘arow’} charstop = True # True means label attributes to previous char features = 1 # 1=discrete; 2=vectors; 3=both random.seed(101) #宣告指令式 "python runcrf.py 'data/sjw/*' 80 data/vector/vectors300.txt 1 1" args = sys.argv ''' if len(args)>1: material = args[1] size = int(args[2]) dictfile = args[3] features = int(args[4]) charstop = int(args[5]) ''' cut = int(size * trainportion) #訓練模型名稱 modelname = filename.replace('/', '').replace( '*', '') + str(size) + str(charstop) + ".m" print(modelname) print("Material:", material) print("Size:", size, "entries,", trainportion, "as training") print(datetime.datetime.now()) # Prepare li: list of random lines if features > 1: vdict = util.readvec(dictfile) #先處理文本 print("Dict:", dictfile) li = [line for line in util.file_to_lines(glob.glob(material))] #已經切成陣列 random.shuffle(li) #做亂數取樣 li = li[:size] # Prepare data: list of x(char), y(label) sequences data = [] for line in li: x, y = util.line_toseq(line, charstop) #print(x) #print(y[:5]) #這邊在做文本做gram if features == 1: d = crf.x_seq_to_features_discrete(x, charstop), y elif features == 2: d = crf.x_seq_to_features_vector(x, vdict, charstop), y elif features == 3: d = crf.x_seq_to_features_both(x, vdict, charstop), y data.append(d) traindata = data[:cut] testdata = data[cut:] #print(traindata) trainer = pycrfsuite.Trainer() #print trainer.params() #print(traindata[0]) for t in traindata: x, y = t trainer.append(x, y) trainer.select(crfmethod) #做訓練 trainer.set('max_iterations', 10) #測試迴圈 #trainer.set('delta',0) #print ("!!!!before train", datetime.datetime.now()) trainer.train(modelname) #print ("!!!!after train", datetime.datetime.now()) tagger = pycrfsuite.Tagger() #建立訓練模型檔案 tagger.open(modelname) tagger.dump(modelname + ".txt") print(datetime.datetime.now()) print("Start closed testing...") results = [] print(traindata) while traindata: x, yref = traindata.pop() yout = tagger.tag(x) pr = tagger.marginal('S', 0) pp = tagger.probability(yout) results.append(util.eval(yref, yout, "S")) tp, fp, fn, tn = zip(*results) tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn) p, r = tp / (tp + fp), tp / (tp + fn) print("Total tokens in Train Set:", tp + fp + fn + tn) print("Total S in REF:", tp + fn) print("Total S in OUT:", tp + fp) print("Presicion:", p) print("Recall:", r) print("*******************F1-score:", 2 * p * r / (p + r)) print("*******************:", pr) print("*******************:", pp) print("*******************:", yout) print(datetime.datetime.now()) return (modelname)
def trainAndpredic_api(inputtext): material = inputtext #material = 'data/24s/*' #material = "data/sjw/A05*" filename = 'model' charstop = True # True means label attributes to previous char crfmethod = "lbfgs" # {‘lbfgs’, ‘l2sgd’, ‘ap’, ‘pa’, ‘arow’} #將文本從JSON轉換 rawalldata = json.loads(material) print(rawalldata) traindata = traindataconvert(rawalldata['traindata']) testdata = testdataconvert(rawalldata['testdata']) trainidx = [] testidx = [] text_obj = {} text_score = [] #紀錄每個區塊的不確定 f = open('UserRES.txt', 'w') #組織全部文本資訊 for i in rawalldata['testdata']: testidx.append(i) text_obj[i] = ([len(rawalldata['testdata'][i]['text']), 0]) for i in rawalldata['traindata']: trainidx.append(i) print('info:', text_obj) print(datetime.datetime.now()) modelname = filename.replace('/', '').replace('*', '') + str(charstop) + ".m" print(modelname) trainer = pycrfsuite.Trainer() #trainer.clear() #print trainer.params() #print(traindata[0]) #for t in traindata: # x, y = t # trainer.append(x, y) trainer.append(traindata[0], traindata[1]) trainer.select(crfmethod) #做訓練 trainer.set('max_iterations', 30) #測試迴圈 trainer.train(modelname) tagger = pycrfsuite.Tagger() #modelname = 'modelTrue1.m' #建立訓練模型檔案 tagger.open(modelname) #tagger.dump(modelname+".txt") print(datetime.datetime.now()) print("Start testing...") results = [] results = [] lines = [] Spp = [] Npp = [] all_len = 0 ftt = open('reslog.txt', 'w') while testdata: x, yref = testdata.pop(0) ftt.write(str(x)) ftt.write(str(yref)) yout = tagger.tag(x) ftt.write(str(yout)) #print(yout) #pr = tagger.probability(yref) sp = 0 np = 0 for i in range(len(yout)): sp = tagger.marginal('S', i) Spp.append(sp) #S標記的機率 #print(sp) np = tagger.marginal('N', i) Npp.append(np) #N標記的機率 #print(np) results.append(util.eval(yref, yout, "S")) score_array = [] All_u_score = 0 p_Scount = 0 p_Ncount = 0 for i in range(len(Spp)): _s = 0 if Spp[i] > Npp[i]: _s = Spp[i] else: _s = Npp[i] #_s = (_s - 0.5) * 10 _s = (1 - _s) #U_score = U_score + _s p_Scount = p_Scount + Spp[i] p_Ncount = p_Ncount + Npp[i] score_array.append(_s) for i in range(len(testidx)): U_score = 0 #文本區塊的不確定值 text_count = 0 #字數 end = 0 if i == 0: start = 0 else: start = end end = text_obj[testidx[i]][0] #print(text_obj[testidx[i]]) #print(len(score_array),end) for a in range(start, end): text_count = text_obj[testidx[i]][0] U_score += score_array[a] print('text_count:', text_count) print('U_score:', U_score) U_score = U_score / text_count text_obj[testidx[i]][1] = U_score All_u_score += U_score text_score.append([str(testidx[i]), U_score]) tp, fp, fn, tn = zip(*results) tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn) #print(tp, fp, fn, tn) if tp <= 0 or fp <= 0: p = 0 r = 0 f_score = 0 else: p, r = tp / (tp + fp), tp / (tp + fn) f_score = 2 * p * r / (p + r) print("Total tokens in Test Set:", tp + fp + fn + tn) print("Total S in REF:", tp + fn) print("Total S in OUT:", tp + fp) print("Presicion:", p) print("Recall:", r) print("F1-score:", f_score) print(text_score) log_text = '' log_text += "----Doc Result-----" + "\n" log_text += "Total tokens in Test Set:" + str(tp + fp + fn + tn) + '\n' log_text += "Total S in REF:" + str(tp + fn) + '\n' log_text += "Total S in OUT:" + str(tp + fp) + '\n' log_text += "Presicion:" + str(p) + '\n' log_text += "Recall:" + str(r) + '\n' log_text += "F1-Score:" + str(f_score) + '\n' log_text += '\n' + "=============" + '\n' log_text += 'End Time:' + str(datetime.datetime.now()) + '\n' log_text += '\n' f.write(str(log_text)) f.close() ftt.close() return text_score
def fit(self, X_train, y_train, len_train,pos_train,length_train,position_train, X_validation, y_validation, len_validation, pos_validation,length_validation,position_validation, name, print_log=True): # ---------------------------------------forward computation--------------------------------------------# y_train_pw = y_train[0] y_train_pph = y_train[1] #y_train_iph = y_train[2] y_validation_pw = y_validation[0] y_validation_pph = y_validation[1] #y_validation_iph = y_validation[2] # ---------------------------------------define graph---------------------------------------------# with self.graph.as_default(): # data place holder self.X_p = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="input_placeholder" ) # pos info placeholder self.pos_p = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="pos_placeholder" ) # length info placeholder self.length_p = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="length_placeholder" ) # position info placeholder self.position_p = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="length_placeholder" ) self.y_p_pw = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_pw" ) self.y_p_pph = tf.placeholder( dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_pph" ) #self.y_p_iph = tf.placeholder( # dtype=tf.int32, # shape=(None, self.max_sentence_size), # name="label_placeholder_iph" #) # dropout 占位 self.keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="keep_prob_p") self.input_keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="input_keep_prob_p") self.output_keep_prob_p=tf.placeholder(dtype=tf.float32, shape=[], name="output_keep_prob_p") # 相应序列的长度占位 self.seq_len_p = tf.placeholder( dtype=tf.int32, shape=(None,), name="seq_len" ) #用来去掉padding的mask self.mask = tf.sequence_mask( lengths=self.seq_len_p, maxlen=self.max_sentence_size, name="mask" ) #去掉padding之后的labels y_p_pw_masked = tf.boolean_mask( #shape[seq_len1+seq_len2+....+,] tensor=self.y_p_pw, mask=self.mask, name="y_p_pw_masked" ) y_p_pph_masked = tf.boolean_mask( # shape[seq_len1+seq_len2+....+,] tensor=self.y_p_pph, mask=self.mask, name="y_p_pph_masked" ) #y_p_iph_masked = tf.boolean_mask( # shape[seq_len1+seq_len2+....+,] # tensor=self.y_p_iph, # mask=self.mask, # name="y_p_iph_masked" #) # embeddings #self.embeddings = tf.Variable( # initial_value=tf.zeros(shape=(self.vocab_size, self.embedding_size), dtype=tf.float32), # name="embeddings" #) self.word_embeddings=tf.Variable( initial_value=util.getCWE( word_embed_file="../data/embeddings/word_vec.txt", char_embed_file="../data/embeddings/char_vec.txt" ), name="word_embeddings" ) print("word_embeddings.shape",self.word_embeddings.shape) # pos one-hot self.pos_one_hot = tf.one_hot( indices=self.pos_p, depth=self.pos_num, name="pos_one_hot" ) print("shape of pos_one_hot:", self.pos_one_hot.shape) # length one-hot self.length_one_hot = tf.one_hot( indices=self.length_p, depth=self.length_num, name="pos_one_hot" ) print("shape of length_one_hot:", self.length_one_hot.shape) # position one-hot self.position_one_hot = tf.one_hot( indices=self.position_p, depth=self.max_sentence_size, name="pos_one_hot" ) print("shape of position_one_hot:", self.position_one_hot.shape) # -------------------------------------PW----------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pw = tf.nn.embedding_lookup(params=self.word_embeddings, ids=self.X_p, name="embeded_input_pw") print("shape of inputs_pw:",inputs_pw.shape) #concat all information inputs_pw = tf.concat( values=[inputs_pw, self.pos_one_hot, self.length_one_hot, self.position_one_hot], axis=2, name="input_pw" ) print("shape of cancated inputs_pw:", inputs_pw.shape) # forward part en_lstm_forward1_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num) en_lstm_forward2_pw=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) en_lstm_forward_pw=rnn.MultiRNNCell(cells=[en_lstm_forward1_pw,en_lstm_forward2_pw]) #dropout en_lstm_forward_pw=rnn.DropoutWrapper( cell=en_lstm_forward_pw, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p ) # backward part en_lstm_backward1_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num) en_lstm_backward2_pw=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) en_lstm_backward_pw=rnn.MultiRNNCell(cells=[en_lstm_backward1_pw,en_lstm_backward2_pw]) #dropout en_lstm_backward_pw=rnn.DropoutWrapper( cell=en_lstm_backward_pw, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p ) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=en_lstm_forward_pw, cell_bw=en_lstm_backward_pw, inputs=inputs_pw, sequence_length=self.seq_len_p, dtype=tf.float32, scope="pw" ) outputs_forward_pw = outputs[0] # shape [batch_size, max_time, cell_fw.output_size] outputs_backward_pw = outputs[1] # shape [batch_size, max_time, cell_bw.output_size] # concat final outputs [batch_size, max_time, cell_fw.output_size*2] h_pw = tf.concat(values=[outputs_forward_pw, outputs_backward_pw], axis=2) h_pw=tf.reshape(tensor=h_pw,shape=(-1,self.hidden_units_num*2),name="h_pw") print("h_pw.shape",h_pw.shape) # 全连接dropout h_pw = tf.nn.dropout(x=h_pw, keep_prob=self.keep_prob_p, name="dropout_h_pw") # fully connect layer(projection) w_pw = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)), name="weights_pw" ) b_pw = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num,)), name="bias_pw" ) #logits logits_pw = tf.matmul(h_pw, w_pw) + b_pw #logits_pw:[batch_size*max_time, 2] logits_normal_pw=tf.reshape( #logits in an normal way:[batch_size,max_time_stpes,2] tensor=logits_pw, shape=(-1,self.max_sentence_size,self.class_num), name="logits_normal_pw" ) logits_pw_masked = tf.boolean_mask( # logits_pw_masked [seq_len1+seq_len2+....+,3] tensor=logits_normal_pw, mask=self.mask, name="logits_pw_masked" ) # prediction pred_pw = tf.cast(tf.argmax(logits_pw, 1), tf.int32, name="pred_pw") # pred_pw:[batch_size*max_time,] pred_normal_pw = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_pw, shape=(-1, self.max_sentence_size), name="pred_normal_pw" ) pred_pw_masked = tf.boolean_mask( # logits_pw_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_pw, mask=self.mask, name="pred_pw_masked" ) pred_normal_one_hot_pw = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_pw, depth=self.class_num, name="pred_normal_one_hot_pw" ) # loss self.loss_pw = tf.losses.sparse_softmax_cross_entropy( labels=y_p_pw_masked, logits=logits_pw_masked )+tf.contrib.layers.l2_regularizer(self.lambda_pw)(w_pw) # --------------------------------------------------------------------------------------- # ----------------------------------PPH-------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pph = tf.nn.embedding_lookup(params=self.word_embeddings, ids=self.X_p, name="embeded_input_pph") print("shape of input_pph:", inputs_pph.shape) # concat all information inputs_pph = tf.concat( values=[inputs_pph, self.pos_one_hot, self.length_one_hot, self.position_one_hot, pred_normal_one_hot_pw], axis=2, name="inputs_pph" ) print("shape of input_pph:", inputs_pph.shape) # forward part en_lstm_forward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) en_lstm_forward2_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num2) en_lstm_forward_pph = rnn.MultiRNNCell(cells=[en_lstm_forward1_pph, en_lstm_forward2_pph]) #dropout en_lstm_forward_pph=rnn.DropoutWrapper( cell=en_lstm_forward_pph, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p ) # backward part en_lstm_backward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) en_lstm_backward2_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num2) en_lstm_backward_pph = rnn.MultiRNNCell(cells=[en_lstm_backward1_pph, en_lstm_backward2_pph]) #dropout en_lstm_backward_pph=rnn.DropoutWrapper( cell=en_lstm_backward_pph, input_keep_prob=self.input_keep_prob_p, output_keep_prob=self.output_keep_prob_p ) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=en_lstm_forward_pph, cell_bw=en_lstm_backward_pph, inputs=inputs_pph, sequence_length=self.seq_len_p, dtype=tf.float32, scope="pph" ) outputs_forward_pph = outputs[0] # shape [batch_size, max_time, cell_fw.output_size] outputs_backward_pph = outputs[1] # shape [batch_size, max_time, cell_bw.output_size] # concat final outputs [batch_size, max_time, cell_fw.output_size*2] h_pph = tf.concat(values=[outputs_forward_pph, outputs_backward_pph], axis=2) h_pph = tf.reshape(tensor=h_pph, shape=(-1, self.hidden_units_num * 2), name="h_pph") # 全连接dropout h_pph = tf.nn.dropout(x=h_pph, keep_prob=self.keep_prob_p, name="dropout_h_pph") # fully connect layer(projection) w_pph = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)), name="weights_pph" ) b_pph = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num,)), name="bias_pph" ) # logits logits_pph = tf.matmul(h_pph, w_pph) + b_pph # shape of logits:[batch_size*max_time, 2] logits_normal_pph = tf.reshape( # logits in an normal way:[batch_size,max_time_stpes,2] tensor=logits_pph, shape=(-1, self.max_sentence_size, self.class_num), name="logits_normal_pph" ) logits_pph_masked = tf.boolean_mask( # [seq_len1+seq_len2+....+,3] tensor=logits_normal_pph, mask=self.mask, name="logits_pph_masked" ) # prediction pred_pph = tf.cast(tf.argmax(logits_pph, 1), tf.int32, name="pred_pph") # pred_pph:[batch_size*max_time,] pred_normal_pph = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_pph, shape=(-1, self.max_sentence_size), name="pred_normal_pph" ) pred_pph_masked = tf.boolean_mask( # logits_pph_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_pph, mask=self.mask, name="pred_pph_masked" ) pred_normal_one_hot_pph = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_pph, depth=self.class_num, name="pred_normal_one_hot_pph" ) # loss self.loss_pph = tf.losses.sparse_softmax_cross_entropy( labels=y_p_pph_masked, logits=logits_pph_masked )+tf.contrib.layers.l2_regularizer(self.lambda_pph)(w_pph) # ------------------------------------------------------------------------------------ ''' # ---------------------------------------IPH------------------------------------------ # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_iph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p, name="embeded_input_iph") # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num] inputs_iph = tf.concat(values=[inputs_iph, pred_normal_one_hot_pph], axis=2, name="inputs_pph") # print("shape of input_pph:", inputs_pph.shape) # encoder cells # forward part en_lstm_forward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2]) # backward part en_lstm_backward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2]) # decoder cells de_lstm_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num*2) # encode encoder_outputs_iph, encoder_states_iph = self.encoder( cell_forward=en_lstm_forward1_iph, cell_backward=en_lstm_backward1_iph, inputs=inputs_iph, seq_length=self.seq_len_p, scope_name="en_lstm_iph" ) # shape of h is [batch*time_steps,hidden_units*2] h_iph = self.decoder( cell=de_lstm_iph, initial_state=encoder_states_iph, inputs=encoder_outputs_iph, scope_name="de_lstm_iph" ) # fully connect layer(projection) w_iph = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)), name="weights_iph" ) b_iph = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num,)), name="bias_iph" ) # logits logits_iph = tf.matmul(h_iph, w_iph) + b_iph # shape of logits:[batch_size*max_time, 3] logits_normal_iph = tf.reshape( # logits in an normal way:[batch_size,max_time_stpes,3] tensor=logits_iph, shape=(-1, self.max_sentence_size, 3), name="logits_normal_iph" ) logits_iph_masked = tf.boolean_mask( # [seq_len1+seq_len2+....+,3] tensor=logits_normal_iph, mask=self.mask, name="logits_iph_masked" ) # prediction pred_iph = tf.cast(tf.argmax(logits_iph, 1), tf.int32, name="pred_iph") # pred_iph:[batch_size*max_time,] pred_normal_iph = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_iph, shape=(-1, self.max_sentence_size), name="pred_normal_iph" ) pred_iph_masked = tf.boolean_mask( # logits_iph_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_iph, mask=self.mask, name="pred_iph_masked" ) pred_normal_one_hot_iph = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_iph, depth=self.class_num, name="pred_normal_one_hot_iph" ) # loss self.loss_iph = tf.losses.sparse_softmax_cross_entropy( labels=y_p_iph_masked, logits=logits_iph_masked )+tf.contrib.layers.l2_regularizer(self.lambda_iph)(w_iph) # --------------------------------------------------------------------------------------- ''' # adjust learning rate global_step = tf.Variable(initial_value=1, trainable=False) start_learning_rate = self.learning_rate learning_rate = tf.train.exponential_decay( learning_rate=start_learning_rate, global_step=global_step, decay_steps=(X_train.shape[0] // self.batch_size) + 1, decay_rate=self.decay_rate, staircase=True, name="decay_learning_rate" ) # loss self.loss = self.loss_pw + self.loss_pph # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss,global_step) self.init_op = tf.global_variables_initializer() self.init_local_op = tf.local_variables_initializer() # ------------------------------------Session----------------------------------------- with self.session as sess: print("Training Start") sess.run(self.init_op) # initialize all variables sess.run(self.init_local_op) train_Size = X_train.shape[0]; validation_Size = X_validation.shape[0] self.best_validation_loss = 1000 # best validation accuracy in training process # epoch for epoch in range(1, self.max_epoch + 1): print("Epoch:", epoch) start_time = time.time() # time evaluation # training loss/accuracy in every mini-batch self.train_losses = [] self.train_accus_pw = [] self.train_accus_pph = [] #self.train_accus_iph = [] self.c1_f_pw = []; self.c2_f_pw = [] # each class's f1 score self.c1_f_pph = []; self.c2_f_pph = [] #self.c1_f_iph = []; #self.c2_f_iph = [] lrs = [] # mini batch for i in range(0, (train_Size // self.batch_size)): #注意:这里获取的都是mask之后的值 _, train_loss, y_train_pw_masked,y_train_pph_masked,\ train_pred_pw, train_pred_pph,lr = sess.run( fetches=[self.optimizer, self.loss, y_p_pw_masked,y_p_pph_masked, pred_pw_masked, pred_pph_masked,learning_rate], feed_dict={ self.X_p: X_train[i * self.batch_size:(i + 1) * self.batch_size], self.y_p_pw: y_train_pw[i * self.batch_size:(i + 1) * self.batch_size], self.y_p_pph: y_train_pph[i * self.batch_size:(i + 1) * self.batch_size], self.seq_len_p: len_train[i * self.batch_size:(i + 1) * self.batch_size], self.pos_p: pos_train[i * self.batch_size:(i + 1) * self.batch_size], self.length_p: length_train[i * self.batch_size:(i + 1) * self.batch_size], self.position_p: position_train[i * self.batch_size:(i + 1) * self.batch_size], self.keep_prob_p: self.keep_prob, self.input_keep_prob_p:self.input_keep_prob, self.output_keep_prob_p:self.output_keep_prob } ) lrs.append(lr) # loss self.train_losses.append(train_loss) # metrics accuracy_pw, f1_pw= util.eval(y_true=y_train_pw_masked,y_pred=train_pred_pw) # pw accuracy_pph, f1_pph= util.eval(y_true=y_train_pph_masked,y_pred=train_pred_pph) # pph #accuracy_iph, f1_1_iph, f1_2_iph = util.eval(y_true=y_train_iph_masked,y_pred=train_pred_iph) # iph self.train_accus_pw.append(accuracy_pw) self.train_accus_pph.append(accuracy_pph) #self.train_accus_iph.append(accuracy_iph) # F1-score self.c1_f_pw.append(f1_pw[0]); self.c2_f_pw.append(f1_pw[1]) self.c1_f_pph.append(f1_pph[0]); self.c2_f_pph.append(f1_pph[1]) #self.c1_f_iph.append(f1_1_iph); #self.c2_f_iph.append(f1_2_iph) print("learning rate:", sum(lrs) / len(lrs)) # validation in every epoch self.validation_loss, y_valid_pw_masked,y_valid_pph_masked,\ valid_pred_pw, valid_pred_pph = sess.run( fetches=[self.loss, y_p_pw_masked,y_p_pph_masked, pred_pw_masked, pred_pph_masked], feed_dict={ self.X_p: X_validation, self.y_p_pw: y_validation_pw, self.y_p_pph: y_validation_pph, self.seq_len_p: len_validation, self.pos_p: pos_validation, self.length_p: length_validation, self.position_p: position_validation, self.keep_prob_p: 1.0, self.input_keep_prob_p:1.0, self.output_keep_prob_p:1.0 } ) # print("valid_pred_pw.shape:",valid_pred_pw.shape) # print("valid_pred_pph.shape:",valid_pred_pph.shape) # print("valid_pred_iph.shape:",valid_pred_iph.shape) # metrics self.valid_accuracy_pw, self.valid_f1_pw = util.eval(y_true=y_valid_pw_masked,y_pred=valid_pred_pw) self.valid_accuracy_pph, self.valid_f1_pph = util.eval(y_true=y_valid_pph_masked,y_pred=valid_pred_pph) #self.valid_accuracy_iph, self.valid_f1_1_iph, self.valid_f1_2_iph = util.eval(y_true=y_valid_iph_masked,y_pred=valid_pred_iph) print("Epoch ", epoch, " finished.", "spend ", round((time.time() - start_time) / 60, 2), " mins") self.showInfo(type="training") self.showInfo(type="validation") # when we get a new best validation accuracy,we store the model if self.best_validation_loss < self.validation_loss: self.best_validation_loss = self.validation_loss print("New Best loss ", self.best_validation_loss, " On Validation set! ") print("Saving Models......\n\n") # exist ./models folder? if not os.path.exists("./models/"): os.mkdir(path="./models/") if not os.path.exists("./models/" + name): os.mkdir(path="./models/" + name) if not os.path.exists("./models/" + name + "/bilstm"): os.mkdir(path="./models/" + name + "/bilstm") # create saver saver = tf.train.Saver() saver.save(sess, "./models/" + name + "/bilstm/my-model-10000") # Generates MetaGraphDef. saver.export_meta_graph("./models/" + name + "/bilstm/my-model-10000.meta") print("\n\n") # test:using X_validation_pw test_pred_pw, test_pred_pph = sess.run( fetches=[pred_pw, pred_pph], feed_dict={ self.X_p: X_validation, self.seq_len_p: len_validation, self.pos_p: pos_validation, self.length_p: length_validation, self.position_p: position_validation, self.keep_prob_p: 1.0, self.input_keep_prob_p:1.0, self.output_keep_prob_p:1.0 } ) # recover to original corpus txt # shape of valid_pred_pw,valid_pred_pw,valid_pred_pw:[corpus_size*time_stpes] util.recover2( X=X_validation, preds_pw=test_pred_pw, preds_pph=test_pred_pph, filename="../result/bilstm_cwe/recover_epoch_" + str(epoch) + ".txt" )
def fit(self, X_train, y_train, len_train, X_validation, y_validation, len_validation, name, print_log=True): # ---------------------------------------forward computation--------------------------------------------# y_train_pw = y_train[0] y_train_pph = y_train[1] y_train_iph = y_train[2] y_validation_pw = y_validation[0] y_validation_pph = y_validation[1] y_validation_iph = y_validation[2] # ---------------------------------------define graph---------------------------------------------# with self.graph.as_default(): # data place holder self.X_p = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="input_placeholder") self.y_p_pw = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_pw") self.y_p_pph = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_pph") self.y_p_iph = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_iph") # 相应序列的长度占位 self.seq_len_p = tf.placeholder(dtype=tf.int32, shape=(None, ), name="seq_len") #用来去掉padding的mask self.mask = tf.sequence_mask(lengths=self.seq_len_p, maxlen=self.max_sentence_size, name="mask") #去掉padding之后的labels y_p_pw_masked = tf.boolean_mask( #shape[seq_len1+seq_len2+....+,] tensor=self.y_p_pw, mask=self.mask, name="y_p_pw_masked") y_p_pph_masked = tf.boolean_mask( # shape[seq_len1+seq_len2+....+,] tensor=self.y_p_pph, mask=self.mask, name="y_p_pph_masked") y_p_iph_masked = tf.boolean_mask( # shape[seq_len1+seq_len2+....+,] tensor=self.y_p_iph, mask=self.mask, name="y_p_iph_masked") # embeddings self.embeddings = tf.Variable(initial_value=tf.zeros( shape=(self.vocab_size, self.embedding_size), dtype=tf.float32), name="embeddings") # -------------------------------------PW----------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pw = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p, name="embeded_input_pw") # encoder cells # forward part en_lstm_forward1_pw = rnn.BasicLSTMCell( num_units=self.hidden_units_num) # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2]) # backward part en_lstm_backward1_pw = rnn.BasicLSTMCell( num_units=self.hidden_units_num) # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2]) # decoder cells de_lstm_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num * 2) # encode encoder_outputs_pw, encoder_states_pw = self.encoder( cell_forward=en_lstm_forward1_pw, cell_backward=en_lstm_backward1_pw, inputs=inputs_pw, seq_length=self.seq_len_p, scope_name="en_lstm_pw") # decode h_pw = self.decoder( # shape of h is [batch*time_steps,hidden_units*2] cell=de_lstm_pw, initial_state=encoder_states_pw, inputs=encoder_outputs_pw, scope_name="de_lstm_pw") # fully connect layer(projection) w_pw = tf.Variable(initial_value=tf.random_normal( shape=(self.hidden_units_num * 2, self.class_num)), name="weights_pw") b_pw = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num, )), name="bias_pw") #logits logits_pw = tf.matmul( h_pw, w_pw) + b_pw #logits_pw:[batch_size*max_time, 3] logits_normal_pw = tf.reshape( #logits in an normal way:[batch_size,max_time_stpes,3] tensor=logits_pw, shape=(-1, self.max_sentence_size, 3), name="logits_normal_pw") logits_pw_masked = tf.boolean_mask( # logits_pw_masked [seq_len1+seq_len2+....+,3] tensor=logits_normal_pw, mask=self.mask, name="logits_pw_masked") # prediction pred_pw = tf.cast(tf.argmax(logits_pw, 1), tf.int32, name="pred_pw") # pred_pw:[batch_size*max_time,] pred_normal_pw = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_pw, shape=(-1, self.max_sentence_size), name="pred_normal_pw") pred_pw_masked = tf.boolean_mask( # logits_pw_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_pw, mask=self.mask, name="pred_pw_masked") pred_normal_one_hot_pw = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_pw, depth=self.class_num, name="pred_normal_one_hot_pw") # loss self.loss_pw = tf.losses.sparse_softmax_cross_entropy( labels=y_p_pw_masked, logits=logits_pw_masked) # --------------------------------------------------------------------------------------- # ----------------------------------PPH-------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p, name="embeded_input_pph") # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num] inputs_pph = tf.concat(values=[inputs_pph, pred_normal_one_hot_pw], axis=2, name="inputs_pph") # print("shape of input_pph:", inputs_pph.shape) # encoder cells # forward part en_lstm_forward1_pph = rnn.BasicLSTMCell( num_units=self.hidden_units_num) # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2]) # backward part en_lstm_backward1_pph = rnn.BasicLSTMCell( num_units=self.hidden_units_num) # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2]) # decoder cells de_lstm_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num * 2) # encode encoder_outputs_pph, encoder_states_pph = self.encoder( cell_forward=en_lstm_forward1_pph, cell_backward=en_lstm_backward1_pph, inputs=inputs_pph, seq_length=self.seq_len_p, scope_name="en_lstm_pph") # shape of h is [batch*time_steps,hidden_units*2] h_pph = self.decoder(cell=de_lstm_pph, initial_state=encoder_states_pph, inputs=encoder_outputs_pph, scope_name="de_lstm_pph") # fully connect layer(projection) w_pph = tf.Variable(initial_value=tf.random_normal( shape=(self.hidden_units_num * 2, self.class_num)), name="weights_pph") b_pph = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num, )), name="bias_pph") # logits logits_pph = tf.matmul( h_pph, w_pph) + b_pph # shape of logits:[batch_size*max_time, 3] logits_normal_pph = tf.reshape( # logits in an normal way:[batch_size,max_time_stpes,3] tensor=logits_pph, shape=(-1, self.max_sentence_size, 3), name="logits_normal_pph") logits_pph_masked = tf.boolean_mask( # [seq_len1+seq_len2+....+,3] tensor=logits_normal_pph, mask=self.mask, name="logits_pph_masked") # prediction pred_pph = tf.cast( tf.argmax(logits_pph, 1), tf.int32, name="pred_pph") # pred_pph:[batch_size*max_time,] pred_normal_pph = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_pph, shape=(-1, self.max_sentence_size), name="pred_normal_pph") pred_pph_masked = tf.boolean_mask( # logits_pph_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_pph, mask=self.mask, name="pred_pph_masked") pred_normal_one_hot_pph = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_pph, depth=self.class_num, name="pred_normal_one_hot_pph") # loss self.loss_pph = tf.losses.sparse_softmax_cross_entropy( labels=y_p_pph_masked, logits=logits_pph_masked) # ------------------------------------------------------------------------------------ # ---------------------------------------IPH------------------------------------------ # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_iph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p, name="embeded_input_iph") # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num] inputs_iph = tf.concat( values=[inputs_iph, pred_normal_one_hot_pph], axis=2, name="inputs_pph") # print("shape of input_pph:", inputs_pph.shape) # encoder cells # forward part en_lstm_forward1_iph = rnn.BasicLSTMCell( num_units=self.hidden_units_num) # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2]) # backward part en_lstm_backward1_iph = rnn.BasicLSTMCell( num_units=self.hidden_units_num) # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2]) # decoder cells de_lstm_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num * 2) # encode encoder_outputs_iph, encoder_states_iph = self.encoder( cell_forward=en_lstm_forward1_iph, cell_backward=en_lstm_backward1_iph, inputs=inputs_iph, seq_length=self.seq_len_p, scope_name="en_lstm_iph") # shape of h is [batch*time_steps,hidden_units*2] h_iph = self.decoder(cell=de_lstm_iph, initial_state=encoder_states_iph, inputs=encoder_outputs_iph, scope_name="de_lstm_iph") # fully connect layer(projection) w_iph = tf.Variable(initial_value=tf.random_normal( shape=(self.hidden_units_num * 2, self.class_num)), name="weights_iph") b_iph = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num, )), name="bias_iph") # logits logits_iph = tf.matmul( h_iph, w_iph) + b_iph # shape of logits:[batch_size*max_time, 3] logits_normal_iph = tf.reshape( # logits in an normal way:[batch_size,max_time_stpes,3] tensor=logits_iph, shape=(-1, self.max_sentence_size, 3), name="logits_normal_iph") logits_iph_masked = tf.boolean_mask( # [seq_len1+seq_len2+....+,3] tensor=logits_normal_iph, mask=self.mask, name="logits_iph_masked") # prediction pred_iph = tf.cast( tf.argmax(logits_iph, 1), tf.int32, name="pred_iph") # pred_iph:[batch_size*max_time,] pred_normal_iph = tf.reshape( # pred in an normal way,[batch_size, max_time] tensor=pred_iph, shape=(-1, self.max_sentence_size), name="pred_normal_iph") pred_iph_masked = tf.boolean_mask( # logits_iph_masked [seq_len1+seq_len2+....+,] tensor=pred_normal_iph, mask=self.mask, name="pred_iph_masked") pred_normal_one_hot_iph = tf.one_hot( # one-hot the pred_normal:[batch_size, max_time,class_num] indices=pred_normal_iph, depth=self.class_num, name="pred_normal_one_hot_iph") # loss self.loss_iph = tf.losses.sparse_softmax_cross_entropy( labels=y_p_iph_masked, logits=logits_iph_masked) # --------------------------------------------------------------------------------------- # loss self.loss = self.loss_pw + self.loss_pph + self.loss_iph # optimizer self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.loss) self.init_op = tf.global_variables_initializer() self.init_local_op = tf.local_variables_initializer() # ------------------------------------Session----------------------------------------- with self.session as sess: print("Training Start") sess.run(self.init_op) # initialize all variables sess.run(self.init_local_op) train_Size = X_train.shape[0] validation_Size = X_validation.shape[0] best_validation_loss = 1000 # best validation accuracy in training process # epoch for epoch in range(1, self.max_epoch + 1): print("Epoch:", epoch) start_time = time.time() # time evaluation # training loss/accuracy in every mini-batch train_losses = [] train_accus_pw = [] train_accus_pph = [] train_accus_iph = [] c1_f_pw = [] c2_f_pw = [] # each class's f1 score c1_f_pph = [] c2_f_pph = [] c1_f_iph = [] c2_f_iph = [] # mini batch for i in range(0, (train_Size // self.batch_size)): #注意:这里获取的都是mask之后的值 _, train_loss, y_train_pw_masked,y_train_pph_masked,y_train_iph_masked,\ train_pred_pw, train_pred_pph, train_pred_iph = sess.run( fetches=[self.optimizer, self.loss, y_p_pw_masked,y_p_pph_masked,y_p_iph_masked, pred_pw_masked, pred_pph_masked, pred_iph_masked], feed_dict={ self.X_p: X_train[i * self.batch_size:(i + 1) * self.batch_size], self.y_p_pw: y_train_pw[i * self.batch_size:(i + 1) * self.batch_size], self.y_p_pph: y_train_pph[i * self.batch_size:(i + 1) * self.batch_size], self.y_p_iph: y_train_iph[i * self.batch_size:(i + 1) * self.batch_size], self.seq_len_p: len_train[i * self.batch_size:(i + 1) * self.batch_size] } ) # loss train_losses.append(train_loss) # metrics accuracy_pw, f1_1_pw, f1_2_pw = util.eval( y_true=y_train_pw_masked, y_pred=train_pred_pw) # pw accuracy_pph, f1_1_pph, f1_2_pph = util.eval( y_true=y_train_pph_masked, y_pred=train_pred_pph) # pph accuracy_iph, f1_1_iph, f1_2_iph = util.eval( y_true=y_train_iph_masked, y_pred=train_pred_iph) # iph train_accus_pw.append(accuracy_pw) train_accus_pph.append(accuracy_pph) train_accus_iph.append(accuracy_iph) # F1-score c1_f_pw.append(f1_1_pw) c2_f_pw.append(f1_2_pw) c1_f_pph.append(f1_1_pph) c2_f_pph.append(f1_2_pph) c1_f_iph.append(f1_1_iph) c2_f_iph.append(f1_2_iph) # validation in every epoch validation_loss, y_valid_pw_masked,y_valid_pph_masked,y_valid_iph_masked,\ valid_pred_pw, valid_pred_pph, valid_pred_iph = sess.run( fetches=[self.loss, y_p_pw_masked,y_p_pph_masked,y_p_iph_masked, pred_pw_masked, pred_pph_masked, pred_iph_masked], feed_dict={ self.X_p: X_validation, self.y_p_pw: y_validation_pw, self.y_p_pph: y_validation_pph, self.y_p_iph: y_validation_iph, self.seq_len_p: len_validation } ) # print("valid_pred_pw.shape:",valid_pred_pw.shape) # print("valid_pred_pph.shape:",valid_pred_pph.shape) # print("valid_pred_iph.shape:",valid_pred_iph.shape) # metrics valid_accuracy_pw, valid_f1_1_pw, valid_f1_2_pw = util.eval( y_true=y_valid_pw_masked, y_pred=valid_pred_pw) valid_accuracy_pph, valid_f1_1_pph, valid_f1_2_pph = util.eval( y_true=y_valid_pph_masked, y_pred=valid_pred_pph) valid_accuracy_iph, valid_f1_1_iph, valid_f1_2_iph = util.eval( y_true=y_valid_iph_masked, y_pred=valid_pred_iph) # show information print("Epoch ", epoch, " finished.", "spend ", round((time.time() - start_time) / 60, 2), " mins") print(" /**Training info**/") print("----avarage training loss:", sum(train_losses) / len(train_losses)) print("PW:") print("----avarage accuracy:", sum(train_accus_pw) / len(train_accus_pw)) print("----avarage f1-Score of N:", sum(c1_f_pw) / len(c1_f_pw)) print("----avarage f1-Score of B:", sum(c2_f_pw) / len(c2_f_pw)) print("PPH:") print("----avarage accuracy :", sum(train_accus_pph) / len(train_accus_pph)) print("----avarage f1-Score of N:", sum(c1_f_pph) / len(c1_f_pph)) print("----avarage f1-Score of B:", sum(c2_f_pph) / len(c2_f_pph)) print("IPH:") print("----avarage accuracy:", sum(train_accus_iph) / len(train_accus_iph)) print("----avarage f1-Score of N:", sum(c1_f_iph) / len(c1_f_iph)) print("----avarage f1-Score of B:", sum(c2_f_iph) / len(c2_f_iph)) print(" /**Validation info**/") print("----avarage validation loss:", validation_loss) print("PW:") print("----avarage accuracy:", valid_accuracy_pw) print("----avarage f1-Score of N:", valid_f1_1_pw) print("----avarage f1-Score of B:", valid_f1_2_pw) print("PPH:") print("----avarage accuracy :", valid_accuracy_pph) print("----avarage f1-Score of N:", valid_f1_1_pph) print("----avarage f1-Score of B:", valid_f1_2_pph) print("IPH:") print("----avarage accuracy:", valid_accuracy_iph) print("----avarage f1-Score of N:", valid_f1_1_iph) print("----avarage f1-Score of B:", valid_f1_2_iph) # when we get a new best validation accuracy,we store the model if best_validation_loss < validation_loss: best_validation_loss = validation_loss print("New Best loss ", best_validation_loss, " On Validation set! ") print("Saving Models......\n\n") # exist ./models folder? if not os.path.exists("./models/"): os.mkdir(path="./models/") if not os.path.exists("./models/" + name): os.mkdir(path="./models/" + name) if not os.path.exists("./models/" + name + "/bilstm"): os.mkdir(path="./models/" + name + "/bilstm") # create saver saver = tf.train.Saver() saver.save(sess, "./models/" + name + "/bilstm/my-model-10000") # Generates MetaGraphDef. saver.export_meta_graph("./models/" + name + "/bilstm/my-model-10000.meta") print("\n\n") # test:using X_validation_pw test_pred_pw, test_pred_pph, test_pred_iph = sess.run( fetches=[pred_pw, pred_pph, pred_iph], feed_dict={ self.X_p: X_validation, self.seq_len_p: len_validation }) # recover to original corpus txt # shape of valid_pred_pw,valid_pred_pw,valid_pred_pw:[corpus_size*time_stpes] util.recover(X=X_validation, preds_pw=test_pred_pw, preds_pph=test_pred_pph, preds_iph=test_pred_iph, filename="recover_epoch_" + str(epoch) + ".txt")
# traindata shape: [[(x,y),(x,y), ...],[],[],...] # testdata shape: [([x1, x2, ...],[y1,y2,...]),([],[])] stt = datetime.datetime.now() print "Start training...", stt hmmtagger = nt.hmm.HiddenMarkovModelTagger.train(traindata) print "################# Training took:", datetime.datetime.now()-stt results = [] for line in testdata: x, yref = util.line_toseq(line, charstop) out = hmmtagger.tag(x) _, yout = zip(*out) results.append(util.eval(yref, yout, "S")) tp, fp, fn, tn = zip(*results) tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn) p, r = tp/(tp+fp), tp/(tp+fn) print "Total tokens in Test Set:", tp+fp+fn+tn print "Total S in REF:", tp+fn print "Total S in OUT:", tp+fp print "Presicion:", p print "Recall:", r print "F1-score:", 2*p*r/(p+r) print "Start close testing...", datetime.datetime.now() results = []
def fit(self, X_train, y_train, len_train, pos_train, length_train, position_train, X_valid, y_valid, len_valid, pos_valid, length_valid, position_valid, X_test, y_test, len_test, pos_test, length_test, position_test, name, print_log=True): # handle data y_train_pw = y_train[0] y_train_pph = y_train[1] # y_train_iph = y_train[2] y_valid_pw = y_valid[0] y_valid_pph = y_valid[1] # y_valid_iph = y_valid[2] y_test_pw = y_test[0] y_test_pph = y_test[1] # y_valid_iph = y_valid[2] # ------------------------------------------define graph---------------------------------------------# with self.graph.as_default(): #***********************Dataset API**************************** # create dataset_train object dataset_train = tf.data.Dataset.from_tensor_slices( tensors=(X_train, y_train_pw, y_train_pph, len_train, pos_train, length_train, position_train)).repeat().batch( batch_size=self.batch_size).shuffle(buffer_size=2) # create iterator_train object iterator_train = dataset_train.make_one_shot_iterator() # get batch batch_train = iterator_train.get_next() #print("batch_train:", batch_train) # dataset_valid= # dataset_test= #*************************************************************** #****************** data place holder*************************** self.X_p = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="input_p") self.y_p_pw = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_p_pw") self.y_p_pph = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_p_pph") #self.y_p_iph = tf.placeholder(dtype=tf.int32,shape=(None, self.max_sentence_size),name="label_p_iph") # 相应序列的长度占位 self.seq_len_p = tf.placeholder(dtype=tf.int32, shape=(None, ), name="seq_len") # 用来去掉padding的mask self.mask = tf.sequence_mask(lengths=self.seq_len_p, maxlen=self.max_sentence_size, name="mask") # 去掉padding之后的labels,shape[seq_len1+seq_len2+....+,] y_p_pw_masked = tf.boolean_mask(tensor=self.y_p_pw, mask=self.mask, name="y_p_pw_masked") y_p_pph_masked = tf.boolean_mask(tensor=self.y_p_pph, mask=self.mask, name="y_p_pph_masked") # y_p_iph_masked = tf.boolean_mask(tensor=self.y_p_iph,mask=self.mask,name="y_p_iph_masked") # pos info placeholder self.pos_p = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="pos_p") self.pos_one_hot = tf.one_hot(indices=self.pos_p, depth=self.pos_num, name="pos_one_hot") #print("shape of pos_one_hot:", self.pos_one_hot.shape) # length info placeholder self.length_p = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="length_p") self.length_one_hot = tf.one_hot(indices=self.length_p, depth=self.length_num, name="pos_one_hot") #print("shape of length_one_hot:", self.length_one_hot.shape) # position info placeholder self.position_p = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="position_p") self.position_one_hot = tf.one_hot(indices=self.position_p, depth=self.max_sentence_size, name="pos_one_hot") #print("shape of position_one_hot:", self.position_one_hot.shape) # dropout 占位 self.keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="keep_prob_p") self.input_keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="input_keep_prob_p") self.output_keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="output_keep_prob_p") # word embeddings self.word_embeddings = tf.Variable( initial_value=util.readEmbeddings( file="../data/embeddings/word_vec.txt"), trainable=False, name="word_embeddings") print("wordembedding.shape", self.word_embeddings.shape) # -------------------------------------PW----------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pw = tf.nn.embedding_lookup(params=self.word_embeddings, ids=self.X_p, name="embeded_input_pw") print("shape of inputs_pw:", inputs_pw.shape) inputs_pw = tf.concat(values=[ inputs_pw, self.pos_one_hot, self.length_one_hot, self.position_one_hot ], axis=2, name="input_pw") print("shape of cancated inputs_pw:", inputs_pw.shape) self.loss_pw, prob_pw_masked, pred_pw, pred_pw_masked, pred_normal_one_hot_pw = self.hierarchy( inputs=inputs_pw, y_masked=y_p_pw_masked, seq_length=self.seq_len_p, scope_name="pw") # ----------------------------------PPH-------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pph = tf.nn.embedding_lookup(params=self.word_embeddings, ids=self.X_p, name="embeded_input_pph") print("input_pph.shape", inputs_pph.shape) # concat all information inputs_pph = tf.concat(values=[ inputs_pph, self.pos_one_hot, self.length_one_hot, self.position_one_hot, pred_normal_one_hot_pw ], axis=2, name="inputs_pph") print("shape of input_pph:", inputs_pph.shape) self.loss_pph, prob_pph_masked, pred_pph, pred_pph_masked, pred_normal_one_hot_pph = self.hierarchy( inputs=inputs_pph, y_masked=y_p_pph_masked, seq_length=self.seq_len_p, scope_name="pph") # adjust learning rate global_step = tf.Variable(initial_value=1, trainable=False) start_learning_rate = self.learning_rate learning_rate = tf.train.exponential_decay( learning_rate=start_learning_rate, global_step=global_step, decay_steps=(X_train.shape[0] // self.batch_size) + 1, decay_rate=self.decay_rate, staircase=True, name="decay_learning_rate") # loss self.loss = self.loss_pw + self.loss_pph # optimizer self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.loss, global_step=global_step) self.init_op = tf.global_variables_initializer() self.init_local_op = tf.local_variables_initializer() # --------------------------------------------Session------------------------------------------------- with self.session as sess: print("Training Start") sess.run(self.init_op) # initialize all variables sess.run(self.init_local_op) train_Size = X_train.shape[0] validation_Size = X_valid.shape[0] test_Size = X_test.shape[0] self.best_validation_loss = 1000 # best validation accuracy in training process # store result if not os.path.exists("../result/bilstm/"): os.mkdir("../result/bilstm/") # epoch for epoch in range(1, self.max_epoch + 1): print("Epoch:", epoch) start_time = time.time() # time evaluation # training loss/accuracy in every mini-batch self.train_losses = [] self.train_accus_pw = [] self.train_accus_pph = [] # self.train_accus_iph = [] self.c1_f_pw = [] self.c2_f_pw = [] # each class's f1 score self.c1_f_pph = [] self.c2_f_pph = [] # self.c1_f_iph = []; # self.c2_f_iph = [] lrs = [] # mini batch for i in range(0, (train_Size // self.batch_size)): elements = sess.run(batch_train) # 注意:这里获取的都是mask之后的值 _, train_loss, lr,y_train_pw_masked, y_train_pph_masked, \ train_pred_pw, train_pred_pph, \ train_prob_pw_masked, train_prob_pph_masked = sess.run( fetches=[self.optimizer, self.loss,learning_rate,y_p_pw_masked, y_p_pph_masked, pred_pw_masked, pred_pph_masked, prob_pw_masked, prob_pph_masked ], feed_dict={ self.X_p: elements[0], self.y_p_pw: elements[1], self.y_p_pph: elements[2], self.seq_len_p: elements[3], self.pos_p: elements[4], self.length_p: elements[5], self.position_p: elements[6], self.keep_prob_p: self.keep_prob, self.input_keep_prob_p: self.input_keep_prob, self.output_keep_prob_p: self.output_keep_prob } ) # write the prob to files util.writeProb( prob_pw=train_prob_pw_masked, prob_pph=train_prob_pph_masked, outFile="../result/bilstm/bilstm_prob_train_epoch" + str(epoch) + ".txt") lrs.append(lr) # loss self.train_losses.append(train_loss) # metrics accuracy_pw, f1_pw = util.eval(y_true=y_train_pw_masked, y_pred=train_pred_pw) # pw accuracy_pph, f1_pph = util.eval( y_true=y_train_pph_masked, y_pred=train_pred_pph) # pph # accuracy_iph, f1_1_iph, f1_2_iph = util.eval(y_true=y_train_iph_masked,y_pred=train_pred_iph) # iph self.train_accus_pw.append(accuracy_pw) self.train_accus_pph.append(accuracy_pph) # self.train_accus_iph.append(accuracy_iph) # F1-score self.c1_f_pw.append(f1_pw[0]) self.c2_f_pw.append(f1_pw[1]) self.c1_f_pph.append(f1_pph[0]) self.c2_f_pph.append(f1_pph[1]) # self.c1_f_iph.append(f1_1_iph); # self.c2_f_iph.append(f1_2_iph) # ----------------------------------validation in every epoch---------------------------------- self.valid_loss, y_valid_pw_masked, y_valid_pph_masked, \ valid_pred_pw_masked, valid_pred_pph_masked, valid_pred_pw, valid_pred_pph, \ valid_prob_pw_masked, valid_prob_pph_masked = sess.run( fetches=[self.loss, y_p_pw_masked, y_p_pph_masked, pred_pw_masked, pred_pph_masked, pred_pw, pred_pph, prob_pw_masked, prob_pph_masked ], feed_dict={ self.X_p: X_valid, self.y_p_pw: y_valid_pw, self.y_p_pph: y_valid_pph, self.seq_len_p: len_valid, self.pos_p: pos_valid, self.length_p: length_valid, self.position_p: position_valid, self.keep_prob_p: 1.0, self.input_keep_prob_p: 1.0, self.output_keep_prob_p: 1.0 } ) #write the prob to files util.writeProb( prob_pw=valid_prob_pw_masked, prob_pph=valid_prob_pph_masked, outFile="../result/bilstm/bilstm_prob_valid_epoch" + str(epoch) + ".txt") # metrics self.valid_accuracy_pw, self.valid_f1_pw = util.eval( y_true=y_valid_pw_masked, y_pred=valid_pred_pw_masked) self.valid_accuracy_pph, self.valid_f1_pph = util.eval( y_true=y_valid_pph_masked, y_pred=valid_pred_pph_masked) # recover to original corpus txt # shape of valid_pred_pw,valid_pred_pw,valid_pred_pw:[corpus_size*time_stpes] util.recover2( X=X_valid, preds_pw=valid_pred_pw, preds_pph=valid_pred_pph, filename="../result/bilstm/valid_recover_epoch_" + str(epoch) + ".txt") # ---------------------------------------------------------------------------------------- # ----------------------------------test in every epoch---------------------------------- self.test_loss, y_test_pw_masked, y_test_pph_masked, \ test_pred_pw_masked, test_pred_pph_masked, test_pred_pw, test_pred_pph, \ test_prob_pw_masked, test_prob_pph_masked = sess.run( fetches=[self.loss, y_p_pw_masked, y_p_pph_masked, pred_pw_masked, pred_pph_masked, pred_pw, pred_pph, prob_pw_masked, prob_pph_masked ], feed_dict={ self.X_p: X_test, self.y_p_pw: y_test_pw, self.y_p_pph: y_test_pph, self.seq_len_p: len_test, self.pos_p: pos_test, self.length_p: length_test, self.position_p: position_test, self.keep_prob_p: 1.0, self.input_keep_prob_p: 1.0, self.output_keep_prob_p: 1.0 } ) # write the prob to files util.writeProb( prob_pw=test_prob_pw_masked, prob_pph=test_prob_pph_masked, outFile="../result/bilstm/bilstm_prob_test_epoch" + str(epoch) + ".txt") # metrics self.test_accuracy_pw, self.test_f1_pw = util.eval( y_true=y_test_pw_masked, y_pred=test_pred_pw_masked) self.test_accuracy_pph, self.test_f1_pph = util.eval( y_true=y_test_pph_masked, y_pred=test_pred_pph_masked) # recover to original corpus txt # shape of test_pred_pw,test_pred_pw,test_pred_pw:[corpus_size*time_stpes] util.recover2(X=X_test, preds_pw=test_pred_pw, preds_pph=test_pred_pph, filename="../result/bilstm/test_recover_epoch_" + str(epoch) + ".txt") # ----------------------------------------------------------------------------------- # self.valid_accuracy_iph, self.valid_f1_1_iph, self.valid_f1_2_iph = util.eval(y_true=y_valid_iph_masked,y_pred=valid_pred_iph) # show information print("Epoch ", epoch, " finished.", "spend ", round((time.time() - start_time) / 60, 2), " mins") print("learning rate:", sum(lrs) / len(lrs)) self.showInfo(type="training") self.showInfo(type="validation") self.showInfo(type="test") # when we get a new best validation accuracy,we store the model if self.best_validation_loss < self.valid_loss: self.best_validation_loss = self.valid_loss print("New Best loss ", self.best_validation_loss, " On Validation set! ") print("Saving Models......\n\n") # exist ./models folder? if not os.path.exists("./models/"): os.mkdir(path="./models/") if not os.path.exists("./models/" + name): os.mkdir(path="./models/" + name) if not os.path.exists("./models/" + name + "/bilstm"): os.mkdir(path="./models/" + name + "/bilstm") # create saver saver = tf.train.Saver() saver.save(sess, "./models/" + name + "/bilstm/my-model-10000") # Generates MetaGraphDef. saver.export_meta_graph("./models/" + name + "/bilstm/my-model-10000.meta") print("\n\n")
def fit(self, X_train, y_train, X_validation, y_validation, name, print_log=True): #---------------------------------------forward computation--------------------------------------------# X_train_pw = X_train[0] X_train_pph = X_train[1] X_train_iph = X_train[2] y_train_pw = y_train[0] y_train_pph = y_train[1] y_train_iph = y_train[2] X_validation_pw = X_validation[0] X_validation_pph = X_validation[1] X_validation_iph = X_validation[2] y_validation_pw = y_validation[0] y_validation_pph = y_validation[1] y_validation_iph = y_validation[2] #---------------------------------------define graph---------------------------------------------# with self.graph.as_default(): # data place holder self.X_p_pw = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="input_placeholder_pw") self.y_p_pw = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_pw") self.X_p_pph = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="input_placeholder_pph") self.y_p_pph = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_pph") self.X_p_iph = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="input_placeholder_iph") self.y_p_iph = tf.placeholder(dtype=tf.int32, shape=(None, self.max_sentence_size), name="label_placeholder_iph") #attention variables self.attention_W = tf.Variable(tf.random_uniform( [self.hidden_units_num, self.hidden_units_num], 0.0, 1.0), name="attention_W") self.attention_U = tf.Variable(tf.random_uniform( [self.hidden_units_num * 2, self.hidden_units_num], 0.0, 1.0), name="attention_U") self.attention_V = tf.Variable(tf.random_uniform( [self.hidden_units_num, 1], 0.0, 1.0), name="attention_V") #embeddings self.embeddings = tf.Variable(initial_value=tf.zeros( shape=(self.vocab_size, self.embedding_size), dtype=tf.float32), name="embeddings") #-------------------------------------PW----------------------------------------------------- #embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pw = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p_pw, name="embeded_input_pw") # encoder cells # forward part en_lstm_forward1_pw = rnn.BasicLSTMCell( num_units=self.hidden_units_num) # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2]) # backward part en_lstm_backward1_pw = rnn.BasicLSTMCell( num_units=self.hidden_units_num) # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2]) # decoder cells de_lstm_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num, reuse=tf.AUTO_REUSE) # encode encoder_outputs_pw, encoder_states_pw = self.encoder( cell_forward=en_lstm_forward1_pw, cell_backward=en_lstm_backward1_pw, inputs=inputs_pw, scope_name="en_lstm_pw") #print("shape of encoder_outputs:",encoder_outputs_pw.shape) #print("shape encoder_states_pw.h",encoder_states_pw.h.shape) #print("shape encoder_states_pw.c",encoder_states_pw.c.shape) #attention test #self.attention(prev_state=encoder_states_pw,enc_outputs=encoder_outputs_pw) #decode test h_pw = self.decode(cell=de_lstm_pw, init_state=encoder_states_pw, enc_outputs=encoder_outputs_pw) #h_pw = self.decode(self.dec_lstm_cell, enc_state, enc_outputs) #h_pw = self.decoder( # cell=de_lstm_pw, # initial_state=encoder_states_pw, # inputs=encoder_outputs_pw, # scope_name="de_lstm_pw" #) ''' ) if is_training: self. else: self.dec_outputs = self.decode(self.dec_lstm_cell, enc_state, enc_outputs, self.loop_function) # shape of h is [batch*time_steps,hidden_units] ''' # fully connect layer(projection) w_pw = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num2, self.class_num)), name="weights_pw") b_pw = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num, )), name="bias_pw") logits_pw = tf.matmul( h_pw, w_pw) + b_pw # shape of logits:[batch_size*max_time, 3] # prediction # shape of pred[batch_size*max_time, 1] pred_pw = tf.cast(tf.argmax(logits_pw, 1), tf.int32, name="pred_pw") # pred in an normal way,shape is [batch_size, max_time,1] pred_normal_pw = tf.reshape(tensor=pred_pw, shape=(-1, self.max_sentence_size), name="pred_normal") # one-hot the pred_normal:[batch_size, max_time,class_num] pred_normal_one_hot_pw = tf.one_hot(indices=pred_normal_pw, depth=self.class_num, name="pred_normal_one_hot_pw") # loss self.loss_pw = tf.losses.sparse_softmax_cross_entropy( labels=tf.reshape(self.y_p_pw, shape=[-1]), logits=logits_pw) #--------------------------------------------------------------------------------------- ''' #----------------------------------PPH-------------------------------------------------- # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_pph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p_pph, name="embeded_input_pph") # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num] inputs_pph = tf.concat(values=[inputs_pph, pred_normal_one_hot_pw], axis=2, name="inputs_pph") print("shape of input_pph:", inputs_pph.shape) # encoder cells # forward part en_lstm_forward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2]) # backward part en_lstm_backward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2]) # decoder cells de_lstm_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # encode encoder_outputs_pph, encoder_states_pph = self.encoder( cell_forward=en_lstm_forward1_pph, cell_backward=en_lstm_backward1_pph, inputs=inputs_pph, scope_name="en_lstm_pph" ) # shape of h is [batch*time_steps,hidden_units] h_pph = self.decoder( cell=de_lstm_pph, initial_state=encoder_states_pph, inputs=encoder_outputs_pph, scope_name="de_lstm_pph" ) # fully connect layer(projection) w_pph = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num2, self.class_num)), name="weights_pph" ) b_pph = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num,)), name="bias_pph" ) logits_pph = tf.matmul(h_pph, w_pph) + b_pph # shape of logits:[batch_size*max_time, 5] # prediction # shape of pred[batch_size*max_time, 1] pred_pph = tf.cast(tf.argmax(logits_pph, 1), tf.int32, name="pred_pph") # pred in an normal way,shape is [batch_size, max_time,1] pred_normal_pph = tf.reshape( tensor=pred_pph, shape=(-1, self.max_sentence_size), name="pred_normal" ) # one-hot the pred_normal:[batch_size, max_time,class_num] pred_normal_one_hot_pph = tf.one_hot( indices=pred_normal_pph, depth=self.class_num, name="pred_normal_one_hot_pph" ) # loss self.loss_pph = tf.losses.sparse_softmax_cross_entropy( labels=tf.reshape(self.y_p_pph, shape=[-1]), logits=logits_pph ) #------------------------------------------------------------------------------------ #---------------------------------------IPH------------------------------------------ # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size] inputs_iph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p_iph, name="embeded_input_iph") # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num] inputs_iph = tf.concat(values=[inputs_iph, pred_normal_one_hot_pph], axis=2, name="inputs_pph") print("shape of input_pph:", inputs_pph.shape) # encoder cells # forward part en_lstm_forward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2]) # backward part en_lstm_backward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2) # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2]) # decoder cells de_lstm_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num) # encode encoder_outputs_iph, encoder_states_iph = self.encoder( cell_forward=en_lstm_forward1_iph, cell_backward=en_lstm_backward1_iph, inputs=inputs_iph, scope_name="en_lstm_iph" ) # shape of h is [batch*time_steps,hidden_units] h_iph = self.decoder( cell=de_lstm_iph, initial_state=encoder_states_iph, inputs=encoder_outputs_iph, scope_name="de_lstm_iph" ) # fully connect layer(projection) w_iph = tf.Variable( initial_value=tf.random_normal(shape=(self.hidden_units_num2, self.class_num)), name="weights_iph" ) b_iph = tf.Variable( initial_value=tf.random_normal(shape=(self.class_num,)), name="bias_iph" ) logits_iph = tf.matmul(h_iph, w_iph) + b_iph # shape of logits:[batch_size*max_time, 5] # prediction # shape of pred[batch_size*max_time, 1] pred_iph = tf.cast(tf.argmax(logits_iph, 1), tf.int32, name="pred_iph") # pred in an normal way,shape is [batch_size, max_time,1] pred_normal_iph = tf.reshape( tensor=pred_iph, shape=(-1, self.max_sentence_size), name="pred_normal" ) # one-hot the pred_normal:[batch_size, max_time,class_num] pred_normal_one_hot_iph = tf.one_hot( indices=pred_normal_iph, depth=self.class_num, name="pred_normal_one_hot_iph" ) # loss self.loss_iph = tf.losses.sparse_softmax_cross_entropy( labels=tf.reshape(self.y_p_iph, shape=[-1]), logits=logits_iph ) #--------------------------------------------------------------------------------------- ''' #loss self.loss = self.loss_pw #+self.loss_pph+self.loss_iph #optimizer self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.loss) self.init_op = tf.global_variables_initializer() self.init_local_op = tf.local_variables_initializer() #------------------------------------Session----------------------------------------- with self.session as sess: print("Training Start") sess.run(self.init_op) # initialize all variables sess.run(self.init_local_op) train_Size = X_train_pw.shape[0] validation_Size = X_validation_pw.shape[0] best_validation_loss = 0 # best validation accuracy in training process #epoch for epoch in range(1, self.max_epoch + 1): print("Epoch:", epoch) start_time = time.time() # time evaluation # training loss/accuracy in every mini-batch train_losses = [] train_accus_pw = [] train_accus_pph = [] train_accus_iph = [] c1_f_pw = [] c2_f_pw = [] # each class's f1 score c1_f_pph = [] c2_f_pph = [] c1_f_iph = [] c2_f_iph = [] # mini batch for i in range(0, (train_Size // self.batch_size)): _, train_loss, train_pred_pw = sess.run( fetches=[self.optimizer, self.loss, pred_pw], feed_dict={ self.X_p_pw: X_train_pw[i * self.batch_size:(i + 1) * self.batch_size], self.y_p_pw: y_train_pw[i * self.batch_size:(i + 1) * self.batch_size], }) # loss train_losses.append(train_loss) # metrics # pw accuracy_pw, f1_1_pw, f1_2_pw = util.eval( y_true=np.reshape( y_train_pw[i * self.batch_size:(i + 1) * self.batch_size], [-1]), y_pred=train_pred_pw) print("f1_score of N:", f1_1_pw) print("f1_score of B:", f1_2_pw) print() #c1_f_pw.append(f1_1_pw); #c2_f_pw.append(f1_2_pw) '''