def evaluate(self, labels, seq_lens, data, epoch, step, raw_data): label2tag = {} for tag, label in self.vocab_t.items(): label2tag[label] = tag # if label != 0 else label prediction = [] for label, sent, tag in zip(labels, raw_data['test_data'], raw_data['test_label']): tag_ = [label2tag[l] for l in label] res = [] for i in range(len(tag_)): res.append([sent[i], tag[i], tag_[i]]) prediction.append(res) print conlleval(prediction)
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ :param label_list: :param seq_len_list: :param data: :param epoch: :return: """ label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] #label长度和句子长度不一致时,输出句子,标签长和tag if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) #将对应的句子,tag,预测tag打包 for i in range(min(len(sent), len(tag), len(tag_))): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) #获取epoch数以及label,metric(转移矩阵)路径 epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_)
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ :param label_list: :param seq_len_list: :param data: :param epoch: :return: """ model_predict = [] for label_, (sent, tag) in zip(label_list, data): #print('label_:', label_) tag_ = label_ sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) #print('model_predict:', model_predict[:2]) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_)
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ :param label_list: :param seq_len_list: :param data: :param epoch: :return: """ label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) if epoch == 1: epoch_num = 'test' elif epoch == None: epoch_num = 'dev' else: epoch_num = str(epoch + 1) label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) pre, recall, f1 = conlleval(model_predict, label_path, metric_path) print("pre {}".format(pre)) print("recall {}".format(recall)) print("f1 {}".format(f1)) return pre, recall, f1
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ :param label_list: :param seq_len_list: :param data: :param epoch: :return: """ label2tag = {} #生成label和tag的对应关系,和tag2label相反,但是label为0的tag保留为0,其他不变 for tag, label in self.tag2label.items(): #label2tag[label] = tag if label != 0 else label label2tag[label] = tag model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_)
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ evaluating the results :param label_list: predicted labels_list, :param seq_len_list: list of sentences length :param data: :param epoch: :return: """ label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_)
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ :param label_list: :param seq_len_list: :param data: :param epoch: :return: """ label2tag = {} # tag_id2tag_name for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): # 如果预测的某句话的label与句子长度不一致 print(sent) print(len(label_)) print(tag) for i in range(len(sent)): # 记录这句话的[词,标签,模型预测标签] sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) # 记录每句话的结果 epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_)
def run_one_epoch(self, sess, train_vecs, train_lens, train_tags, test_data, test_lens, test_tags, epoch, raw_data): # print("1################1") # print("++++epoch+++", epoch) logging.info("++++epoch+++" + str(epoch)) #batches = batch_yield(train, self.batch_size, vocab_index_dict, self.tag2label, shuffle=False) trainBatches = generator(zip(train_vecs, train_lens, train_tags), self.batch_size) for step, (seqs, seq_lens, labels) in enumerate(trainBatches): feed_dict, _ = self.get_feed_dict(seqs, seq_lens, labels, self.lr, self.dropout_keep_prob) _, loss_train, step_num_ = sess.run( [self.train_op, self.loss, self.global_step], feed_dict=feed_dict) #_, train_loss = sess.run([self.train_op, self.loss], feed_dict=feed_dict) # print("loss_train:%.3f%%" % train_loss) # print('===========validation / test===========') logging.info('===========validation / test===========') label_list_test, label_list_test_len = [], [] testBatches = generator(zip(test_data, test_lens, test_tags), self.batch_size) for step, (seqs, seq_lens, labels) in enumerate(testBatches): label_list, seq_len_list = self.predict_one_batch( sess, seqs, seq_lens, labels) label_list_test.extend(label_list) label_list_test_len.extend(seq_len_list) # print("###predict###",label_list,"lentgth:###",seq_len_list) label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] for label_, sent_, tag in zip(label_list_test, raw_data['test_data'], raw_data['test_label']): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] #logging.info('len(label_):%.3f%%' % len(label_)) #logging.info("len(sent_):%.3f%%" % len(sent_)) #logging.info("len(tag):%.3f%%" % len(tag)) for i in range(len(sent_)): #tagStr = 'sent_:' + sent_[i] + 'tag[i]:' + tag[i] + 'tag_[i]:' + tag_[i] #logging.info("tagStr:%s" % tagStr) sent_res.append([sent_[i], tag[i], tag_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): logging.info(_)
def run_one_epoch(self, sess, train, test, epoch, vocab_index_dict): # print("1################1") #print("++++epoch+++", epoch) logging.info("++++epoch+++" + str(epoch)) batches = batch_yield(train, self.batch_size, vocab_index_dict, self.tag2label, shuffle=False) for step, (seqs, labels) in enumerate(batches): # print("2################", sent_, "***", tag_) feed_dict, _ = self.get_feed_dict(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, step_num_ = sess.run( [self.train_op, self.loss, self.global_step], feed_dict=feed_dict) #print("loss_train:%.3f%%" % loss_train) #print('===========validation / test===========') logging.info('===========validation / test===========') label_list_test, label_list_test_len = [], [] for seqs, labels in batch_yield(test, self.batch_size, vocab_index_dict, self.tag2label, shuffle=False): label_list, seq_len_list = self.predict_one_batch(sess, seqs) label_list_test.extend(label_list) label_list_test_len.extend(seq_len_list) # print("###predict###",label_list,"lentgth:###",seq_len_list) label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label #label2tag[label] = tag #print("+++") model_predict = [] for label_, (sent_, tag) in zip(label_list_test, test): #tag_ = [label2tag[label_2] for label_2 in label_] tag_ = [label2tag[label__] for label__ in label_] sent_res = [] for i in range(len(sent_)): sent_res.append([sent_[i], tag[i], tag_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): logging.info(_)
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ :param label_list: :param seq_len_list: :param data: :param epoch: :return: """ label2tag = {} for tag, label in self.tag2label.items(): #这里等价于:if label != 0: # label2tag[label] = tag # else: # label2tag[label] = label label2tag[label] = tag if label != 0 else label #print('label2tag: ',label2tag) model_predict = [] for label_, (sent, tag, vectorsss) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print('label_: ', label_) print(len(label_)) print('sent: ', sent) print(len(sent)) print('tag: ', tag) print(len(tag)) print('tag_: ', tag_) print('tag_: ', len(tag_)) continue for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) #以下等同于:if epoch!=None : # epoch_num = str(epoch+1) # else: # epoch_num="test" epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_)
def evaluate(self, label_list, seq_len_list, data, raw_data=None, epoch=None, mode='train'): """ :param label_list: pading后的句子的长度预测出来的标签,标签的长度=句子不含pading部分的长度 :param seq_len_list: :param data: 这里的data 没有pading :param epoch: :return: """ label2tag = {} for tag, label in self.tag2label.items(): # label2tag[label] = tag if label != 0 else label 为什么要把"O" 换成"0" label2tag[label] = tag model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) result_file = os.path.join(self.result_path, 'result_file') f1score, precision, recall = conlleval(mode, model_predict, result_file, self.negative_label, self.iob2iobes) if mode == 'train': self.logger.info( 'epochs: {} 训练集: recall:{}, precision:{}, f1score: {}'.format( epoch + 1, recall, precision, f1score)) else: self.logger.info( 'epochs: {} 验证集: recall:{}, precision:{}, f1score: {}'.format( epoch + 1, recall, precision, f1score)) return f1score
def evaluate(self, label_list, seq_len_list, data, raw_data=None, epoch=None): """ :param label_list: pading后的句子的长度预测出来的标签,标签的长度=句子不含pading部分的长度 :param seq_len_list: :param data: 这里的data 没有pading :param epoch: :return: """ label2tag = {} for tag, label in self.tag2label.items(): # label2tag[label] = tag if label != 0 else label 为什么要把"O" 换成"0" label2tag[label] = tag model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' dg_file_path = os.path.join(self.result_path, 'dg_file') result_file_path = os.path.join(self.result_path, 'result_file' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) f1score = conlleval(model_predict, dg_file_path, metric_path, raw_data, result_file_path) if len(data) > 10000: self.logger.info('epochs: {} 训练集f1score: {}'.format( epoch + 1, f1score)) else: self.logger.info('epochs: {} 验证集f1score: {}'.format( epoch + 1, f1score)) return f1score
def evaluate(self, labels, seq_lens, data, epoch, step, raw_data): label2tag = {} for tag, label in self.vocab_t.items(): label2tag[label] = tag if label != 0 else label prediction = [] for label, sent, tag in zip(labels, raw_data['test_data'], raw_data['test_label']): tag_ = [label2tag[l] for l in label] res = [] for i in range(len(tag_)): res.append([sent[i], tag[i], tag_[i]]) prediction.append(res) output_path = './trained_models/' + str(self.model_name) if not os.path.exists(output_path): os.makedirs(output_path) label_path = os.path.join(output_path+'/label_'+str(epoch)+'_'+str(step)) metric_path = os.path.join(output_path+'/metric_'+str(epoch)+'_'+str(step)) for _ in conlleval(prediction, label_path, metric_path): tf.logging.info(_)
def evaluate(epoch): model.eval() eval_loss = 0 model_predict = [] sent_res = [] label2tag = {} for tag, lb in tag2label.items(): label2tag[lb] = tag if lb != 0 else lb label_list = [] for word, label, seq_lengths, unsort_idx in test_data: loss, _ = model(word, label, seq_lengths) pred = model.predict(word, seq_lengths) pred = pred[unsort_idx] seq_lengths = seq_lengths[unsort_idx] for i, seq_len in enumerate(seq_lengths.cpu().numpy()): pred_ = list(pred[i][:seq_len].cpu().numpy()) label_list.append(pred_) eval_loss += loss.detach().item() for label_, (sent, tag) in zip(label_list, data_origin): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): # print(sent) print(len(sent)) print(len(label_)) # print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) label_path = os.path.join(args.result_path, 'label_' + str(epoch)) metric_path = os.path.join(args.result_path, 'result_metric_' + str(epoch)) for line in conlleval(model_predict, label_path, metric_path): print(line) return eval_loss / test_data._stop_step
def evaluate(self, label_list, seq_len_list, data, epoch=None): #label2tag = {} #for tag, label in self.tag2label.items(): # label2tag[label] = tag if label != 0 else label model_predict = [] for label_, (sent, tag) in zip(label_list, data): sent_res = [] for i in range(len(sent)): sent_res.append([sent[i], tag[i], label_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_)
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ :param label_list: :param seq_len_list: :param data: :param epoch: :return: """ label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] TP, FP, TN, FN = 0, 0, 0, 0 for label_, (sent, tag) in zip(label_list, data): for p, t in zip(label_, tag): if p == 0 and t == 'O': TN += 1 if p == 0 and t != 'O': FN += 1 if p != 0 and t != 'O': TP += 1 if p != 0 and t == 'O': FP += 1 tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) precision = TP / (TP + FP) recall = TP / (TP + FN) F = 2 * precision * recall / (precision + recall) print("precision is ", precision, " recall is ", recall, " F-value is ", F) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_) with open(metric_path, 'a+') as f: f.write("precision is " + str(precision) + " recall is " + str(recall) + " F-value is " + str(F))
def evaluate(self, label_list, seq_len_list, data, epoch=None): """ :param label_list: :param seq_len_list: :param data: :param epoch: :return: """ label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) temp = '' for _ in conlleval(model_predict, label_path, metric_path): temp += str(_) self.logger.info(_) res = re.findall('[\.0-9]{1,}', temp) self.logger.info(res) if res != [] and len(list(res)) == 24: return res else: return [random.random(1)] * 24
def evaluate(label_list, seq_len_list, data, config, epoch=None): label2tag = {} for tag, label in tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(config.result_path, 'label_' + epoch_num) metric_path = os.path.join(config.result_path, 'result_metric_' + epoch_num) for _ in conlleval(model_predict, label_path, metric_path): config.logger.info(_)
def evaluate(self, label_list, seq_len_list, data, epoch=None): label2tag = {} for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label model_predict = [] for label_, (sent, tag) in zip(label_list, data): # label(pos)2num tag_ is predict results tag_ = [label2tag[label___] for label___ in label_] sent_res = [] if len(label_) != len(sent): print(sent) print(len(label_)) print(tag) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(self.result_path, 'label_' + epoch_num) metric_path = os.path.join(self.result_path, 'result_metric_' + epoch_num) # Calling the function eval.conlleval() to writing the results for _ in conlleval(model_predict, label_path, metric_path): self.logger.info(_)
def evaluate(model, label_list, seq_len_list, data, epoch=None): label2tag = {} for tag, label in model.tag2label.items(): label2tag[label] = tag if label != 0 else label #将预测出的label index映射成label,因为perl文件中统计的是B I 等开头的标签 model_predict = [] for label_, (sent, tag) in zip(label_list, data): tag_ = [label2tag[label__] for label__ in label_] sent_res = [] if len(label_) != len(sent): print("len=", len(sent), len(label_), len(tag_)) for i in range(len(sent)): sent_res.append([sent[i], tag[i], tag_[i]]) model_predict.append(sent_res) #print(model_predict) epoch_num = str(epoch + 1) if epoch != None else 'test' label_path = os.path.join(model.result_path, 'label_' + epoch_num) metric_path = os.path.join(model.result_path, 'result_metric_' + epoch_num) for item in conlleval(model_predict, label_path, metric_path): print(item)