def eval(sentence, predict, tags): entityres, entityall = [], [] entityres = calculate(sentence, predict.permute(1, 0), id2word, id2tag, entityres) entityall = calculate(sentence, tags, id2word, id2tag, entityall) intersection = [] acc, recall, f1 = [], [], [] for i in range(len(entityres)): intersection.append([j for j in entityres[i] if j in entityall[i]]) for i in range(len(intersection)): if len(entityres[i]) == 0: acc_ = 1. else: acc_ = float(len(intersection[i]) / len(entityres[i])) if len(entityall[i]) == 0: recall_ = 1. else: recall_ = float(len(intersection[i]) / len(entityall[i])) if acc_ + recall_ == 0.: f1_ = 0. else: f1_ = (2 * acc_ * recall_) / (acc_ + recall_) acc.append(acc_) recall.append(recall_) f1.append(f1_) return np.mean(acc), np.mean(recall), np.mean(f1)
def train(model,sess): for epoch in range(training_epochs): for batch in range(batch_num): x_batch, y_batch = data_train.next_batch(batch_size) feed_dict = {model.input_data:x_batch, model.labels:y_batch} pre,_ = sess.run([model.viterbi_sequence,model.train_op], feed_dict) acc = 0 if batch%100==0: for i in range(len(y_batch)): for j in range(len(y_batch[0])): if y_batch[i][j]==pre[i][j]: acc+=1 print float(acc)/(len(y_batch)*len(y_batch[0])) path_name = "./model/model"+str(epoch)+".ckpt" print path_name if epoch%2==0: saver.save(sess, path_name) print "model has been saved" entityres=[] entityall=[] for batch in range(batch_num): x_batch, y_batch = data_train.next_batch(batch_size) feed_dict = {model.input_data:x_batch, model.labels:y_batch} pre = sess.run([model.viterbi_sequence], feed_dict) pre = pre[0] entityres = calculate(x_batch,pre,id2word,id2tag,entityres) entityall = calculate(x_batch,y_batch,id2word,id2tag,entityall) jiaoji = [i for i in entityres if i in entityall] if len(jiaoji)!=0: zhun = float(len(jiaoji))/len(entityres) zhao = float(len(jiaoji))/len(entityall) print "train" print "zhun:", zhun print "zhao:", zhao print "f:", (2*zhun*zhao)/(zhun+zhao) else: print "zhun:",0 entityres=[] entityall=[] for batch in range(batch_num_test): x_batch, y_batch = data_test.next_batch(batch_size) feed_dict = {model.input_data:x_batch, model.labels:y_batch} pre = sess.run([model.viterbi_sequence], feed_dict) pre = pre[0] entityres = calculate(x_batch,pre,id2word,id2tag,entityres) entityall = calculate(x_batch,y_batch,id2word,id2tag,entityall) jiaoji = [i for i in entityres if i in entityall] if len(jiaoji)!=0: zhun = float(len(jiaoji))/len(entityres) zhao = float(len(jiaoji))/len(entityall) print "test" print "zhun:", zhun print "zhao:", zhao print "f:", (2*zhun*zhao)/(zhun+zhao) else: print "zhun:",0
def train(): model = BiLSTM_CRF(len(word2id) + 1, tag2id, EMBEDDING_DIM, HIDDEN_DIM) # 实例化模型 optimizer = optim.SGD(model.parameters(), lr=0.005, weight_decay=1e-4) # 随机梯度下降优化算法 # 训练 for epoch in range(EPOCHS): index = 0 for sentence, tags in zip(x_train, y_train): index += 1 # 计量每次迭代进行了多少步 model.zero_grad() # 清空梯度 sentence = torch.tensor(sentence, dtype=torch.long) tags = torch.tensor([tag2id[t] for t in tags], dtype=torch.long) loss = model.neg_log_likelihood(sentence, tags) # 模型定义的loss loss.backward() optimizer.step() if index % 300 == 0: print("epoch", epoch, "batch", index) # 用来保存测试结果 entityres = [] entityall = [] # 每个epoch后测试一下 for sentence, tags in zip(x_test, y_test): sentence = torch.tensor(sentence, dtype=torch.long) tags = torch.tensor([tag2id[t] for t in tags], dtype=torch.long) score, predict = model(sentence) entityres = calculate(sentence, predict, id2word, id2tag, entityres) entityall = calculate(sentence, tags.numpy(), id2word, id2tag, entityall) jiaoji = [i for i in entityres if i in entityall] if len(jiaoji) != 0: zhun = float(len(jiaoji)) / len(entityres) # 准确率 zhao = float(len(jiaoji)) / len(entityall) # 召回率 print("test:") print("precision:", zhun) print("recall:", zhao) print("F:", (2 * zhun * zhao) / (zhun + zhao)) else: print("precision:", 0) path_name = "./model/model.pkl" print(path_name) torch.save(model, path_name) print("model has been saved")
def test(): path_name = "./model/model.pkl" model = torch.load(path_name) entityres = [] entityall = [] # 测试 for sentence, tags in zip(x_test, y_test): sentence = torch.tensor(sentence, dtype=torch.long) score, predict = model(sentence) entityres = calculate(sentence, predict, id2word, id2tag, entityres) entityall = calculate(sentence, tags, id2word, id2tag, entityall) jiaoji = [i for i in entityres if i in entityall] if len(jiaoji) != 0: zhun = float(len(jiaoji)) / len(entityres) # 准确率 zhao = float(len(jiaoji)) / len(entityall) # 召回率 print("test:") print("zhun:", zhun) print("zhao:", zhao) print("f:", (2 * zhun * zhao) / (zhun + zhao)) else: print("zhun:", 0)
sentence = torch.tensor(sentence, dtype=torch.long) tags = torch.tensor([tag2id[t] for t in tags], dtype=torch.long) loss = model.neg_log_likelihood(sentence, tags) loss.backward() optimizer.step() if index % 300 == 0: print("epoch", epoch, "index", index) entityres = [] entityall = [] for sentence, tags in zip(x_test, y_test): sentence = torch.tensor(sentence, dtype=torch.long) score, predict_tags = model(sentence) entityres = calculate(sentence, predict_tags, id2word, id2tag, entityres) entityall = calculate(sentence, tags, id2word, id2tag, entityall) common_entity = [i for i in entityres if i in entityall] if len(common_entity) != 0: zhun = float(len(common_entity)) / len(entityres) zhao = float(len(common_entity)) / len(entityall) print("test:") print("Percision:", zhun) print("Recall:", zhao) print("F1:", (2 * zhun * zhao) / (zhun + zhao)) else: print("Percision:", 0) path_name = "C:\\Users\\Administrator\\PycharmProjects\\ChineseNER\\save_model\\save_model" + str( epoch) + ".pkl"
for j in range(len(y_batch[0])): if y_batch[i][j] == pre[i][j]: acc += 1 print float(acc) / (len(y_batch) * len(y_batch[0])) path_name = "./model/model" + str(epoch) + ".ckpt" print path_name saver.save(sess, path_name) print "model has been saved" entityres = [] entityall = [] for batch in range(batch_num): x_batch, y_batch = data_train.next_batch(batch_size) feed_dict = {input_data: x_batch, labels: y_batch} pre = sess.run([viterbi_sequence], feed_dict) pre = pre[0] entityres = calculate(x_batch, pre, id2word, id2tag, entityres) entityall = calculate(x_batch, y_batch, id2word, id2tag, entityall) jiaoji = [i for i in entityres if i in entityall] if len(jiaoji) != 0: zhun = float(len(jiaoji)) / len(entityres) zhao = float(len(jiaoji)) / len(entityall) print "train" print "zhun:", zhun print "zhao:", zhao print "f:", (2 * zhun * zhao) / (zhun + zhao) else: print "zhun:", 0 entityres = [] entityall = [] for batch in range(batch_num_test):
valLoader = DataLoader(dataset=validation, batch_size=batch_size, shuffle=True) testLoader = DataLoader(dataset=test, batch_size=batch_size, shuffle=True) for epoch in range(EPOCHS): train_loss = 0.0 entityres = [] entityall = [] for i, data in enumerate(trainLoader): x, y = data x = x.permute(1, 0, 2) #y = y.permute(1,0,2) optimizer.zero_grad() loss = model.neg_log_likelihood(x, y) train_loss += loss _, pred = model(x) entityres = calculate(x, pred, id2word, id2tag, entityres) entityall = calculate(x, y, id2word, id2tag, entityall) train_acc, train_recall, train_f1 = matrix(entityres, entityall) iter = epoch * len(trainLoader) + i writer.add_scalar("train_loss", train_loss, iter) writer.add_scalar("train_acc", train_acc, iter) writer.add_scalar("train_recall", train_recall, iter) writer.add_scalar("train_f1", train_f1, iter) loss.backward() optimizer.step() val_loss = 0.0 val_res = [] val_all = [] for i, data in enumerate(valLoader): x, y = data x = x.permute(1, 0, 2)