def load_model_decode(model_dir, data, name, gpu, seg=True): data.HP_gpu = gpu print("Load Model from file: ", model_dir) model = None if data.model_name == 'WC-LSTM_model': model = CW_NER(data) elif data.model_name == 'CNN_model': model = CNNmodel(data) elif data.model_name == 'LSTM_model': model = BiLSTM_CRF(data) assert (model is not None) model.load_state_dict(torch.load(model_dir)) print("Decode %s data ..." % name) start_time = time.time() speed, acc, p, r, f, pred_results = evaluate(data, model, name) end_time = time.time() time_cost = end_time - start_time # seg: boolen. # If task is segmentation like, tasks with token accuracy evaluation (e.g. POS, CCG) is False; # tasks with F-value evaluation(e.g. Word Segmentation, NER, Chunking) is True . if seg: print( "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (name, time_cost, speed, acc, p, r, f)) else: print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" % (name, time_cost, speed, acc)) return pred_results
def train(data, save_model_dir, dset_dir, seg=True): print("Training model...") data.show_data_summary() save_data_setting(data, dset_dir) model = None if data.model_name == 'WC-LSTM_model': model = CW_NER(data, type=2) elif data.model_name == 'CNN_model': model = CNNmodel(data) elif data.model_name == 'LSTM_model': model = BiLSTM_CRF(data) assert (model is not None) print("finished built model.") # loss_function = nn.NLLLoss() # requires_grad指定要不要更新這個變數 属性默认为False 可以加快運算 parameters = filter(lambda p: p.requires_grad, model.parameters()) # SGD: Stochastic gradient descent # 每读入一个数据,便立刻计算cost fuction的梯度来更新参数 # 算法收敛速度快 可以在线更新 有几率跳出较差的局部最优 # 易收敛到局部最优,易被困在鞍点 # 更新方向完全依赖于当前batch计算出的梯度,因而十分不稳定 # # SGD+momentum # 更新的时候在一定程度上保留之前更新的方向,同时利用当前batch的梯度微调最终的更新方向 # 在一定程度上增加稳定性,从而学习地更快,并且还有一定摆脱局部最优的能力 # optim: SGD/Adagrad/AdaDelta/RMSprop/Adam. optimizer selection. # optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum) optimizer = optim.Adagrad(parameters, lr=data.HP_lr) best_dev = -1 # training for idx in tqdm(range(data.HP_iteration)): epoch_start = time.time() print("\nEpoch: %s/%s" % (idx, data.HP_iteration)) optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr) instance_count = 0 batch_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_Ids) model.train() model.zero_grad() batch_size = data.HP_batch_size train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] if not instance: continue tag_seq, batch_label, mask, loss = None, None, None, None if data.model_name == 'WC-LSTM_model': gaz_list, reverse_gaz_list, batch_char, batch_bichar, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label_3( instance, data.HP_gpu, data.HP_num_layer) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, reverse_gaz_list, batch_char, batch_charlen, batch_label, mask) elif data.model_name == 'CNN_model': gaz_list, batch_char, batch_bichar, batch_charlen, batch_label, layer_gaz, gaz_mask, mask = batchify_with_label_2( instance, data.HP_gpu, data.HP_num_layer) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, batch_char, batch_bichar, batch_charlen, layer_gaz, gaz_mask, mask, batch_label) elif data.model_name == 'LSTM_model': gaz_list, batch_char, batch_bichar, batch_charlen, batch_wordrecover, batch_label, mask = batchify_with_label( instance, data.HP_gpu, data.HP_num_layer) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( gaz_list, batch_char, batch_bichar, batch_charlen, batch_label, mask) assert (loss.size != torch.Size([])) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole total_loss += loss.item() batch_loss += loss if end % data.HP_clip == 0: batch_loss.backward() optimizer.step() model.zero_grad() batch_loss = 0 epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (idx, epoch_cost, train_num / epoch_cost, total_loss)) speed, acc, p, r, f, _ = evaluate(data, model, "dev") dev_finish = time.time() dev_cost = dev_finish - epoch_finish current_score = f if seg else acc if seg: print( "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (dev_cost, speed, acc, p, r, f)) else: print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)) if current_score > best_dev: if seg: print( "Exceed previous best f score: %.4f, \033[35mnew best f: %.4f\033[0m" % (best_dev, current_score)) else: print( "Exceed previous best acc score:%.4f, \033[35mnew best acc: %.4f\033[0m" % (best_dev, current_score)) save_model_name = save_model_dir + '-' + str(idx) + '-' + str( round(current_score * 100, 1)) + ".model" torch.save(model.state_dict(), save_model_name) best_dev = current_score speed, acc, p, r, f, _ = evaluate(data, model, "test") test_finish = time.time() test_cost = test_finish - dev_finish if seg: print( "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (test_cost, speed, acc, p, r, f)) else: print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)) gc.collect()