def ensemble_test(config): ''' test 4-way result! ''' choise = "cuda" if torch.cuda.is_available() else "cpu" print(choise + " is available") device = torch.device(choise) # load models models = [] for type in config.types: models.append( torch.load(config.resourses.model_path + type + "_" + config.resourses.model_name).to(device)) pdtb = PDTB(config) # load test dataset train_arg1_sents, train_arg2_sents, train_labels = pdtb.load_PDTB("train") dev_arg1_sents, dev_arg2_sents, dev_labels = pdtb.load_PDTB("dev") test_arg1_sents, test_arg2_sents, test_labels = pdtb.load_PDTB("test") word_to_id = pdtb.build_vocab() batch_size = config.training.batch_size max_seq_len = config.model.max_seq_len # dev with torch.no_grad(): result = [] for i in range(0, len(test_arg1_sents), batch_size): arg1 = test_arg1_sents[i:i + batch_size] arg2 = test_arg2_sents[i:i + batch_size] label = test_labels[i:i + batch_size] arg1 = sent_to_tensor(arg1, word_to_id, max_seq_len).to(device) arg2 = sent_to_tensor(arg2, word_to_id, max_seq_len).to(device) label = torch.LongTensor(label).to(device) outputs = [] for i in range(4): outputs.append(models[i](arg1, arg2)) for i in range(len(arg1)): result.append( np.argmax([ outputs[0][i][1], outputs[1][i][1], outputs[2][i][1], outputs[3][i][1] ])) print(classification_report(test_labels, result))
def test(config): choise = "cuda" if torch.cuda.is_available() else "cpu" print(choise + " is available") device = torch.device(choise) daguan = Daguan(config) dataset, word_to_id = daguan.load_dataset() try: net = torch.load(config.resourses.model_path + "_" + config.resourses.model_name) except FileNotFoundError: raise FileNotFoundError("No model!") batch_size = config.training.batch_size max_seq_len = config.model.max_seq_len # test with torch.no_grad(): result = [] for i in tqdm(range(0, len(dataset), batch_size)): x = dataset[i:i + batch_size] x = sent_to_tensor(x, word_to_id, max_seq_len).to(device) output = net(x) result.extend(list(torch.max(output, 1)[1].cpu().numpy())) result = [n + 1 for n in result] id = [i for i in range(len(result))] df = pd.DataFrame({"id": id, "class": result}) df.to_csv("result.csv", index=False, encoding='utf-8')
def test(config): choise = "cuda" if torch.cuda.is_available() else "cpu" print(choise + " is available") device = torch.device(choise) # load models model = torch.load(config.resourses.model_path + config.type + "_" + config.resourses.model_name).to(device) pdtb = PDTB(config) # load test dataset train_arg1_sents, train_arg2_sents, train_labels = pdtb.load_PDTB("train") dev_arg1_sents, dev_arg2_sents, dev_labels = pdtb.load_PDTB("dev") test_arg1_sents, test_arg2_sents, test_labels = pdtb.load_PDTB("test") word_to_id = pdtb.build_vocab() batch_size = config.training.batch_size max_seq_len = config.model.max_seq_len # dev with torch.no_grad(): result = [] for i in range(0, len(test_arg1_sents), batch_size): arg1 = test_arg1_sents[i:i + batch_size] arg2 = test_arg2_sents[i:i + batch_size] label = test_labels[i:i + batch_size] arg1 = sent_to_tensor(arg1, word_to_id, max_seq_len).to(device) arg2 = sent_to_tensor(arg2, word_to_id, max_seq_len).to(device) label = torch.LongTensor(label).to(device) output = model(arg1, arg2) result.extend(list(torch.max(output, 1)[1].cpu().numpy())) # F1 score precision, recall, f1, _ = score(test_labels, result, average='binary') print("f1 score: %.2f precision: %.2f recall: %.2f" % (100 * f1, 100 * precision, 100 * recall))
def train(config): choise = "cuda" if torch.cuda.is_available() else "cpu" print(choise + " is available") device = torch.device(choise) daguan = Daguan(config) dataset, labels, word_to_id = daguan.load_dataset() config.model.class_num = len(set(labels)) print('class num:', config.model.class_num) size = len(dataset) print('data size:', size) divid = int(0.9 * size) train_dataset = dataset[:divid] train_labels = labels[:divid] dev_dataset = dataset[divid:] dev_labels = labels[divid:] # load pretrained embedding with open('word2vec.pkl', 'rb') as f: embedding = torch.FloatTensor(pickle.load(f)) print("Training from scratch!") if config.model.module == "BiLSTM": net = BiLSTMNet(config.model.vocab_size, config.model.embedd_size, config.model.hidden_size, config.model.max_seq_len, config.model.class_num, config.model.dropout, embedding, config.training.fix, config.model.n_layers) elif config.model.module == "BiGRU": net = BiGRUNet(config.model.vocab_size, config.model.embedd_size, config.model.hidden_size, config.model.max_seq_len, config.model.class_num, config.model.dropout, config.model.n_layers) elif config.model.module == "BiLSTMSum": net = BiLSTMSumNet(config.model.vocab_size, config.model.embedd_size, config.model.hidden_size, config.model.max_seq_len, config.model.class_num, config.model.dropout, embedding, config.training.fix, config.model.n_layers) else: raise ValueError("Undefined network") net.to(device) batch_size = config.training.batch_size max_seq_len = config.model.max_seq_len loss_func = nn.CrossEntropyLoss() optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, net.parameters()), lr=config.training.lr, weight_decay=config.training.weight_decay) # L2 print("Start training!") best_f1 = 0.0 for epoch in range(config.training.epochs): total_loss = 0.0 start = time.time() result = [] # train for i in tqdm(range(0, len(train_dataset), batch_size)): optimizer.zero_grad() x = train_dataset[i:i + batch_size] label = train_labels[i:i + batch_size] x = sent_to_tensor(x, word_to_id, max_seq_len).to(device) label = torch.LongTensor(label).to(device) output = net(x) result.extend(list(torch.max(output, 1)[1].cpu().numpy())) loss = loss_func(output, label) loss.backward() optimizer.step() total_loss += loss.item() precision, recall, f1, _ = score(train_labels, result, average='macro') print("Epoch %d: train f1 score: %.2f precision: %.2f recall: %.2f" % (epoch, 100 * f1, 100 * precision, 100 * recall)) print("Epoch %d train loss: %.3f time: %.3f s" % (epoch, total_loss / len(train_dataset), time.time() - start)) # dev with torch.no_grad(): result = [] for i in range(0, len(dev_dataset), batch_size): x = dev_dataset[i:i + batch_size] label = dev_labels[i:i + batch_size] x = sent_to_tensor(x, word_to_id, max_seq_len).to(device) label = torch.LongTensor(label).to(device) output = net(x) result.extend(list(torch.max(output, 1)[1].cpu().numpy())) # F1 score precision, recall, f1, _ = score(dev_labels, result, average='macro') print("Epoch %d: dev f1 score: %.2f precision: %.2f recall: %.2f" % (epoch, 100 * f1, 100 * precision, 100 * recall)) if f1 > best_f1: best_f1 = f1 torch.save( net, config.resourses.model_path + "_" + config.resourses.model_name) print("net saved!")
def train(config): choise = "cuda" if torch.cuda.is_available() else "cpu" print(choise + " is available") device = torch.device(choise) print("Training from scratch!") model = Model(config.model.vocab_size, config.model.embedd_size, config.model.hidden_size, config.model.max_seq_len, config.model.n_layers) pdtb = PDTB(config) train_arg1_sents, train_arg2_sents, train_labels = pdtb.load_PDTB("train") dev_arg1_sents, dev_arg2_sents, dev_labels = pdtb.load_PDTB("dev") word_to_id = pdtb.build_vocab() model.to(device) start = time.time() model.load_pretrained_embedding(config.training.fix_embed, config.resourses.glove_path, word_to_id) print("Loading embedding taking %.3f s" % (time.time() - start)) batch_size = config.training.batch_size max_seq_len = config.model.max_seq_len loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=config.training.lr, weight_decay=config.training.weight_decay) # L2 print("Start training!") best_f1 = 0.0 for epoch in range(config.training.epochs): total_loss = 0.0 start = time.time() result = [] # train for i in range(0, len(train_arg1_sents), batch_size): optimizer.zero_grad() arg1 = train_arg1_sents[i:i + batch_size] arg2 = train_arg2_sents[i:i + batch_size] label = train_labels[i:i + batch_size] arg1 = sent_to_tensor(arg1, word_to_id, max_seq_len).to(device) arg2 = sent_to_tensor(arg2, word_to_id, max_seq_len).to(device) label = torch.LongTensor(label).to(device) output = model(arg1, arg2) result.extend(list(torch.max(output, 1)[1].cpu().numpy())) loss = loss_func(output, label) loss.backward() optimizer.step() total_loss += loss.item() precision, recall, f1, _ = score(train_labels, result, average='binary') print("Epoch %d: train f1 score: %.2f precision: %.2f recall: %.2f" % (epoch, 100 * f1, 100 * precision, 100 * recall)) print("Epoch %d train loss: %.3f time: %.3f s" % (epoch, total_loss / len(train_arg1_sents), time.time() - start)) # dev with torch.no_grad(): result = [] for i in range(0, len(dev_arg1_sents), batch_size): arg1 = dev_arg1_sents[i:i + batch_size] arg2 = dev_arg2_sents[i:i + batch_size] label = dev_labels[i:i + batch_size] arg1 = sent_to_tensor(arg1, word_to_id, max_seq_len).to(device) arg2 = sent_to_tensor(arg2, word_to_id, max_seq_len).to(device) label = torch.LongTensor(label).to(device) output = model(arg1, arg2) result.extend(list(torch.max(output, 1)[1].cpu().numpy())) # F1 score precision, recall, f1, _ = score(dev_labels, result, average='binary') print("Epoch %d: dev f1 score: %.2f precision: %.2f recall: %.2f" % (epoch, 100 * f1, 100 * precision, 100 * recall)) if f1 > best_f1: best_f1 = f1 torch.save( model, config.resourses.model_path + config.type + "_" + config.resourses.model_name) print("Model saved!")