def train_model(): rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300 + 1024 + 50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.3, dropout2=0.2, dropout3=0.2) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.01, momentum=0.9) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 20 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] # A counter for the number of gradient updates num_iter = 0 for epoch in tqdm(range(num_epochs)): # print("Starting epoch {}".format(epoch + 1)) for (example_text, example_lengths, labels) in train_dataloader_vua: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, 2) predicted = rnn_clf(example_text, example_lengths) batch_loss = nll_criterion(predicted, labels) rnn_clf_optimizer.zero_grad() batch_loss.backward() rnn_clf_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate( val_dataloader_vua, rnn_clf, nll_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1.append(f1) print( "Iteration {}. Validation Loss {}. Accuracy {}. Precision {}. Recall {}. F1 {}. class-wise F1 {}." .format(num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) filename = f'../models/classification/VUA_iter_{str(num_iter)}.pt' torch.save(rnn_clf.state_dict(), filename) # print("Training done!") return rnn_clf, nll_criterion
def load_model(filename): rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300 + 1024 + 50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.3, dropout2=0.2, dropout3=0.2) rnn_clf.load_state_dict(torch.load(filename)) rnn_clf.cuda() return rnn_clf
collate_fn=TextDataset.collate_fn) val_dataloader_mohX = DataLoader(dataset=val_dataset_mohX, batch_size=batch_size, shuffle=True, collate_fn=TextDataset.collate_fn) """ 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers!=1 # dropout3: dropout on hidden state of RNN to linear layer rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0, dropout3=0.2) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.02, momentum=0.9) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 30 ''' 3. 2 train model ''' training_loss = []
3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo(NOne) + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers=1 # dropout3: dropout on hidden state of RNN to linear layer rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=350, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0, dropout3=0) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.Adam(rnn_clf.parameters(), lr=0.001) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 40 ''' 3. 2 train model
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # Path options. parser.add_argument("--pretrained_w2v_model_path", required=True, type=str, help="Path of the tence w2v pretrained model.") parser.add_argument("--query_matrix_path", required=True, type=str, help="Path of the query matrix.") parser.add_argument("--summary_result_path", required=True, type=str, help="Path of the output model.") parser.add_argument("--output_result_path", required=True, type=str, help="Path of the output result.") parser.add_argument("--train_path", type=str, required=True, help="Path of the trainset.") parser.add_argument("--dev_path", type=str, required=True, help="Path of the devset.") parser.add_argument("--test_path", type=str, required=True, help="Path of the testset.") parser.add_argument("--vocab_path", type=str, required=True, help="Path of the vocab.") parser.add_argument("--elmo_path", type=str, required=True, help="Path of the elmo features.") # Model options. parser.add_argument("--language_type", type=str, choices=["en", "zh"], required=True, help="Num of the classes.") parser.add_argument("--num_classes", type=int, default=3, help="Num of the classes.") parser.add_argument("--batch_size", type=int, default=64, help="Batch size.") parser.add_argument("--require_improvement", type=int, default=5, help="Require improvement.") parser.add_argument("--epochs_num", type=int, default=100, help="Number of epochs.") parser.add_argument("--w2v_embedding_dim", type=int, required=True, help="w2v embedding dim.") parser.add_argument("--elmo_embedding_dim", type=int, default=1024, help="elmo embedding dim.") parser.add_argument("--input_dim", type=int, required=True, help="input embedding dim.") parser.add_argument("--seq_length", type=int, default=128, help="Sequence length.") parser.add_argument("--hidden_size", type=int, default=200, help="hidden size.") parser.add_argument("--layers_num", type=int, default=2, help="Number of layers.") parser.add_argument("--attention_query_size", type=int, default=200, help="Size of attention query matrix.") parser.add_argument("--attention_layer", choices=[ "att", "m_a", "m_pre_orl_a", "m_pre_orl_pun_a", "m_pol_untrain_a", "mpa", "mpoa" ], required=True, help="attention type.") parser.add_argument("--pretrain_model_type", choices=["w2v", "elmo", "none"], required=True, help="pretrain model type.") # Optimizer options. parser.add_argument("--learning_rate", type=float, default=0.1, help="Learning rate.") parser.add_argument("--momentum", type=float, default=0.9, help="momentum.") # Training options. parser.add_argument("--dropout", type=float, default=0.2, help="Dropout.") parser.add_argument("--is_bidir", type=int, default=2, help="bidir or only one.") parser.add_argument("--report_steps", type=int, default=100, help="Specific steps to print prompt.") parser.add_argument("--seed", type=int, default=7, help="Random seed.") parser.add_argument("--run_type", type=str, required=True, help="usage: python main_vua.py [train / test]") args = parser.parse_args() #set numpy、random、etc seeds set_seed(args.seed) #set vocab vocab = Vocab() vocab.load(args.vocab_path) label_columns = read_cataloge(args.dev_path) #set embedding embeddings = get_embedding_matrix(args, vocab, normalization=False) elmo_embedding = h5py.File(args.elmo_path, 'r') query_matrix = get_query_matrix(args) # For simplicity, we use DataParallel wrapper to use multiple GPUs. model = RNNSequenceClassifier(args, embeddings, query_matrix) model = model.cuda() best_josn = { 'F_macro': 0, 'P_macro': 0, 'R_macro': 0, 'Best_F_macro': 0, 'ACC': 0, 'F_negative': 0, 'F_positive': 0, 'Predict': [], 'Label': [], 'Weights': [], 'Last_up_epoch': 0, 'Total_batch_loss': 0, 'F_nuetral': 0, 'Time': 0, 'Total_orthogonal_loss': 0, 'train_num': 0, 'test_num': 0, 'dev_num': 0 } def evaluate(args, is_test): model.eval() if is_test: print("Start testing.") dataset = read_dataset(args, args.test_path, label_columns, vocab) best_josn['test_num'] = len(dataset) writer_result = open(os.path.join(args.output_result_path, 'result.txt'), encoding='utf-8', mode='w') writer_summary_result = open(os.path.join(args.summary_result_path, 'summary_result.txt'), mode='a') else: dataset = read_dataset(args, args.dev_path, label_columns, vocab) best_josn['dev_num'] = len(dataset) random.shuffle(dataset) input_ids = torch.LongTensor([example[0] for example in dataset]) label_ids = torch.LongTensor([example[1] for example in dataset]) length_ids = torch.LongTensor([example[2] for example in dataset]) input = [example[3] for example in dataset] if is_test: batch_size = 1 else: batch_size = args.batch_size for i, (input_ids_batch, label_ids_batch, length_ids_batch) in enumerate( batch_loader(batch_size, input_ids, label_ids, length_ids)): model.zero_grad() input_ids_batch = input_ids_batch.cuda() label_ids_batch = label_ids_batch.cuda() length_ids_batch = length_ids_batch.cuda() if args.attention_layer == 'att': predicted, weight = model(input_ids_batch, length_ids_batch, elmo_embedding) else: predicted, weight, _ = model(input_ids_batch, length_ids_batch, elmo_embedding) best_josn['Weights'] += weight.squeeze( dim=1).cpu().detach().numpy().tolist() _, predicted_labels = torch.max(predicted.data, 1) best_josn['Predict'] += predicted_labels.cpu().numpy().tolist() best_josn['Label'] += label_ids_batch.data.cpu().numpy().tolist() if is_test: details_result = metrics.classification_report( best_josn['Label'], best_josn['Predict']) best_josn['P_macro'], best_josn['R_macro'], best_josn[ 'F_macro'], _ = metrics.precision_recall_fscore_support( best_josn['Label'], best_josn['Predict'], average="macro") best_josn['ACC'] = metrics.classification.accuracy_score( best_josn['Label'], best_josn['Predict']) saveSenResult(input, best_josn['Label'], best_josn['Predict'], args, best_josn['Weights']) writer_result.writelines(details_result) print( "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}" .format(best_josn['ACC'], best_josn['F_macro'], best_josn['P_macro'], best_josn['R_macro'])) writer_result.writelines( "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}" .format(best_josn['ACC'], best_josn['F_macro'], best_josn['P_macro'], best_josn['R_macro'])) writer_summary_result.writelines('保存路径' + args.output_result_path + '\n') writer_summary_result.writelines( "Testing Acc: {:.4f}, F_macro: {:.4f}, P_macro: {:.4f}, R_macro: {:.4f}\n\n" .format(best_josn['ACC'], best_josn['F_macro'], best_josn['P_macro'], best_josn['R_macro'])) writer_summary_result.writelines(details_result) else: best_josn['P_macro'], best_josn['R_macro'], best_josn[ 'F_macro'], _ = metrics.precision_recall_fscore_support( best_josn['Label'], best_josn['Predict'], average="macro") best_josn['ACC'] = metrics.classification.accuracy_score( best_josn['Label'], best_josn['Predict']) def train(): print("Start training.") mkdir(args.output_result_path) writer_process = open(os.path.join(args.output_result_path, 'process.txt'), mode='w') writer_process.writelines("Start training.") trainset = read_dataset(args, args.train_path, label_columns, vocab) random.shuffle(trainset) best_josn['train_num'] = len(trainset) input_ids = torch.LongTensor([example[0] for example in trainset]) label_ids = torch.LongTensor([example[1] for example in trainset]) length_ids = torch.LongTensor([example[2] for example in trainset]) print("Batch size: ", args.batch_size) print("The number of training instances:", best_josn['train_num']) start_time = time.time() best_josn['Time'] = get_time_dif(start_time) print("Time usage:", best_josn['Time']) param_optimizer = list(model.named_parameters()) nll_criterion = nn.NLLLoss() if args.attention_layer == 'm_pol_untrain_a': optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if ('query_embedding.weight' not in n) ], 'weight_decay_rate': 0.01 }] else: optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer], 'weight_decay_rate': 0.01 }] optimizer = optim.SGD(optimizer_grouped_parameters, lr=args.learning_rate, momentum=args.momentum) for epoch in range(1, args.epochs_num + 1): model.train() for i, (input_ids_batch, label_ids_batch, length_ids_batch) in enumerate( batch_loader(args.batch_size, input_ids, label_ids, length_ids)): model.zero_grad() input_ids_batch = input_ids_batch.cuda() label_ids_batch = label_ids_batch.cuda() length_ids_batch = length_ids_batch.cuda() if args.attention_layer == 'att': predicted_ids_batch, _ = model(input_ids_batch, length_ids_batch, elmo_embedding) else: predicted_ids_batch, _, orthogonal_loss = model( input_ids_batch, length_ids_batch, elmo_embedding) best_josn['Total_orthogonal_loss'] += orthogonal_loss batch_loss = nll_criterion(predicted_ids_batch, label_ids_batch) best_josn['Total_batch_loss'] += batch_loss if args.attention_layer != 'm_pre_orl_pun_a' and args.attention_layer != 'mpoa': optimizer.zero_grad() batch_loss.backward() optimizer.step() else: optimizer.zero_grad() (0.1 * orthogonal_loss).backward(retain_graph=True) (0.9 * batch_loss).backward() optimizer.step() best_josn['Time'] = get_time_dif(start_time) if (i + 1) % args.report_steps == 0: if args.attention_layer == 'att': print( "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Time: {}" .format( epoch, i + 1, best_josn['Total_batch_loss'] / args.report_steps, best_josn['Time'])) writer_process.writelines( "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Time: {}" .format( epoch, i + 1, best_josn['Total_batch_loss'] / args.report_steps, best_josn['Time'])) else: print( "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Avg orthogonal loss: {:.4f}, Time: {}" .format( epoch, i + 1, best_josn['Total_batch_loss'] / args.report_steps, best_josn['Total_orthogonal_loss'] / args.report_steps, best_josn['Time'])) writer_process.writelines( "Epoch id: {}, Training steps: {}, Avg batch loss: {:.4f}, Avg orthogonal loss: {:.4f}, Time: {}" .format( epoch, i + 1, best_josn['Total_batch_loss'] / args.report_steps, best_josn['Total_orthogonal_loss'] / args.report_steps, best_josn['Time'])) best_josn['Total_batch_loss'] = 0 best_josn['Total_orthogonal_loss'] = 0 # 读取验证集 evaluate(args, False) best_josn['Time'] = get_time_dif(start_time) if best_josn['F_macro'] > best_josn['Best_F_macro'] + 0.001: best_josn['Best_F_macro'] = best_josn['F_macro'] best_josn['Last_up_epoch'] = epoch torch.save(model, os.path.join(args.output_result_path, 'result.pkl')) print("Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} *".format( best_josn['ACC'], best_josn['F_macro'], best_josn['Time'])) writer_process.writelines( "Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} *".format( best_josn['ACC'], best_josn['F_macro'], best_josn['Time'])) elif epoch - best_josn['Last_up_epoch'] == args.require_improvement: print("No optimization for a long time, auto-stopping...") writer_process.writelines( "No optimization for a long time, auto-stopping...") break else: print("Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} ".format( best_josn['ACC'], best_josn['F_macro'], best_josn['Time'])) writer_process.writelines( "Deving Acc: {:.4f}, F_macro: {:.4f}, Time: {} ".format( best_josn['ACC'], best_josn['F_macro'], best_josn['Time'])) if args.run_type == 'train': train() else: model = torch.load(os.path.join(args.output_result_path, 'result.pkl')) evaluate(args, True)
def train_model(): optimal_f1s = [] optimal_ps = [] optimal_rs = [] optimal_accs = [] # predictions_all = [] for i in tqdm(range(10)): ''' 2. 3 set up Dataloader for batching ''' training_sentences = [] training_labels = [] for j in range(10): if j != i: training_sentences.extend(ten_folds[j][0]) training_labels.extend(ten_folds[j][1]) training_dataset_trofi = TextDataset(training_sentences, training_labels) val_dataset_trofi = TextDataset(ten_folds[i][0], ten_folds[i][1]) # Data-related hyperparameters batch_size = 10 # Set up a DataLoader for the training, validation, and test dataset train_dataloader_trofi = DataLoader(dataset=training_dataset_trofi, batch_size=batch_size, shuffle=True, collate_fn=TextDataset.collate_fn) val_dataloader_trofi = DataLoader(dataset=val_dataset_trofi, batch_size=batch_size, shuffle=False, collate_fn=TextDataset.collate_fn) """ 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers=1 # dropout3: dropout on hidden state of RNN to linear layer rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0, dropout3=0) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.Adam(rnn_clf.parameters(), lr=0.001) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 15 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] val_p = [] val_r = [] val_acc = [] # A counter for the number of gradient updates num_iter = 0 train_dataloader = train_dataloader_trofi val_dataloader = val_dataloader_trofi model_index = 0 for epoch in range(num_epochs): # print("Starting epoch {}".format(epoch + 1)) for (example_text, example_lengths, labels) in train_dataloader: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, 2) predicted = rnn_clf(example_text, example_lengths) batch_loss = nll_criterion(predicted, labels) rnn_clf_optimizer.zero_grad() batch_loss.backward() rnn_clf_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1.append(f1) val_p.append(precision) val_r.append(recall) val_acc.append(eval_accuracy.item()) # print( # "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) # filename = f'../models/classification/TroFi_fold_{str(i)}_iter_{str(num_iter)}.pt' # torch.save(rnn_clf, filename) model_index += 1 # avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(train_dataloader, rnn_clf, nll_criterion, using_GPU) # training_loss.append(avg_eval_loss) # training_f1.append(f1) # print( # "Iteration {}. Training Loss {}. Training Accuracy {}. Training Precision {}. Training Recall {}. Training F1 {}. Training class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) """ additional trianing! """ # rnn_clf_optimizer = optim.Adam(rnn_clf.parameters(), lr=0.0005) # for epoch in range(num_epochs): # print("Starting epoch {}".format(epoch + 1)) # for (example_text, example_lengths, labels) in train_dataloader: # example_text = Variable(example_text) # example_lengths = Variable(example_lengths) # labels = Variable(labels) # if using_GPU: # example_text = example_text.cuda() # example_lengths = example_lengths.cuda() # labels = labels.cuda() # # predicted shape: (batch_size, 2) # predicted = rnn_clf(example_text, example_lengths) # batch_loss = nll_criterion(predicted, labels) # rnn_clf_optimizer.zero_grad() # batch_loss.backward() # rnn_clf_optimizer.step() # num_iter += 1 # # Calculate validation and training set loss and accuracy every 200 gradient updates # if num_iter % 100 == 0: # avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU) # val_loss.append(avg_eval_loss) # val_f1.append(f1) # val_p.append(precision) # val_r.append(recall) # val_acc.append(eval_accuracy) # print( # "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) # model_index += 1 # print("Training done for fold {}".format(i)) """ 3.3 plot the training process: MET F1 and losses for validation and training dataset """ # plt.figure(0) # plt.title('F1 for TroFI dataset on fold ' + str(i)) # plt.xlabel('iteration (unit:200)') # plt.ylabel('F1') # plt.plot(val_f1,'g') # plt.plot(val_p,'r') # plt.plot(val_r,'b') # plt.plot(val_acc,'c') # plt.plot(training_f1, 'b') # plt.legend(['Validation F1', 'Validation precision', 'validaiton recall', 'validation accuracy', 'Training F1'], loc='upper right') # plt.show() # plt.figure(1) # plt.title('Loss for TroFi dataset on fold ' + str(i)) # plt.xlabel('iteration (unit:200)') # plt.ylabel('Loss') # plt.plot(val_loss,'g') # plt.plot(training_loss, 'b') # plt.legend(['Validation loss', 'Training loss'], loc='upper right') # plt.show() """ store the best f1 """ # print('val_f1: ', val_f1) idx = 0 if math.isnan(max(val_f1)): optimal_f1s.append(max(val_f1[6:])) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) else: optimal_f1s.append(max(val_f1)) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) # filename = '../models/LSTMSuffixElmoAtt_TroFi_fold_' + str(i) + '_epoch_' + str(idx) + '.pt' # temp_model = torch.load(filename) # print('best model: ', filename) # predictions_all.extend(test(val_dataloader_TroFi, temp_model, using_GPU)) return np.mean(np.array(optimal_ps)), np.mean(np.array(optimal_rs)), np.mean(np.array(optimal_f1s)), np.mean(np.array(optimal_accs))
def train_model(): optimal_f1s = [] optimal_ps = [] optimal_rs = [] optimal_accs = [] for i in tqdm(range(10)): ''' 2. 3 set up Dataloader for batching ''' training_sentences = [] training_labels = [] for j in range(10): if j != i: training_sentences.extend(ten_folds[j][0]) training_labels.extend(ten_folds[j][1]) training_dataset_mohX = TextDataset(training_sentences, training_labels) val_dataset_mohX = TextDataset(ten_folds[i][0], ten_folds[i][1]) # Data-related hyperparameters batch_size = 10 # Set up a DataLoader for the training, validation, and test dataset train_dataloader_mohX = DataLoader(dataset=training_dataset_mohX, batch_size=batch_size, shuffle=True, collate_fn=TextDataset.collate_fn) val_dataloader_mohX = DataLoader(dataset=val_dataset_mohX, batch_size=batch_size, shuffle=True, collate_fn=TextDataset.collate_fn) """ 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers!=1 # dropout3: dropout on hidden state of RNN to linear layer rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0, dropout3=0.2) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.02, momentum=0.9) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 30 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] val_p = [] val_r = [] val_acc = [] # A counter for the number of gradient updates num_iter = 0 train_dataloader = train_dataloader_mohX val_dataloader = val_dataloader_mohX for epoch in range(num_epochs): # print("Starting epoch {}".format(epoch + 1)) for (example_text, example_lengths, labels) in train_dataloader: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, 2) predicted = rnn_clf(example_text, example_lengths) batch_loss = nll_criterion(predicted, labels) rnn_clf_optimizer.zero_grad() batch_loss.backward() rnn_clf_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1.append(f1) val_p.append(precision) val_r.append(recall) val_acc.append(eval_accuracy.item()) # print( # "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) # filename = f'../models/classification/MOHX_fold_{str(i)}_iter_{str(num_iter)}.pt' # torch.save(rnn_clf, filename) avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(train_dataloader, rnn_clf, nll_criterion, using_GPU) training_loss.append(avg_eval_loss) training_f1.append(f1) # print( # "Iteration {}. Training Loss {}. Training Accuracy {}. Training Precision {}. Training Recall {}. Training F1 {}. Training class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) # print("Training done for fold {}".format(i)) # store the best f1 idx = 0 if math.isnan(max(val_f1)): optimal_f1s.append(max(val_f1[6:])) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) else: optimal_f1s.append(max(val_f1)) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) return np.mean(np.array(optimal_ps)), np.mean(np.array(optimal_rs)), np.mean(np.array(optimal_f1s)), np.mean(np.array(optimal_accs)) # print('F1 on MOH-X by 10-fold = ', optimal_f1s) # print('F1 on MOH-X = ', np.mean(np.array(optimal_f1s))) """