def evaluate_classifier(model, dataloaders, device): model.eval() set_seed() with torch.no_grad(): test_acc = 0 test_loss = 0 predictions = [] prediction_probs = [] num_correct_preds = 0 true_labels = [] for step, batch in tqdm(enumerate(dataloaders['test_dataloader']), total=len(dataloaders['test_dataloader'])): # Load and feed data to model input_ids = batch[0].to(device) attention_masks = batch[1].to(device) labels = batch[2].to(device) outputs = model(input_ids, attention_mask=attention_masks) loss, logits = outputs[:2] batch_loss = loss.item() test_loss += batch_loss _, predictions = torch.argmax(outputs, dim=1).to(device) num_correct_preds += torch.sum(predictions == labels.data) batch_accuracy = correct / len(labels) test_acc += batch_accuracy predictions.extend(predictions) prediction_probs.extend(outputs) true_labels.extend(labels) torch.cuda.empty_cache() test_loss = test_loss / len(dataloaders['test_dataloader']) test_acc = num_correct_preds / len(dataloaders['val_dataloader']) predictions = torch.stack(predictions).cpu() prediction_probs = torch.stack(prediction_probs).cpu true_labels = torch.stack(labels).cpu() performance = {} performance['test_loss'] = test_loss performance['test_accuracy'] = test_acc return performance, predictions, prediction_probs, true_labels
def evaluate_classifier(model, dataloader, device): model.eval() set_seed() with torch.no_grad(): test_acc = 0 test_loss = 0 predictions_list = [] num_samples = 0 prediction_probs_list = [] num_correct_preds = 0 labels_list = [] for step, batch in tqdm(enumerate(dataloader), total=len(dataloader)): # Load and feed data to model input_ids = batch[0].to(device) attention_masks = batch[1].to(device) labels = batch[2].to(device) outputs = model(input_ids, attention_masks) logits = outputs[0] predictions = torch.argmax(logits, dim=1) num_correct_preds += torch.sum(predictions == labels.data) num_samples += predictions.shape[0] predictions_list.append(predictions) prediction_probs_list.append(logits) labels_list.append(labels) torch.cuda.empty_cache() test_accuracy = num_correct_preds / num_samples performance = {} performance['test_accuracy'] = test_accuracy print(f' test accuracy: {test_accuracy:.6f}') return performance, torch.cat(predictions_list), torch.cat(labels_list)
def bert_train_val(model, dataloaders, starting_epoch, optimizer, scheduler, epochs, device): print("\n\n" + "-" * 15) print("| TRAINING... |") print("-" * 15) set_seed() start_training_time = time.time() # Define running history for train and val train_loss_history = [] val_loss_history = [] train_acc_history = [] val_acc_history = [] # Training loop for epoch in range(starting_epoch, epochs): train_loss = 0 train_acc = 0 model.train() for step, batch in tqdm(enumerate(dataloaders['train_dataloader']), total=len(dataloaders['train_dataloader'])): # Load and feed data to model input_ids = batch[0].to(device) attention_masks = batch[1].to(device) labels = batch[2].to(device) model.zero_grad() outputs = model(input_ids, labels=labels, attention_mask=attention_masks) loss = outputs.loss logits = outputs.logits batch_loss = loss.item() train_loss += batch_loss logits = logits.detach().cpu().numpy() labels = labels.to('cpu').numpy() predictions = np.argmax(logits, axis=1).flatten() # labels = labels.flatten() correct = 0 for i in range(0, len(predictions)): if predictions[i] == labels[i]: correct = correct + 1 batch_accuracy = correct / len(labels) train_acc += batch_accuracy if step % 100 == 0: print("Epoch: ", epoch + 1, "/", epochs, "Batch: ", step + 1, "/", len(dataloaders['train_dataloader']), "Loss: ", train_loss / (step + 1), "Accuracy: ", batch_accuracy) loss.backward() # Apply gradient clipping nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # Optimzer/Learning rate schedular step optimizer.step() scheduler.step() torch.cuda.empty_cache() # Loss and accuracy results by epoch end_epoch_time = time.time() epoch_train_accuracy = train_acc / len(dataloaders['train_dataloader']) epoch_train_loss = train_loss / len(dataloaders['train_dataloader']) epoch_train_time = format_time(start_training_time, end_epoch_time) train_loss_history.append(epoch_train_loss) train_acc_history.append(epoch_train_accuracy) print( f' epoch: {epoch + 1}, train loss: {epoch_train_loss:.6f}, train accuracy: {epoch_train_accuracy:.6f}, train time:{epoch_train_time}' ) # Switch to evaluation mode and run validation print("Validating...") start_val_time = time.time() model.eval() val_loss = 0 val_acc = 0 with torch.no_grad(): for step, batch in tqdm(enumerate(dataloaders['val_dataloader']), total=len(dataloaders['val_dataloader'])): # Load and feed data to model input_ids = batch[0].to(device) attention_masks = batch[1].to(device) labels = batch[2].to(device) model.zero_grad() outputs = model(input_ids, labels=labels, attention_mask=attention_masks) loss = outputs.loss logits = outputs.logits batch_loss = loss.item() val_loss += batch_loss logits = logits.detach().cpu().numpy() labels = labels.to('cpu').numpy() predictions = np.argmax(logits, axis=1).flatten() correct = 0 for i in range(0, len(predictions)): if predictions[i] == labels[i]: correct = correct + 1 batch_accuracy = correct / len(labels) val_acc += batch_accuracy torch.cuda.empty_cache() end_val_time = time.time() epoch_val_time = format_time(start_val_time, end_val_time) epoch_val_loss = val_loss / len(dataloaders['val_dataloader']) epoch_val_acc = val_acc / len(dataloaders['val_dataloader']) val_loss_history.append(epoch_val_loss) val_acc_history.append(epoch_val_acc) print( f' epoch: {epoch + 1}, val loss: {epoch_val_loss:.6f}, val accuracy: {epoch_val_acc:.6f}, val_time: {epoch_val_time}' ) # Record results to dictionary to return performance_history = { 'train_loss': train_loss_history, 'val_loss': val_loss_history, 'train_accuracy': train_acc_history, 'val_accuracy': val_acc_history, 'num_epochs': epochs } # Save model checkpoint at end of train_val run, also saves performance history if epoch == epochs - 1: checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'performance_history': performance_history, 'epoch': epoch + 1, } save_checkpoint(checkpoint, f"./BERTcheckpoint_{checkpoint['epoch']}.pth.tar") print("") print("Training Finished") return performance_history
def gpt2_train_val(model, dataloaders, tokenizer, starting_epoch, optimizer, scheduler, epochs, device): print("\n\n" + "-" * 15) print("| TRAINING... |") print("-" * 15) set_seed() start_training_time = time.time() # Define running history for train and val train_loss_history = [] val_loss_history = [] train_perplexity_history = [] val_perplexity_history = [] # Training loop for epoch in range(starting_epoch, epochs): train_loss = 0 model.train() for step, batch in tqdm(enumerate(dataloaders['train_dataloader']), total=len(dataloaders['train_dataloader'])): # Load and feed data to model input_ids = batch.to(device) model.zero_grad() outputs = model(input_ids, labels=input_ids) loss = outputs[0] batch_loss = loss.item() train_loss += batch_loss if step % 200 == 199: print("Epoch:", epoch + 1, "/", epochs, "Batch:", step + 1, "/", len(dataloaders['train_dataloader']), "Loss", train_loss / 200) train_loss = 0.0 # Generates a model output including special tokens in order to visualise the training process and model learning model.eval() if step % 100 == 0 and step != 0: samples = model.generate( # decoder_start_token_id=50258, bos_token_id=50257, do_sample=True, top_k=50, max_length=50, min_length=15, top_p=0.95, num_return_sequences=1, repition_penalty=1.1, no_repeat_ngram_size=2, temperature=1.1) for i, sample in enumerate(samples): print("{}".format( tokenizer.decode(sample, skip_special_tokens=False))) # Return to train mode and back propagate loss model.train() loss.backward() # Apply gradient clipping nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # Optimizer/Learning rate scheduler step optimizer.step() scheduler.step() torch.cuda.empty_cache() # Loss and perplexity results by epoch end_epoch_time = time.time() epoch_train_loss = train_loss / len(dataloaders['train_dataloader']) epoch_train_perplexity = torch.exp(torch.tensor(epoch_train_loss)) epoch_train_time = format_time(start_training_time, end_epoch_time) train_loss_history.append(epoch_train_loss) train_perplexity_history.append(epoch_train_perplexity) print( f' epoch: {epoch + 1}, train loss: {epoch_train_loss:.6f}, train ppl: {epoch_train_perplexity:.6f}, train time:{epoch_train_time}' ) # Switch to evaluation mode and run validation print("Validating...") start_val_time = time.time() model.eval() val_loss = 0 val_steps = 0 with torch.no_grad(): for step, batch in tqdm(enumerate(dataloaders['val_dataloader']), total=len(dataloaders['val_dataloader'])): input_ids = batch[0].to(device) outputs = model(input_ids, labels=input_ids) loss = outputs[0] # loss, logits= outputs[:2] # outputs has two elements loss and logits batch_loss = loss.item() val_loss += batch_loss torch.cuda.empty_cache() end_val_time = time.time() epoch_val_time = format_time(start_val_time, end_val_time) epoch_val_loss = val_loss / len(dataloaders['val_dataloader']) epoch_val_perplexity = torch.exp(torch.tensor(epoch_val_loss)) val_loss_history.append(epoch_val_loss) val_perplexity_history.append(epoch_val_perplexity) # print("Validation time: ", epoch_val_time) print( f' epoch: {epoch + 1}, val loss: {epoch_val_loss:.6f}, val ppl: {epoch_val_perplexity:.6f}, val_time: {epoch_val_time}' ) # Record results to dictionary to return performance_history = { 'train_loss': train_loss_history, 'val_loss': val_loss_history, 'train_perplexity': train_perplexity_history, 'val_perplexity': val_perplexity_history, 'num_epochs': epochs } # Save model checkpoint at end of train_val run, also saves performance history if epoch == epochs - 1: checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'performance_history': performance_history, 'epoch': epoch + 1, } save_checkpoint(checkpoint, f"./checkpoint_{checkpoint['epoch']}.pth.tar") print("") print("Training Finished") return performance_history
def test_generate(model, tokenizer, dataloaders, device): model.eval() set_seed() with torch.no_grad(): test_loss = 0 plots = [] references = [] for step, batch in tqdm(enumerate(dataloaders['test_dataloader']), total=len(dataloaders['test_dataloader'])): input_ids = batch.to(device) outputs = model(input_ids, labels=input_ids) loss, logits = outputs[:2] batch_loss = loss.item() test_loss += batch_loss # List of genre decoded from each input_id genre_list = [] # Pass input_id to references for BLEU score comparison with generated samples for input_id in input_ids: # reference = tokenizer.decode(input_id, skip_special_tokens=True) x = reference.split()[0] references.append([reference]) genre_list.append(x) # Generate sample using the same input genre as input_id generate = generate_text(model, tokenizer, device, num_samples=len(input_ids), input_genres=genre_list) plots += generate torch.cuda.empty_cache() # Smoothing function for BLEU score cc = SmoothingFunction() # bleu_score1= corpus_bleu(references, plots, weights=(1,0,0,0),smoothing_function=cc.method1) # BLEU score with default settings bleu_score_default = corpus_bleu(references, plots, smoothing_function=cc.method1) # BLEU score with modified weights to penalize higher n-gram precision bleu_score_modified1 = corpus_bleu(references, plots, weights=(0.5, 0.25, 0.25, 0), smoothing_function=cc.method1) bleu_score_modified2 = corpus_bleu(references, plots, weights=(0.5, 0.5), smoothing_function=cc.method1) # Loss and perplexity results mean_test_loss = test_loss / len(dataloaders['test_dataloader']) mean_test_perplexity = torch.exp(torch.tensor(mean_test_loss)) mean_test_perplexity = mean_test_perplexity.cpu().numpy() print( f'test loss: {mean_test_loss:.6f}, test ppl: {mean_test_perplexity:.6f}, bleu score default:{bleu_score_default}, bleu score modified 1: {bleu_score_modified1}, bleu score modified 2: {bleu_score_modified2}' ) # Save test_performance test_performance = { 'mean_test_loss': mean_test_loss, 'mean_test_perplexity': mean_test_perplexity, 'bleu_score_default': bleu_score_default, 'bleu_score_modified1': bleu_score_modified1, 'bleu_score_modified2': bleu_score_modified2 } return test_performance