out, err = process.communicate() print("Process output: ", out) print("Process error: ", err) for epoch in tqdm.trange(4): utils.train(model, train_loader, loss, optimizer, device, epoch, tb_logger=tb_logger) step = (epoch + 1) * len(train_loader) utils.validate(model, val_loader, loss, device, step, tb_logger=tb_logger) test_imgs, test_labels = utils.load_cifar(os.path.join(cifar_dir, 'test')) test_data = utils.DatasetWithTransform(test_imgs, test_labels, transform=trafos) test_loader = torch.utils.data.DataLoader(test_data, batch_size=val_batch_size) pred, test_labels = utils.validate(model, test_loader, loss, device, 0) accuracy = sklearn.metrics.accuracy_score(test_labels, pred) print("Test Accuracy: ", accuracy) fig, ax = plt.subplots(1, figsize=(8, 8)) utils.make_confusion_matrix(test_labels, pred, categories, ax) plt.show()
test_predictions, test_labels = validate(model, test_loader, loss_function, device, 0, tb_logger=None) # to evaluate the model we compute the overall accuracy and # the class confusion matrix accuracy = metrics.accuracy_score(test_labels, test_predictions) print("Test accuracy") print(accuracy) print() # we have implemented generating the confusion matrix in the utils already import matplotlib.pyplot as plt fig, ax = plt.subplots(1, figsize=(8, 8)) utils.make_confusion_matrix(test_labels, test_predictions, categories, ax) """## Logistic Regression with preset filters Now, we will try to improve the model performance by presenting additional features to the model. For this, we can compute convolutional filters (see previous notebook) on the image and present the filter responses as (additional) features to the model. Hopefully, these features provide additional context to the model that can improve its performance. We will implement these preset filters using the transform mechanism we have implemented in the previous exercise. """ # apply a list of filters as on the fly transformation. def apply_filters(image, target, filter_list, keep_image=False): filtered = [image] if keep_image else [] for filter_function in filter_list:
def test_bert(model, criterion, device, data_loader, data='bnc_rb', writer=None, global_iteration=0, set='validation', print_metrics=True, plot_cm=False, save_fig=True, show_fig=False, model_type='bert', mode='train'): # For Confucius matrix y_pred = [] y_true = [] # set model to evaluation mode model.eval() # initialize loss and number of correct predictions set_loss = 0 total_correct = 0 # start eval timer eval_start_time = datetime.now() with torch.no_grad(): for iteration, (batch_inputs, batch_labels, batch_lengths) in enumerate(data_loader): # move everything to device batch_inputs, batch_labels, batch_lengths = batch_inputs.to(device), batch_labels.to(device), \ batch_lengths.to(device) loss, text_fea = model(batch_inputs, batch_labels) set_loss += loss predictions = torch.argmax(text_fea, 1) # batch_pred = [int(item[0]) for item in predictions.tolist()] # batch_pred = predictions.tolist() # ## OLD # if model_type == 'lstm': # y_pred.extend(batch_pred) # elif model_type == 'bert': # y_pred.extend(predictions.tolist()) y_pred.extend(predictions.tolist()) #New y_true.extend(batch_labels.tolist()) total_correct += predictions.eq(batch_labels.view_as(predictions)).sum().item() # average losses and accuracy set_loss /= len(data_loader.dataset) accuracy = total_correct / len(data_loader.dataset) if print_metrics: print('-' * 91) print( "| " + set + " set " "| time {}" "| loss: {:.5f} | Accuracy: {}/{} ({:.5f})".format( datetime.now() - eval_start_time, set_loss, total_correct, len(data_loader.dataset), accuracy ) ) print('-' * 91) if writer: if set == 'validation': writer.add_scalar('Accuracy/val', accuracy, global_iteration) writer.add_scalar('Loss/val', set_loss, global_iteration) print(91 * '-') print(34 * '-' + ' Classification Report ' + 34 * '-') labels = [label for label in range(data_loader.dataset.num_classes)] print(classification_report(y_true, y_pred, labels=labels, digits=5, zero_division=0)) print(91 * '-') print('| Confusion Matrix |') # cm = confusion_matrix(y_true, y_pred, labels=labels, normalize='all') cm = confusion_matrix(y_true, y_pred, labels=labels) # df_confusion = pd.DataFrame(cm * len(y_true)) df_confusion = pd.DataFrame(cm) print(" Predicted") print(df_confusion) print("True -->") # print(cm * len(y_true)) if plot_cm: if data == 'bnc' or 'bnc_rb': tick_labels = ['19_29', '50_plus'] elif data == 'blog': tick_labels = ['13-17', '23-27', '33-47'] make_confusion_matrix(cf=cm, categories=tick_labels, title=f'Confusion Matrix for {data} on {set} set', num_labels=labels, y_true=y_true, y_pred=y_pred, figsize=FIGSIZE) if save_fig: cur_datetime = datetime.now().strftime('%d_%b_%Y_%H_%M_%S') plt.savefig(f"{FIGDIR}{data}/cm_{model_type}_{set}_dt_{cur_datetime}.png", bbox_inches='tight') if show_fig: plt.show() if mode == 'tvt': f1_scores = f1_score(y_true, y_pred, average=None) return set_loss, accuracy, f1_scores else: return set_loss, accuracy, y_pred
name='resnet18_augmented', n_epochs=n_epochs) # evaluate the model on test data test_dataset = utils.make_cifar_test_dataset(cifar_dir) test_loader = DataLoader(test_dataset, batch_size=25) predictions, labels = utils.validate(model, test_loader, nn.NLLLoss(), device, step=0, tb_logger=None) print("Test accuracy:") accuracy = metrics.accuracy_score(labels, predictions) print(accuracy) fig, ax = plt.subplots(1, figsize=(8, 8)) utils.make_confusion_matrix(labels, predictions, categories, ax) """## Tasks and Questions Tasks: - Read up on some of the models in [torchvision.models](https://pytorch.org/docs/stable/torchvision/models.html) and train at least one of them on this data. - Combine the best performing model in this exercise with data augmentation (previous exercise). Questions: - What's the best accuracy you have achieved on CIFAR10 over all the exercises? Which model and training procedure did lead to it? - What would your next steps be to improve this performance? - Do you think the performance possible on cifar will improve significantly with much larger models (= models with a lot more parameters)? """
def evaluate_performance(model, data_loader, device, criterion, data, writer=None, global_iteration=0, set='validation', print_metrics=True, plot_cm=False, save_fig=True, show_fig=False, model_type='lstm'): # For Confucius matrix y_pred = [] y_true = [] # set model to evaluation mode model.eval() # initialize loss and number of correct predictions set_loss = 0 total_correct = 0 # start eval timer eval_start_time = datetime.now() with torch.no_grad(): for iteration, (batch_inputs, batch_labels, batch_lengths) in enumerate(tqdm(data_loader)): # move everything to device batch_inputs, batch_labels, batch_lengths = batch_inputs.to(device), batch_labels.to(device), \ batch_lengths.to(device) # forward pass through model log_probs = model(batch_inputs, batch_lengths) # log_probs shape: (batch_size, num_classes) # compute and sum up batch loss and correct predictions set_loss += criterion(log_probs, batch_labels) predictions = torch.argmax(log_probs, dim=1, keepdim=True) batch_pred = [int(item[0]) for item in predictions.tolist()] y_pred.extend(batch_pred) y_true.extend(batch_labels.tolist()) total_correct += predictions.eq(batch_labels.view_as(predictions)).sum().item() # average losses and accuracy set_loss /= len(data_loader.dataset) accuracy = total_correct / len(data_loader.dataset) if print_metrics: print('-' * 91) print( "| " + set + " set " "| time {}" "| loss: {:.5f} | Accuracy: {}/{} ({:.5f})".format( datetime.now() - eval_start_time, set_loss, total_correct, len(data_loader.dataset), accuracy ) ) print('-' * 91) if writer: if set == 'validation': writer.add_scalar('Accuracy/val', accuracy, global_iteration) writer.add_scalar('Loss/val', set_loss, global_iteration) print(91 * '-') print(34 * '-' + ' Classification Report ' + 34 * '-') labels = [label for label in range(data_loader.dataset.num_classes)] print(classification_report(y_true, y_pred, labels=labels, digits=5, zero_division=0)) print(91 * '-') print('| Confusion Matrix |') # cm = confusion_matrix(y_true, y_pred, labels=labels, normalize='all') cm = confusion_matrix(y_true, y_pred, labels=labels) # df_confusion = pd.DataFrame(cm * len(y_true)) df_confusion = pd.DataFrame(cm) print(" Predicted") print(df_confusion) print("True -->") # print(cm * len(y_true)) if plot_cm: if data == 'bnc' or 'bnc_rb': tick_labels = ['19_29', '50_plus'] elif data == 'blog': tick_labels = ['13-17', '23-27', '33-47'] make_confusion_matrix(cf=cm, categories=tick_labels, title=f'Confusion Matrix for {data} on {set} set', num_labels=labels, y_true=y_true, y_pred=y_pred, figsize=FIGSIZE) if save_fig: cur_datetime = datetime.now().strftime('%d_%b_%Y_%H_%M_%S') plt.savefig(f"{FIGDIR}{data}/cm_{model_type}_{set}_dt_{cur_datetime}.png", bbox_inches='tight') if show_fig: plt.show() # if plot_cm: # #TODO: implement this function make_confusion_matrix # # link: https://github.com/DTrimarchi10/confusion_matrix/blob/master/cf_matrix.py # ax = plt.subplot() # sns.heatmap(cm, annot=True, ax=ax, cmap='Blues', fmt='.3f') # # ax.set_title('Confusion Matrix') # # ax.set_xlabel('Predicted Labels') # ax.set_ylabel('True Labels') # # # if data == 'bnc': # tick_labels = ['19_29', '50_plus'] # ax.xaxis.set_ticklabels(tick_labels) # ax.yaxis.set_ticklabels(tick_labels) # elif data == 'blog': # tick_labels = ['13-17', '23-27', '33-47'] # ax.xaxis.set_ticklabels(tick_labels) # ax.yaxis.set_ticklabels(tick_labels) # # plt.show() return set_loss, accuracy