def _evaluate(data_loader, model, criterion, observer, observer_criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- :param data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set :param model : torch.nn.module The network model being used :param criterion: torch.nn.loss The loss function used to compute the loss of the model :param writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. :param epoch : int Number of the epoch (for logging purposes) :param logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. :param no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. :param log_interval : int Interval limiting the logging of mini-batches. Default value of 10. :return: None """ multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() observer_loss_meter = AverageMeter() top1 = AverageMeter() observer_acc_meter = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, target) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) target = target.cuda(async=True) # Convert the input and its labels to Torch Variables input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # Compute output output = model(input_var) # Get the features from second last layer input_features_var = torch.autograd.Variable( model.module.features.data) # Use observer on the features observer_acc, observer_loss = evaluate_one_mini_batch( observer, observer_criterion, input_features_var, target_var, observer_loss_meter, observer_acc_meter) # Compute and record the loss loss = criterion(output, target_var) losses.update(loss.data[0], input.size(0)) # Compute and record the accuracy acc1 = accuracy(output.data, target, topk=(1, ))[0] top1.update(acc1[0], input.size(0)) # Get the predictions _ = [ preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()] ] _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/obs_mb_loss', observer_loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/obs_mb_accuracy', observer_acc.cpu().numpy(), epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar( logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) writer.add_scalar( logging_label + '/obs_mb_loss_{}'.format(multi_run), observer_loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar( logging_label + '/obs_mb_accuracy_{}'.format(multi_run), observer_acc.cpu().numpy(), epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), Acc1='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Make a confusion matrix try: cm = confusion_matrix(y_true=targets, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) except ValueError: logging.warning('Confusion Matrix did not work as expected') confusion_matrix_heatmap = np.zeros((10, 10, 3)) # Logging the epoch-wise accuracy and confusion matrix if multi_run is None: writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) writer.add_scalar(logging_label + '/obs_accuracy', observer_acc_meter.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix', image_tensor=confusion_matrix_heatmap, global_step=epoch) else: writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) writer.add_scalar(logging_label + '/obs_accuracy_{}'.format(multi_run), observer_acc_meter.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix_{}'.format(multi_run), image_tensor=confusion_matrix_heatmap, global_step=epoch) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'. format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) # Generate a classification report for each epoch _log_classification_report(data_loader, epoch, preds, targets, writer) return top1.avg
def _evaluate(data_loader, model, criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- top1.avg : float Accuracy of the model of the evaluated split """ multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] multi_run = False pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, target) in pbar: # todo: how to you implement sliding window accross batches if len(input.size()) == 5: multi_run = True # input [64, 5, 3, 299, 299] bs, ncrops, c, h, w = input.size() # input.view leaves the 3rd 4th and 5th dimension as is, but multiplies the 1st and 2nd together # result [320, 3, 299, 299] # result = input.view(-1, c, h, w) # fuse batch size and ncrops # result_avg = input.view(bs, -1, c, h, w).mean(1) input = input.view(-1, c, h, w) # If you are using tensor.max(1) then you get a tupel with two tensors, choose the first one # which is a floattensor and what you need. # input = result_avg[0] # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) target = target.cuda(async=True) # Convert the input and its labels to Torch Variables # todo: check them out in debugger input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # Compute output output = model(input_var) if multi_run: output = output.view(bs, ncrops, -1).mean(1) # Compute and record the loss loss = criterion(output, target_var) losses.update(loss.data[0], input.size(0)) # Compute and record the accuracy acc1 = accuracy(output.data, target, topk=(1, ))[0] top1.update(acc1[0], input.size(0)) # Get the predictions _ = [ preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()] ] _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar( logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), Acc1='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Make a confusion matrix try: cm = confusion_matrix(y_true=targets, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) except ValueError: logging.warning('Confusion Matrix did not work as expected') confusion_matrix_heatmap = np.zeros((10, 10, 3)) # Logging the epoch-wise accuracy and confusion matrix if multi_run is None: writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) # ERROR: save_image_and_log_tensorboard() got an unexpected keyword argument 'image_tensore' # changed 'image_tensor=confusion_matrix_heattmap' to 'image=confusion_mastrix_heatmap' save_image_and_log_to_tensorboard(writer, tag=logging_label + '/confusion_matrix', image=confusion_matrix_heatmap, global_step=epoch) else: writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix_{}'.format(multi_run), image=confusion_matrix_heatmap, global_step=epoch) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'. format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) # Generate a classification report for each epoch _log_classification_report(data_loader, epoch, preds, targets, writer) return top1.avg
def _evaluate(data_loader, model, criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- top1.avg : float Accuracy of the model of the evaluated split """ #TODO All parts computing the accuracy are commented out. It is necessary to #TODO implement a 2D softmax and instead of regressing the output class have it #TODO work with class labels. Notice that, however, it would be #TODO of interest leaving open the possibility to work with soft labels #TODO (e.g. the ground truth for pixel X,Y is an array of probabilities instead #TODO of an integer. multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, _) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) # Split the data into halves to separate the input from the GT satel_image, map_image = torch.chunk(input, chunks=2, dim=3) # Convert the input and its labels to Torch Variables input_var = torch.autograd.Variable(satel_image) target_var = torch.autograd.Variable(map_image) # Compute output output = model(input_var) # Compute and record the loss loss = criterion(output, target_var) losses.update(loss.data[0], input.size(0)) # Compute and record the accuracy # acc1 = accuracy(output.data, target, topk=(1,))[0] # top1.update(acc1[0], input.size(0)) # Get the predictions # _ = [preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()]] # _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) # writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) # writer.add_scalar(logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(), # epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), # Acc1='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Logging the epoch-wise accuracy if multi_run is None: # writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/output', image=output[:1], global_step=epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/input', image=satel_image[:1]) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/target', image=map_image[:1]) else: # writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/output_{}'.format(multi_run), image=output[:1], global_step=epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/input_{}'.format(multi_run), image=satel_image[:1]) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/target', image=map_image[:1]) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' # 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'. format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1))
def validate(val_loader, model, criterion, writer, epoch, class_encodings, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- val_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) class_encodings : List Contains the classes (range of ints) no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- meanIU.avg : float MeanIU of the model of the evaluated split """ # 'Run' is injected in kwargs at runtime IFF it is a multi-run event multi_run = kwargs['run'] if 'run' in kwargs else None num_classes = len(class_encodings) # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() meanIU = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() pbar = tqdm(enumerate(val_loader), total=len(val_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, target) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # Compute output output = model(input) # Compute and record the loss loss = criterion(output, target) losses.update(loss.item(), input.size(0)) # Compute and record the accuracy _, _, mean_iu_batch, _ = accuracy_segmentation(target.cpu().numpy(), get_argmax(output), num_classes) meanIU.update(mean_iu_batch, input.size(0)) # Add loss and meanIU to Tensorboard scalar_label = 'val/mb_loss' if multi_run is None else 'val/mb_loss_{}'.format(multi_run) writer.add_scalar(scalar_label, loss.item(), epoch * len(val_loader) + batch_idx) scalar_label = 'val/mb_meanIU' if multi_run is None else 'val/mb_meanIU_{}'.format(multi_run) writer.add_scalar(scalar_label, mean_iu_batch, epoch * len(val_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description('val epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(val_loader))) pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), meanIU='{meanIU.avg:.3f}\t'.format(meanIU=meanIU), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Logging the epoch-wise meanIU scalar_label = 'val/meanIU' if multi_run is None else 'val/meanIU_{}'.format(multi_run) writer.add_scalar(scalar_label, meanIU.avg, epoch) logging.info(_prettyprint_logging_label("val") + ' epoch[{}]: ' 'MeanIU={meanIU.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)' .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, meanIU=meanIU)) return meanIU.avg
def test(test_loader, model, criterion, writer, epoch, class_encodings, img_names_sizes_dict, dataset_folder, post_process, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- img_names_sizes_dict: dictionary {str: (int, int)} Key: gt image name (with extension), Value: image size test_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) class_encodings : List Contains the range of encoded classes img_names_sizes_dict # TODO dataset_folder : str # TODO post_process : Boolean apply post-processing to the output of the network no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- meanIU.avg : float MeanIU of the model of the evaluated split """ # 'Run' is injected in kwargs at runtime IFF it is a multi-run event multi_run = kwargs['run'] if 'run' in kwargs else None num_classes = len(class_encodings) # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() meanIU = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Need to store the images currently being processes canvas = {} pbar = tqdm(enumerate(test_loader), total=len(test_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, target) in pbar: # Unpack input input, top_left_coordinates, test_img_names = input # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # Compute output output = model(input) # Compute and record the loss loss = criterion(output, target) losses.update(loss.item(), input.size(0)) # Compute and record the batch meanIU _, _, mean_iu_batch, _ = accuracy_segmentation(target.cpu().numpy(), get_argmax(output), num_classes) # Add loss and meanIU to Tensorboard scalar_label = 'test/mb_loss' if multi_run is None else 'test/mb_loss_{}'.format(multi_run) writer.add_scalar(scalar_label, loss.item(), epoch * len(test_loader) + batch_idx) scalar_label = 'test/mb_meanIU' if multi_run is None else 'test/mb_meanIU_{}'.format(multi_run) writer.add_scalar(scalar_label, mean_iu_batch, epoch * len(test_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description('test epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(test_loader))) pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), meanIU='{meanIU.avg:.3f}\t'.format(meanIU=meanIU), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Output needs to be patched together to form the complete output of the full image # patches are returned as a sliding window over the full image, overlapping sections are averaged for patch, x, y, img_name in zip(output.data.cpu().numpy(), top_left_coordinates[0].numpy(), top_left_coordinates[1].numpy(), test_img_names): # Is a new image? if not img_name in canvas: # Create a new image of the right size filled with NaNs canvas[img_name] = np.empty((num_classes, *img_names_sizes_dict[img_name])) canvas[img_name].fill(np.nan) # Add the patch to the image canvas[img_name] = merge_patches(patch, (x, y), canvas[img_name]) # Save the image when done if not np.isnan(np.sum(canvas[img_name])): # Save the final image mean_iu = process_full_image(img_name, canvas[img_name], multi_run, dataset_folder, class_encodings, post_process) # Update the meanIU meanIU.update(mean_iu, 1) # Remove the entry canvas.pop(img_name) logging.info("\nProcessed image {} with mean IU={}".format(img_name, mean_iu)) # Canvas MUST be empty or something was wrong with coverage of all images assert len(canvas) == 0 # Logging the epoch-wise meanIU scalar_label = 'test/mb_meanIU' if multi_run is None else 'test/mb_meanIU_{}'.format(multi_run) writer.add_scalar(scalar_label, meanIU.avg, epoch) logging.info(_prettyprint_logging_label("test") + ' epoch[{}]: ' 'MeanIU={meanIU.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)' .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, meanIU=meanIU)) return meanIU.avg
def _evaluate(data_loader, model, criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- top1.avg : float Accuracy of the model of the evaluated split """ multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, target) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) target = target.cuda(async=True) # Convert the input and its labels to Torch Variables input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # Compute output output = model(input_var) # Compute and record the loss loss = criterion(output, target_var) losses.update(loss.data[0], input.size(0)) # Apply sigmoid and take everything above a threshold of 0.5 squashed_output = torch.nn.Sigmoid()(output).data.cpu().numpy() target_vals = target.cpu().numpy().astype(np.int) # jss = compute_jss(target_vals, get_preds_from_minibatch(squashed_output)) # top1.update(jss, input.size(0)) # Store results of each minibatch _ = [ preds.append(item) for item in get_preds_from_minibatch(squashed_output) ] _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) # writer.add_scalar(logging_label + '/mb_jaccard_similarity', jss, epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) # writer.add_scalar(logging_label + '/mb_jaccard_similarity_{}'.format(multi_run), jss, # epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), # JSS='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Generate a classification report for each epoch targets = np.array(targets).astype(np.int) preds = np.array(preds).astype(np.int) _log_classification_report(data_loader, epoch, preds, targets, writer) jss_epoch = compute_jss(targets, preds) # try: # np.testing.assert_approx_equal(jss_epoch, top1.avg) # except: # logging.error('Computed JSS scores do not match') # logging.error('JSS: {} Avg: {}'.format(jss_epoch, top1.avg)) # # Logging the epoch-wise JSS if multi_run is None: writer.add_scalar(logging_label + '/loss', losses.avg, epoch) writer.add_scalar(logging_label + '/jaccard_similarity', jss_epoch, epoch) else: writer.add_scalar(logging_label + '/loss_{}'.format(multi_run), losses.avg, epoch) writer.add_scalar( logging_label + '/jaccard_similarity_{}'.format(multi_run), jss_epoch, epoch) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'JSS={jss_epoch:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'. format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, jss_epoch=jss_epoch)) return jss_epoch
save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix', image_tensor=confusion_matrix_heatmap, global_step=epoch) else: writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix_{}'.format(multi_run), image_tensor=confusion_matrix_heatmap, global_step=epoch) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'. format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) # Generate a classification report for each epoch _log_classification_report(data_loader, epoch, preds, targets, writer) return top1.avg