def train(trainloader, model, criterion, optimizer, epoch, cuda=False): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) if cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress progress_str = 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\ % (losses.avg, top1.avg, top1.sum, top1.count) progress_bar(batch_idx, len(trainloader), progress_str) iteration = epoch * len(trainloader) + batch_idx track.metric(iteration=iteration, epoch=epoch, avg_train_loss=losses.avg, avg_train_acc=top1.avg, cur_train_loss=loss.item(), cur_train_acc=prec1.item()) return (losses.avg, top1.avg)
def test(testloader, model, criterion, epoch, cuda=False): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs = torch.autograd.Variable(inputs, volatile=True) targets = torch.autograd.Variable(targets, volatile=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress progress_str = 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\ % (losses.avg, top1.avg, top1.sum, top1.count) progress_bar(batch_idx, len(testloader), progress_str) track.metric(iteration=0, epoch=epoch, avg_test_loss=losses.avg, avg_test_acc=top1.avg) return (losses.avg, top1.avg)
def test(testloader, model, epoch, device): # FIXME remove this and make paste_masks_in_image run on the GPU cpu_device = torch.device("cpu") device = device batch_time = AverageMeter() data_time = AverageMeter() hyperparameters = model.hp confidence = hyperparameters['inf_confidence'] iou_threshold = hyperparameters['inf_iou_threshold'] if type(model) is nn.DataParallel: inp_dim = model.module.inp_dim pw_ph = model.module.pw_ph cx_cy = model.module.cx_cy stride = model.module.stride else: inp_dim = model.inp_dim pw_ph = model.pw_ph cx_cy = model.cx_cy stride = model.stride pw_ph = pw_ph.to(device) cx_cy = cx_cy.to(device) stride = stride.to(device) sys.stdout = open(os.devnull, 'w') #wrapper to disable hardcoded printing coco = coco_utils.get_coco_api_from_dataset(testloader.dataset) iou_types = ["bbox"] coco_evaluator = coco_eval.CocoEvaluator(coco, iou_types) sys.stdout = sys.__stdout__ #wrapper to enable hardcoded printing (return to normal mode) # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for batch_idx, (images, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) images = images.to(device) targets2 = [] for t in targets: dd = {} for k, v in t.items(): if (k != 'img_size'): dd[k] = v.to(device) else: dd[k] = v targets2.append(dd) # targets = [{k: v.to(device) for k, v in t.items()} for t in targets] targets = targets2 raw_pred = model(images, device) true_pred = util.transform(raw_pred.clone().detach(), pw_ph, cx_cy, stride) sorted_pred = torch.sort(true_pred[:, :, 4] * (true_pred[:, :, 5:].max(axis=2)[0]), descending=True) pred_mask = sorted_pred[0] > confidence indices = [(sorted_pred[1][e, :][pred_mask[e, :]]) for e in range(pred_mask.shape[0])] pred_final = [ true_pred[i, indices[i], :] for i in range(len(indices)) ] pred_final_coord = [ util.get_abs_coord(pred_final[i].unsqueeze(-2)) for i in range(len(pred_final)) ] indices = [ nms_box.nms(pred_final_coord[i][0], pred_final[i][:, 4], iou_threshold) for i in range(len(pred_final)) ] pred_final = [ pred_final[i][indices[i], :] for i in range(len(pred_final)) ] abs_pred_final = [ helper.convert2_abs_xyxy(pred_final[i], targets[i]['img_size'], inp_dim) for i in range(len(pred_final)) ] outputs = [dict() for i in range(len((abs_pred_final)))] for i, atrbs in enumerate(abs_pred_final): outputs[i]['boxes'] = atrbs[:, :4] outputs[i]['scores'] = pred_final[i][:, 4] try: outputs[i]['labels'] = pred_final[i][:, 5:].max( axis=1)[1] + 1 #could be empty except: outputs[i]['labels'] = torch.tensor([]) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } coco_evaluator.update(res) # measure elapsed time batch_time.update(time.time() - end) end = time.time() sys.stdout = open(os.devnull, 'w') #wrapper to disable hardcoded printing coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() metrics = coco_evaluator.get_stats() sys.stdout = sys.__stdout__ #wrapper to enable hardcoded printing (return to normal mode) coco_stats = { 'map_all': metrics[0], '[email protected]': metrics[1], '[email protected]': metrics[2], 'map_small': metrics[3], 'map_med': metrics[4], 'map_large': metrics[5], 'recall@1': metrics[6], 'recall@10': metrics[7], 'recall@100': metrics[8], 'recall@small': metrics[9], 'recall@medium': metrics[10], 'recall@large': metrics[11] } track.metric(iteration=0, epoch=epoch, coco_stats=coco_stats) return (metrics[0])
def train(trainloader, model, optimizer, epoch, cuda=True): # switch to train mode model.train() hyperparameters = model.hp mode = model.mode if type(model) is nn.DataParallel: inp_dim = model.module.inp_dim pw_ph = model.module.pw_ph cx_cy = model.module.cx_cy stride = model.module.stride else: inp_dim = model.inp_dim pw_ph = model.pw_ph cx_cy = model.cx_cy stride = model.stride if cuda: pw_ph = pw_ph.cuda() cx_cy = cx_cy.cuda() stride = stride.cuda() batch_time = AverageMeter() data_time = AverageMeter() avg_loss = AverageMeter() avg_iou = AverageMeter() avg_conf = AverageMeter() avg_no_conf = AverageMeter() avg_pos = AverageMeter() avg_neg = AverageMeter() end = time.time() break_flag = 0 if mode['show_temp_summary'] == True: writer = SummaryWriter(os.path.join(track.trial_dir(), 'temp_vis/')) for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) if cuda: inputs = inputs.cuda() # compute output raw_pred = model(inputs, torch.cuda.is_available()) true_pred = util.transform(raw_pred.clone().detach(), pw_ph, cx_cy, stride) iou_list = util.get_iou_list(true_pred, targets, hyperparameters, inp_dim) resp_raw_pred, resp_cx_cy, resp_pw_ph, resp_stride, no_obj = util.build_tensors( raw_pred, iou_list, pw_ph, cx_cy, stride, hyperparameters) stats = helper.get_progress_stats(true_pred, no_obj, iou_list, targets) if hyperparameters['wasserstein'] == True: no_obj = util.get_wasserstein_matrices(raw_pred, iou_list, inp_dim) try: loss = util.yolo_loss(resp_raw_pred, targets, no_obj, resp_pw_ph, resp_cx_cy, resp_stride, inp_dim, hyperparameters) except RuntimeError: print('bayes opt failed') break_flag = 1 break # measure accuracy and record loss avg_loss.update(loss.item()) avg_iou.update(stats['iou']) avg_conf.update(stats['pos_conf']) avg_no_conf.update(stats['neg_conf']) avg_pos.update(stats['pos_class']) avg_neg.update(stats['neg_class']) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if mode['show_output'] == True: # plot progress progress_str = 'Loss: %.4f | AvIoU: %.3f | AvPConf: %.3f | AvNConf: %.5f | AvClass: %.3f | AvNClass: %.5f'\ % (loss.item(), stats['iou'], stats['pos_conf'], stats['neg_conf'],stats['pos_class'],stats['neg_class']) progress_bar(batch_idx, len(trainloader), progress_str) iteration = epoch * len(trainloader) + batch_idx if mode['show_temp_summary'] == True: writer.add_scalar('AvLoss/train', avg_loss.avg, iteration) writer.add_scalar('AvIoU/train', avg_iou.avg, iteration) writer.add_scalar('AvPConf/train', avg_conf.avg, iteration) writer.add_scalar('AvNConf/train', avg_no_conf.avg, iteration) writer.add_scalar('AvClass/train', avg_pos.avg, iteration) writer.add_scalar('AvNClass/train', avg_neg.avg, iteration) track.metric(iteration=iteration, epoch=epoch, avg_train_loss=avg_loss.avg, avg_train_iou=avg_iou.avg, avg_train_conf=avg_conf.avg, avg_train_neg_conf=avg_no_conf.avg, avg_train_pos=avg_pos.avg, avg_train_neg=avg_neg.avg) outcome = { 'avg_loss': avg_loss.avg, 'avg_iou': avg_iou.avg, 'avg_pos': avg_pos.avg, 'avg_neg': avg_neg.avg, 'avg_conf': avg_conf.avg, 'avg_no_conf': avg_no_conf.avg, 'broken': break_flag } return outcome
def test(testloader, model, criterion, epoch, cuda=False, metric=True, criterion_has_labels=True, compute_acc=True): """ criterion = torch.nn.Loss instance. criterion_has_labels (bool): if true, the above criterion is called as criterion(outputs, labels). otherwise, just criterion(outputs). returns (test_loss, test_acc) if compute_acc is True otherwise, returns test_loss alone """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if cuda: inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): # compute output outputs = model(inputs) if criterion_has_labels: loss = criterion(outputs, targets) else: loss = criterion(outputs) # measure accuracy and record loss losses.update(loss.item(), inputs.size(0)) if compute_acc: prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress if compute_acc: progress_str = 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\ % (losses.avg, top1.avg, top1.sum, top1.count) else: progress_str = 'Loss: %.3f (%d/%d)'\ % (losses.avg, batch_idx*inputs.size(0), losses.count) progress_bar(batch_idx, len(testloader), progress_str) if metric: track.metric(iteration=0, epoch=epoch, avg_test_loss=losses.avg, avg_test_acc=top1.avg) if compute_acc: return (losses.avg, top1.avg) else: return losses.avg
def train(trainloader, model, criterion, optimizer, epoch, cuda=False, num_chunks=4): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (all_inputs, all_targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) # do mini-mini-batching for large batch sizes xs = all_inputs.chunk(num_chunks) ys = all_targets.chunk(num_chunks) optimizer.zero_grad() batch_prec1 = 0.0 batch_loss = 0.0 for (inputs, targets) in zip(xs, ys): if cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) # compute output outputs = model(inputs) mini_loss = criterion(outputs, targets) / num_chunks batch_loss += mini_loss.item() mini_loss.backward() # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) batch_prec1 += prec1.item() / num_chunks losses.update(num_chunks * mini_loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.step(epoch) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress progress_str = 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\ % (losses.avg, top1.avg, top1.sum, top1.count) progress_bar(batch_idx, len(trainloader), progress_str) iteration = epoch * len(trainloader) + batch_idx track.metric(iteration=iteration, epoch=epoch, avg_train_loss=losses.avg, avg_train_acc=top1.avg, cur_train_loss=batch_loss, cur_train_acc=batch_prec1) return (losses.avg, top1.avg)