def validate(config, testloader, model, writer_dict): model.eval() ave_loss = AverageMeter() confusion_matrix = np.zeros( (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES)) with torch.no_grad(): for _, batch in enumerate(testloader): image, label, _, _ = batch size = label.size() label = label.long().cuda() losses, pred = model(image, label) pred = F.upsample(input=pred, size=(size[-2], size[-1]), mode='bilinear') loss = losses.mean() ave_loss.update(loss.item()) confusion_matrix += get_confusion_matrix( label, pred, size, config.DATASET.NUM_CLASSES, config.TRAIN.IGNORE_LABEL) pos = confusion_matrix.sum(1) res = confusion_matrix.sum(0) tp = np.diag(confusion_matrix) IoU_array = (tp / np.maximum(1.0, pos + res - tp)) mean_IoU = IoU_array.mean() writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', ave_loss.average(), global_steps) writer.add_scalar('valid_mIoU', mean_IoU, global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return ave_loss.average(), mean_IoU, IoU_array
def validate(config, testloader, model, writer_dict): model.eval() ave_loss = AverageMeter() nums = config.MODEL.NUM_OUTPUTS confusion_matrix = np.zeros( (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES, nums)) with torch.no_grad(): for idx, batch in enumerate(testloader): image, label, _, _ = batch size = label.size() image = image.cuda() label = label.long().cuda() losses, pred = model(image, label) if not isinstance(pred, (list, tuple)): pred = [pred] for i, x in enumerate(pred): x = F.interpolate( input=x, size=size[-2:], mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS ) confusion_matrix[..., i] += get_confusion_matrix( label, x, size, config.DATASET.NUM_CLASSES, config.TRAIN.IGNORE_LABEL ) if idx % 10 == 0: print(idx) loss = losses.mean() if dist.is_distributed(): reduced_loss = reduce_tensor(loss) else: reduced_loss = loss ave_loss.update(reduced_loss.item()) if dist.is_distributed(): confusion_matrix = torch.from_numpy(confusion_matrix).cuda() reduced_confusion_matrix = reduce_tensor(confusion_matrix) confusion_matrix = reduced_confusion_matrix.cpu().numpy() for i in range(nums): pos = confusion_matrix[..., i].sum(1) res = confusion_matrix[..., i].sum(0) tp = np.diag(confusion_matrix[..., i]) IoU_array = (tp / np.maximum(1.0, pos + res - tp)) mean_IoU = IoU_array.mean() if dist.get_rank() <= 0: logging.info('{} {} {}'.format(i, IoU_array, mean_IoU)) writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', ave_loss.average(), global_steps) writer.add_scalar('valid_mIoU', mean_IoU, global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return ave_loss.average(), mean_IoU, IoU_array
def train(config, epoch, num_epoch, epoch_iters, base_lr, num_iters, trainloader, optimizer, model, writer_dict, device): # Training model.train() batch_time = AverageMeter() ave_loss = AverageMeter() ave_loss1 = AverageMeter() ave_aux_loss = AverageMeter() ave_error_loss = AverageMeter() tic = time.time() cur_iters = epoch * epoch_iters writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] rank = get_rank() world_size = get_world_size() for i_iter, batch in enumerate(trainloader): images, labels, _, _ = batch images = images.to(device) labels = labels.long().to(device) losses, aux_loss, error_loss, _ = model(images, labels) # print('pred', pred[2].size()) loss = losses.mean() + 0.4 * aux_loss.mean() + 1 * error_loss.mean() reduced_loss = reduce_tensor(loss) loss1 = reduce_tensor(losses) aux_loss = reduce_tensor(aux_loss) error_losses = reduce_tensor(error_loss) model.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss ave_loss.update(reduced_loss.item()) ave_loss1.update(loss1.item()) ave_aux_loss.update(aux_loss.item()) ave_error_loss.update(error_losses.item()) lr = adjust_learning_rate(optimizer, base_lr, num_iters, i_iter + cur_iters) if i_iter % config.PRINT_FREQ == 0 and rank == 0: print_loss = ave_loss.average() / world_size print_loss1 = ave_loss1.average() / world_size print_loss_aux = ave_aux_loss.average() / world_size print_error_loss = ave_error_loss.average() / world_size msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \ 'lr: {:.6f}, Loss: {:.6f}, Loss_1: {:.6f}, Loss_aux: {:.6f}, error_loss: {:.6f}' .format( epoch, num_epoch, i_iter, epoch_iters, batch_time.average(), lr, print_loss, print_loss1, print_loss_aux, print_error_loss) logging.info(msg) writer.add_scalar('train_loss', print_loss, global_steps) writer_dict['train_global_steps'] = global_steps + 1
def validate(testloader, model, test_size, local_rank): if local_rank <= 0: logging.info('Start evaluation...') model.eval() ave_loss = AverageMeter() with torch.no_grad(): iterator = tqdm(testloader, ascii=True) if local_rank <= 0 else testloader for batch in iterator: def handle_batch(): a, fg, bg, _, _ = batch # [B, 3, 3 or 1, H, W] out = model(a, fg, bg) L_alpha = out[0].mean() L_comp = out[1].mean() L_grad = out[2].mean() #L_temp = out[3].mean() #loss['L_total'] = 0.5 * loss['L_alpha'] + 0.5 * loss['L_comp'] + loss['L_grad'] + 0.5 * loss['L_temp'] #loss['L_total'] = loss['L_alpha'] + loss['L_comp'] + loss['L_grad'] + loss['L_temp'] loss = L_alpha + L_comp + L_grad return loss.detach() loss = handle_batch() reduced_loss = reduce_tensor(loss) ave_loss.update(reduced_loss.item()) if local_rank <= 0: logging.info('Validation loss: {:.6f}'.format(ave_loss.average())) return ave_loss.average()
def inference(): model = DFSeg_model.RedNet(num_classes=40, pretrained=False) #model = nn.DataParallel(model) load_ckpt(model, None, args.last_ckpt, device) model.eval() model.to(device) val_data = SUNRGBD(transform=torchvision.transforms.Compose([scaleNorm(), ToTensor(), Normalize()]), phase_train=False, data_dir=args.data_dir ) val_loader = DataLoader(val_data, batch_size=1, shuffle=False,num_workers=1, pin_memory=True) acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() a_meter = AverageMeter() b_meter = AverageMeter() with torch.no_grad(): for batch_idx, sample in enumerate(val_loader): #origin_image = sample['origin_image'].numpy() #origin_depth = sample['origin_depth'].numpy() image = sample['image'].to(device) depth = sample['depth'].to(device) label = sample['label'].numpy() with torch.no_grad(): pred = model(image, depth) output = torch.max(pred, 1)[1] + 1 output = output.squeeze(0).cpu().numpy() acc, pix = accuracy(output, label) intersection, union = intersectionAndUnion(output, label, args.num_class) acc_meter.update(acc, pix) a_m, b_m = macc(output, label, args.num_class) intersection_meter.update(intersection) union_meter.update(union) a_meter.update(a_m) b_meter.update(b_m) print('[{}] iter {}, accuracy: {}' .format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), batch_idx, acc)) # img = image.cpu().numpy() # print('origin iamge: ', type(origin_image)) #if args.visualize: # visualize_result(origin_image, origin_depth, label-1, output-1, batch_idx, args) iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [{}], IoU: {}'.format(i, _iou)) mAcc = (a_meter.average() / (b_meter.average()+1e-10)) print(mAcc.mean()) print('[Eval Summary]:') print('Mean IoU: {:.4}, Accuracy: {:.2f}%' .format(iou.mean(), acc_meter.average() * 100))
def train(config, epoch, num_epoch, epoch_iters, base_lr, num_iters, trainloader, optimizer, model, writer_dict): # Training model.train() scaler = GradScaler() batch_time = AverageMeter() ave_loss = AverageMeter() ave_acc = AverageMeter() tic = time.time() cur_iters = epoch * epoch_iters writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] for i_iter, batch in enumerate(trainloader, 0): images, labels, _, _ = batch images = images.cuda() # print("images:",images.size()) labels = labels.long().cuda() # print("label:",labels.size()) with autocast(): losses, _, acc = model(images, labels) loss = losses.mean() acc = acc.mean() if dist.is_distributed(): reduced_loss = reduce_tensor(loss) else: reduced_loss = loss model.zero_grad() scaler.scale(loss).backward() # loss.backward() #optimizer.step() scaler.step(optimizer) scaler.update() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss ave_loss.update(reduced_loss.item()) ave_acc.update(acc.item()) lr = adjust_learning_rate(optimizer, base_lr, num_iters, i_iter + cur_iters) if i_iter % config.PRINT_FREQ == 0 and dist.get_rank() == 0: msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \ 'lr: {}, Loss: {:.6f}, Acc:{:.6f}' .format( epoch, num_epoch, i_iter, epoch_iters, batch_time.average(), [x['lr'] for x in optimizer.param_groups], ave_loss.average(), ave_acc.average()) logging.info(msg) writer.add_scalar('train_loss', ave_loss.average(), global_steps) writer_dict['train_global_steps'] = global_steps + 1
def train(config, epoch, num_epoch, epoch_iters, base_lr, num_iters, trainloader, optimizer, lr_scheduler, model, writer_dict, device): # Training model.train() batch_time = AverageMeter() ave_loss = AverageMeter() tic = time.time() cur_iters = epoch*epoch_iters writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] rank = get_rank() world_size = get_world_size() for i_iter, batch in enumerate(trainloader): images, labels, _, _ = batch images = images.to(device) labels = labels.long().to(device) losses, _ = model(images, labels, train_step=(lr_scheduler._step_count-1)) loss = losses.mean() reduced_loss = reduce_tensor(loss) model.zero_grad() loss.backward() optimizer.step() if config.TRAIN.LR_SCHEDULER != 'step': lr_scheduler.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss ave_loss.update(reduced_loss.item()) lr = adjust_learning_rate(optimizer, base_lr, num_iters, i_iter+cur_iters) if i_iter % config.PRINT_FREQ == 0 and rank == 0: print_loss = ave_loss.average() / world_size msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \ 'lr: {:.6f}, Loss: {:.6f}' .format( epoch, num_epoch, i_iter, epoch_iters, batch_time.average(), lr, print_loss) logging.info(msg) writer.add_scalar('train_loss', print_loss, global_steps) writer_dict['train_global_steps'] = global_steps + 1 batch_time = AverageMeter()
def train(config, epoch, num_epoch, epoch_iters, trainloader, optimizer, lr_scheduler, model, writer_dict, device): # Training model.train() batch_time = AverageMeter() ave_loss = AverageMeter() tic = time.time() rank = get_rank() world_size = get_world_size() for i_iter, batch in enumerate(trainloader, 0): images, labels, _, _ = batch labels = labels.long().to(device) images = images.to(device) loss, _ = model(images, labels) reduced_loss = reduce_tensor(loss) optimizer.zero_grad() loss.backward() optimizer.step() lr_scheduler.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss ave_loss.update(reduced_loss.item()) lr = optimizer.param_groups[0]['lr'] if i_iter % config.PRINT_FREQ == 0 and rank == 0: print_loss = ave_loss.average() / world_size msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \ 'lr: {:.6f}, Loss: {:.6f}' .format( epoch, num_epoch, i_iter, epoch_iters, batch_time.average(), lr, print_loss) logging.info(msg) if rank == 0: writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', ave_loss.average() / world_size, global_steps) writer_dict['train_global_steps'] = global_steps + 1
def train(config, epoch, num_epoch, epoch_iters, base_lr, num_iters, trainloader, optimizer, model, writer_dict): if config.DATASET.DATASET == "pneumothorax": trainloader.dataset.update_train_ds(config.DATASET.WEIGHT_POSITIVE) # Training model.train() batch_time = AverageMeter() ave_loss = AverageMeter() tic = time.time() cur_iters = epoch*epoch_iters writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] for i_iter, batch in enumerate(trainloader, 0): images, labels, _, _ = batch # import pdb; pdb.set_trace() labels = labels.long().cuda() losses, _ = model(images, labels) loss = losses.mean() model.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss ave_loss.update(loss.item()) lr = adjust_learning_rate(optimizer, base_lr, num_iters, i_iter+cur_iters) if i_iter % config.PRINT_FREQ == 0: msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \ 'lr: {:.6f}, Loss: {:.6f}' .format( epoch, num_epoch, i_iter, epoch_iters, batch_time.average(), lr, ave_loss.average()) logging.info(msg) writer.add_scalar('train_loss', ave_loss.average(), global_steps) writer_dict['train_global_steps'] = global_steps + 1
def validate(config, testloader, model, writer_dict, device): rank = get_rank() world_size = get_world_size() model.eval() ave_loss = AverageMeter() confusion_matrix = np.zeros( (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES)) confusion_matrix_sum = np.zeros( (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES)) with torch.no_grad(): for _, batch in enumerate(testloader): image, label, boundary_gt, _, _ = batch size = label.size() image = image.to(device) boundary_gt = boundary_gt.to(device) label = label.long().to(device) losses, aux_loss, error_loss, losses_2, aux_loss_2, error_loss_2, preds = model( image, label, boundary_gt.float()) pred = F.upsample(input=preds[0], size=(size[-2], size[-1]), mode='bilinear') loss = (losses + 0.4 * aux_loss + 4 * error_loss + losses_2 + 0.4 * aux_loss_2 + 4 * error_loss_2).mean() reduced_loss = reduce_tensor(loss) ave_loss.update(reduced_loss.item()) confusion_matrix += get_confusion_matrix( label, pred, size, config.DATASET.NUM_CLASSES, config.TRAIN.IGNORE_LABEL) confusion_matrix = torch.from_numpy(confusion_matrix).to(device) reduced_confusion_matrix = reduce_tensor(confusion_matrix) confusion_matrix = reduced_confusion_matrix.cpu().numpy() pos = confusion_matrix.sum(1) res = confusion_matrix.sum(0) tp = np.diag(confusion_matrix) IoU_array = (tp / np.maximum(1.0, pos + res - tp)) mean_IoU = IoU_array.mean() print_loss = ave_loss.average() / world_size if rank == 0: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', print_loss, global_steps) writer.add_scalar('valid_mIoU', mean_IoU, global_steps) writer_dict['valid_global_steps'] = global_steps + 1 # cv2.imwrite(str(global_steps)+'_boundary.png', (preds[0][0][0].data.cpu().numpy()*255).astype(np.uint8)) # cv2.imwrite(str(global_steps) + '_error.png', (preds[2][0][0].data.cpu().numpy() * 255).astype(np.uint8)) cv2.imwrite( str(global_steps) + '_error.png', (preds[2][0][0].data.cpu().numpy() * 255).astype(np.uint8)) return print_loss, mean_IoU, IoU_array
def validate(config, testloader, model, writer_dict, device): rank = get_rank() world_size = get_world_size() model.eval() ave_loss = AverageMeter() tot_inter = np.zeros(config.DATASET.NUM_CLASSES) tot_union = np.zeros(config.DATASET.NUM_CLASSES) with torch.no_grad(): for i_iter, batch in enumerate(testloader): image, label, _, _ = batch size = label.size() label = label.long().to(device) image = image.to(device) loss, pred = model(image, label) if pred.size()[-2] != size[-2] or pred.size()[-1] != size[-1]: pred = F.interpolate(pred, size=(size[-2], size[-1]), mode='bilinear', align_corners=False) reduced_loss = reduce_tensor(loss) ave_loss.update(reduced_loss.item()) batch_inter, batch_union = batch_intersection_union( pred, label, config.DATASET.NUM_CLASSES) tot_inter += batch_inter tot_union += batch_union if i_iter % config.PRINT_FREQ == 0 and rank == 0: msg = f'Iter: {i_iter}, Loss: {ave_loss.average() / world_size:.6f}' logging.info(msg) tot_inter = torch.from_numpy(tot_inter).to(device) tot_union = torch.from_numpy(tot_union).to(device) tot_inter = reduce_tensor(tot_inter).cpu().numpy() tot_union = reduce_tensor(tot_union).cpu().numpy() IoU = np.float64(1.0) * tot_inter / (np.spacing(1, dtype=np.float64) + tot_union) mean_IoU = IoU.mean() print_loss = ave_loss.average() / world_size if rank == 0: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', print_loss, global_steps) writer.add_scalar('valid_mIoU', mean_IoU, global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return print_loss, mean_IoU
def validate(config, testloader, model, writer_dict, device): rank = get_rank() world_size = get_world_size() model.eval() ave_loss = AverageMeter() confusion_matrix = np.zeros( (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES)) with torch.no_grad(): for _, batch in enumerate(testloader): image, label, _, _ = batch size = label.size() image = image.to(device) label = label.long().to(device) losses, pred = model(image, label) pred = F.upsample(input=pred, size=( size[-2], size[-1]), mode='bilinear') loss = losses.mean() reduced_loss = reduce_tensor(loss) ave_loss.update(reduced_loss.item()) confusion_matrix += get_confusion_matrix( label, pred, size, config.DATASET.NUM_CLASSES, config.TRAIN.IGNORE_LABEL) confusion_matrix = torch.from_numpy(confusion_matrix).to(device) reduced_confusion_matrix = reduce_tensor(confusion_matrix) confusion_matrix = reduced_confusion_matrix.cpu().numpy() pos = confusion_matrix.sum(1) res = confusion_matrix.sum(0) tp = np.diag(confusion_matrix) IoU_array = (tp / np.maximum(1.0, pos + res - tp)) mean_IoU = IoU_array.mean() print_loss = ave_loss.average()/world_size if rank == 0: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', print_loss, global_steps) writer.add_scalar('valid_mIoU', mean_IoU, global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return print_loss, mean_IoU, IoU_array
def validate(val_loader, net, criterion, curr_epoch): # the following code is written assuming that batch size is 1 net.eval() if args['gpu']: torch.cuda.empty_cache() start = time.time() val_loss = AverageMeter() acc_meter = AverageMeter() fwIoU_meter = AverageMeter() for vi, (imgs, labels) in enumerate(val_loader): imgs = imgs.float() labels = labels.long() if args['gpu']: imgs = imgs.cuda().float() labels = labels.cuda().long() with torch.no_grad(): outputs, aux = net(imgs) loss = criterion(outputs, labels) val_loss.update(loss.cpu().detach().numpy()) outputs = outputs.cpu().detach() labels = labels.cpu().detach().numpy() _, preds = torch.max(outputs, dim=1) preds = preds.numpy() for (pred, label) in zip(preds, labels): acc, valid_sum = accuracy(pred, label) fwiou = FWIoU(pred.squeeze(), label.squeeze(), ignore_zero=True) acc_meter.update(acc) fwIoU_meter.update(fwiou) if curr_epoch % args['predict_step'] == 0 and vi == 0: pred_color = RS.Index2Color(preds[0]) io.imsave(os.path.join(args['pred_dir'], NET_NAME + '.png'), pred_color) print('Prediction saved!') curr_time = time.time() - start print('%.1fs Val loss: %.2f, Accuracy: %.2f, fwIoU: %.2f' % (curr_time, val_loss.average(), acc_meter.average() * 100, fwIoU_meter.average() * 100)) writer.add_scalar('val_loss', val_loss.average(), curr_epoch) writer.add_scalar('val_Accuracy', acc_meter.average(), curr_epoch) writer.add_scalar('val_fwIoU', fwIoU_meter.average(), curr_epoch) return acc_meter.avg, fwIoU_meter.avg, val_loss.avg
def evaluate(models, val_loader, interp, criterion, args): loss_meter = AverageMeter() acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() time_meter = AverageMeter() models.eval() for i, batch_data in enumerate(val_loader): # forward pass images, labels, _ = batch_data torch.cuda.synchronize() tic = time.perf_counter() pred_seg = torch.zeros(images.size(0), args.num_classes, labels.size(1), labels.size(2)) pred_seg = pred_seg.cuda(args.gpu_id, non_blocking=True) for scale in args.scales: imgs_scale = zoom(images.numpy(), (1., 1., scale, scale), order=1, prefilter=False, mode='nearest') input_images = torch.from_numpy(imgs_scale) if args.gpu_id is not None: input_images = input_images.cuda(args.gpu_id, non_blocking=True) pred_scale, _ = models(input_images) # change pred_scale = interp(pred_scale) # average the probability pred_seg = pred_seg + pred_scale / len(args.scales) # pred =torch.log(pred) seg_labels = labels.cuda(args.gpu_id, non_blocking=True) loss = criterion(pred_seg, seg_labels) loss_meter.update(loss.data.item()) print('[Eval] iter {}, loss: {}'.format(i, loss.data.item())) # loss_meter.update(loss.item()) # print('[Eval] iter {}, loss: {}'.format(i, loss.item())) labels = as_numpy(labels) _, pred = torch.max(pred_seg, dim=1) pred = as_numpy(pred.squeeze(0).cpu()) # calculate accuracy acc, pix = accuracy(pred, labels) intersection, union = intersectionAndUnion(pred, labels, args.num_classes) acc_meter.update(acc, pix) intersection_meter.update(intersection) union_meter.update(union) torch.cuda.synchronize() time_meter.update(time.perf_counter() - tic) if args.visualize: visualize_result(batch_data, pred_seg, args) # summary iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [ {} ], IoU: {:.4f}'.format(i, _iou)) print('[Eval Summary]:') print( 'loss: {:.6f}, Mean IoU: {:.2f}, Accuracy: {:.2f}%, Inference Time: {:.4f}s' .format(loss_meter.average(), iou.mean() * 100, acc_meter.average() * 100, time_meter.average()))
def train(): # 记录数据在tensorboard中显示 writer_loss = SummaryWriter(os.path.join(args.summary_dir, 'loss')) # writer_loss1 = SummaryWriter(os.path.join(args.summary_dir, 'loss', 'loss1')) # writer_loss2 = SummaryWriter(os.path.join(args.summary_dir, 'loss', 'loss2')) # writer_loss3 = SummaryWriter(os.path.join(args.summary_dir, 'loss', 'loss3')) writer_acc = SummaryWriter(os.path.join(args.summary_dir, 'macc')) # 准备数据集 train_data = data_eval.ReadData(transform=transforms.Compose([ data_eval.scaleNorm(), data_eval.RandomScale((1.0, 1.4)), data_eval.RandomHSV((0.9, 1.1), (0.9, 1.1), (25, 25)), data_eval.RandomCrop(image_h, image_w), data_eval.RandomFlip(), data_eval.ToTensor(), data_eval.Normalize() ]), data_dir=args.train_data_dir) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False, drop_last=True) val_data = data_eval.ReadData(transform=transforms.Compose([ data_eval.scaleNorm(), data_eval.RandomScale((1.0, 1.4)), data_eval.RandomCrop(image_h, image_w), data_eval.ToTensor(), data_eval.Normalize() ]), data_dir=args.val_data_dir) val_loader = DataLoader(val_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False, drop_last=True) num_train = len(train_data) # num_val = len(val_data) # build model if args.last_ckpt: model = MultiTaskCNN_Atten(38, depth_channel=1, pretrained=False, arch='resnet50', use_aspp=True) else: model = MultiTaskCNN_Atten(38, depth_channel=1, pretrained=True, arch='resnet50', use_aspp=True) # build optimizer if args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), args.lr) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=0.9, weight_decay=1e-4) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), args.lr) else: # rmsprop print('not supported optimizer \n') return None global_step = 0 max_miou_val = 0 loss_count = 0 # 如果有模型的训练权重,则获取global_step,start_epoch if args.last_ckpt: global_step, args.start_epoch = load_ckpt(model, optimizer, args.last_ckpt, device) # if torch.cuda.device_count() > 1 and args.cuda and torch.cuda.is_available(): # print("Let's use", torch.cuda.device_count(), "GPUs!") # model = torch.nn.DataParallel(model).to(device) model = model.to(device) model.train() # cal_param(model, data) loss_func = nn.CrossEntropyLoss() for epoch in range(int(args.start_epoch), args.epochs): torch.cuda.empty_cache() # if epoch <= freeze_epoch: # for layer in [model.conv1, model.maxpool,model.layer1, model.layer2, model.layer3, model.layer4]: # for param in layer.parameters(): # param.requires_grad = False tq = tqdm(total=len(train_loader) * args.batch_size) if loss_count >= 10: args.lr = 0.5 * args.lr loss_count = 0 lr = poly_lr_scheduler(optimizer, args.lr, iter=epoch, max_iter=args.epochs) optimizer.param_groups[0]['lr'] = lr # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.5) tq.set_description('epoch %d, lr %f' % (epoch, args.lr)) loss_record = [] # loss1_record = [] # loss2_record = [] # loss3_record = [] local_count = 0 # print('1') for batch_idx, data in enumerate(train_loader): image = data['image'].to(device) depth = data['depth'].to(device) label = data['label'].long().to(device) # print('label', label.shape) output, output_sup1, output_sup2 = model(image, depth) loss1 = loss_func(output, label) loss2 = loss_func(output_sup1, label) loss3 = loss_func(output_sup2, label) loss = loss1 + loss2 + loss3 tq.update(args.batch_size) tq.set_postfix(loss='%.6f' % loss) optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 local_count += image.data.shape[0] # writer_loss.add_scalar('loss_step', loss, global_step) # writer_loss1.add_scalar('loss1_step', loss1, global_step) # writer_loss2.add_scalar('loss2_step', loss2, global_step) # writer_loss3.add_scalar('loss3_step', loss3, global_step) loss_record.append(loss.item()) # loss1_record.append(loss1.item()) # loss2_record.append(loss2.item()) # loss3_record.append(loss3.item()) if global_step % args.print_freq == 0 or global_step == 1: for name, param in model.named_parameters(): writer_loss.add_histogram(name, param.clone().cpu().data.numpy(), global_step, bins='doane') writer_loss.add_graph(model, [image, depth]) grid_image1 = make_grid(image[:3].clone().cpu().data, 3, normalize=True) writer_loss.add_image('image', grid_image1, global_step) grid_image2 = make_grid(depth[:3].clone().cpu().data, 3, normalize=True) writer_loss.add_image('depth', grid_image2, global_step) grid_image3 = make_grid(utils.color_label( torch.max(output[:3], 1)[1]), 3, normalize=False, range=(0, 255)) writer_loss.add_image('Predicted label', grid_image3, global_step) grid_image4 = make_grid(utils.color_label(label[:3]), 3, normalize=False, range=(0, 255)) writer_loss.add_image('Groundtruth label', grid_image4, global_step) tq.close() loss_train_mean = np.mean(loss_record) with open(log_file, 'a') as f: f.write(str(epoch) + '\t' + str(loss_train_mean)) # loss1_train_mean = np.mean(loss1_record) # loss2_train_mean = np.mean(loss2_record) # loss3_train_mean = np.mean(loss3_record) writer_loss.add_scalar('epoch/loss_epoch_train', float(loss_train_mean), epoch) # writer_loss1.add_scalar('epoch/sub_loss_epoch_train', float(loss1_train_mean), epoch) # writer_loss2.add_scalar('epoch/sub_loss_epoch_train', float(loss2_train_mean), epoch) # writer_loss3.add_scalar('epoch/sub_loss_epoch_train', float(loss3_train_mean), epoch) print('loss for train : %f' % loss_train_mean) print('----validation starting----') # tq_val = tqdm(total=len(val_loader) * args.batch_size) # tq_val.set_description('epoch %d' % epoch) model.eval() val_total_time = 0 with torch.no_grad(): sys.stdout.flush() tbar = tqdm(val_loader) acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() a_meter = AverageMeter() b_meter = AverageMeter() for batch_idx, sample in enumerate(tbar): # origin_image = sample['origin_image'].numpy() # origin_depth = sample['origin_depth'].numpy() image_val = sample['image'].to(device) depth_val = sample['depth'].to(device) label_val = sample['label'].numpy() with torch.no_grad(): start = time.time() pred = model(image_val, depth_val) end = time.time() duration = end - start val_total_time += duration # tq_val.set_postfix(fps ='%.4f' % (args.batch_size / (end - start))) print_str = 'Test step [{}/{}].'.format( batch_idx + 1, len(val_loader)) tbar.set_description(print_str) output_val = torch.max(pred, 1)[1] output_val = output_val.squeeze(0).cpu().numpy() acc, pix = accuracy(output_val, label_val) intersection, union = intersectionAndUnion( output_val, label_val, args.num_class) acc_meter.update(acc, pix) a_m, b_m = macc(output_val, label_val, args.num_class) intersection_meter.update(intersection) union_meter.update(union) a_meter.update(a_m) b_meter.update(b_m) fps = len(val_loader) / val_total_time print('fps = %.4f' % fps) tbar.close() mAcc = (a_meter.average() / (b_meter.average() + 1e-10)) with open(log_file, 'a') as f: f.write(' ' + str(mAcc.mean()) + '\n') iou = intersection_meter.sum / (union_meter.sum + 1e-10) writer_acc.add_scalar('epoch/Acc_epoch_train', mAcc.mean(), epoch) print('----validation finished----') model.train() # # 每隔save_epoch_freq个epoch就保存一次权重 if epoch != args.start_epoch: if iou.mean() >= max_miou_val: print('mIoU:', iou.mean()) if not os.path.isdir(args.ckpt_dir): os.mkdir(args.ckpt_dir) save_ckpt(args.ckpt_dir, model, optimizer, global_step, epoch, local_count, num_train) max_miou_val = iou.mean() # max_macc_val = mAcc.mean() else: loss_count += 1 torch.cuda.empty_cache()
def evaluate(nets, loader, args): loss_pred1_meter = AverageMeter() loss_pred2_meter = AverageMeter() #loss_pred_outputs_meter = AverageMeter() acc_pred1_meter = AverageMeter() acc_pred2_meter = AverageMeter() #acc_pred_outputs_meter = AverageMeter() intersection_pred1_meter = AverageMeter() intersection_pred2_meter = AverageMeter() #intersection_pred_outputs_meter = AverageMeter() union_pred1_meter = AverageMeter() union_pred2_meter = AverageMeter() #union_pred_outputs_meter = AverageMeter() for model in nets: model.eval() for i, batch_data in enumerate(loader): # forward pass if i % 100 == 0: print('{:d} processd'.format(i)) #pred1, pred2, pred_outputs, loss_pred1, loss_pred2, loss_pred_outputs = forward_multiscale(nets, batch_data, args) pred1, pred2, loss_pred1, loss_pred2 = forward_multiscale(nets, batch_data, args) loss_pred1_meter.update(loss_pred1.data[0]) loss_pred2_meter.update(loss_pred2.data[0]) #loss_pred_outputs_meter.update(loss_pred_outputs.data[0]) # calculate accuracy acc_pred1, pix_pred1 = accuracy(batch_data, pred1) intersection_pred1, union_pred1 = intersectionAndUnion(batch_data, pred1, args.num_classes) acc_pred2, pix_pred2 = accuracy(batch_data, pred2) intersection_pred2, union_pred2 = intersectionAndUnion(batch_data, pred2, args.num_classes) #acc_pred_outputs, pix_pred_outputs = accuracy(batch_data, pred_outputs) #intersection_pred_outputs, union_pred_outputs = intersectionAndUnion(batch_data, pred_outputs, args.num_classes) acc_pred1_meter.update(acc_pred1, pix_pred1) intersection_pred1_meter.update(intersection_pred1) union_pred1_meter.update(union_pred1) acc_pred2_meter.update(acc_pred2, pix_pred2) intersection_pred2_meter.update(intersection_pred2) union_pred2_meter.update(union_pred2) #acc_pred_outputs_meter.update(acc_pred_outputs, pix_pred_outputs) #intersection_pred_outputs_meter.update(intersection_pred_outputs) #union_pred_outputs_meter.update(union_pred_outputs) print('[{}] iter {}, loss_pred1: {} loss_pred2: {}, Accurarcy_pred1: {} Accurarcy_pred2: {}' .format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), i, loss_pred1.data[0], loss_pred2.data[0], acc_pred1, acc_pred2)) # visualization if args.visualize: visualize_result(batch_data, pred1, pred2, args) iou_pred1 = intersection_pred1_meter.sum / (union_pred1_meter.sum + 1e-10) iou_pred2 = intersection_pred2_meter.sum / (union_pred2_meter.sum + 1e-10) #iou_pred_outputs = intersection_pred_outputs_meter.sum / (union_pred_outputs_meter.sum + 1e-10) ''' for i , _iou_pred1 in enumerate(iou_pred1): for j, _iou_pred2 in enumerate(iou_pred2): for k, _iou_pred_outputs in enumerate(iou_pred_outputs): if k == (j == i): #print('class [{}], IoU_pred1: {}, IoU_pred2: {}, IoU_pred_outputs: {}'.format(i, _iou_pred1, _iou_pred2, _iou_pred_outputs) ) print('class [{}], IoU_pred1: {}, IoU_pred2: {}'.format(i, _iou_pred1, _iou_pred2)) break for i, _iou_pred1, _iou_pred2, _iou_pred_outputs in list(zip(iou_pred1, iou_pred2, iou_pred_outputs )): print('class [{}], IoU_pred1: {}, IoU_pred2: {}'.format(i, _iou_pred1, _iou_pred2)) ''' iou = list(zip(iou_pred1, iou_pred2)) for i, (_iou_pred1, _iou_pred2) in enumerate(iou): print('class [{}],\n IoU_pred1: {},\n IoU_pred2: {}\n'.format(i, _iou_pred1, _iou_pred2)) #print('class [{}],\n IoU_pred1: {},\n IoU_pred2: {},\n IoU_pred_outputs: {}\n'.format(i, _iou_pred1, _iou_pred2, _iou_pred_outputs)) print('[Eval Summary]:') print('Loss_pred1: {},\n Loss_pred2: {},\n Mean IoU_pred1: {:.2f}%,\n Mean IoU_pred2: {:.2f}%,\n Accurarcy_pred1: {:.2f}%,\n Accurarcy_pred2: {:.2f}%,\n' .format(loss_pred1_meter.average(), loss_pred2_meter.average(), iou_pred1.mean()*100, iou_pred2.mean()*100, acc_pred1_meter.average()*100, acc_pred2_meter.average()*100))
def evaluate(models, val_loader, interp, criterion, history, epoch, args): print('***Evaluating at {} epoch ...'.format(epoch)) loss_meter = AverageMeter() acc_meter = AverageMeter() models.eval() for i, batch_data in enumerate(val_loader): torch.cuda.synchronize() # forward pass images, labels, _ = batch_data images = images.to(device) seg_labels = labels.to(device) pred_seg, _ = models(images) # change pred_seg = interp(pred_seg) # pred_seg = F.softmax(pred_seg) loss = criterion(pred_seg, seg_labels) loss_meter.update(loss.data.item()) print('[Eval] iter {}, loss: {}'.format(i, loss.data.item())) #acc = pixel_acc(pred_seg, labels) #acc_meter.update(acc.data.item()) labels = as_numpy(labels) _, pred = torch.max(pred_seg, dim=1) pred = as_numpy(pred.squeeze(0).cpu()) acc, pix = accuracy(pred, labels) acc_meter.update(acc, pix) if args.visualize: visualize_result(batch_data, pred_seg, args) history['val']['epoch'].append(epoch) history['val']['loss'].append(loss_meter.average()) history['val']['acc'].append(acc_meter.average()) print('[Eval Summary] Epoch: {}, Loss: {}, Accurarcy: {:4.2f}%'.format( epoch, loss_meter.average(), acc_meter.average() * 100)) # Plot figure if epoch > 0: print('Plotting loss figure...') fig = plt.figure() plt.plot(np.asarray(history['train']['epoch']), np.log(np.asarray(history['train']['loss'])), color='b', label='training') plt.plot(np.asarray(history['val']['epoch']), np.log(np.asarray(history['val']['loss'])), color='c', label='validation') plt.legend() plt.xlabel('Epoch') plt.ylabel('Log(loss)') fig.savefig('{}/loss.png'.format(args.checkpoints_dir), dpi=225) plt.close('all') fig = plt.figure() plt.plot(history['train']['epoch'], history['train']['acc'], color='b', label='training') plt.plot(history['val']['epoch'], history['val']['acc'], color='c', label='validation') plt.legend() plt.xlabel('Epoch') plt.ylabel('Accuracy') fig.savefig('{}/accuracy.png'.format(args.checkpoints_dir), dpi=225) plt.close('all')
def train(models, train_loader, interp, optimizers, criterion, history, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() ave_total_loss = AverageMeter() ave_branch_loss = AverageMeter() ave_weakly_loss = AverageMeter() ave_acc = AverageMeter() # Switch to train mode models.train() # main loop tic = time.time() for i_iter, batch_data in enumerate(train_loader): # measure data loading time torch.cuda.synchronize() data_time.update(time.time() - tic) optimizers.zero_grad() # forward pass images, labels, _ = batch_data # print(images.type()) # feed input data # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') images = images.to(device) labels = labels.to(device) #print(labels.shape) #print(labels.size()) #print(labels) pred_seg, pred_branch = models(images) #print(pred_seg) pred_seg = interp(pred_seg) pred_branch = interp(pred_branch) # pred_seg = F.softmax(pred_seg) loss = criterion(pred_seg, labels) acc = pixel_acc(pred_seg, labels) #acc, _ = accuracy(pred_seg, labels) #print(loss) loss_branch = criterion(pred_branch, labels) # acc_branch = pixel_acc(pred_branch, labels) # pred_label = pred_branch.data.cpu().numpy().argmax(axis=1) # pred_label = torch.from_numpy(pred_label) # pred_label = Variable(pred_label.long()).cuda() weakly_signal = as_numpy(pred_branch) # weakly_signal = as_numpy(pred_branch.detach()) weakly_signal = weakly_signal.argmax(axis=1) weakly_signal = torch.from_numpy(weakly_signal) weakly_signal = weakly_signal.long().to(device) loss_weakly = criterion(pred_seg, weakly_signal) # weakly-supervision # print(weakly_signal.size()) loss_joint = loss + args.lambda_branch * loss_weakly loss = loss.mean() loss_branch = loss_branch.mean() loss_weakly = loss_weakly.mean() acc = acc.mean() # acc_branch = acc_branch.mean() # Backward / compute gradient and do SGD step # optimizers.zero_grad() loss_branch.backward(retain_graph=True) loss_joint.backward() # loss.backward() optimizers.step() # Measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # Update average loss and acc ave_total_loss.update(loss.data.item()) ave_branch_loss.update(loss_branch.data.item()) ave_weakly_loss.update(loss_weakly.data.item()) ave_acc.update(acc.data.item()) # Calculate accuracy and display if i_iter % args.display_iter == 0: print( 'Epoch: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, ' 'LR: {:.6f} ' 'Accurary: {:4.2f}, Loss: {:.6f}, Loss_branch: {:.6f}, Loss_weakly: {:.6f} ' .format(epoch, i_iter, args.epoch_iters, batch_time.average(), data_time.average(), args.running_lr, ave_acc.average() * 100, ave_total_loss.average(), ave_branch_loss.average(), ave_weakly_loss.average())) fractional_epoch = epoch - 1 + 1. * i_iter / args.epoch_iters history['train']['epoch'].append(fractional_epoch) history['train']['loss'].append(loss.data.item()) history['train']['acc'].append(acc.data.item()) # Adjust learning rate cur_iter = i_iter + (epoch - 1) * args.epoch_iters adjust_learning_rate(optimizers, cur_iter, args)
def train(epoch, trainloader, steps_per_val, base_lr, total_epochs, optimizer, model, adjust_learning_rate, print_freq, image_freq, image_outdir, local_rank, sub_losses): # Training model.train() batch_time = AverageMeter() ave_loss = AverageMeter() tic = time.time() cur_iters = epoch*steps_per_val for i_iter, dp in enumerate(trainloader): def handle_batch(): a, fg, bg = dp # [B, 3, 3 or 1, H, W] #print (a.shape) out = model(a, fg, bg) L_alpha = out[0].mean() L_comp = out[1].mean() L_grad = out[2].mean() vis_alpha = L_alpha.detach().item() vis_comp = L_comp.detach().item() vis_grad = L_grad.detach().item() #L_temp = out[3].mean() #loss['L_total'] = 0.5 * loss['L_alpha'] + 0.5 * loss['L_comp'] + loss['L_grad'] + 0.5 * loss['L_temp'] #loss['L_total'] = loss['L_alpha'] + loss['L_comp'] + loss['L_grad'] + loss['L_temp'] loss = L_alpha + L_comp + L_grad model.zero_grad() loss.backward() optimizer.step() return loss.detach(), vis_alpha, vis_comp, vis_grad, out[3:] loss, vis_alpha, vis_comp, vis_grad, vis_out = handle_batch() reduced_loss = reduce_tensor(loss) # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss ave_loss.update(reduced_loss.item()) torch_barrier() adjust_learning_rate(optimizer, base_lr, total_epochs * steps_per_val, i_iter+cur_iters) if i_iter % print_freq == 0 and local_rank <= 0: msg = 'Iter:[{}/{}], Time: {:.2f}, '.format(\ i_iter+cur_iters, total_epochs * steps_per_val, batch_time.average()) msg += 'lr: {}, Avg. Loss: {:.6f} | Current: Loss: {:.6f}, '.format( [x['lr'] for x in optimizer.param_groups], ave_loss.average(), ave_loss.value()) msg += '{}: {:.4f} {}: {:.4f} {}: {:.4f}'.format( sub_losses[0], vis_alpha, sub_losses[1], vis_comp, sub_losses[2], vis_grad) logging.info(msg) if i_iter % image_freq == 0 and local_rank <= 0: write_image(image_outdir, vis_out, i_iter+cur_iters)
def validate(config, testloader, model, writer_dict, epoch): model.eval() ave_loss = AverageMeter() val_mask_list = sorted(glob.glob('../../data/val_full/mask/*.tif')) # print('len val_mask list: ', len(val_mask_list)) iou_score_list_1 = [] c = 0 with torch.no_grad(): for _, batch in enumerate(testloader): image, label, _, name = batch size = label.size() label = label.long().cuda() losses, pred = model(image, label) pred = F.upsample(input=pred, size=(size[-2], size[-1]), mode='bilinear') loss = losses.mean() ave_loss.update(loss.item()) pred = pred.cpu().numpy().transpose(0, 2, 3, 1) pred = np.asarray(np.argmax(pred, axis=3), dtype=np.uint8) label = label.cpu().numpy() num = pred.shape[0] for i in range(c, c + num): # save the result threshold = 0.5 pred1 = np.where(pred[i - c, :, :] < threshold, 0.0, 1.0) ttt = Image.fromarray(pred1) img_save_path = os.path.join(config.DATASET.ROOT, config.DATASET.TEST_RESULT, str(threshold)) if not os.path.exists(img_save_path): os.makedirs(img_save_path) file_name = img_save_path + '/epoch' + '%02d' % epoch + '_' + 'WTR000' + '%02d' % ( i + 1) + '.tif' ttt.save(file_name) # calculate IoU if np.sum(label[i - c, :, :]) == 0: label[i - c, :, :] = np.logical_not(label[i - c, :, :]) pred1 = np.logical_not(pred1) intersection = np.logical_and(label[i - c, :, :], pred1) union = np.logical_or(label[i - c, :, :], pred1) iou_score_list_1.append(np.sum(intersection) / np.sum(union)) # display # fig = plt.figure(figsize=(16, 8)) # rows = 1 # cols = 2 # # ax1 = fig.add_subplot(rows, cols, 1) # ax1.imshow(label[i-c,:,:], cmap='gray') # ax1.set_title('Ground truth') # ax1.axis("off") # # ax2 = fig.add_subplot(rows, cols, 2) # ax2.imshow(pred1, cmap='gray') # ax2.set_title('Predicted result') # ax2.axis("off") # # plt.show(block=False) # plt.pause(3) # 3 seconds # plt.close() c += num print(iou_score_list_1) mean_IoU = sum(iou_score_list_1) / len(iou_score_list_1) print('average IoU 1: ', mean_IoU) # confusion_matrix += get_confusion_matrix( # label, # pred, # size, # config.DATASET.NUM_CLASSES, # config.TRAIN.IGNORE_LABEL) # # pos = confusion_matrix.sum(1) # res = confusion_matrix.sum(0) # tp = np.diag(confusion_matrix) # IoU_array = (tp / np.maximum(1.0, pos + res - tp)) # mean_IoU = IoU_array.mean() writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', ave_loss.average(), global_steps) writer.add_scalar('valid_mIoU', mean_IoU, global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return ave_loss.average(), mean_IoU
def validate(testloader, model, test_size, local_rank, dataset_samples, tmp_folder='/dev/shm/val_tmp'): if local_rank <= 0: logging.info('Start evaluation...') model.eval() ave_loss = AverageMeter() c = len(dataset_samples[0]) // 2 # We calculate L_dt as a mere indicator of temporal consistency. # Since we have sample_length=3 during validation, which means # there's only one prediction (the middle frame). Thus, here we # first save the prediction to tmp_folder then compute L_dt in # one pass. with torch.no_grad(): iterator = tqdm(testloader, ascii=True) if local_rank <= 0 else testloader for batch in iterator: fg, bg, a, idx = batch # [B, 3, 3 or 1, H, W] def handle_batch(): out = model(a, fg, bg) L_alpha = out[0].mean() L_comp = out[1].mean() L_grad = out[2].mean() loss = L_alpha + L_comp + L_grad return loss.detach(), out[6].detach(), out[7].detach() loss, tris, alphas = handle_batch() reduced_loss = reduce_tensor(loss) for i in range(tris.shape[0]): fn = dataset_samples[idx[i].item()][c] outpath = os.path.join(tmp_folder, fn) os.makedirs(os.path.dirname(outpath), exist_ok=True) pred = np.uint8((alphas[i, c, 0] * 255).cpu().numpy()) tri = tris[i, c, 0] * 255 tri = np.uint8(((tri > 0) * (tri < 255)).cpu().numpy() * 255) gt = np.uint8(a[i, c, 0].numpy()) out = np.stack([pred, tri, gt], axis=-1) cv.imwrite(outpath, out) ave_loss.update(reduced_loss.item()) loss = ave_loss.average() if local_rank <= 0: logging.info('Validation loss: {:.6f}'.format(ave_loss.average())) def _read_output(fn): fn = os.path.join(tmp_folder, fn) preds = cv.imread(fn) a, m, g = np.split(preds, 3, axis=-1) a = np.float32(np.squeeze(a)) / 255.0 m = np.squeeze(m) != 0 g = np.float32(np.squeeze(g)) / 255.0 return a, g, m res = 0. for sample in tqdm(dataset_samples, ascii=True): a, g, m = _read_output(sample[c]) ha, hg, _ = _read_output(sample[c+1]) dadt = a - ha dgtdt = g - hg if np.sum(m) == 0: continue res += np.mean(np.abs(dadt[m] - dgtdt[m])) res /= float(len(dataset_samples)) logging.info('Average L_dt: {:.6f}'.format(res)) loss += res shutil.rmtree(tmp_folder) torch_barrier() return loss
def train(models, train_loader, interp, optimizers, criterion, history, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() # loss_value = 0 # Switch to train mode models.train() # main loop tic = time.time() for i_iter, batch_data in enumerate(train_loader): cur_iter = i_iter + (epoch - 1) * args.epoch_iters # measure data loading time torch.cuda.synchronize() data_time.update(time.time() - tic) # optimizers.zero_grad() # cur_iter = i_iter + (epoch - 1) * args.epoch_iters # adjust_learning_rate(optimizers, cur_iter, args) # forward pass images, labels, _ = batch_data # print(images.type()) # feed input data # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') images = images.to(device) labels = labels.to(device) #print(labels.shape) #print(labels.size()) #print(labels) optimizers.zero_grad() adjust_learning_rate(optimizers, cur_iter, args) pred_seg = models(images) #print(pred_seg) pred_seg = interp(pred_seg) # pred_seg = F.softmax(pred_seg) loss = criterion(pred_seg, labels) # loss_value += loss.item() #print(loss) # acc = pixel_acc(pred_seg, labels) # acc, _ = accuracy(pred_seg, labels) # loss = loss.mean() # acc = acc.mean() # Backward / compute gradient and do SGD step # optimizers.zero_grad() loss.backward() # optimizers.step() #ave_total_loss.update(loss.data.item()) #ave_acc.update(acc.data.item() * 100) # loss_value += loss.data.cpu().numpy().item() # loss_value += loss.data.item() # loss_value += loss.item() # loss_value += loss.data.cpu().numpy()[0] # loss_value += loss.cpu().numpy()[0] optimizers.step() #loss_value += loss.item() # loss_value += loss.data.cpu().numpy().item() # loss_value = loss.data.item() # Measure elapsed time batch_time.update(time.time() - tic) tic = time.time() #loss_value += loss.data.cpu().numpy().item() # Update average loss and acc # acc = pixel_acc(pred_seg, labels) # ave_total_loss.update(loss.data.item()) # ave_acc.update(acc.data.item() * 100) if i_iter % args.display_iter == 0: acc = pixel_acc(pred_seg, labels) print('Epoch: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, ' 'LR: {:.6f} ' 'Accurary: {:4.2f}, Loss: {:.6f} '.format( epoch, i_iter, args.epoch_iters, batch_time.average(), data_time.average(), args.running_lr, acc.data.item() * 100, loss.data.item())) fractional_epoch = epoch - 1 + 1. * i_iter / args.epoch_iters history['train']['epoch'].append(fractional_epoch) history['train']['loss'].append(loss.data.item()) history['train']['acc'].append(acc.data.item())
def train(genertor, discriminator, iterator, interp, optimizer, optimizer_D, criterion, criterion_bce, history, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() # laber for adversarial training S1_label = 0 S2_label = 1 genertor.train() discriminator.train() # main loop tic = time.time() for i_iter in range(args.epoch_iters): loss_seg_value_S1 = 0 loss_seg_value_S2 = 0 loss_seg_value_La = 0 loss_adv_pred_value = 0 loss_D_value = 0 optimizer.zero_grad() adjust_learning_rate(optimizer, i_iter) optimizer_D.zero_grad() adjust_learning_rate_D(optimizer_D, i_iter) for param in discriminator.parameters(): param.requires_grad = False _, batch_data = next(iterator) # use enumerate() data_time.update(time.time() - tic) # batch_data = next(trainloader_iter) # use iter() images, labels, infos = batch_data # images, labels, _ = batch_data # print(images, labels) # feed input data input_img = Variable(images, volatile=False) # train:False , val: True label_seg = Variable(labels.long(), volatile=False) # long() ??? input_img = input_img.cuda() label_seg = label_seg.cuda() #print(label_seg) #print('input_img_size: {}, label_seg_size: {}'.format(input_img.size(), label_seg.size())) pred_S2, _, pred_S1 = genertor(input_img) pred_S1 = interp(pred_S1) # --> [ B x 150 x 321 x 321 ] pred_S2 = interp(pred_S2) #print(pred_G2.size()) #print(pred_G2.type()) # input size (torch.Size([4, 150, 321, 321])) Target size (torch.Size([4, 321, 321]) loss_seg_S1 = criterion(pred_S1, label_seg) loss_seg_S2 = criterion(pred_S2, label_seg) # produce mask #pred_label = pred_S2.data.cpu().numpy().argmax(axis=1) pred_label = pred_S1.data.cpu().numpy().argmax(axis=1) pred_label = torch.from_numpy(pred_label) pred_label = Variable(pred_label.long()).cuda() #loss_seg_La = criterion(pred_S2, pred_label) # / 1.65 loss_seg_La = criterion(pred_S2, label_seg) # / 1.65 D_out_S1 = interp(discriminator( F.softmax(pred_S1))) # --> [B x 1 x 321 x 321] D_out_S2 = interp(discriminator(F.softmax(pred_S2))) #loss_adv_pred = criterion_bce(D_out_S1, Variable(torch.FloatTensor(D_out_S1.data.size()).fill_(S2_label)).cuda()) loss_adv_pred = criterion_bce( D_out_S2, Variable(torch.FloatTensor( D_out_S2.data.size()).fill_(S1_label)).cuda()) loss_weakly = args.lambda_seg_La * loss_seg_La #loss_weakly = args.lambda_seg_La * (1 - (loss_seg_La / loss_seg_S2))**2 #loss = args.lambda_seg_S1 * loss_seg_S1 loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred #loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred + args.lambda_seg_La * loss_seg_La #loss = args.lambda_seg_S1 * loss_seg_S1 + args.lambda_adv_pred * loss_adv_pred + args.lambda_seg_La * (1 - (loss_seg_La / loss_seg_S2))**2 # proper normalization #loss_1.backward() # detach() loss_weakly.backward(retain_graph=True) loss.backward() loss_seg_value_S1 += loss_seg_S1.data.cpu().numpy()[0] loss_seg_value_S2 += loss_seg_S2.data.cpu().numpy()[0] loss_seg_value_La += loss_seg_La.data.cpu().numpy()[0] loss_adv_pred_value += loss_adv_pred.data.cpu().numpy()[0] # train D # model_D.train() # optimizer_D.zero_grad() # bring back requires_grad for param in discriminator.parameters(): param.requires_grad = True # train S1 pred_S1 = pred_S1.detach() D_out_S1 = interp(discriminator(F.softmax(pred_S1))) loss_D = criterion_bce( D_out_S1, Variable(torch.FloatTensor( D_out_S1.data.size()).fill_(S1_label)).cuda()) loss_D = loss_D / 2.0 loss_D.backward() loss_D_value += loss_D.data.cpu().numpy()[0] # train S2 pred_S2 = pred_S2.detach() D_out_S2 = interp(discriminator(F.softmax(pred_S2))) loss_D = criterion_bce( D_out_S2, Variable(torch.FloatTensor( D_out_S2.data.size()).fill_(S2_label)).cuda()) loss_D = loss_D / 2.0 loss_D.backward() loss_D_value += loss_D.data.cpu().numpy()[0] optimizer.step() optimizer_D.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # calculate accuracy , mIOU, and display if i_iter % args.disp_iter == 0: # can not change acc_pred_outputs, pix_pred_outputs = accuracy(batch_data, pred_S1) #print('exp = {}'.format(args.checkpoints_dir)) print( 'iter =[{0:d}]/[{1:d}/{2:d}], Time: {3:.2f}, Data: {4:.2f}, loss_seg_S1 = {5:.4f} loss_seg_S2 = {6:.4f} loss_seg_La = {7:.4f}, loss_adv_pred = {8:.4f}, loss_D = {9:.4f}, Accurarcy: {10:4.2f}%' .format(epoch, i_iter, args.epoch_iters, batch_time.average(), data_time.average(), loss_seg_value_S1, loss_seg_value_S2, loss_seg_value_La, loss_adv_pred_value, loss_D_value, acc_pred_outputs * 100)) fractional_epoch = epoch - 1 + 1. * i_iter / args.epoch_iters history['train']['epoch'].append(fractional_epoch) history['train']['loss_pred_outputs'].append(loss_seg_S1.data[0]) history['train']['acc_pred_outputs'].append(acc_pred_outputs) # checkpoint if epoch == args.num_epoches and i_iter >= args.epoch_iters - 1: print('taking checkpoints latest ...') torch.save( genertor.state_dict(), osp.join( args.checkpoints_dir, str(args.generatormodel) + '_' + str(epoch) + 'epoch_' + str(args.epoch_iters) + '_latest.pth')) torch.save( discriminator.state_dict(), osp.join( args.checkpoints_dir, str(args.generatormodel) + '_' + str(epoch) + 'epoch_' + str(args.epoch_iters) + '_D_latest.pth')) loss_seg_S1 = history['train']['loss_pred_outputs'][-1] if loss_seg_S1 < args.best_loss: args.best_loss = loss_seg_S1 print('taking checkpoints best ...') torch.save( genertor.state_dict(), osp.join( args.checkpoints_dir, str(args.generatormodel) + '_' + str(args.epoch_iters) + '_train_best.pth')) torch.save( discriminator.state_dict(), osp.join( args.checkpoints_dir, str(args.generatormodel) + '_' + str(args.epoch_iters) + '_D_train_best.pth'))
def train(config, epoch, num_epoch, epoch_iters, base_lr, num_iters, trainloader, optimizer, model, writer_dict, device): # Training model.train() batch_time = AverageMeter() ave_loss = AverageMeter() ave_loss_joints = AverageMeter() ave_loss_inp = AverageMeter() ave_acc = AverageMeter() tic = time.time() cur_iters = epoch * epoch_iters writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] rank = get_rank() world_size = get_world_size() for i_iter, batch in enumerate(trainloader): images, labels, target_weight, _, name, joints, joints_vis = batch size = labels.size() #cv2.imwrite('groundtruth/gt_'+str(i_iter)+'.png', labels[0].detach().numpy()) images = images.to(device) labels = labels.to(device) losses, losses_joints, losses_inp, pred = model( images, labels, target_weight) #forward #pred = F.upsample(input=pred, size=(size[-2], size[-1]), mode='bilinear') #pred = pred.to('cpu') #cv2.imwrite('prediction/pred_'+str(i_iter)+'.png',pred[0][0].detach().numpy()) #print("saved") label_joints, _ = get_max_preds( labels[:, 0:15, :, :].detach().cpu().numpy()) pred_joints, _ = get_max_preds(pred[:, 0:15, :, :].detach().cpu().numpy()) _, acc, _, _ = accuracy(pred[:, 0:15, :, :].detach().cpu().numpy(), labels[:, 0:15, :, :].detach().cpu().numpy()) save_batch_image_with_joints( images[:, 0:3, :, :], label_joints * 4, joints_vis, 'results/full_RGBD/train/joint_gt/{}_gt.png'.format(i_iter)) save_batch_image_with_joints( images[:, 0:3, :, :], pred_joints * 4, joints_vis, 'results/full_RGBD/train/joint_pred/{}_pred.png'.format(i_iter)) labels = F.upsample(input=labels, size=(256, 256), mode='bilinear') pred = F.upsample(input=pred, size=(256, 256), mode='bilinear') cv2.imwrite( 'results/full_RGBD/train/depth_gt/{}_gt.png'.format(i_iter), labels[0, 15, :, :].detach().cpu().numpy()) cv2.imwrite( 'results/full_RGBD/train/depth_pred/{}_pred.png'.format(i_iter), pred[0, 15, :, :].detach().cpu().numpy()) loss = losses.mean() loss_joints = losses_joints.mean() loss_inp = losses_inp.mean() reduced_loss = reduce_tensor(loss) reduced_loss_joints = reduce_tensor(loss_joints) reduced_loss_inp = reduce_tensor(loss_inp) model.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss ave_loss.update(reduced_loss.item()) ave_loss_joints.update(reduced_loss_joints.item()) ave_loss_inp.update(reduced_loss_inp.item()) ave_acc.update(acc) lr = adjust_learning_rate(optimizer, base_lr, num_iters, i_iter + cur_iters) if i_iter % config.PRINT_FREQ == 0 and rank == 0: print_loss = ave_loss.average() / world_size print_loss_joints = ave_loss_joints.average() / world_size print_loss_inp = ave_loss_inp.average() / world_size print_acc = ave_acc.average() / world_size msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \ 'lr: {:.6f}, Loss: {:.6f}, {:.6f}, {:.6f}, Acc: {:.6f}' .format( epoch, num_epoch, i_iter, epoch_iters, batch_time.average(), lr, print_loss, print_loss_joints, print_loss_inp,print_acc) logging.info(msg) writer.add_scalar('train_loss', print_loss, global_steps) writer.add_scalar('train_loss_joint', print_loss_joints, global_steps) writer.add_scalar('train_loss_depth', print_loss_inp, global_steps) writer.add_scalar('train_accuracy', print_acc, global_steps) writer_dict['train_global_steps'] = global_steps + 1
def evaluate(genertor, val_loader, interp, criterion, history, epoch, args): print('Evaluating at {} epochs...'.format(epoch)) loss_pred_outputs_meter = AverageMeter() acc_pred_outputs_meter = AverageMeter() # switch to eval mode genertor.eval() for i, batch_data in enumerate(val_loader): # forward pass #_, batch_data = next(iterator) # use enumerate() #data_time.update(time.time() - tic) # batch_data = next(trainloader_iter) # use iter() images, labels, infos = batch_data # images, labels, _ = batch_data # print(images, labels) # feed input data input_img = Variable(images, volatile=True) # train:False , val: True label_seg = Variable(labels.long(), volatile=True) # long() ??? input_img = input_img.cuda() label_seg = label_seg.cuda() #print(label_seg) #print('input_img_size: {}, label_seg_size: {}'.format(input_img.size(), label_seg.size())) pred1, _, pred2 = genertor(input_img) pred1 = interp(pred1) # --> [ B x 150 x 321 x 321 ] pred2 = interp(pred2) #pred1 = nn.functional.log_softmax(pred1) #pred2 = nn.functional.log_softmax(pred2) #pred_outputs = nn.functional.log_softmax(pred_outputs) loss_pred_outputs = criterion(pred2, label_seg) loss_pred_outputs_meter.update(loss_pred_outputs.data[0]) print('[Eval] iter {}, loss_pred_outputs:{}'.format( i, loss_pred_outputs.data[0])) acc_pred_outputs, pix_pred_outputs = accuracy(batch_data, pred2) acc_pred_outputs_meter.update(acc_pred_outputs, pix_pred_outputs) if args.visualize: visualize_tv(batch_data, pred1, pred2, args) history['val']['epoch'].append(epoch) history['val']['loss_pred_outputs'].append( loss_pred_outputs_meter.average()) history['val']['acc_pred_outputs'].append(acc_pred_outputs_meter.average()) print('[Eval Summary] Epoch: {}, Loss: {}, Accurarcy: {:4.2f}%'.format( epoch, loss_pred_outputs_meter.average(), acc_pred_outputs_meter.average() * 100)) # plot figure if epoch > 0: print('Plotting loss figure...') fig = plt.figure() plt.plot(np.asarray(history['train']['epoch']), np.log(np.asarray(history['train']['loss_pred_outputs'])), color='b', label='training') plt.plot(np.asarray(history['val']['epoch']), np.log(np.asarray(history['val']['loss_pred_outputs'])), color='c', label='validation') plt.legend() plt.xlabel('Epoch') plt.ylabel('Log(loss)') fig.savefig('{}/loss.png'.format(args.checkpoints_dir), dpi=200) plt.close('all') fig = plt.figure() plt.plot(history['train']['epoch'], history['train']['acc_pred_outputs'], color='b', label='training') plt.plot(history['val']['epoch'], history['val']['acc_pred_outputs'], color='c', label='validation') plt.legend() plt.xlabel('Epoch') plt.ylabel('Accuracy') fig.savefig('{}/accuracy.png'.format(args.checkpoints_dir), dpi=200) plt.close('all') """
def evaluate(): model = ACNet_models_V1.ACNet(num_class=5, pretrained=False) load_ckpt(model, None, None, args.last_ckpt, device) model.eval() model.to(device) val_data = ACNet_data.FreiburgForest( transform=torchvision.transforms.Compose([ ACNet_data.ScaleNorm(), ACNet_data.ToTensor(), ACNet_data.Normalize() ]), data_dirs=[args.test_dir], modal1_name=args.modal1, modal2_name=args.modal2, ) val_loader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() a_meter = AverageMeter() b_meter = AverageMeter() with torch.no_grad(): for batch_idx, sample in enumerate(val_loader): modal1 = sample['modal1'].to(device) modal2 = sample['modal2'].to(device) label = sample['label'].numpy() basename = sample['basename'][0] with torch.no_grad(): pred = model(modal1, modal2) output = torch.argmax(pred, 1) + 1 output = output.squeeze(0).cpu().numpy() acc, pix = accuracy(output, label) intersection, union = intersectionAndUnion(output, label, args.num_class) acc_meter.update(acc, pix) a_m, b_m = macc(output, label, args.num_class) intersection_meter.update(intersection) union_meter.update(union) a_meter.update(a_m) b_meter.update(b_m) print('[{}] iter {}, accuracy: {}' .format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), batch_idx, acc)) if args.visualize: visualize_result(modal1, modal2, label, output, batch_idx, args) if args.save_predictions: colored_output = utils.color_label_eval(output).astype(np.uint8) imageio.imwrite(f'{args.output_dir}/{basename}_pred.png', colored_output.transpose([1, 2, 0])) iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [{}], IoU: {}'.format(i, _iou)) mAcc = (a_meter.average() / (b_meter.average() + 1e-10)) print(mAcc.mean()) print('[Eval Summary]:') print('Mean IoU: {:.4}, Accuracy: {:.2f}%' .format(iou.mean(), acc_meter.average() * 100))
def inference(): writer_image = SummaryWriter(os.path.join(args.summary_dir, 'segtest')) model = MultiTaskCNN(38, depth_channel=1, pretrained=False, arch='resnet50', use_aspp=False) load_ckpt(model, None, args.last_ckpt, device) model.eval() model = model.to(device) val_data = data_eval.ReadData(transform=torchvision.transforms.Compose( [data_eval.scaleNorm(), data_eval.ToTensor(), Normalize()]), data_dir=args.data_dir) val_loader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=False) acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() a_meter = AverageMeter() b_meter = AverageMeter() test_total_time = 0 with torch.no_grad(): for batch_idx, sample in enumerate(val_loader): # origin_image = sample['origin_image'].to(device) # origin_depth = sample['origin_depth'].to(device) image = sample['image'].to(device) depth = sample['depth'].to(device) label = sample['label'].numpy() show_label = sample['label'].long().to(device) with torch.no_grad(): time1 = time.time() pred = model(image, depth) time2 = time.time() test_total_time += (time2 - time1) output = torch.max(pred, 1)[1] # # output = output.squeeze(0).cpu().numpy() output = output.cpu().numpy() acc, pix = accuracy(output, label) intersection, union = intersectionAndUnion(output, label, args.num_class) acc_meter.update(acc, pix) a_m, b_m = macc(output, label, args.num_class) intersection_meter.update(intersection) union_meter.update(union) a_meter.update(a_m) b_meter.update(b_m) if batch_idx % 50 == 0: grid_image1 = make_grid(image[:1].clone().cpu().data, 1, normalize=True) writer_image.add_image('image', grid_image1, batch_idx) grid_image2 = make_grid(depth[:1].clone().cpu().data, 1, normalize=True) writer_image.add_image('depth', grid_image2, batch_idx) grid_image3 = make_grid(utils.color_label( torch.max(pred[:1], 1)[1]), 1, normalize=False, range=(0, 255)) writer_image.add_image('Predicted label', grid_image3, batch_idx) grid_image4 = make_grid(utils.color_label(show_label[:1]), 1, normalize=False, range=(0, 255)) writer_image.add_image('Groundtruth label', grid_image4, batch_idx) print('[{}] iter {}, accuracy: {}'.format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), batch_idx, acc)) # if batch_idx % 1 == 0: # if args.visualize: # visualize_result(origin_image, origin_depth, label, output, batch_idx, args) # visualize_result(origin_image, origin_depth, label - 1, output - 1, batch_idx, args) print('推理时间:', test_total_time / len(val_data), '\nfps:', len(val_data) / test_total_time) iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [{}], IoU: {}'.format(i, _iou)) # mAcc:Prediction和Ground Truth对应位置的“分类”准确率(每个像素) mAcc = (a_meter.average() / (b_meter.average() + 1e-10)) print(mAcc.mean()) print('[Eval Summary]:') print('Mean IoU: {:.4}, Accuracy: {:.2f}%'.format( iou.mean(), acc_meter.average() * 100))
def validate(config, testloader, model, writer_dict, device): rank = get_rank() #0 world_size = get_world_size() #1 model.eval() ave_loss = AverageMeter() ave_loss_joints = AverageMeter() ave_loss_inp = AverageMeter() ave_accs = AverageMeter() ave_acc = AverageMeter() confusion_matrix = np.zeros( (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES)) with torch.no_grad(): for i_iter, batch in enumerate(testloader): image, label, target_weight, _, name, joints, joints_vis = batch size = label.size() #cv2.imwrite('validation_result/groundtruth/gt_'+str(i_iter)+'.png', label[0].detach().numpy()) image = image.to(device) label = label.to(device) losses, losses_joints, losses_inp, pred = model( image, label, target_weight) #pred = F.upsample(input=pred, size=(64, 64), mode='bilinear') label_joints, _ = get_max_preds( label[:, 0:15, :, :].detach().cpu().numpy()) pred_joints, _ = get_max_preds( pred[:, 0:15, :, :].detach().cpu().numpy()) accs, acc, _, _ = accuracy( pred[:, 0:15, :, :].detach().cpu().numpy(), label[:, 0:15, :, :].detach().cpu().numpy()) save_batch_image_with_joints( image[:, 0:3, :, :], label_joints * 4, joints_vis, 'results/full_RGBD/val/joint_gt/{}_gt.png'.format(i_iter)) save_batch_image_with_joints( image[:, 0:3, :, :], pred_joints * 4, joints_vis, 'results/full_RGBD/val/joint_pred/{}_pred.png'.format(i_iter)) label = F.upsample(input=label, size=(256, 256), mode='bilinear') pred = F.upsample(input=pred, size=(256, 256), mode='bilinear') cv2.imwrite( 'results/full_RGBD/val/depth_gt/{}_gt.png'.format(i_iter), label[0, 15, :, :].detach().cpu().numpy()) cv2.imwrite( 'results/full_RGBD/val/depth_pred/{}_pred.png'.format(i_iter), pred[0, 15, :, :].detach().cpu().numpy()) loss = losses.mean() loss_joints = losses_joints.mean() loss_inp = losses_inp.mean() reduced_loss = reduce_tensor(loss) reduced_loss_joints = reduce_tensor(loss_joints) reduced_loss_inp = reduce_tensor(loss_inp) ave_loss.update(reduced_loss.item()) ave_loss_joints.update(reduced_loss_joints.item()) ave_loss_inp.update(reduced_loss_inp.item()) ave_acc.update(acc) ave_accs.update(accs) print_loss = ave_loss.average() / world_size print_loss_joints = ave_loss_joints.average() / world_size print_loss_inp = ave_loss_inp.average() / world_size print_acc = ave_acc.average() / world_size print_accs = ave_accs.average() / world_size if rank == 0: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', print_loss, global_steps) writer.add_scalar('valid_loss_joint', print_loss_joints, global_steps) writer.add_scalar('valid_loss_depth', print_loss_inp, global_steps) writer.add_scalar('valid_accuracy', print_acc, global_steps) for i in range(15): writer.add_scalar('valid_each_accuracy_' + str(i), print_accs[i], global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return print_loss, print_loss_joints, print_loss_inp, print_acc