def train(config, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output # print("forward ") output = model(input) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred*4, output, prefix)
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') exec_net, net_input_shape = load_to_IE(args.model) # We need dynamically generated key for fetching output tensor output_key = list(exec_net.outputs.keys())[0] # Data loading code normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ]) ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=1, shuffle=False, num_workers=cfg.WORKERS, pin_memory=True ) process_time = AverageMeter() with torch.no_grad(): for i, (input, target, target_weight, meta) in enumerate(valid_loader): start_time = time.time() # compute output output = sync_inference(exec_net, image=np.expand_dims(input[0].numpy(), 0)) batch_heatmaps = output[output_key] coords, maxvals = get_max_preds(batch_heatmaps) # measure elapsed time process_time.update(time.time() - start_time) prefix = '{}_{}'.format( os.path.join(final_output_dir, 'val'), i ) save_debug_images(cfg, input, meta, target, coords * 4, torch.from_numpy(batch_heatmaps), prefix) if i == 100: break logger.info(f'OpenVINO IE: Inference EngineAverage processing time of model:{process_time.avg}')
def validate(config, val_loader, val_dataset, model, criterion, output_dir): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output heatmap = model(input) if isinstance(heatmap, list): output = heatmap[-1] else: output = heatmap target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() idx += num_images if i % 100 == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) return acc.avg
def main(): args = parse_args() update_config(cfg, args) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'valid') exec_net = load_to_IE(args.model) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([transforms.ToTensor()])) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=cfg.WORKERS, pin_memory=True) process_time = AverageMeter() with torch.no_grad(): for i, (input, target, target_weight, meta) in enumerate(valid_loader): start_time = time.time() # compute output output = sync_inference(exec_net, image=np.expand_dims(input[0].numpy(), 0)) # measure elapsed time process_time.update(time.time() - start_time) batch_heatmaps = output['Conv_746'] coords = output['Mul_772'] prefix = '{}_{}'.format(os.path.join(final_output_dir, 'val'), i) save_debug_images(cfg, input, meta, target, coords * 4, torch.from_numpy(batch_heatmaps), prefix) if i == 100: break logger.info( f'OpenVINO IE: Average processing time of model with merged pre- and post-processing:{process_time.avg}' )
def train_with_alpha(config, train_loader, mini_loader, model, criterion, optimizer, a_optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() for i, ((input, target, target_weight, meta), (input1, target1, target_weight1, meta1)) in enumerate(zip(train_loader, mini_loader)): # measure data loading time data_time.update(time.time() - end) # compute output outputs = model(input) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) if isinstance(outputs, list): loss = criterion(outputs[0], target, target_weight) for output in outputs[1:]: loss += criterion(output, target, target_weight) else: output = outputs loss = criterion(output, target, target_weight) # loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # optim alpha outputs1 = model(input1) target1 = target1.cuda(non_blocking=True) target_weight1 = target_weight1.cuda(non_blocking=True) if isinstance(outputs1, list): loss1 = criterion(outputs1[0], target1, target_weight1) for output1 in outputs1[1:]: loss1 += criterion(output1, target1, target_weight1) else: output1 = outputs1 loss1 = criterion(output1, target1, target_weight1) a_optimizer.zero_grad() loss1.backward() a_optimizer.step() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix)
def validate(config, device, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): """ valid data를 모델에 넣어 모델을 평가합니다. Parameters ---------- config : yacs.config.CfgNode config 파일입니다. device : torch.device GPU 사용시 데이터를 GPU에 넣어주는 객체입니다. val_loader : torch.utils.data.dataloader.DataLoader validation data Loader. val_dataset : dataset.dataset validation dataset. model : model 학습하는 모델 객체입니다. criterion : torch.nn.modules.loss torch의 loss 객체입니다. output_dir : str 결과값이 저장될 경로입니다. tb_log_dir : str log 파일 위치입니다. writer_dict : dict, optional 실험 기록 dict입니다. The default is None. Returns ------- losses.avg : float 예측된 heatmap loss의 평균값입니다. f_losses.avg : float 예측된 keypoint loss의 평균값입니다. """ batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() f_losses = AverageMeter() # switch to evaluate mode model.eval() idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # input과 bbox 객체를 GPU에 넣을 수 있는 객체로 만듭니다. input = input.to(device) input = input.float() target = target.to(device) target = target.float() outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs # 만약 TEST도 FLIP한다면 적용하는 옵션입니다. # 기본적으로는 False로 되어있어 통과합니다. if config.TEST.FLIP_TEST: input_flipped = input.flip(3) outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # heatmap을 원래 keypoint 데이터로 만들기 위해 meta 데이터의 center, scale 값을 구합니다. c = meta['center'].numpy() s = meta['scale'].numpy() # 예측된 heatmap을 keypoint 데이터로 만듭니다. preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) criterion2 = torch.nn.MSELoss() trues = meta['origin'][:,:,:2] trues = trues.reshape(trues.shape[0],-1) # 예측된 keypoint 값을 실제 keypoint 값과 비교합니다. f_loss = criterion2(torch.from_numpy(preds.reshape(preds.shape[0],-1)), trues) f_losses.update(f_loss.item(), num_images) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format( os.path.join(output_dir, 'val'), i ) save_debug_images(config, input, meta, target, pred*4, output, prefix) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar( 'valid_loss', losses.avg, global_steps ) writer_dict['valid_global_steps'] = global_steps + 1 # 예측된 heatmap 값, keypoint 값을 반환합니다. return losses.avg, f_losses.avg
def run_model( config, dataset, loader, model, criterion_mse, criterion_mpjpe, final_output_dir, tb_writer=None, optimizer=None, epoch=None, is_train=True, **kwargs): # preparing meters batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() avg_acc = AverageMeter() mpjpe_meters = None detail_mpjpes = None detail_preds = None detail_preds2d = None detail_weights = None nviews = len(dataset.selected_cam) nsamples = len(dataset) * nviews njoints = config.NETWORK.NUM_JOINTS n_used_joints = config.DATASET.NUM_USED_JOINTS height = int(config.NETWORK.HEATMAP_SIZE[0]) width = int(config.NETWORK.HEATMAP_SIZE[1]) all_view_weights = [] all_maxvs = [] all_nview_vis_gt = np.zeros((len(dataset), n_used_joints), dtype=np.int) if not is_train: do_save_heatmaps = kwargs['save_heatmaps'] all_preds = np.zeros((nsamples, njoints, 3), dtype=np.float32) all_preds_3d = np.zeros((len(dataset), n_used_joints, 3), dtype=np.float32) if do_save_heatmaps: all_heatmaps = np.zeros((nsamples, njoints, height, width), dtype=np.float32) idx_sample = 0 if is_train: phase = 'train' model.train() frozen_backbone_bn(model, backbone_name='resnet') # do not change backbone bn params else: phase = 'test' model.eval() with dummy_context_mgr() if is_train else torch.no_grad(): # if eval then use no_grad context manager end = time.time() for i, (input_, target_, weight_, meta_) in enumerate(loader): data_time.update(time.time() - end) debug_bit = False batch = input_.shape[0] train_2d_backbone = False run_view_weight = True input = collate_first_two_dims(input_) target = collate_first_two_dims(target_) weight = collate_first_two_dims(weight_) meta = dict() for kk in meta_: meta[kk] = collate_first_two_dims(meta_[kk]) extra_params = dict() extra_params['run_view_weight'] = run_view_weight extra_params['joint_vis'] = weight extra_params['run_phase'] = phase hms, extra = model(input_, **meta_, **extra_params) # todo output = hms origin_hms = extra['origin_hms'] fused_hms_smax = extra['fused_hms_smax'] target_cuda = target.cuda(non_blocking=True) weight_cuda = weight.cuda(non_blocking=True) pose3d_gt = meta_['joints_gt'][:,0,:,:].contiguous().cuda(non_blocking=True) # (batch, njoint, 3) num_total_joints = batch * n_used_joints # --- --- forward end here joint_2d_loss = extra['joint_2d_loss'].mean() # obtain all j3d predictions final_preds_name = 'j3d_AdaFuse' pred3d = extra[final_preds_name] j3d_keys = [] j2d_keys = [] for k in extra.keys(): if 'j3d' in k: j3d_keys.append(k) if 'j2d' in k: j2d_keys.append(k) # initialize only once if mpjpe_meters is None: logger.info(j3d_keys) mpjpe_meters = dict() for k in j3d_keys: mpjpe_meters[k] = AverageMeter() if detail_mpjpes is None: detail_mpjpes = dict() for k in j3d_keys: detail_mpjpes[k] = list() if detail_preds is None: detail_preds = dict() for k in j3d_keys: detail_preds[k] = list() detail_preds['joints_gt'] = list() if detail_preds2d is None: detail_preds2d = dict() for k in j2d_keys: detail_preds2d[k] = list() if detail_weights is None: detail_weights = dict() detail_weights['maxv'] = list() detail_weights['learn'] = list() # save all weights maxvs = extra['maxv'] # batch njoint, nview for b in range(batch): maxvs_tmp = [] for j in range(n_used_joints): maxv_str = ''.join(['{:.2f}, '.format(v) for v in maxvs[b, j]]) maxvs_tmp.append(maxv_str) all_maxvs.append(maxvs_tmp) view_weight = extra['pred_view_weight'] for b in range(batch): maxvs_tmp = [] for j in range(n_used_joints): maxv_str = ''.join(['{:.2f}, '.format(v) for v in view_weight[b, j]]) maxvs_tmp.append(maxv_str) all_view_weights.append(maxvs_tmp) nviews_vis = extra['nviews_vis'] all_nview_vis_gt[i*batch:(i+1)*batch] = nviews_vis.view(batch, n_used_joints).detach().cpu().numpy().astype(np.int) joints_vis_3d = torch.as_tensor(nviews_vis >= 2, dtype=torch.float32).cuda() for k in j3d_keys: preds = extra[k] if config.DATASET.TRAIN_DATASET in ['multiview_h36m']: preds = align_to_pelvis(preds, pose3d_gt, 0) avg_mpjpe, detail_mpjpe, n_valid_joints = criterion_mpjpe(preds, pose3d_gt, joints_vis_3d=joints_vis_3d, output_batch_mpjpe=True) mpjpe_meters[k].update(avg_mpjpe, n=n_valid_joints) detail_mpjpes[k].extend(detail_mpjpe.detach().cpu().numpy().tolist()) detail_preds[k].extend(preds.detach().cpu().numpy()) detail_preds['joints_gt'].extend(pose3d_gt.detach().cpu().numpy()) for k in j2d_keys: p2d = extra[k] p2d = p2d.permute(0, 1, 3, 2).contiguous() p2d = p2d.detach().cpu().numpy() detail_preds2d[k].extend(p2d) maxv_weight = extra['maxv'].detach().cpu().numpy() detail_weights['maxv'].extend(maxv_weight) learn_weight = extra['pred_view_weight'].detach().cpu().numpy() detail_weights['learn'].extend(learn_weight) if is_train: loss = 0 if train_2d_backbone: loss_mse = criterion_mse(hms, target_cuda, weight_cuda) loss += loss_mse loss += joint_2d_loss optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item(), len(input)) else: # validation loss = 0 loss_mse = criterion_mse(hms, target_cuda, weight_cuda) loss += loss_mse losses.update(loss.item(), len(input)) nimgs = input.shape[0] _, acc, cnt, pre = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy(), thr=0.083) avg_acc.update(acc, cnt) batch_time.update(time.time() - end) end = time.time() # ---- print logs if i % config.PRINT_FREQ == 0 or i == len(loader)-1 or debug_bit: gpu_memory_usage = torch.cuda.max_memory_allocated(0) # bytes gpu_memory_usage_gb = gpu_memory_usage / 1.074e9 mpjpe_log_string = '' for k in mpjpe_meters: mpjpe_log_string += '{:.1f}|'.format(mpjpe_meters[k].avg) msg = 'Ep:{0}[{1}/{2}]\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Acc {acc.val:.3f} ({acc.avg:.3f})\t' \ 'Memory {memory:.2f}G\t' \ 'MPJPEs {mpjpe_str}'.format( epoch, i, len(loader), batch_time=batch_time, speed=input.shape[0] / batch_time.val, data_time=data_time, loss=losses, acc=avg_acc, memory=gpu_memory_usage_gb, mpjpe_str=mpjpe_log_string) logger.info(msg) # ---- save debug images view_name = 'view_{}'.format(0) prefix = '{}_{}_{:08}'.format( os.path.join(final_output_dir, phase), view_name, i) meta_for_debug_imgs = dict() meta_for_debug_imgs['joints_vis'] = meta['joints_vis'] meta_for_debug_imgs['joints_2d_transformed'] = meta['joints_2d_transformed'] save_debug_images(config, input, meta_for_debug_imgs, target, pre * 4, origin_hms, prefix) # save_debug_images_2(config, input, meta_for_debug_imgs, target, # pre * 4, output, prefix, suffix='fuse') save_debug_images_2(config, input, meta_for_debug_imgs, target, pre * 0, fused_hms_smax, prefix, suffix='smax', normalize=True, IMG=False) if is_train: pass else: pred, maxval = get_final_preds(config, output.clone().cpu().numpy(), meta['center'], meta['scale']) pred = pred[:, :, 0:2] pred = np.concatenate((pred, maxval), axis=2) all_preds[idx_sample:idx_sample + nimgs] = pred all_preds_3d[i * batch:(i + 1) * batch] = pred3d.detach().cpu().numpy() if do_save_heatmaps: all_heatmaps[idx_sample:idx_sample + nimgs] = output.cpu().numpy() idx_sample += nimgs # -- End epoch if is_train: pass else: cur_time = time.strftime("%Y-%m-%d-%H-%M", time.localtime()) # save mpjpes for k in detail_mpjpes: detail_mpjpe = detail_mpjpes[k] out_path = os.path.join(final_output_dir, '{}_ep_{}_mpjpes_{}.csv'.format(cur_time, epoch, k,)) np.savetxt(out_path, detail_mpjpe, delimiter=',') logger.info('MPJPE summary: {} {:.2f}'.format(k, np.array(detail_mpjpe).mean())) # save preds pose detail into h5 pred_path = os.path.join(final_output_dir, '{}_ep_{}_3dpreds.h5'.format(cur_time, epoch)) pred_file = h5py.File(pred_path, 'w') for k in detail_preds: pred_file[k] = np.array(detail_preds[k]) for k in detail_preds2d: pred_file[k] = np.array(detail_preds2d[k]) for k in detail_weights: pred_file[k] = np.array(detail_weights[k]) pred_file.close() if do_save_heatmaps: # save heatmaps and joint locations u2a = dataset.u2a_mapping a2u = {v: k for k, v in u2a.items() if v != '*'} a = list(a2u.keys()) u = np.array(list(a2u.values())) save_file = config.TEST.HEATMAP_LOCATION_FILE file_name = os.path.join(final_output_dir, save_file) file = h5py.File(file_name, 'w') file['heatmaps'] = all_heatmaps[:, u, :, :] file['locations'] = all_preds[:, u, :] file['joint_names_order'] = a file.close() return 0
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, epoch, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 logger.info(f'# VALIDATE: EPOCH {epoch}') model = add_flops_counting_methods(model) model.start_flops_count() model.eval() flops_per_layer = [] total_per_layer = [] with torch.no_grad(): end = time.time() val_iter = val_loader.__iter__() num_step = len(val_iter) for i in range(num_step): input, target, target_weight, meta = next(val_iter) input = input.to('cuda', non_blocking=True) dynconv_meta = make_dynconv_meta(config, epoch, i) outputs, dynconv_meta = model(input, dynconv_meta) if 'masks' in dynconv_meta: percs, cost, total = dynconv.cost_per_layer(dynconv_meta) flops_per_layer.append(cost) total_per_layer.append(total) output = outputs[-1] if isinstance(outputs, list) else outputs # if config.TEST.FLIP_TEST: # flip not supported for dynconv # # this part is ugly, because pytorch has not supported negative index # # input_flipped = model(input[:, :, :, ::-1]) # input_flipped = np.flip(input.cpu().numpy(), 3).copy() # input_flipped = torch.from_numpy(input_flipped).cuda() # outputs_flipped = model(input_flipped) # if isinstance(outputs_flipped, list): # output_flipped = outputs_flipped[-1] # else: # output_flipped = outputs_flipped # output_flipped = flip_back(output_flipped.cpu().numpy(), # val_dataset.flip_pairs) # output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # # feature is not aligned, shift flipped heatmap for higher accuracy # if config.TEST.SHIFT_HEATMAP: # output_flipped[:, :, :, 1:] = \ # output_flipped.clone()[:, :, :, 0:-1] # output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() output_np = output.clone().cpu().numpy() preds_rel, maxvals_rel = get_max_preds(output_np) preds, maxvals = get_final_preds(config, output_np, c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) if config.DEBUG.PONDER: img = viz.frame2mpl(input[0], denormalize=True) img = viz.add_skeleton(img, preds_rel[0] * 4, maxvals_rel[0], thres=0.2) plt.figure() plt.title('input') plt.imshow(img) ponder_cost = dynconv.ponder_cost_map(dynconv_meta['masks']) if ponder_cost is not None: plt.figure() plt.title('ponder cost map') plt.imshow(ponder_cost, vmin=2, vmax=len(dynconv_meta['masks']) - 2) plt.colorbar() else: logger.info('Not a sparse model - no ponder cost') viz.showKey() name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 avg_flops, total_flops, batch_count = model.compute_average_flops_cost() logger.info( f'# PARAMS: {get_model_parameters_number(model, as_string=False)/1e6} M' ) logger.info( f'# FLOPS (multiply-accumulates, MACs): {(total_flops/idx)/1e9} GMacs on {idx} images' ) # some conditional execution statistics if len(flops_per_layer) > 0: flops_per_layer = torch.cat(flops_per_layer, dim=0) total_per_layer = torch.cat(total_per_layer, dim=0) perc_per_layer = flops_per_layer / total_per_layer perc_per_layer_avg = perc_per_layer.mean(dim=0) perc_per_layer_std = perc_per_layer.std(dim=0) s = '' for perc in perc_per_layer_avg: s += f'{round(float(perc), 2)}, ' logger.info( f'# FLOPS (multiply-accumulates MACs) used percentage per layer (average): {s}' ) s = '' for std in perc_per_layer_std: s += f'{round(float(std), 2)}, ' logger.info( f'# FLOPS (multiply-accumulates MACs) used percentage per layer (standard deviation): {s}' ) exec_cond_flops = int(torch.sum(flops_per_layer)) / idx total_cond_flops = int(torch.sum(total_per_layer)) / idx logger.info( f'# Conditional FLOPS (multiply-accumulates MACs) over all layers (average per image): {exec_cond_flops/1e9} GMac out of {total_cond_flops/1e9} GMac ({round(100*exec_cond_flops/total_cond_flops,1)}%)' ) return perf_indicator
def fpd_train(config, train_loader, model, tmodel, pose_criterion, kd_pose_criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() pose_losses = AverageMeter() kd_pose_losses = AverageMeter() acc = AverageMeter() kd_weight_alpha = config.KD.ALPHA # s_model switch to train mode and t_model switch to evaluate mode model.train() tmodel.eval() end = time.time() for i, (input, target, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output outputs = model(input) toutput = tmodel(input) if isinstance(toutput, list): toutput = toutput[-1] target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) if isinstance(outputs, list): pose_loss = pose_criterion(outputs[0], target, target_weight) kd_pose_loss = kd_pose_criterion(outputs[0], toutput, target_weight) for output in outputs[1:]: pose_loss += pose_criterion(output, target, target_weight) kd_pose_loss += kd_pose_criterion(output, toutput, target_weight) loss = (1 - kd_weight_alpha ) * pose_loss + kd_weight_alpha * kd_pose_loss output = outputs[-1] else: output = outputs pose_loss = pose_criterion(output, target, target_weight) kd_pose_loss = kd_pose_criterion(output, toutput, target_weight) loss = (1 - kd_weight_alpha ) * pose_loss + kd_weight_alpha * kd_pose_loss # loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss pose_losses.update(pose_loss.item(), input.size(0)) kd_pose_losses.update(kd_pose_loss.item(), input.size(0)) losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'POSE_Loss {pose_loss.val:.5f} ({pose_loss.avg:.5f})\t' \ 'KD_POSE_Loss {kd_pose_loss.val:.5f} ({kd_pose_loss.avg:.5f})\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val,data_time=data_time, pose_loss=pose_losses, kd_pose_loss=kd_pose_losses, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_pose_loss', pose_losses.val, global_steps) writer.add_scalar('train_kd_pose_loss', kd_pose_losses.val, global_steps) writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix)
def validate(config, loader, dataset, model, criterion, output_dir, writer_dict=None, **kwargs): model.eval() batch_time = AverageMeter() losses = AverageMeter() avg_acc = AverageMeter() nview = len(config.SELECTED_VIEWS) nsamples = len(dataset) * nview njoints = config.NETWORK.NUM_JOINTS height = int(config.NETWORK.HEATMAP_SIZE[0]) width = int(config.NETWORK.HEATMAP_SIZE[1]) all_preds = np.zeros((nsamples, njoints, 3), dtype=np.float32) all_heatmaps = np.zeros( (nsamples, njoints, height, width), dtype=np.float32) idx = 0 with torch.no_grad(): end = time.time() for i, (input_, target_, weight_, meta_) in enumerate(loader): batch = input_.shape[0] output, extra = model(input_, **meta_) input = merge_first_two_dims(input_) target = merge_first_two_dims(target_) weight = merge_first_two_dims(weight_) meta = dict() for kk in meta_: meta[kk] = merge_first_two_dims(meta_[kk]) target_cuda = target.cuda() weight_cuda = weight.cuda() loss = criterion(output, target_cuda, weight_cuda) nimgs = input.size()[0] losses.update(loss.item(), nimgs) _, acc, cnt, pre = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy(), thr=0.083) avg_acc.update(acc, cnt) batch_time.update(time.time() - end) end = time.time() pred, maxval = get_final_preds(config, output.clone().cpu().numpy(), meta['center'], meta['scale']) pred = pred[:, :, 0:2] pred = np.concatenate((pred, maxval), axis=2) all_preds[idx:idx + nimgs] = pred all_heatmaps[idx:idx + nimgs] = output.cpu().numpy() # image_only_heatmaps[idx:idx + nimgs] = img_detected.cpu().numpy() idx += nimgs if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(loader), batch_time=batch_time, loss=losses, acc=avg_acc) logger.info(msg) view_name = 'view_{}'.format(0) prefix = '{}_{}_{:08}'.format( os.path.join(output_dir, 'validation'), view_name, i) meta_for_debug_imgs = dict() meta_for_debug_imgs['joints_vis'] = meta['joints_vis'] meta_for_debug_imgs['joints_2d_transformed'] = meta['joints_2d_transformed'] save_debug_images(config, input, meta_for_debug_imgs, target, pre * 4, extra['origin_hms'], prefix) if 'fused_hms' in extra: fused_hms = extra['fused_hms'] prefix = '{}_{}_{:08}'.format( os.path.join(output_dir, 'fused_hms'), view_name, i) save_debug_heatmaps(config, input, meta_for_debug_imgs, target, pre * 4, fused_hms, prefix) detection_thresholds = [0.075, 0.05, 0.025, 0.0125] # 150,100,50,25 mm perf_indicators = [] cur_time = time.strftime("%Y-%m-%d-%H-%M", time.gmtime()) for thresh in detection_thresholds: name_value, perf_indicator, per_grouping_detected = dataset.evaluate(all_preds, threshold=thresh) perf_indicators.append(perf_indicator) names = name_value.keys() values = name_value.values() num_values = len(name_value) _, full_arch_name = get_model_name(config) logger.info('Detection Threshold set to {} aka {}mm'.format(thresh, thresh * 2000.0)) logger.info('| Arch ' + ' '.join(['| {: <5}'.format(name) for name in names]) + ' |') logger.info('|--------' * (num_values + 1) + '|') logger.info('| ' + '------ ' + ' '.join(['| {:.4f}'.format(value) for value in values]) + ' |') logger.info('| ' + full_arch_name) logger.info('Overall Perf on threshold {} is {}\n'.format(thresh, perf_indicator)) logger.info('\n') if per_grouping_detected is not None: df = pd.DataFrame(per_grouping_detected) save_path = os.path.join(output_dir, 'grouping_detec_rate_{}_{}.csv'.format(thresh, cur_time)) df.to_csv(save_path) # save heatmaps and joint locations u2a = dataset.u2a_mapping a2u = {v: k for k, v in u2a.items() if v != '*'} a = list(a2u.keys()) u = np.array(list(a2u.values())) save_file = config.TEST.HEATMAP_LOCATION_FILE file_name = os.path.join(output_dir, save_file) file = h5py.File(file_name, 'w') file['heatmaps'] = all_heatmaps[:, u, :, :] file['locations'] = all_preds[:, u, :] file['joint_names_order'] = a file.close() return perf_indicators[3] # 25mm as indicator
def rl_train(config, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target, target_weight, inter_target, inter_target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) inter_scores = meta['interference_maps'].cuda(non_blocking=True) cat_maps = meta['kpt_cat_maps'].cuda(non_blocking=True) with torch.no_grad(): amaps_target, amaps_target_weight = generate_association_map_from_gt_heatmaps( target, inter_target) # compute output target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) inter_target = inter_target.cuda(non_blocking=True) inter_target_weight = inter_target_weight.cuda(non_blocking=True) amaps_target = amaps_target.cuda(non_blocking=True) amaps_target_weight = amaps_target_weight.cuda(non_blocking=True) outputs = model(input) output, inter_output, amap_output = outputs # target heatmap loss st_loss = criterion(output, target, target_weight) # multi-instances heatmap loss mt_loss = criterion(inter_output, inter_target, inter_target_weight) # association loss rel_loss_all = criterion(amap_output, amaps_target, amaps_target_weight) for amap, gt_amap, n in zip(amap_output, meta['association_maps'], meta['num_points']): gt_map = gt_amap[:n, :n].to(amap.device) rel_loss = F.mse_loss(amap, gt_map) rel_loss_all += rel_loss rel_loss_all /= len(amap_output) loss = st_loss + mt_loss + rel_loss_all # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) print('st_pose loss loss:%.6f' % (st_loss.cpu().detach().numpy()), 'mt_pose loss:%.6f' % (mt_loss.cpu().detach().numpy())) print('*' * 100) # 'relation loss:%.6f' % (rel_loss_all.cpu().detach().numpy())) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix)
def train(config, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() use_warping = config['MODEL']['USE_WARPING_TRAIN'] use_gt_input = config['MODEL']['USE_GT_INPUT_TRAIN'] N = min(len(train_loader), config['MODEL']['ITER']) if use_warping: for i, (input, input_sup, target, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) ### concatenating if use_gt_input: target_up_op = nn.Upsample(scale_factor=4, mode='nearest') target_up = target_up_op(target) concat_input = torch.cat((input, input_sup, target_up), 1) else: concat_input = torch.cat((input, input_sup), 1) # compute output outputs = model(concat_input) #outputs = model(input) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) if isinstance(outputs, list): loss = criterion(outputs[0], target, target_weight) for output in outputs[1:]: loss += criterion(output, target, target_weight) else: output = outputs loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) else: for i, (input, target, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output outputs = model(input) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) if isinstance(outputs, list): loss = criterion(outputs[0], target, target_weight) for output in outputs[1:]: loss += criterion(output, target, target_weight) else: output = outputs loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix)
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] filenames_map = {} filenames_counter = 0 imgnums = [] idx = 0 ############3 preds_output_dir = config.OUTPUT_DIR + 'keypoint_preds/' if config.SAVE_PREDS: output_filenames_map_file = preds_output_dir + 'filenames_map.npy' if not os.path.exists(preds_output_dir): os.makedirs(preds_output_dir) #################### use_warping = config['MODEL']['USE_WARPING_TEST'] use_gt_input = config['MODEL']['USE_GT_INPUT_TEST'] warping_reverse = config['MODEL']['WARPING_REVERSE'] #################################################### if config.LOAD_PROPAGATED_GT_PREDS: output_path = preds_output_dir + 'propagated_gt_preds.h5' hf = h5py.File(output_path, 'r') all_preds = np.array(hf.get('data')) hf.close() output_path = preds_output_dir + 'propagated_gt_boxes.h5' hf = h5py.File(output_path, 'r') all_boxes = np.array(hf.get('data')) hf.close() output_path = preds_output_dir + 'filenames_map.npy' D = np.load(output_path, allow_pickle=True) filenames_map = D.item() track_preds = None logger.info('########################################') logger.info('{}'.format(config.EXPERIMENT_NAME)) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, filenames_map, track_preds, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator ###################################3 with torch.no_grad(): end = time.time() if not use_warping: for i, (input, target, target_weight, meta) in enumerate(val_loader): ######## for ff in range(len(meta['image'])): cur_nm = meta['image'][ff] if not cur_nm in filenames_map: filenames_map[cur_nm] = [filenames_counter] else: filenames_map[cur_nm].append(filenames_counter) filenames_counter += 1 ######### # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) track_preds = None logger.info('########################################') logger.info('{}'.format(config.EXPERIMENT_NAME)) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, filenames_map, track_preds, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 else: ### PoseWarper for i, (input, input_sup, target, target_weight, meta) in enumerate(val_loader): for ff in range(len(meta['image'])): cur_nm = meta['image'][ff] if not cur_nm in filenames_map: filenames_map[cur_nm] = [filenames_counter] else: filenames_map[cur_nm].append(filenames_counter) filenames_counter += 1 ### concatenating if use_gt_input: target_up_op = nn.Upsample(scale_factor=4, mode='nearest') target_up = target_up_op(target) concat_input = torch.cat((input, input_sup, target_up), 1) else: if warping_reverse: target_up_op = nn.Upsample(scale_factor=4, mode='nearest') target_up = target_up_op(target) concat_input = torch.cat((input, input_sup, target_up), 1) else: concat_input = torch.cat((input, input_sup), 1) ########### if not config.LOAD_PREDS: outputs = model(concat_input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) num_images = input.size(0) if config.LOAD_PREDS: loss = 0.0 avg_acc = 0.0 cnt = 1 else: loss = criterion(output, target, target_weight) losses.update(loss.item(), num_images) # measure accuracy and record loss _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() if not config.LOAD_PREDS: preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score ############## image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) if not config.LOAD_HEATMAPS and not config.LOAD_PREDS: save_debug_images(config, input, meta, target, pred * 4, output, prefix) if config.SAVE_PREDS: print('Saving preds...') output_path = preds_output_dir + 'delta' + str( config.MODEL.TIMESTEP_DELTA) + '_keypoints.h5' # output_path = preds_output_dir + 'delta'+str(config.MODEL.TIMESTEP_DELTA)+'_th'+str(config.TEST.IMAGE_THRE)+'_keypoints.h5' if config.MODEL.WARPING_REVERSE: output_path = output_path.replace('.h5', '_reverse.h5') if config.DATASET.TEST_ON_TRAIN: output_path = output_path.replace('.h5', '_train.h5') print(output_path) hf = h5py.File(output_path, 'w') hf.create_dataset('data', data=all_preds) hf.close() output_path = preds_output_dir + 'delta' + str( config.MODEL.TIMESTEP_DELTA) + '_boxes.h5' # output_path = preds_output_dir + 'delta'+str(config.MODEL.TIMESTEP_DELTA)+'_th'+str(config.TEST.IMAGE_THRE)+'_boxes.h5' if config.MODEL.WARPING_REVERSE: output_path = output_path.replace('.h5', '_reverse.h5') if config.DATASET.TEST_ON_TRAIN: output_path = output_path.replace('.h5', '_train.h5') hf = h5py.File(output_path, 'w') hf.create_dataset('data', data=all_boxes) hf.close() # if config.MODEL.TIMESTEP_DELTA == 0: # output_filenames_map_file = output_filenames_map_file.replace('.npy','_th'+str(config.TEST.IMAGE_THRE)+'.npy') # print(output_filenames_map_file) # np.save(output_filenames_map_file, filenames_map) if config.LOAD_PREDS: #print('Loading preds...') output_path = preds_output_dir + 'delta' + str( config.MODEL.TIMESTEP_DELTA) + '_keypoints' + sfx + '.h5' hf = h5py.File(output_path, 'r') all_preds = np.array(hf.get('data')) hf.close() output_path = preds_output_dir + 'delta' + str( config.MODEL.TIMESTEP_DELTA) + '_boxes' + sfx + '.h5' hf = h5py.File(output_path, 'r') all_boxes = np.array(hf.get('data')) hf.close() #################### if config.MODEL.EVALUATE: track_preds = None logger.info('########################################') logger.info('{}'.format(config.EXPERIMENT_NAME)) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, filenames_map, track_preds, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 else: perf_indicator = None return perf_indicator
def train(config, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output input = input.cuda() output = model(input) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # log if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}_{}'.format( os.path.join(output_dir, 'train'), epoch+1, i) save_debug_images(config, input, meta, target, pred*4, output, prefix, target_weight) # to save grad cam img = denormalize(input[0]) img_h, img_w = img.shape[-2:] gradcam_pred = gradcam_pred.detach().mul( 255).permute(0, 2, 3, 1).byte().cpu().numpy() gradcam_gt = gradcam_gt.detach().mul(255).permute(0, 2, 3, 1).byte().cpu().numpy() img = denormalize(input[0]) for j in range(17): img_numpy = img.mul(255).permute(1, 2, 0).byte().cpu().numpy() superimposed_img_pred = cv2.applyColorMap(cv2.resize(np.expand_dims( gradcam_pred[0, :, :, j], axis=2), (img_w, img_h)), cv2.COLORMAP_JET) * 0.4 + img_numpy superimposed_img_gt = cv2.applyColorMap(cv2.resize(np.expand_dims( gradcam_gt[0, :, :, j], axis=2), (img_w, img_h)), cv2.COLORMAP_JET) *0.4 + img_numpy # imwrite gradcam cv2.imwrite( f'{prefix}_{i}_th_epoch_{joint_index[j]}_pred.jpg', superimposed_img_pred) cv2.imwrite( f'{prefix}_{i}_th_epoch_{joint_index[j]}_gt.jpg', superimposed_img_gt)
def train(config, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output outputs = model(input) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) if isinstance(outputs, list): output = [] targets = [] output.append(outputs[0][:, (6, 7, 8, 9, 12, 13)]) targets.append(target[:, (6, 7, 8, 9, 12, 13)]) output.append(outputs[1][:, (2, 3, 6, 7, 8, 9, 11, 12, 13, 14)]) targets.append(target[:, (2, 3, 6, 7, 8, 9, 11, 12, 13, 14)]) output.append(outputs[2][:, (1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)]) targets.append(target[:, (1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)]) output.append(outputs[3][:, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)]) #由容易到全 targets.append(target[:, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)]) #由容易到全 # for out in output: # print(out.shape) loss = criterion(output[0], targets[0], target_weight) for i in range(1, len(output)): loss += criterion(output[i], targets[i], target_weight) output = outputs[-1] else: output = outputs loss = criterion(output, target, target_weight) # loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix)
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] filenames_map = {} filenames_counter = 0 imgnums = [] idx = 0 use_warping = config['MODEL']['USE_WARPING_TEST'] ############3 preds_output_dir = config.OUTPUT_DIR + 'keypoint_preds/' if config.SAVE_PREDS: output_filenames_map_file = preds_output_dir + 'filenames_map.npy' if not os.path.exists(preds_output_dir): os.makedirs(preds_output_dir) #################### with torch.no_grad(): end = time.time() if not use_warping: for i, (input, target, target_weight, meta) in enumerate(val_loader): for ff in range(len(meta['image'])): cur_nm = meta['image'][ff] if not cur_nm in filenames_map: filenames_map[cur_nm] = [filenames_counter] else: filenames_map[cur_nm].append(filenames_counter) filenames_counter += 1 # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) logger.info('### Method: {} ###'.format(config.EXPERIMENT_NAME)) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, filenames_map, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 else: ### PoseWarper for i, (input, input_prev1, input_prev2, input_next1, input_next2, target, target_weight, meta) in enumerate(val_loader): for ff in range(len(meta['image'])): cur_nm = meta['image'][ff] if not cur_nm in filenames_map: filenames_map[cur_nm] = [filenames_counter] else: filenames_map[cur_nm].append(filenames_counter) filenames_counter += 1 ###################3 concat_input = torch.cat((input, input_prev1, input_prev2, input_next1, input_next2), 1) outputs = model(concat_input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) num_images = input.size(0) loss = criterion(output, target, target_weight) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) ####################### acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images #### Debug ########## #name_values, perf_indicator = val_dataset.evaluate(config, all_preds, output_dir, all_boxes, filenames_map, filenames, imgnums) #print(xy) #################3 if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) #################################3 if config.SAVE_PREDS: print('Saving preds...') output_path = preds_output_dir + 'delta' + str( config.MODEL.TIMESTEP_DELTA) + '_keypoints.h5' print(output_path) hf = h5py.File(output_path, 'w') hf.create_dataset('data', data=all_preds) hf.close() output_path = preds_output_dir + 'delta' + str( config.MODEL.TIMESTEP_DELTA) + '_boxes.h5' hf = h5py.File(output_path, 'w') hf.create_dataset('data', data=all_boxes) hf.close() np.save(output_filenames_map_file, filenames_map) #################### #print(xy) logger.info('### Method: {} ###'.format(config.EXPERIMENT_NAME)) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, filenames_map, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset)#2958 all_preds = np.zeros( (num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32 ) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input)#<class 'torch.Tensor'> torch.Size([64, 16, 64, 64]) if isinstance(outputs, list): # output = outputs[-1]#只输出最后一个 l=0 c=1 j=0 k=0 # output = (l*outputs[0]+c*outputs[1]+j*outputs[2]+k*outputs[3]) output = (c*outputs[0]+j*outputs[1]+k*outputs[2]) else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda()#torch.Size([64, 3, 256, 256]) outputs_flipped = model(input_flipped)#torch.Size([64, 16, 64, 64]) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs)#将翻转过的输入变成正常的输出 output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy #【】为啥翻转的没对齐 if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] #【c】将0以后的图左移动 output = (output + output_flipped) * 0.5 #【see】妙啊 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True)#target_weight是否可见 loss = criterion(output, target, target_weight) # criterion(output, target, target_weight) #【see】 num_images = input.size(0)#求平均值用 # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy()#meta只获取当前loader的 64个 score = meta['score'].numpy() preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals#(2958, 16, 3) # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)#沿着axis=1也就是×200后再平方的面积 all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image'])#将路径信息传回meta idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg)#len(val_loader)是总的迭代次数 prefix = '{}_{}'.format( os.path.join(output_dir, 'val'), i )#'output/mpii/pose_hrnet/w32_256x256_adam_lr1e-3/val_0' save_debug_images(config, input, meta, target, pred*4, output, prefix) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums ) #【】这儿的作用是? model_name = config.MODEL.NAME#'pose_hrnet' if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name)#打印相关精度到终端 else: _print_name_value(name_values, model_name) if writer_dict: #【】None 是不是显示损失用的,怎么用 writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar( 'valid_loss', losses.avg, global_steps ) writer.add_scalar( 'valid_acc', acc.avg, global_steps ) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars( 'valid', dict(name_value), global_steps ) else: writer.add_scalars( 'valid', dict(name_values), global_steps ) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def inference(config, image_loader, image_dataset, model, output_dir): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(image_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 5)) all_image_pathes = [] all_image_ids = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(image_loader): num_images = input.size(0) # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), image_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] # output_flipped[:, :, :, 0] = 0 output = (output + output_flipped) * 0.5 # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() tlwhs = meta['bbox_tlwh'].numpy() output = output.data.cpu() preds, maxvals = get_final_preds(config, output.numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:4] = tlwhs all_boxes[idx:idx + num_images, 4] = score all_image_pathes.extend(meta['image']) if config.DATASET.DATASET == 'mot': seq_names, frame_ids = meta['image_id'] frame_ids = frame_ids.numpy().astype(int) all_image_ids.extend(list(zip(seq_names, frame_ids))) elif config.DATASET.DATASET == 'aifi': all_image_ids.extend(meta['image_id']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format( i, len(image_loader), batch_time=batch_time) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'inference'), i) pred, _ = get_max_preds(output.numpy()) save_debug_images(config, input, meta, target, pred * 4, output, prefix) # write output frame_results = defaultdict(list) for image_id, pred, box in zip(all_image_ids, all_preds, all_boxes): frame_results[image_id].append( (pred.astype(float).tolist(), box.astype(float).tolist())) final_results = {} for image_id, results in frame_results.items(): keypoints, boxes = zip(*results) final_results[image_id] = {'keypoints': keypoints, 'boxes': boxes} if not os.path.isdir(output_dir): os.makedirs(output_dir) with open(os.path.join(output_dir, 'box_keypoints.json'), 'w') as f: json.dump(final_results, f) logger.info('Save results to {}'.format( os.path.join(output_dir, 'box_keypoints.json')))
def train(config, data, model, criterion, optim, epoch, output_dir, writer_dict, **kwargs): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() avg_acc = AverageMeter() model.train() end = time.time() for i, (input_, target_, weight_, meta_) in enumerate(data): data_time.update(time.time() - end) output, extra = model(input_, **meta_) input = merge_first_two_dims(input_) target = merge_first_two_dims(target_) weight = merge_first_two_dims(weight_) meta = dict() for kk in meta_: meta[kk] = merge_first_two_dims(meta_[kk]) target_cuda = target.cuda() weight_cuda = weight.cuda() loss = 0 b_imu_fuse = extra['imu_fuse'] if b_imu_fuse: loss += 0.5 * criterion(extra['origin_hms'], target_cuda, weight_cuda) target_mask = torch.as_tensor(target_cuda > 0.001, dtype=torch.float32).cuda() imu_masked = heatmaps * target_mask target_imu_joint = target_cuda * extra['joint_channel_mask'][0] loss += 0.5 * criterion(imu_masked, target_imu_joint, weight_cuda) else: loss += criterion(extra['origin_hms'], target_cuda, weight_cuda) optim.zero_grad() loss.backward() optim.step() losses.update(loss.item(), len(input) * input[0].size(0)) _, acc, cnt, pre = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) avg_acc.update(acc, cnt) batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: gpu_memory_usage = torch.cuda.memory_allocated(0) msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})\t' \ 'Memory {memory:.1f}'.format( epoch, i, len(data), batch_time=batch_time, speed=input.shape[0] / batch_time.val, data_time=data_time, loss=losses, acc=avg_acc, memory=gpu_memory_usage) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', avg_acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 # for k in range(len(input)): view_name = 'view_{}'.format(0) prefix = '{}_{}_{:08}'.format( os.path.join(output_dir, 'train'), view_name, i) meta_for_debug_imgs = dict() meta_for_debug_imgs['joints_vis'] = meta['joints_vis'] meta_for_debug_imgs['joints_2d_transformed'] = meta['joints_2d_transformed'] save_debug_images(config, input, meta_for_debug_imgs, target, pre * 4, extra['origin_hms'], prefix) if extra is not None and 'fused_hms' in extra: fuse_hm = extra['fused_hms'] prefix = '{}_{}_{:08}'.format( os.path.join(output_dir, 'fused_hms'), view_name, i) save_debug_heatmaps(config, input, meta_for_debug_imgs, target, pre * 4, fuse_hm, prefix)
def train(config, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target, skeletal, half, full, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output outputs = model(input) output = outputs[0] target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) skeletal = skeletal.cuda(non_blocking=True) half = half.cuda(non_blocking=True) full = full.cuda(non_blocking=True) n_joints = config.MODEL.NUM_JOINTS n_skeletal = config.MODEL.NUM_SKELETON + n_joints n_half = config.MODEL.NUM_HALF + n_skeletal loss1 = criterion(outputs[0], target, target_weight[:, :n_joints]) loss2 = criterion(outputs[1], skeletal, target_weight[:, n_joints:n_skeletal]) loss3 = criterion(outputs[2], half, target_weight[:, n_skeletal:n_half]) loss4 = criterion(outputs[3], full, target_weight[:, -1].unsqueeze(2)) loss = loss1 + loss2 + loss3 + loss4 # loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = ( "Epoch: [{0}][{1}/{2}]\t" "Keypoint {keypoint:.3f} Skeletal {skeletal:.3f} Half {half:.3f} Full {full:.3f}\t" "Loss {loss.val:.5f} ({loss.avg:.5f})\t" "Accuracy {acc.val:.3f} ({acc.avg:.3f})".format( epoch, i, len(train_loader), loss=losses, keypoint=loss1.item(), skeletal=loss2.item(), half=loss3.item(), full=loss4.item(), acc=acc, ) ) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred*4, output, prefix)
def rsgnet_train(config, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() crite = torch.nn.BCELoss().cuda() # rel_crite = torch.nn.MSELoss().cuda() if config.MODEL.UDP_POSE_ON: udp_criterion = UDPLosses( use_target_weight=config.LOSS.USE_TARGET_WEIGHT).cuda() for i, (input, target, target_weight, all_ins_target, all_ins_target_weight, target_limbs, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # compute output person_target, _ = torch.max(target, dim=1) b, h, w = person_target.size() person_target = person_target.reshape(b, 1, h, w) person_target = torch.nn.functional.interpolate(person_target, scale_factor=1 / 2, mode='bilinear', align_corners=True) person_target = torch.squeeze(person_target) b, h, w = person_target.size() person_target = person_target.reshape(b, 1, h * w) relation_target = torch.matmul(person_target.permute(0, 2, 1), person_target) # relation_target = relation_target.cuda(non_blocking=True) multi_outputs, outputs, limbs_ouptuts, relation_scores = model( input, relation_target) target = target.cuda(non_blocking=True) # ([64,17,64,48]) target_weight = target_weight.cuda(non_blocking=True) all_ins_target = all_ins_target.cuda(non_blocking=True) all_ins_target_weight = all_ins_target_weight.cuda(non_blocking=True) target_limbs = target_limbs.cuda(non_blocking=True) if config.MODEL.UDP_POSE_ON: udp_target = meta['udp_target'].cuda( non_blocking=True) # ([64,17,64,48]) udp_target_weight = meta['udp_target_weight'].cuda( non_blocking=True) udp_outputs = outputs[1] outputs = outputs[0] # only target if isinstance(outputs, list): target_loss = criterion(outputs[0], target, target_weight) for output in outputs[1:]: target_loss += criterion(output, target, target_weight) else: output = outputs target_loss = criterion(output, target, target_weight) # 0.5*interference + target if multi_outputs is not None: if isinstance(multi_outputs, list): multi_loss = criterion(multi_outputs[0], all_ins_target, all_ins_target_weight) for multi_output in multi_outputs[1:]: multi_loss += criterion(multi_output, all_ins_target, all_ins_target_weight) else: multi_output = multi_outputs multi_loss = criterion(multi_output, all_ins_target, all_ins_target_weight) else: multi_loss = 0. * target.mean() if limbs_ouptuts is not None: skelton_loss = 0.01 * crite(limbs_ouptuts, target_limbs) else: skelton_loss = 0. * target.mean() # relation loss # diffs = (relation_target - relation_scores)**2 relation_loss = 0.001 * torch.mean(relation_scores) loss = multi_loss + target_loss + skelton_loss + relation_loss if config.MODEL.UDP_POSE_ON: loss_udp_hm, loss_udp_os = udp_criterion(udp_outputs, udp_target, udp_target_weight) loss = loss + loss_udp_hm + loss_udp_os # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0) / batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) print('multi_kpt_loss:', multi_loss.clone().detach().cpu().numpy(), 'kpt_loss:', target_loss.clone().detach().cpu().numpy(), ' limbs_loss:', skelton_loss.clone().detach().cpu().numpy(), 'relation loss:', relation_loss.clone().detach().cpu().numpy()) if config.MODEL.UDP_POSE_ON: print('udp_hm_loss:', loss_udp_hm.clone().detach().cpu().numpy(), 'udp_os_loss:', loss_udp_os.clone().detach().cpu().numpy()) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix)
def validate(config, loader, dataset, model, criterion, output_dir, writer_dict=None): model.eval() batch_time = AverageMeter() losses = AverageMeter() avg_acc = AverageMeter() nsamples = len(dataset) * 4 is_aggre = config.NETWORK.AGGRE njoints = config.NETWORK.NUM_JOINTS height = int(config.NETWORK.HEATMAP_SIZE[0]) width = int(config.NETWORK.HEATMAP_SIZE[1]) all_preds = np.zeros((nsamples, njoints, 3), dtype=np.float32) all_heatmaps = np.zeros((nsamples, njoints, height, width), dtype=np.float32) idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, weight, meta) in enumerate(loader): raw_features, aggre_features = model(input) output = routing(raw_features, aggre_features, is_aggre, meta) loss = 0 target_cuda = [] for t, w, o in zip(target, weight, output): t = t.cuda(non_blocking=True) w = w.cuda(non_blocking=True) target_cuda.append(t) loss += criterion(o, t, w) if is_aggre: for t, w, r in zip(target, weight, raw_features): t = t.cuda(non_blocking=True) w = w.cuda(non_blocking=True) loss += criterion(r, t, w) target = target_cuda nimgs = len(input) * input[0].size(0) losses.update(loss.item(), nimgs) nviews = len(output) acc = [None] * nviews cnt = [None] * nviews pre = [None] * nviews for j in range(nviews): _, acc[j], cnt[j], pre[j] = accuracy( output[j].detach().cpu().numpy(), target[j].detach().cpu().numpy()) acc = np.mean(acc) cnt = np.mean(cnt) avg_acc.update(acc, cnt) batch_time.update(time.time() - end) end = time.time() preds = np.zeros((nimgs, njoints, 3), dtype=np.float32) heatmaps = np.zeros((nimgs, njoints, height, width), dtype=np.float32) for k, o, m in zip(range(nviews), output, meta): pred, maxval = get_final_preds(config, o.clone().cpu().numpy(), m['center'].numpy(), m['scale'].numpy()) pred = pred[:, :, 0:2] pred = np.concatenate((pred, maxval), axis=2) preds[k::nviews] = pred heatmaps[k::nviews] = o.clone().cpu().numpy() all_preds[idx:idx + nimgs] = preds all_heatmaps[idx:idx + nimgs] = heatmaps idx += nimgs if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(loader), batch_time=batch_time, loss=losses, acc=avg_acc) logger.info(msg) for k in range(len(input)): view_name = 'view_{}'.format(k + 1) prefix = '{}_{}_{:08}'.format( os.path.join(output_dir, 'validation'), view_name, i) save_debug_images(config, input[k], meta[k], target[k], pre[k] * 4, output[k], prefix) # save heatmaps and joint locations u2a = dataset.u2a_mapping a2u = {v: k for k, v in u2a.items() if v != '*'} a = list(a2u.keys()) u = np.array(list(a2u.values())) save_file = config.TEST.HEATMAP_LOCATION_FILE file_name = os.path.join(output_dir, save_file) file = h5py.File(file_name, 'w') file['heatmaps'] = all_heatmaps[:, u, :, :] file['locations'] = all_preds[:, u, :] file['joint_names_order'] = a file.close() name_value, perf_indicator = dataset.evaluate(all_preds) names = name_value.keys() values = name_value.values() num_values = len(name_value) _, full_arch_name = get_model_name(config) logger.info('| Arch ' + ' '.join(['| {}'.format(name) for name in names]) + ' |') logger.info('|---' * (num_values + 1) + '|') logger.info('| ' + full_arch_name + ' ' + ' '.join(['| {:.3f}'.format(value) for value in values]) + ' |') return perf_indicator
def train(config, train_loader, model, criterion, sparsity_criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() NUM_ERRORS = 0 end = time.time() train_iter = train_loader.__iter__() num_step = len(train_iter) for i in range(num_step): try: # dataloading in try/except for file server is overload input, target, target_weight, meta = next(train_iter) NUM_ERRORS = max(0, NUM_ERRORS - 1) except Exception as e: NUM_ERRORS += 1 print('Exception at dataloading for train iteration ' + str(i) + ': ' + str(e), end="", flush=True) time.sleep(5) if NUM_ERRORS > 20: raise RuntimeError('Too many dataloader errors') continue # measure data loading time data_time.update(time.time() - end) # compute output dynconv_meta = make_dynconv_meta(config, epoch, i) outputs, dynconv_meta = model(input, dynconv_meta) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) assert isinstance(outputs, list) loss = criterion(outputs[0], target, target_weight) for output in outputs[1:]: loss += criterion(output, target, target_weight) output = outputs[-1] if config.DYNCONV.ENABLED: assert sparsity_criterion is not None loss_sparsity, dynconv_meta = sparsity_criterion(dynconv_meta) loss = loss + loss_sparsity if i % config.PRINT_FREQ == 0: sparsity_meta = dynconv_meta['sparsity_meta'] logger.info( f'train/sparsity_upper_bound: {float(sparsity_meta["upper_bound"])}' ) logger.info( f'train/sparsity_lower_bound: {float(sparsity_meta["lower_bound"])}' ) logger.info( f'train/loss_sparsity_block: {float(sparsity_meta["loss_sp_block"])}' ) logger.info( f'train/loss_sparsity_network: {float(sparsity_meta["loss_sp_network"])}' ) logger.info(f'train/cost: {float(sparsity_meta["cost_perc"])}') logger.info(f'train/loss_sparsity: {float(loss_sparsity)}') logger.info(f'train/loss: {float(loss)}') # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix)
def do_train(cfg, model, data_loader, loss_factory, optimizer, epoch, output_dir, tb_log_dir, writer_dict): logger = logging.getLogger("Training") batch_time = AverageMeter() data_time = AverageMeter() heatmaps_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)] offset_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)] model.train() end = time.time() for i, (images, heatmaps, masks, offsets, weights) in enumerate(data_loader): data_time.update(time.time() - end) heatmaps = [ list(map(lambda x: x.cuda(non_blocking=True), heatmap)) for heatmap in heatmaps ] masks = [ list(map(lambda x: x.cuda(non_blocking=True), mask)) for mask in masks ] offsets = [ list(map(lambda x: x.cuda(non_blocking=True), offset)) for offset in offsets ] offset_weights = [ list(map(lambda x: x.cuda(non_blocking=True), weight)) for weight in weights ] #################################################################### if cfg.LOSS.HEATMAP_MIDDLE_LOSS: outputs, poffsets, middle_output = model(images) heatmaps_losses, offset_losses, middle_losses = \ loss_factory(outputs, poffsets, heatmaps, masks, offsets, offset_weights, middle_output) else: outputs, poffsets = model(images) heatmaps_losses, offset_losses = \ loss_factory(outputs, poffsets, heatmaps, masks, offsets, offset_weights) #################################################################### loss = 0 for idx in range(cfg.LOSS.NUM_STAGES): if heatmaps_losses[idx] is not None: heatmaps_loss = heatmaps_losses[idx].mean(dim=0) heatmaps_loss_meter[idx].update(heatmaps_loss.item(), images.size(0)) loss = loss + heatmaps_loss if offset_losses[idx] is not None: offset_loss = offset_losses[idx] offset_loss_meter[idx].update(offset_loss.item(), images.size(0)) loss = loss + offset_loss ######################################################################## if cfg.LOSS.HEATMAP_MIDDLE_LOSS: if middle_losses is not None: loss = loss + middle_losses.mean(dim=0) ######################################################################## optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % cfg.PRINT_FREQ == 0 and cfg.RANK == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed: {speed:.1f} samples/s\t' \ 'Data: {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ '{heatmaps_loss}{offset_loss}'.format( epoch, i, len(data_loader), batch_time=batch_time, speed=images.size(0)/batch_time.val, data_time=data_time, heatmaps_loss=_get_loss_info( heatmaps_loss_meter, 'heatmaps'), offset_loss=_get_loss_info(offset_loss_meter, 'offset') ) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] for idx in range(cfg.LOSS.NUM_STAGES): writer.add_scalar('train_stage{}_heatmaps_loss'.format(i), heatmaps_loss_meter[idx].val, global_steps) writer.add_scalar('train_stage{}_offset_loss'.format(idx), offset_loss_meter[idx].val, global_steps) # 每经过PRINT_FREQ个batch,train_global_steps加1 writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) for scale_idx in range(len(cfg.DATASET.OUTPUT_SIZE)): prefix_scale = prefix + '_output_{}'.format( cfg.DATASET.OUTPUT_SIZE[scale_idx]) save_debug_images(cfg, images, heatmaps[scale_idx], masks[scale_idx], outputs[scale_idx], prefix_scale)
def train_adaptive(config, train_loader, model_p, model_d, criterion_p, criterion_d, optimizer_p, optimizer_d, epoch, output_dir, tb_log_dir, writer_dict,losses_P_list, losses_D_list, acces_P_list, acces_D_list, acc_num_total, num, losses_p, acc_p, losses_d): batch_time = AverageMeter() data_time = AverageMeter() losses_d_2 = AverageMeter() # switch to train mode model_d.train() model_p.train() end = time.time() print(len(train_loader)) num_p = 0 for i, (input, target, target_weight, meta) in enumerate(train_loader): # mixed images to train # measure data loading time data_time.update(time.time() - end) # compute output for pose network feature_outputs, outputs = model_p(input) ######## Step I: Domain Classifier Update ######## # compute for domain classifier domain_logits = model_d(feature_outputs.detach()) domain_label = (meta['synthetic'].unsqueeze(-1)*1.0).cuda() loss_d = criterion_d(domain_logits, domain_label) loss_d.backward(retain_graph = True) optimizer_d.step() optimizer_d.zero_grad() ######## Step II: Pose_Net Update ######## domain_logits_p = model_d(feature_outputs) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss_p = criterion_p(outputs, target, target_weight) - config.TRAIN.LAMBDA * criterion_d(domain_logits_p, domain_label) loss_d_2 = criterion_d(domain_logits_p, domain_label) # compute gradient and do update step num_p += 1 if num_p == 1: loss_p.backward(retain_graph = True) optimizer_p.step() optimizer_p.zero_grad() num_p = 0 # measure accuracy and record loss losses_p.update(loss_p.item(), input.size(0)) losses_d.update(loss_d.item(), input.size(0)) losses_d_2.update(loss_d_2.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(outputs.detach().cpu().numpy(), target.detach().cpu().numpy()) acc_p.update(avg_acc, cnt) acces_P_list.append(acc_p.val) acc_num = 0 for j in range(len(domain_label)): if (domain_logits[j] > 0 and domain_label[j] == 1.0) or (domain_logits[j] < 0 and domain_label[j] == 0.0): acc_num += 1 acc_num_total += 1 num += 1 acc_d = acc_num * 1.0 / input.size(0) acces_D_list.append(acc_d) losses_P_list.append(losses_p.val) losses_D_list.append(losses_d.val) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Accuracy_d {3} ({4})\t' \ 'Loss_d_2 {5}\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t'\ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss_p {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Loss_d {loss_d.val:.5f} ({loss_d.avg:.5f})\t' \ 'Accuracy_p {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), acc_d, acc_num_total * 1.0 / num, losses_d_2.val, batch_time=batch_time, data_time=data_time, loss=losses_p, loss_d = losses_d, acc=acc_p) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss_P', losses_p.val, global_steps) writer.add_scalar('train_acc_P', acc_p.val, global_steps) writer.add_scalar('train_loss_D', losses_d.val, global_steps) writer.add_scalar('train_loss_D_2', losses_d_2.val, global_steps) writer.add_scalar('train_acc_D', acc_d, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred*4, outputs, prefix) return losses_P_list,losses_D_list, acces_P_list, acces_D_list
def do_train(cfg, model, data_loader, loss_factory, optimizer, epoch, output_dir, tb_log_dir, writer_dict, fp16=False): logger = logging.getLogger("Training") batch_time = AverageMeter() data_time = AverageMeter() heatmaps_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)] push_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)] pull_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)] # switch to train mode model.train() end = time.time() for i, (images, heatmaps, masks, joints) in enumerate(data_loader): # measure data loading time data_time.update(time.time() - end) # compute output outputs = model(images) heatmaps = list(map(lambda x: x.cuda(non_blocking=True), heatmaps)) masks = list(map(lambda x: x.cuda(non_blocking=True), masks)) joints = list(map(lambda x: x.cuda(non_blocking=True), joints)) # loss = loss_factory(outputs, heatmaps, masks) heatmaps_losses, push_losses, pull_losses = \ loss_factory(outputs, heatmaps, masks, joints) loss = 0 for idx in range(cfg.LOSS.NUM_STAGES): if heatmaps_losses[idx] is not None: heatmaps_loss = heatmaps_losses[idx].mean(dim=0) heatmaps_loss_meter[idx].update( heatmaps_loss.item(), images.size(0) ) loss = loss + heatmaps_loss if push_losses[idx] is not None: push_loss = push_losses[idx].mean(dim=0) push_loss_meter[idx].update( push_loss.item(), images.size(0) ) loss = loss + push_loss if pull_losses[idx] is not None: pull_loss = pull_losses[idx].mean(dim=0) pull_loss_meter[idx].update( pull_loss.item(), images.size(0) ) loss = loss + pull_loss # compute gradient and do update step optimizer.zero_grad() if fp16: optimizer.backward(loss) else: loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % cfg.PRINT_FREQ == 0 and cfg.RANK == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed: {speed:.1f} samples/s\t' \ 'Data: {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ '{heatmaps_loss}{push_loss}{pull_loss}'.format( epoch, i, len(data_loader), batch_time=batch_time, speed=images.size(0)/batch_time.val, data_time=data_time, heatmaps_loss=_get_loss_info(heatmaps_loss_meter, 'heatmaps'), push_loss=_get_loss_info(push_loss_meter, 'push'), pull_loss=_get_loss_info(pull_loss_meter, 'pull') ) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] for idx in range(cfg.LOSS.NUM_STAGES): writer.add_scalar( 'train_stage{}_heatmaps_loss'.format(i), heatmaps_loss_meter[idx].val, global_steps ) writer.add_scalar( 'train_stage{}_push_loss'.format(idx), push_loss_meter[idx].val, global_steps ) writer.add_scalar( 'train_stage{}_pull_loss'.format(idx), pull_loss_meter[idx].val, global_steps ) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) for scale_idx in range(len(outputs)): prefix_scale = prefix + '_output_{}'.format( cfg.DATASET.OUTPUT_SIZE[scale_idx] ) save_debug_images( cfg, images, heatmaps[scale_idx], masks[scale_idx], outputs[scale_idx], prefix_scale )
def train(config, device, train_loader, model, criterion, optimizer, epoch, output_dir, tb_log_dir, writer_dict): """ 1 epoch train 시킵니다. Parameters ---------- config : yacs.config.CfgNode config 파일입니다. device : torch.device GPU 사용시 데이터를 GPU에 넣어주는 객체입니다. train_loader : torch.utils.data.dataloader.DataLoader train data Loader. model : model 학습하는 모델 객체입니다. criterion : torch.nn.modules.loss torch의 loss 객체입니다. optimizer : torch.optim torch의 optimizer 객체입니다. epoch : int 현재 epoch 값입니다. output_dir : str 결과값이 저장될 경로입니다. tb_log_dir : str log 파일 위치입니다. writer_dict : dict 실험 기록 dict입니다. Returns ------- losses.avg : float loss의 평균값 입니다. """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target, target_weight, meta) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # input과 bbox 객체를 GPU에 넣을 수 있는 객체로 만듭니다. input = input.to(device) input = input.float() target = target.to(device) target = target.float() outputs = model(input) target = target.cuda(non_blocking=True) # target_weight를 반영합니다. 기본값은 0으로 되어있어 영향을 미치지 않습니다. target_weight = target_weight.cuda(non_blocking=True) if isinstance(outputs, list): loss = criterion(outputs[0], target, target_weight) for output in outputs[1:]: loss += criterion(output, target, target_weight) else: output = outputs loss = criterion(output, target, target_weight) # compute gradient and do update step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, speed=input.size(0)/batch_time.val, data_time=data_time, loss=losses, acc=acc) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) save_debug_images(config, input, meta, target, pred*4, output, prefix) return losses.avg
def validate(config, loader, dataset, model, criterion, output_dir, writer_dict=None): model.eval() batch_time = AverageMeter() losses = AverageMeter() avg_acc = AverageMeter() if config.DATASET.TEST_DATASET == 'multiview_h36m': nviews = 4 elif config.DATASET.TEST_DATASET in [ 'totalcapture', 'panoptic', 'unrealcv' ]: nviews = len(config.MULTI_CAMS.SELECTED_CAMS) else: assert 'Not defined dataset' nsamples = len(dataset) * nviews is_aggre = config.NETWORK.AGGRE njoints = config.NETWORK.NUM_JOINTS height = int(config.NETWORK.HEATMAP_SIZE[0]) width = int(config.NETWORK.HEATMAP_SIZE[1]) all_preds = np.zeros((nsamples, njoints, 3), dtype=np.float32) idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, weight, meta) in enumerate(loader): raw_features, aggre_features = model(input) output = routing(raw_features, aggre_features, is_aggre, meta) loss = 0 target_cuda = [] for t, w, o in zip(target, weight, output): t = t.cuda(non_blocking=True) w = w.cuda(non_blocking=True) target_cuda.append(t) loss += criterion(o, t, w) if is_aggre: for t, w, r in zip(target, weight, raw_features): t = t.cuda(non_blocking=True) w = w.cuda(non_blocking=True) loss += criterion(r, t, w) target = target_cuda nimgs = len(input) * input[0].size(0) losses.update(loss.item(), nimgs) nviews = len(output) acc = [None] * nviews cnt = [None] * nviews pre = [None] * nviews for j in range(nviews): _, acc[j], cnt[j], pre[j] = accuracy( output[j].detach().cpu().numpy(), target[j].detach().cpu().numpy(), thr=0.083) acc = np.mean(acc) cnt = np.mean(cnt) avg_acc.update(acc, cnt) batch_time.update(time.time() - end) end = time.time() preds = np.zeros((nimgs, njoints, 3), dtype=np.float32) heatmaps = np.zeros((nimgs, njoints, height, width), dtype=np.float32) for k, o, m in zip(range(nviews), output, meta): pred, maxval = get_final_preds(config, o.clone().cpu().numpy(), m['center'].numpy(), m['scale'].numpy()) pred = pred[:, :, 0:2] pred = np.concatenate((pred, maxval), axis=2) preds[k::nviews] = pred heatmaps[k::nviews] = o.clone().cpu().numpy() all_preds[idx:idx + nimgs] = preds # all_heatmaps[idx:idx + nimgs] = heatmaps idx += nimgs if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(loader), batch_time=batch_time, loss=losses, acc=avg_acc) logger.info(msg) for k in range(len(input)): view_name = 'view_{}'.format(k + 1) prefix = '{}_{}_{:08}'.format( os.path.join(output_dir, 'validation'), view_name, i) save_debug_images(config, input[k], meta[k], target[k], pre[k] * 4, output[k], prefix) detection_thresholds = [0.075, 0.05, 0.025, 0.0125, 6.25e-3] # 150,100,50,25 mm perf_indicators = [] cur_time = time.strftime("%Y-%m-%d-%H-%M", time.gmtime()) for thresh in detection_thresholds: name_value, perf_indicator, per_grouping_detected = dataset.evaluate( all_preds, threshold=thresh) perf_indicators.append(perf_indicator) names = name_value.keys() values = name_value.values() num_values = len(name_value) _, full_arch_name = get_model_name(config) logger.info('Detection Threshold set to {} aka {}mm'.format( thresh, thresh * 2000.0)) logger.info('| Arch ' + ' '.join(['| {: <5}'.format(name) for name in names]) + ' |') logger.info('|--------' * (num_values + 1) + '|') logger.info( '| ' + '------ ' + ' '.join(['| {:.4f}'.format(value) for value in values]) + ' |') logger.info('| ' + full_arch_name) logger.info('Overall Perf on threshold {} is {}\n'.format( thresh, perf_indicator)) logger.info('\n') if per_grouping_detected is not None: df = pd.DataFrame(per_grouping_detected) save_path = os.path.join( output_dir, 'grouping_detec_rate_{}_{}.csv'.format(thresh, cur_time)) df.to_csv(save_path) return perf_indicators[2]
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def train(config, data, model, criterion, optim, epoch, output_dir, writer_dict): is_aggre = config.NETWORK.AGGRE batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() avg_acc = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target, weight, meta) in enumerate(data): data_time.update(time.time() - end) raw_features, aggre_features = model(input) output = routing(raw_features, aggre_features, is_aggre, meta) loss = 0 target_cuda = [] for t, w, o in zip(target, weight, output): t = t.cuda(non_blocking=True) w = w.cuda(non_blocking=True) target_cuda.append(t) loss += criterion(o, t, w) target = target_cuda if is_aggre: for t, w, r in zip(target, weight, raw_features): t = t.cuda(non_blocking=True) w = w.cuda(non_blocking=True) loss += criterion(r, t, w) optim.zero_grad() loss.backward() optim.step() losses.update(loss.item(), len(input) * input[0].size(0)) nviews = len(output) acc = [None] * nviews cnt = [None] * nviews pre = [None] * nviews for j in range(nviews): _, acc[j], cnt[j], pre[j] = accuracy( output[j].detach().cpu().numpy(), target[j].detach().cpu().numpy(), thr=0.083) acc = np.mean(acc) cnt = np.mean(cnt) avg_acc.update(acc, cnt) batch_time.update(time.time() - end) end = time.time() if i % config.PRINT_FREQ == 0: gpu_memory_usage = torch.cuda.memory_allocated(0) msg = 'Epoch: [{0}][{1}/{2}]\t' \ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 'Speed {speed:.1f} samples/s\t' \ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})\t' \ 'Memory {memory:.1f}'.format( epoch, i, len(data), batch_time=batch_time, speed=len(input) * input[0].size(0) / batch_time.val, data_time=data_time, loss=losses, acc=avg_acc, memory=gpu_memory_usage) logger.info(msg) writer = writer_dict['writer'] global_steps = writer_dict['train_global_steps'] writer.add_scalar('train_loss', losses.val, global_steps) writer.add_scalar('train_acc', avg_acc.val, global_steps) writer_dict['train_global_steps'] = global_steps + 1 for k in range(len(input)): view_name = 'view_{}'.format(k + 1) prefix = '{}_{}_{:08}'.format( os.path.join(output_dir, 'train'), view_name, i) save_debug_images(config, input[k], meta[k], target[k], pre[k] * 4, output[k], prefix)