def __getitem__(self, index): ann = self.coco.loadAnns(ids=[self.idxs[index]])[0] clean_bbox = self.clean_bbox[index] img_info = self.coco.loadImgs(ids=[ann['image_id']])[0] img_path = os.path.join(self.img_dir, img_info['file_name']) img = cv2.imread(img_path) ids_all = self.coco.getAnnIds(imgIds=[ann['image_id']]) ann_all = self.coco.loadAnns(ids=ids_all) pts_all = [] for k in range(len(ann_all)): pts_k = np.array(ann_all[k]['keypoints']) pts_k = pts_k.reshape(self.num_joints, 3).astype(np.float32) pts_all.append(pts_k.copy()) pts = np.array(ann['keypoints']).reshape(self.num_joints, 3).astype(np.float32) c, s = self._box2cs(clean_bbox) r = 0 if self.split == 'train': sf = self.opt.scale rf = self.opt.rotate s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ if np.random.random() <= 0.6 else 0 trans_input = get_affine_transform( c, s, r, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine(img, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 256. - self.mean) / self.std inp = inp.transpose(2, 0, 1) trans_output = get_affine_transform( c, s, r, [self.opt.output_w, self.opt.output_h]) out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w), dtype=np.float32) for i in range(self.num_joints): if pts[i, 2] > 0: pt = affine_transform(pts[i], trans_output) out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss) ''' out_all = np.zeros((self.num_joints, self.opt.output_w, self.opt.output_h), dtype=np.float32) for k in range(len(pts_all)): pts = pts_all[k] for i in range(self.num_joints): if pts[i, 2] > 0: pt = affine_transform(pts[i], trans_output) out_all[i] = np.maximum( out_all[i], draw_gaussian(out_all[i], pt, self.opt.hm_gauss)) ''' if self.split == 'train': if np.random.random() < self.opt.flip: inp = flip(inp) out = shuffle_lr(flip(out), self.shuffle_ref) # out_all = shuffle_lr(flip(out_all), self.shuffle_ref) meta = { 'index': index, 'id': self.idxs[index], 'center': c, 'scale': s, 'rotate': r, 'image_id': ann['image_id'], 'vis': pts[:, 2], 'score': 1 } return {'input': inp, 'target': out, 'meta': meta}
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var) acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges edges_3d = data_loader.dataset.edges_3d shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Loss3D = AverageMeter(), AverageMeter() Acc, MPJPE = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] time_str = '' nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) for k in batch: if k != 'meta': batch[k] = batch[k].cuda(device=opt.device, non_blocking=True) gt_2d = batch['meta']['pts_crop'].cuda( device=opt.device, non_blocking=True).float() / opt.output_h output = model(batch['input']) loss = crit(output[-1]['hm'], batch['target']) loss_3d = crit_3d( output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'],gt_2d) for k in range(opt.num_stacks - 1): loss += crit(output[k], batch['target']) loss_3d = crit_3d( output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'], gt_2d) loss += loss_3d if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = batch['input'].cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda( device=opt.device, non_blocking=True) output_flip_ = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_depth_flip = shuffle_lr( flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref) output_depth_flip = output_depth_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_flip = torch.from_numpy(output_flip).cuda( device=opt.device, non_blocking=True) output_depth_flip = torch.from_numpy(output_depth_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2 # pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) # preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.item(), batch['input'].size(0)) Loss3D.update(loss_3d.item(), batch['input'].size(0)) Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), batch['target'].detach().cpu().numpy(), acc_idxs)) mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy(), batch['meta']['gt_3d'].detach().numpy(), convert_func=convert_eval_format) MPJPE.update(mpeje_batch, mpjpe_cnt) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\ '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str, MPJPE=MPJPE, loss_3d=Loss3D) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt = get_preds(batch['target'].cpu().numpy()) * 4 pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = ( batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_3d( batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d) pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy()) debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b',edges=edges_3d) debugger.show_all_imgs(pause=False) debugger.show_3d() bar.finish() return {'loss': Loss.avg, 'acc': Acc.avg, 'mpjpe': MPJPE.avg, 'time': bar.elapsed_td.total_seconds() / 60.}, preds
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Acc = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) input, target, meta = batch['input'], batch['target'], batch['meta'] input_var = input.cuda(device=opt.device, non_blocking=True) target_var = target.cuda(device=opt.device, non_blocking=True) output = model(input_var) loss = crit(output[-1]['hm'], target_var) for k in range(opt.num_stacks - 1): loss += crit(output[k], target_var) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda( device=opt.device, non_blocking=True) output_flip = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) # output_ = (output[-1].detach().cpu().numpy() + output_flip) / 2 output_flip = torch.from_numpy(output_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 pred, conf = get_preds( output[-1]['hm'].detach().cpu().numpy(), True) preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.detach().item(), input.size(0)) Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), target_var.detach().cpu().numpy(), acc_idxs)) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) else: time_str = '' Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:}' \ '|Loss {loss.avg:.5f} |Acc {Acc.avg:.4f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt, amb_idx = get_preds(target.cpu().numpy()) gt *= 4 pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy()) pred *= 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = (input[0].numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(target[0].numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.show_all_imgs(pause=True) bar.finish() return {'loss': Loss.avg, 'acc': Acc.avg, 'time': bar.elapsed_td.total_seconds() / 60.}, preds