示例#1
0
    def __getitem__(self, index):
        ann = self.coco.loadAnns(ids=[self.idxs[index]])[0]
        clean_bbox = self.clean_bbox[index]
        img_info = self.coco.loadImgs(ids=[ann['image_id']])[0]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = cv2.imread(img_path)
        ids_all = self.coco.getAnnIds(imgIds=[ann['image_id']])
        ann_all = self.coco.loadAnns(ids=ids_all)
        pts_all = []
        for k in range(len(ann_all)):
            pts_k = np.array(ann_all[k]['keypoints'])
            pts_k = pts_k.reshape(self.num_joints, 3).astype(np.float32)
            pts_all.append(pts_k.copy())

        pts = np.array(ann['keypoints']).reshape(self.num_joints,
                                                 3).astype(np.float32)

        c, s = self._box2cs(clean_bbox)
        r = 0

        if self.split == 'train':
            sf = self.opt.scale
            rf = self.opt.rotate
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if np.random.random() <= 0.6 else 0

        trans_input = get_affine_transform(
            c, s, r, [self.opt.input_w, self.opt.input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (self.opt.input_w, self.opt.input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 256. - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        trans_output = get_affine_transform(
            c, s, r, [self.opt.output_w, self.opt.output_h])
        out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w),
                       dtype=np.float32)
        for i in range(self.num_joints):
            if pts[i, 2] > 0:
                pt = affine_transform(pts[i], trans_output)
                out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss)
        '''
    out_all = np.zeros((self.num_joints, self.opt.output_w, self.opt.output_h), 
                       dtype=np.float32)
    for k in range(len(pts_all)):
      pts = pts_all[k]
      for i in range(self.num_joints):
        if pts[i, 2] > 0:
          pt = affine_transform(pts[i], trans_output)
          out_all[i] = np.maximum(
            out_all[i], draw_gaussian(out_all[i], pt, self.opt.hm_gauss))
    '''

        if self.split == 'train':
            if np.random.random() < self.opt.flip:
                inp = flip(inp)
                out = shuffle_lr(flip(out), self.shuffle_ref)
                # out_all = shuffle_lr(flip(out_all), self.shuffle_ref)

        meta = {
            'index': index,
            'id': self.idxs[index],
            'center': c,
            'scale': s,
            'rotate': r,
            'image_id': ann['image_id'],
            'vis': pts[:, 2],
            'score': 1
        }

        return {'input': inp, 'target': out, 'meta': meta}
示例#2
0
def step(split, epoch, opt, data_loader, model, optimizer=None):
  if split == 'train':
    model.train()
  else:
    model.eval()
  
  crit = torch.nn.MSELoss()
  crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var)

  acc_idxs = data_loader.dataset.acc_idxs
  edges = data_loader.dataset.edges
  edges_3d = data_loader.dataset.edges_3d
  shuffle_ref = data_loader.dataset.shuffle_ref
  mean = data_loader.dataset.mean
  std = data_loader.dataset.std
  convert_eval_format = data_loader.dataset.convert_eval_format

  Loss, Loss3D = AverageMeter(), AverageMeter()
  Acc, MPJPE = AverageMeter(), AverageMeter()
  data_time, batch_time = AverageMeter(), AverageMeter()
  preds = []
  time_str = ''

  nIters = len(data_loader)
  bar = Bar('{}'.format(opt.exp_id), max=nIters)
  
  end = time.time()
  for i, batch in enumerate(data_loader):
    data_time.update(time.time() - end)
    for k in batch:
      if k != 'meta':
        batch[k] = batch[k].cuda(device=opt.device, non_blocking=True)
    gt_2d = batch['meta']['pts_crop'].cuda(
      device=opt.device, non_blocking=True).float() / opt.output_h
    output = model(batch['input'])

    loss = crit(output[-1]['hm'], batch['target'])
    loss_3d = crit_3d(
      output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], 
      batch['reg_target'],gt_2d)
    for k in range(opt.num_stacks - 1):
      loss += crit(output[k], batch['target'])
      loss_3d = crit_3d(
        output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], 
        batch['reg_target'], gt_2d)
    loss += loss_3d

    if split == 'train':
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    else:
      input_ = batch['input'].cpu().numpy().copy()
      input_[0] = flip(input_[0]).copy()[np.newaxis, ...]
      input_flip_var = torch.from_numpy(input_).cuda(
        device=opt.device, non_blocking=True)
      output_flip_ = model(input_flip_var)
      output_flip = shuffle_lr(
        flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref)
      output_flip = output_flip.reshape(
        1, opt.num_output, opt.output_h, opt.output_w)
      output_depth_flip = shuffle_lr(
        flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref)
      output_depth_flip = output_depth_flip.reshape(
        1, opt.num_output, opt.output_h, opt.output_w)
      output_flip = torch.from_numpy(output_flip).cuda(
        device=opt.device, non_blocking=True)
      output_depth_flip = torch.from_numpy(output_depth_flip).cuda(
        device=opt.device, non_blocking=True)
      output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2
      output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2
      # pred = get_preds(output[-1]['hm'].detach().cpu().numpy())
      # preds.append(convert_eval_format(pred, conf, meta)[0])
    
    Loss.update(loss.item(), batch['input'].size(0))
    Loss3D.update(loss_3d.item(), batch['input'].size(0))
    Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), 
                        batch['target'].detach().cpu().numpy(), acc_idxs))
    mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(),
                                   output[-1]['depth'].detach().cpu().numpy(),
                                   batch['meta']['gt_3d'].detach().numpy(),
                                   convert_func=convert_eval_format)
    MPJPE.update(mpeje_batch, mpjpe_cnt)
   
    batch_time.update(time.time() - end)
    end = time.time()
    if not opt.hide_data_time:
      time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \
                 ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
      
    Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\
                 '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\
                 '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\
                 '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, 
                                     eta=bar.eta_td, loss=Loss, Acc=Acc, 
                                     split=split, time_str=time_str,
                                     MPJPE=MPJPE, loss_3d=Loss3D)
    if opt.print_iter > 0:
      if i % opt.print_iter == 0:
        print('{}| {}'.format(opt.exp_id, Bar.suffix))
    else:
      bar.next()
    if opt.debug >= 2:
      gt = get_preds(batch['target'].cpu().numpy()) * 4
      pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4
      debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges)
      img = (
        batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256
      img = img.astype(np.uint8).copy()
      debugger.add_img(img)
      debugger.add_mask(
        cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), 
                   (opt.input_w, opt.input_h)), img, 'target')
      debugger.add_mask(
        cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), 
                   (opt.input_w, opt.input_h)), img, 'pred')
      debugger.add_point_2d(gt[0], (0, 0, 255))
      debugger.add_point_2d(pred[0], (255, 0, 0))
      debugger.add_point_3d(
        batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d)
      pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(), 
                             output[-1]['depth'].detach().cpu().numpy())
      debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b',edges=edges_3d)
      debugger.show_all_imgs(pause=False)
      debugger.show_3d()

  bar.finish()
  return {'loss': Loss.avg, 
          'acc': Acc.avg, 
          'mpjpe': MPJPE.avg,
          'time': bar.elapsed_td.total_seconds() / 60.}, preds
示例#3
0
def step(split, epoch, opt, data_loader, model, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()

    crit = torch.nn.MSELoss()

    acc_idxs = data_loader.dataset.acc_idxs
    edges = data_loader.dataset.edges
    shuffle_ref = data_loader.dataset.shuffle_ref
    mean = data_loader.dataset.mean
    std = data_loader.dataset.std
    convert_eval_format = data_loader.dataset.convert_eval_format

    Loss, Acc = AverageMeter(), AverageMeter()
    data_time, batch_time = AverageMeter(), AverageMeter()
    preds = []

    nIters = len(data_loader)
    bar = Bar('{}'.format(opt.exp_id), max=nIters)

    end = time.time()
    for i, batch in enumerate(data_loader):
        data_time.update(time.time() - end)
        input, target, meta = batch['input'], batch['target'], batch['meta']
        input_var = input.cuda(device=opt.device, non_blocking=True)
        target_var = target.cuda(device=opt.device, non_blocking=True)

        output = model(input_var)

        loss = crit(output[-1]['hm'], target_var)
        for k in range(opt.num_stacks - 1):
            loss += crit(output[k], target_var)

        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            input_ = input.cpu().numpy().copy()
            input_[0] = flip(input_[0]).copy()[np.newaxis, ...]
            input_flip_var = torch.from_numpy(input_).cuda(
                device=opt.device, non_blocking=True)
            output_flip = model(input_flip_var)
            output_flip = shuffle_lr(
                flip(output_flip[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref)
            output_flip = output_flip.reshape(
                1, opt.num_output, opt.output_h, opt.output_w)
            # output_ = (output[-1].detach().cpu().numpy() + output_flip) / 2
            output_flip = torch.from_numpy(output_flip).cuda(
                device=opt.device, non_blocking=True)
            output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2
            pred, conf = get_preds(
                output[-1]['hm'].detach().cpu().numpy(), True)
            preds.append(convert_eval_format(pred, conf, meta)[0])

        Loss.update(loss.detach().item(), input.size(0))
        Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(),
                            target_var.detach().cpu().numpy(), acc_idxs))

        batch_time.update(time.time() - end)
        end = time.time()
        if not opt.hide_data_time:
            time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \
                       ' |Net {bt.avg:.3f}s'.format(dt=data_time,
                                                    bt=batch_time)
        else:
            time_str = ''
        Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:}' \
                     '|Loss {loss.avg:.5f} |Acc {Acc.avg:.4f}'\
                     '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td,
                                         eta=bar.eta_td, loss=Loss, Acc=Acc,
                                         split=split, time_str=time_str)
        if opt.print_iter > 0:
            if i % opt.print_iter == 0:
                print('{}| {}'.format(opt.exp_id, Bar.suffix))
        else:
            bar.next()
        if opt.debug >= 2:
            gt, amb_idx = get_preds(target.cpu().numpy())
            gt *= 4
            pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy())
            pred *= 4
            debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges)
            img = (input[0].numpy().transpose(1, 2, 0) * std + mean) * 256
            img = img.astype(np.uint8).copy()
            debugger.add_img(img)
            debugger.add_mask(
                cv2.resize(target[0].numpy().max(axis=0),
                           (opt.input_w, opt.input_h)), img, 'target')
            debugger.add_mask(
                cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0),
                           (opt.input_w, opt.input_h)), img, 'pred')
            debugger.add_point_2d(pred[0], (255, 0, 0))
            debugger.add_point_2d(gt[0], (0, 0, 255))
            debugger.show_all_imgs(pause=True)

    bar.finish()
    return {'loss': Loss.avg,
            'acc': Acc.avg,
            'time': bar.elapsed_td.total_seconds() / 60.}, preds