def demo_image(image, model, opt, save_path=None): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] debugger = Debugger() debugger.add_img(image) debugger.add_point_2d(pred, (255, 0, 0)) debugger.add_point_3d(pred_3d, 'b') # import pdb;pdb.set_trace() debugger.show_all_imgs(pause=True) debugger.show_3d() if save_path: debugger.save_3d(save_path)
def demo_image(image, model, opt, timestep): inps = [] s = None c = None hidden = None for t in range(timestep): s = max(image[t].shape[0], image[t].shape[1]) * 1.0 c = np.array([image[t].shape[1] / 2., image[t].shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image[t], trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) inps.append(inp) if opt.task == "conv3d": outs = model(inps) else: outs, hidden = model(inps, hidden) out = outs[-1] preds, amb_idx = get_preds(out[-1]['hm'].detach().cpu().numpy()) pred = preds[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d, ignore_idx = get_preds_3d(out[-1]['hm'].detach().cpu().numpy(), out[-1]['depth'].detach().cpu().numpy(), amb_idx) pred_3d = pred_3d[0] ignore_idx = ignore_idx[0] return image[-1], pred, pred_3d, ignore_idx
def demo_image(image, image_name, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] path = "D:\\CV-Project\\pytorch-pose-hg-3d\\images\\last_save\\" _, image_name = os.path.split(image_name) image_name = image_name[:-4] debugger = Debugger() debugger.add_img(image, image_name) debugger.add_point_2d(pred, (255, 0, 0), image_name) debugger.add_point_3d(pred_3d, 'b') debugger.show_all_imgs(pause=False) debugger.show_3d(image_name, path) debugger.save_img(image_name, path)
def demo_image(image, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] # 'hm': (1, 16, 64, 64), 'depth': (1, 16, 64, 64) preds, amb_idx = get_preds(out['hm'].detach().cpu().numpy()) pred = preds[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d, ignore_idx = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy(), amb_idx) pred_3d = pred_3d[0] ignore_idx = ignore_idx[0] debugger = Debugger() debugger.add_img(image) # デバッガークラスに画像をコピー debugger.add_point_2d(pred, (255, 0, 0)) debugger.add_point_3d(pred_3d, 'b', ignore_idx=ignore_idx) debugger.show_all_imgs(pause=False) debugger.show_3d() print("Done")
def demo_image( image, model, opt ): #image name added as an input, so that the individual output files can be asctibed to the respective image s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds( pred, c, s, (opt.output_w, opt.output_h) ) #this step readjusts the 2D skeleton to the input image with center and scale pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[ 0] #uses heatmap and depthmap to create 3D pose #pred_3d = transform_preds(pred_3d, c, s, (opt.output_w, opt.output_h,10)) #this step readjusts the 3D skeleton to the input image with center and scale ''' debugger = Debugger() debugger.add_img(image) #adds image debugger.add_point_2d(pred, (255, 0, 0)) #adds 2D graph of joints to image debugger.add_point_3d(pred_3d, 'b') #plots the 3D joint locations into the plot debugger.show_all_imgs(pause=False) #this command enables showing the images debugger.show_3d() #this command shows the figures ''' return pred_3d.flatten()
def step(phase, epoch, opt, dataloader, model, criterion, optimizer=None): # Choose the phase(Evaluate phase-Normally without Dropout and BatchNorm) if phase == 'train': model.train() else: model.eval() # Load default values Loss, Err, Acc = AverageMeter(), AverageMeter(), AverageMeter() Acc_tot = AverageMeter() seqlen = set_sequence_length(opt.MinSeqLenIndex, opt.MaxSeqLenIndex, epoch) # Show iteration using Bar nIters = len(dataloader) bar = Bar(f'{opt.expID}', max=nIters) # Loop in dataloader for i, gt in enumerate(dataloader): ## Wraps tensors and records the operations applied to it input, label = gt['input'], gt['label'] gtpts, center, scale = gt['gtpts'], gt['center'], gt['scale'] input_var = input[:, 0, ].float().cuda(device=opt.device, non_blocking=True) label_var = label.float().cuda(device=opt.device, non_blocking=True) Loss.reset() Err.reset() Acc.reset() ### if it is 3D, may need the nOutput to get the different target, not just only the heatmap ## Forwad propagation output = model(input_var) ## Get model outputs and calculate loss loss = criterion(output, label_var) ## Backward + Optimize only if in training phase if phase == 'train': ## Zero the parameter gradients optimizer.zero_grad() loss.mean().backward() optimizer.step() Loss.update(loss.sum()) ## Compute the accuracy # acc = Accuracy(opt, output.data.cpu().numpy(), labels_var.data.cpu().numpy()) ref = get_ref(opt.dataset, scale) for j in range(opt.preSeqLen): if j <= seqlen: pred_hm = get_preds(output[:, j, ].float()) pred_pts = original_coordinate(pred_hm, center[:, ], scale, opt.outputRes) err, ne = error(pred_pts, gtpts[:, j, ], ref) acc, na = accuracy(pred_pts, gtpts[:, j, ], ref) # assert ne == na, "ne must be the same as na" Err.update(err) Acc.update(acc) Acc_tot.update(acc) Bar.suffix = f'{phase}[{epoch}][{i}/{nIters}]|Total:{bar.elapsed_td}' \ f'|ETA:{bar.eta_td}|Loss:{Loss.val:.6f}|Err:{Err.avg:.6f}|Acc:{Acc.avg:.6f}' bar.next() bar.finish() return Loss.val, Acc_tot.avg
def estimate(self, image): if isinstance(image, str): image = cv2.imread(image) inp, c, s = self.processImage(image) inp = torch.from_numpy(inp).to(self.device) out = self.model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (self.output_w, self.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] return pred, pred_3d
def demo_image(image, model, opt, name): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) # pred 2d range (176, 256) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] pred_3d_real_size = pred_3d * 4 pred_3d_real_size[:, 0] = pred_3d_real_size[:, 0] - 40 # print(pred_3d) # pdb.set_trace() pred_3d_ordered = np.zeros([15, 3]) # the last one as mid hip for spline compute for i in range(16): pred_3d_ordered[corres[i]] = pred_3d_real_size[i] pred_3d_ordered[1] = (pred_3d_ordered[2] + pred_3d_ordered[5]) / 2 pred_3d_ordered[14] = (pred_3d_ordered[8] + pred_3d_ordered[11]) / 2 pred_3d_ordered[0] = -1 pred_3d_ordered[9:11] = -1 pred_3d_ordered[12:14] = -1 from good_order_cood_angle_convert import absolute_angles, anglelimbtoxyz2 # bias # neck as the offset # if pred_3d[8,:][0] != 0 or pred_3d[8,:][1] != 0: # bias = np.array([pred[8,0], pred[8,1]]) absolute_angles, limbs, offset = absolute_angles(pred_3d_ordered) # pdb.set_trace() # rev = anglelimbtoxyz2(offset, absolute_angles, limbs) pred_2d = pred_3d_ordered[:, :2] dic = { 'absolute_angles': absolute_angles, 'limbs': limbs, 'offset': offset } # pdb.set_trace() np.save(name, dic)
def demo_image(image, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform( c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] return image, pred, pred_3d
def predict(self, image): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform( c, s, 0, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine(image, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(self.opt.device) out = self.model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (self.opt.output_w, self.opt.output_h)) # pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] # Overlay points on top fo image #return show_2d(image, pred, (255, 0, 0), mpii_edges) return image, pred, mpii_edges
def demo_image(image, model, opt, name): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform( c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] if pred_3d[6,:][0] != 0 or pred_3d[6,:][1] != 0: print("A different bias!!") pdb.set_trace() bias = np.array([pred[6,0], pred[6,1]]) dic = {'pred_3d': pred_3d, 'bias':bias} np.save(name, dic)
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() # crit = torch.nn.MSELoss() # crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var) # crit_ocv = nn.BCEWithLogitsLoss() crit_ocv = nn.CrossEntropyLoss() # acc_idxs = data_loader.dataset.acc_idxs # edges = data_loader.dataset.edges # edges_3d = data_loader.dataset.edges_3d # shuffle_ref = data_loader.dataset.shuffle_ref # mean = data_loader.dataset.mean # std = data_loader.dataset.std # convert_eval_format = data_loader.dataset.convert_eval_format # Loss, Loss3D = AverageMeter(), AverageMeter() # Acc, MPJPE = AverageMeter(), AverageMeter() Loss_ocv, Acc_ocv = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] time_str = '' nIters = len(data_loader) if opt.train_half: nIters = nIters / 2 bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): if i >= nIters: break data_time.update(time.time() - end) # for k in batch: # if k != 'meta': # batch[k] = batch[k].cuda(device=opt.device, non_blocking=True) # gt_2d = batch['meta']['pts_crop'].cuda( # device=opt.device, non_blocking=True).float() / opt.output_h img, ocv_gt, info = batch if i == 0: np.savez(split + '_debug.npz', img=img.numpy(), ocv_gt=ocv_gt.numpy(), info=info) img = img.cuda(device=opt.device, non_blocking=True) ocv_gt = ocv_gt.cuda(device=opt.device, non_blocking=True) output = model(img) # loss = crit(output[-1]['hm'], batch['target']) # loss_3d = crit_3d( # output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], # batch['reg_target'],gt_2d) # for k in range(opt.num_stacks - 1): # loss += crit(output[k], batch['target']) # loss_3d = crit_3d( # output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], # batch['reg_target'], gt_2d) # loss += loss_3d # loss = crit_ocv(output, ocv_gt) loss = crit_ocv(output, torch.argmax(ocv_gt, 1)) preds = torch.argmax(output, 1) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() # else: # input_ = batch['input'].cpu().numpy().copy() # input_[0] = flip(input_[0]).copy()[np.newaxis, ...] # input_flip_var = torch.from_numpy(input_).cuda( # device=opt.device, non_blocking=True) # output_flip_ = model(input_flip_var) # output_flip = shuffle_lr( # flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) # output_flip = output_flip.reshape( # 1, opt.num_output, opt.output_h, opt.output_w) # output_depth_flip = shuffle_lr( # flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref) # output_depth_flip = output_depth_flip.reshape( # 1, opt.num_output, opt.output_h, opt.output_w) # output_flip = torch.from_numpy(output_flip).cuda( # device=opt.device, non_blocking=True) # output_depth_flip = torch.from_numpy(output_depth_flip).cuda( # device=opt.device, non_blocking=True) # output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 # output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2 # pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) # preds.append(convert_eval_format(pred, conf, meta)[0]) acc = accuracy_ocv(preds, torch.argmax(ocv_gt, 1)) Loss_ocv.update(loss.item(), img.size(0)) Acc_ocv.update(acc, img.size(0)) # Loss.update(loss.item(), batch['input'].size(0)) # Loss3D.update(loss_3d.item(), batch['input'].size(0)) # Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), # batch['target'].detach().cpu().numpy(), acc_idxs)) # mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(), # output[-1]['depth'].detach().cpu().numpy(), # batch['meta']['gt_3d'].detach().numpy(), # convert_func=convert_eval_format) # MPJPE.update(mpeje_batch, mpjpe_cnt) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) # Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ # '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\ # '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\ # '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, # eta=bar.eta_td, loss=Loss, Acc=Acc, # split=split, time_str=time_str, # MPJPE=MPJPE, loss_3d=Loss3D) Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ '|Loss_ocv {loss.avg:.5f}'\ '|Acc_ocv {Acc.avg:.4f}'\ '|loss_batch {loss_batch:.4f}'\ '|acc_batch {acc_batch:.4f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss_ocv, Acc=Acc_ocv, loss_batch=loss.item(), acc_batch=acc, split=split, time_str=time_str) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt = get_preds(batch['target'].cpu().numpy()) * 4 pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = (batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize( output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_3d(batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d) pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy()) debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b', edges=edges_3d) debugger.show_all_imgs(pause=False) debugger.show_3d() # pdb.set_trace() bar.finish() # return {'loss': Loss.avg, # 'acc': Acc.avg, # 'mpjpe': MPJPE.avg, # 'time': bar.elapsed_td.total_seconds() / 60.}, preds return { 'loss': Loss_ocv.avg, 'acc': Acc_ocv.avg, 'time': bar.elapsed_td.total_seconds() / 60. }, preds
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var) acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges edges_3d = data_loader.dataset.edges_3d shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Loss3D = AverageMeter(), AverageMeter() Acc, MPJPE = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] time_str = '' nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) for k in batch: if k != 'meta': batch[k] = batch[k].cuda(device=opt.device, non_blocking=True) gt_2d = batch['meta']['pts_crop'].cuda( device=opt.device, non_blocking=True).float() / opt.output_h output = model(batch['input']) loss = crit(output[-1]['hm'], batch['target']) loss_3d = crit_3d( output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'],gt_2d) for k in range(opt.num_stacks - 1): loss += crit(output[k], batch['target']) loss_3d = crit_3d( output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'], gt_2d) loss += loss_3d if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = batch['input'].cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda( device=opt.device, non_blocking=True) output_flip_ = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_depth_flip = shuffle_lr( flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref) output_depth_flip = output_depth_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_flip = torch.from_numpy(output_flip).cuda( device=opt.device, non_blocking=True) output_depth_flip = torch.from_numpy(output_depth_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2 # pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) # preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.item(), batch['input'].size(0)) Loss3D.update(loss_3d.item(), batch['input'].size(0)) Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), batch['target'].detach().cpu().numpy(), acc_idxs)) mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy(), batch['meta']['gt_3d'].detach().numpy(), convert_func=convert_eval_format) MPJPE.update(mpeje_batch, mpjpe_cnt) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\ '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str, MPJPE=MPJPE, loss_3d=Loss3D) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt = get_preds(batch['target'].cpu().numpy()) * 4 pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = ( batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_3d( batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d) pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy()) debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b',edges=edges_3d) debugger.show_all_imgs(pause=False) debugger.show_3d() bar.finish() return {'loss': Loss.avg, 'acc': Acc.avg, 'mpjpe': MPJPE.avg, 'time': bar.elapsed_td.total_seconds() / 60.}, preds
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Acc = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) input, target, meta = batch['input'], batch['target'], batch['meta'] input_var = input.cuda(device=opt.device, non_blocking=True) target_var = target.cuda(device=opt.device, non_blocking=True) output = model(input_var) loss = crit(output[-1]['hm'], target_var) for k in range(opt.num_stacks - 1): loss += crit(output[k], target_var) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda( device=opt.device, non_blocking=True) output_flip = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) # output_ = (output[-1].detach().cpu().numpy() + output_flip) / 2 output_flip = torch.from_numpy(output_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 pred, conf = get_preds( output[-1]['hm'].detach().cpu().numpy(), True) preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.detach().item(), input.size(0)) Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), target_var.detach().cpu().numpy(), acc_idxs)) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) else: time_str = '' Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:}' \ '|Loss {loss.avg:.5f} |Acc {Acc.avg:.4f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt, amb_idx = get_preds(target.cpu().numpy()) gt *= 4 pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy()) pred *= 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = (input[0].numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(target[0].numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.show_all_imgs(pause=True) bar.finish() return {'loss': Loss.avg, 'acc': Acc.avg, 'time': bar.elapsed_td.total_seconds() / 60.}, preds
def main(): # Parse the options from parameters opts = Opts().parse() ## For PyTorch 0.4.1, cuda(device) opts.device = torch.device(f'cuda:{opts.gpu[0]}') print(opts.expID, opts.task, os.path.dirname(os.path.realpath(__file__))) # Load the trained model test if opts.loadModel != 'none': model_path = os.path.join(opts.root_dir, opts.loadModel) model = torch.load(model_path).cuda(device=opts.device) model.eval() else: print('ERROR: No model is loaded!') return # Read the input image, pass input to gpu if opts.img == 'None': val_dataset = PENN_CROP(opts, 'val') val_loader = tud.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=int(opts.num_workers)) opts.nJoints = val_dataset.nJoints opts.skeleton = val_dataset.skeleton for i, gt in enumerate(val_loader): # Test Visualizer, Input and get_preds if i == 0: input, label = gt['input'], gt['label'] gtpts, center, scale, proj = gt['gtpts'], gt['center'], gt[ 'scale'], gt['proj'] input_var = input[:, 0, ].float().cuda(device=opts.device, non_blocking=True) # output = label output = model(input_var) # Test Loss, Err and Acc(PCK) Loss, Err, Acc = AverageMeter(), AverageMeter(), AverageMeter() ref = get_ref(opts.dataset, scale) for j in range(opts.preSeqLen): pred = get_preds(output[:, j, ].cpu().float()) pred = original_coordinate(pred, center[:, ], scale, opts.outputRes) err, ne = error(pred, gtpts[:, j, ], ref) acc, na = accuracy(pred, gtpts[:, j, ], ref) # assert ne == na, "ne must be the same as na" Err.update(err) Acc.update(acc) print(j, f"{Err.val:.6f}", Acc.val) print('all', f"{Err.avg:.6f}", Acc.avg) # Visualizer Object ## Initialize v = Visualizer(opts.nJoints, opts.skeleton, opts.outputRes) # ## Add input image # v.add_img(input[0,0,].transpose(2, 0).numpy().astype(np.uint8)) # ## Get the predicted joints # predJoints = get_preds(output[:, 0, ]) # # ## Add joints and skeleton to the figure # v.add_2d_joints_skeleton(predJoints, (0, 0, 255)) # Transform heatmap to show hm_img = output[0, 0, ].cpu().detach().numpy() v.add_hm(hm_img) ## Show image v.show_img(pause=True) break else: print('NOT ready for the raw input outside the dataset') img = cv2.imread(opts.img) input = torch.from_numpy(img.tramspose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.variable(input).float().cuda( device=opts.device) output = model(input_var) predJoints = get_preds(output[-2].data.cpu().numpy())[0] * 4