def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc, Mpjpe, Loss3D = AverageMeter(), AverageMeter(), AverageMeter( ), AverageMeter() nIters = len(dataLoader) bar = Bar('==>', max=nIters) for i, (input, target2D, target3D, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target2D_var = torch.autograd.Variable(target2D).float().cuda() target3D_var = torch.autograd.Variable(target3D).float().cuda() depMap, depth = model(input_var, target2D_var) depthPridict = depth[opt.nStack - 1] if opt.DEBUG >= 2: gt = getPreds(target2D.cpu().numpy()) * 4 pred = getPreds((depMap[opt.nStack - 1].data).cpu().numpy()) * 4 debugger = Debugger() debugger.addImg( (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showImg() debugger.saveImg('debug/{}.png'.format(i)) loss = 0 for k in range(opt.nStack): loss += criterion(depth[k], target3D_var[:, :, 2]) Loss.update(loss.data[0], input.size(0)) #Acc.update(Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target2D_var.data).cpu().numpy())) mpjpe, num3D = MPJPE2(target3D.cpu().numpy(), (depthPridict.data).cpu().numpy(), meta) Mpjpe.update(mpjpe, num3D) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Mpjpe {Mpjpe.avg:.6f} ({Mpjpe.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, split=split, Mpjpe=Mpjpe) bar.next() bar.finish() return Loss.avg, Acc.avg, Mpjpe.avg, Loss3D.avg
def initLatent(loader, model, Y, nViews, S, AVG = False): model.eval() nIters = len(loader) N = loader.dataset.nImages M = np.zeros((N, ref.J, 3)) bar = Bar('==>', max=nIters) sum_sigma2 = 0 cnt_sigma2 = 1 for i, (input, target, meta) in enumerate(loader): output = (model(torch.autograd.Variable(input)).data).cpu().numpy() G = output.shape[0] / nViews output = output.reshape(G, nViews, ref.J, 3) if AVG: for g in range(G): id = int(meta[g * nViews, 1]) for j in range(nViews): RR, tt = horn87(output[g, j].transpose(), output[g, 0].transpose()) MM = (np.dot(RR, output[g, j].transpose())).transpose().copy() M[id] += MM.copy() / nViews else: for g in range(G): #assert meta[g * nViews, 0] > 1 + ref.eps p = np.zeros(nViews) sigma2 = 0.1 for j in range(nViews): for kk in range(Y.shape[0] / S): k = kk * S d = Dis(Y[k], output[g, j]) sum_sigma2 += d cnt_sigma2 += 1 p[j] += np.exp(- d / 2 / sigma2) id = int(meta[g * nViews, 1]) M[id] = output[g, p.argmax()] if DEBUG and g == 0: print 'M[id]', id, M[id], p.argmax() debugger = Debugger() for j in range(nViews): RR, tt = horn87(output[g, j].transpose(), output[g, p.argmax()].transpose()) MM = (np.dot(RR, output[g, j].transpose())).transpose().copy() debugger.addPoint3D(MM, 'b') debugger.addImg(input[g * nViews + j].numpy().transpose(1, 2, 0), j) debugger.showAllImg() debugger.addPoint3D(M[id], 'r') debugger.show3D() Bar.suffix = 'Init : [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Dis: {dis:.6f}'.format(i, nIters, total=bar.elapsed_td, eta=bar.eta_td, dis = sum_sigma2 / cnt_sigma2) bar.next() bar.finish() #print 'mean sigma2', sum_sigma2 / cnt_sigma2 return M
def main(): opt = opts().parse() #if opt.loadModel != 'none': model = AlexNet(ref.nJoints).cuda() model.load_state_dict(torch.load("save.model")) for (i, filename) in enumerate(os.listdir("./testimages/")): img = cv2.imread("./testimages/" + filename) c = np.ones(2) * ref.h36mImgSize / 2 s = ref.h36mImgSize * 1.0 img2 = Crop(img, c, s, 0, ref.inputRes) / 256. input = torch.from_numpy(img2) input = input.contiguous().view(1, input.size(0), input.size(1), input.size(2)) print(input.size()) input_var = torch.autograd.Variable(input).float().cuda() output = model(input_var) print(output.size()) reg = (output.data).cpu().numpy() #.reshape(pred.shape[0], 1) four = lambda t: t * 4.57 fourfunc = np.vectorize(four) reg = fourfunc(reg) print(reg) debugger = Debugger() debugger.addImg( (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(reg, (255, 0, 0)) #debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis = 1)) debugger.saveImg(path="./result/" + filename) np.set_printoptions(threshold=np.inf, linewidth=np.inf) with open("./result/" + filename[:-4] + ".out", 'w') as f: f.write(np.array2string(reg, separator=', ')) """
def main(): opt = opts().parse() if opt.loadModel != 'none': model = torch.load(opt.loadModel).cuda() else: model = torch.load('hgreg-3d.pth').cuda() img = cv2.imread(opt.demo) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) debugger = Debugger() debugger.addImg((input[0].numpy().transpose(1, 2, 0)*256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis = 1)) debugger.showImg(pause = True) debugger.show3D()
def check_logic(): preproc = nn.ModuleList([model.conv1_, model.bn1, model.relu, model.r1, model.maxpool, model.r4, model.r5 ]) hg = model.hourglass[0] lower_hg = getEncoder(hg) data_loader = torch.utils.data.DataLoader( H36M(opts, 'val'), batch_size = 1, shuffle = False, num_workers = int(ref.nThreads) ) for k, (input, target) in enumerate(data_loader): if(k>nSamples): break input_var = torch.autograd.Variable(input).float().cuda() for mod in preproc: input_var = mod(input_var) for mod in lower_hg: input_var = mod(input_var) #decode ups = input_var upper_hg = nn.ModuleList(getDecoder(hg)) for mod in upper_hg: ups = mod(ups) Residual = model.Residual for j in range(nModules): ups = Residual[j](ups) lin_ = model.lin_ ups = lin_[0](ups) tmpOut = model.tmpOut ups = tmpOut[0](ups) pred = eval.getPreds(ups.data.cpu().numpy())*4 gt = eval.getPreds(target.cpu().numpy()) * 4 # init = getPreds(input.numpy()[:, 3:]) debugger = Debugger() img = (input[0].numpy()[:3].transpose(1, 2, 0)*256).astype(np.uint8).copy() print(img.shape) debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) # debugger.addPoint2D(init[0], (0, 255, 0)) debugger.showAllImg(pause = True)
def generate(imageName): process_image(imageName) model = torch.load('../model/Stage3/model_10.pth', map_location=lambda storage, loc: storage) img = cv2.imread(imageName) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() output = model(input_var) print(output[-2].data[0][-2].shape) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) print(pred, (reg + 1) / 2. * 256) debugger = Debugger() debugger.addImg( (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis=1)) debugger.showImg(pause=True) debugger.show3D() '''
def main(): pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") opt = opts().parse() if opt.loadModel != 'none': model = torch.load(opt.loadModel).cuda() else: model = torch.load('../../tr_models/hgreg-3d.pth').cuda() #opt.demo has the path to dir containing frames of demo video all_frames = os.listdir(opt.demo) n_frames = len(all_frames) #specifics dir_name = opt.demo.split('/')[-1] save_path = '../../output/demo/'+dir_name try: os.makedirs(save_path) except OSError: pass for idx, frame in enumerate(all_frames): print('processing frame {}'.format(idx)) img = cv2.imread(opt.demo+'/'+frame) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) debugger = Debugger() debugger.addImg((input[0].numpy().transpose(1, 2, 0)*256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis = 1)) # debugger.showImg(pause = True) debugger.saveImg(path=save_path+'/frame{}.jpg'.format(idx)) debugger.save3D(path=save_path+'/frame_p3d{}.jpg'.format(idx)) print('frame {} done'.format(idx))
class PoseExtractor: def __init__(self, flag_save_pose_image, flag_save_pose_file): rospy.loginfo("Initializing 3D Pose Extractor") self.frameInfo = FrameInfo() self.bridge = CvBridge() self.lock = Lock() self.image_shape = (256, 256, 3) self.debugger = Debugger() self.tracking_info_topic = rospy.get_param( '~tracking_info_topic', '/person_tracker/tracking_info') self.model_name = rospy.get_param('~pose_model', 'hgreg-3d.pth') self.model = {} self.save_pose_image = flag_save_pose_image self.save_pose_file = flag_save_pose_file self.publish_person = False self.initModel() self.frame_info_pub = rospy.Publisher('~frame_info', FrameInfo, queue_size=1) self.person_pub = rospy.Publisher('~person', Person, queue_size=1) self.tracking_info_sub = rospy.Subscriber(self.tracking_info_topic, FrameInfo, self.trackingInfoCallback, queue_size=1) def trackingInfoCallback(self, tracking_info_msg): begin = time.time() self.frameInfo.frame_id = tracking_info_msg.frame_id self.frameInfo.image_frame = tracking_info_msg.image_frame self.frameInfo.last_frame = tracking_info_msg.last_frame rospy.loginfo("Frame ID: {}".format(self.frameInfo.frame_id)) persons = tracking_info_msg.persons numPersons = len(persons) if numPersons != 0: for person in persons: rospy.loginfo("Person {} is detected".format(person.person_id)) try: # multi-threading for publishing single person #p = ThreadPool(numPersons) #p.map(self.poseEstimation, persons) #p.close() for person in persons: self.poseEstimation(person) self.frame_info_pub.publish(self.frameInfo) self.frameInfo = FrameInfo() if tracking_info_msg.last_frame: rospy.loginfo('Last frame in the video!') except BaseException as e: rospy.logerr(e) else: rospy.logwarn("No person is detected!") self.frame_info_pub.publish(self.frameInfo) if tracking_info_msg.last_frame: rospy.loginfo('Last frame in the video!') rospy.loginfo("FPS: {}".format(1 / (time.time() - begin))) def poseEstimation(self, tracked_person): person_id = tracked_person.person_id try: curImage = self.bridge.imgmsg_to_cv2(self.frameInfo.image_frame) person_image = curImage[ int(tracked_person.bbox.top):int(tracked_person.bbox.top + tracked_person.bbox.height), int(tracked_person.bbox.left):int(tracked_person.bbox.left + tracked_person.bbox.width)] except CvBridgeError as e: rospy.logerr(e) # Resize input image rospy.logdebug("person image shape: {}".format(person_image.shape)) if person_image.shape != self.image_shape: h, w = person_image.shape[0], person_image.shape[1] center = torch.FloatTensor((w / 2, h / 2)) scale = 1.0 * max(h, w) res = 256 input_image = Crop(person_image, center, scale, 0, res) else: input_image = person_image # Feed input image to model rospy.loginfo("feeding image to model") input = torch.from_numpy(input_image.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() # lock when using model to estimate pose self.lock.acquire() try: output = self.model(input_var) finally: self.lock.release() rospy.logdebug("got output from model") # Get 2D pose rospy.logdebug("Rendering 2D pose") pose2D = getPreds((output[-2].data).cpu().numpy())[0] * 4 # Get 3D pose rospy.logdebug("Rendering 3D pose") reg = (output[-1].data).cpu().numpy().reshape(pose2D.shape[0], 1) pose3D = np.concatenate([pose2D, (reg + 1) / 2. * 256], axis=1) rospy.logdebug("pose 3d shape: {}".format(pose3D.shape)) for pose in pose3D: joint = Point() joint.x = pose[0] joint.y = pose[1] joint.z = pose[2] tracked_person.person_pose.append(joint) # publish person if self.publish_person: self.person_pub.publish(tracked_person) self.lock.acquire() try: self.frameInfo.persons.append(tracked_person) finally: self.lock.release() rospy.logdebug("pose3D: \n {}".format(pose3D)) # Save pose image if self.save_pose_image: cv2.imwrite( pkg_path + '/scripts/debug/original/ogImg_' + str(self.frame_id) + '.png', self.cv_image) cv2.imwrite( pkg_path + '/scripts/debug/input/inputImg_' + str(self.frame_id) + '.png', input_image) self.debugger.addImg(input_image, imgId=self.frame_id) self.debugger.addPoint2D(pose2D, (255, 0, 0), imgId=self.frame_id) self.debugger.saveImg(pkg_path + '/scripts/debug/pose/poseImg_' + str(self.frame_id) + '.png', imgId=self.frame_id) if self.save_pose_file: file_name = pkg_path + '/pose_file/pose_{:04d}.txt'.format( self.frame_id) with file(file_name, 'w') as outfile: np.savetxt(outfile, pose3D, fmt='%-7.2f') rospy.loginfo("Person {} processing finished".format(person_id)) # Initialize model def initModel(self): rospy.loginfo("=====> Loading and Initializing Model") model_path = rospkg.RosPack().get_path( 'pose_3d_ros') + '/models/' + self.model_name self.model = torch.load(model_path).cuda() img = np.zeros((256, 256, 3)) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float().cuda() output = self.model(input_var) rospy.loginfo("Model Initialization Done")
img = (input[:3].transpose(1, 2, 0) * 256).astype(np.uint8).copy() star = (cv2.resize(hm[0, 0], (ref.inputRes, ref.inputRes)) * 255) star[star > 255] = 255 star[star < 0] = 0 star = star.astype(np.uint8) for k in range(len(ps[0])): x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(np.int32) dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(np.int32) color.append((1.0 * x / ref.outputRes, 1.0 * y / ref.outputRes, 1.0 * z / ref.outputRes)) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 6, (int(x * 4), int(y * 4), int(z * 4)), -1) debugger.addImg(img) debugger.addImg(star, 'star') debugger.addPoint3D(np.array((pred)) / 64. - 0.5, c=color, marker='x') rotated = Rotate(canonical, gt_view) rotated[:, 2], rotated[:, 1] = -rotated[:, 1].copy(), -rotated[:, 2].copy() debugger.addPoint3D(np.array(rotated) / 64. - 0.5, c=color, marker='^') debugger.showAllImg(pause=False) debugger.show3D() bar.finish() accAll10 = 0. accAll30 = 0. numAll = 0. mid = {}
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc = AverageMeter(), AverageMeter() nIters = len(dataLoader) # bar = Bar('==>', max=nIters) start_time = time.time() for i, (input, target) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target_var = torch.autograd.Variable( target.cuda(async=True)).float().cuda() if (model.hgType == 'vae'): output, latentspace = model(input_var) else: output = model(input_var) if opt.DEBUG >= 2: gt = getPreds(target.cpu().numpy()) * 4 pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4 # init = getPreds(input.numpy()[:, 3:]) debugger = Debugger() img = (input[0].numpy()[:3].transpose(1, 2, 0) * 256).astype( np.uint8).copy() debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) # debugger.addPoint2D(init[0], (0, 255, 0)) debugger.showAllImg(pause=True) #debugger.saveImg('debug/{}.png'.format(i)) loss = criterion(output[0], target_var) for k in range(1, opt.nStack): loss += criterion(output[k], target_var) if (model.hgType == 'vae'): for k in range(0, opt.nStack): loss += ref.vaeloss_wt * _compute_kl(latentspace[k]) Loss.update(loss.data[0], input.size(0)) Acc.update( Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target_var.data).cpu().numpy())) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() # Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split = split) # bar.next() curr_time = time.time() print( '{split} Epoch: [{0}][{1}/{2}]| Total: {total:f} | ETA: {eta:f} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})' .format(epoch, i, nIters, total=curr_time - start_time, eta=(curr_time - start_time) * (nIters - i + 1) / (i + 1), loss=Loss, Acc=Acc, split=split)) # bar.finish() return Loss.avg, Acc.avg
scale = 1.0 * h / mheight new_im = image.resize((int(w / scale), int(h / scale)), Image.ANTIALIAS) new_im.save(filename) new_im.close() #opt = opts().parse() imageName = './images/test3.jpg' #process_image(imageName) model = torch.load('../model/Stage3/model_10.pth', map_location=lambda storage, loc: storage) img = cv2.imread(imageName) print(type(np.array(img))) input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256. input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() output = model(input_var) pred = getPreds((output[-2].data).cpu().numpy())[0] * 4 reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1) print(pred, (reg + 1) / 2. * 256) debugger = Debugger() debugger.addImg((input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred, (255, 0, 0)) debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis=1)) debugger.showImg(pause=True) debugger.show3D()
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc = AverageMeter(), AverageMeter() preds = [] nIters = len(dataLoader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, targets, action, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda(opt.GPU) target_var = [] for t in range(len(targets)): target_var.append( torch.autograd.Variable(targets[t]).float().cuda(opt.GPU)) z = [] for k in range(opt.numNoise): noise = torch.autograd.Variable( torch.randn((input_var.shape[0], 1, 64, 64))).cuda(opt.GPU) z.append(noise) output, samples = model(input_var, z, action) pred_sample = maximumExpectedUtility(samples, criterion) target = maximumExpectedUtility(target_var, criterion) if opt.DEBUG >= 2: gt = getPreds(target.cpu().numpy()) * 4 pred = getPreds((pred_sample.data).cpu().numpy()) * 4 debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype( np.uint8).copy() debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showAllImg(pause=True) loss = DiscoLoss(output, samples, target_var, criterion) Loss.update(loss.item(), input.size(0)) Acc.update( Accuracy((pred_sample.data).cpu().numpy(), (target.data).cpu().numpy())) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy() input_[0] = Flip(input_[0]).copy() inputFlip_var = torch.autograd.Variable( torch.from_numpy(input_).view(1, input_.shape[1], ref.inputRes, ref.inputRes)).float().cuda( opt.GPU) _, samplesFlip = model(inputFlip_var, z, action) pred_sample_flip = maximumExpectedUtility(samplesFlip, criterion) outputFlip = ShuffleLR( Flip((pred_sample_flip.data).cpu().numpy()[0])).reshape( 1, ref.nJoints, ref.outputRes, ref.outputRes) output_ = old_div(((pred_sample.data).cpu().numpy() + outputFlip), 2) preds.append( finalPreds(output_, meta['center'], meta['scale'], meta['rotate'])[0]) Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split) bar.next() bar.finish() return {'Loss': Loss.avg, 'Acc': Acc.avg}, preds
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc, Mpjpe, Loss3D = AverageMeter(), AverageMeter(), AverageMeter( ), AverageMeter() nIters = len(dataLoader) bar = Bar('==>', max=nIters) for i, (input, target2D, target3D, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target2D_var = torch.autograd.Variable(target2D).float().cuda() target3D_var = torch.autograd.Variable(target3D).float().cuda() output = model(input_var) reg = output[opt.nStack] if opt.DEBUG >= 2: gt = getPreds(target2D.cpu().numpy()) * 4 pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4 debugger = Debugger() debugger.addImg( (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8)) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showImg() debugger.saveImg('debug/{}.png'.format(i)) # FusioCriterion is an Autograd funciton which can be called only once in the forward pass. So it is defined again in every iteration. # Don't ask why. loss = FusionCriterion(opt.regWeight, opt.varWeight)(reg, target3D_var) Loss3D.update(loss.data[0], input.size(0)) for k in range(opt.nStack): loss += criterion(output[k], target2D_var) Loss.update(loss.data[0], input.size(0)) Acc.update( Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target2D_var.data).cpu().numpy())) mpjpe, num3D = MPJPE((output[opt.nStack - 1].data).cpu().numpy(), (reg.data).cpu().numpy(), meta) if num3D > 0: Mpjpe.update(mpjpe, num3D) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Loss3D {loss3d.avg:.6f} | Acc {Acc.avg:.6f} | Mpjpe {Mpjpe.avg:.6f} ({Mpjpe.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, Mpjpe=Mpjpe, loss3d=Loss3D) bar.next() bar.finish() return Loss.avg, Acc.avg, Mpjpe.avg, Loss3D.avg
def main(): opt = opts().parse() model = torch.load(opt.loadModel) img = cv2.imread(opt.demo) s = max(img.shape[0], img.shape[1]) * 1.0 c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) img = Crop(img, c, s, 0, ref.inputRes) / 256. input = torch.from_numpy(img.copy()).float() input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() if opt.GPU > -1: model = model.cuda(opt.GPU) input_var = input_var.cuda(opt.GPU) output = model(input_var) hm = output[-1].data.cpu().numpy() debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8).copy() inp = img.copy() star = (cv2.resize(hm[0, 0], (ref.inputRes, ref.inputRes)) * 255) star[star > 255] = 255 star[star < 0] = 0 star = np.tile(star, (3, 1, 1)).transpose(1, 2, 0) trans = 0.8 star = (trans * star + (1. - trans) * img).astype(np.uint8) ps = parseHeatmap(hm[0], thresh=0.1) canonical, pred, color, score = [], [], [], [] for k in range(len(ps[0])): x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(np.int32) dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype( np.int32) canonical.append([x, y, z]) pred.append([ps[1][k], ref.outputRes - dep, ref.outputRes - ps[0][k]]) score.append(hm[0, 0, ps[0][k], ps[1][k]]) color.append((1.0 * x / ref.outputRes, 1.0 * y / ref.outputRes, 1.0 * z / ref.outputRes)) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 4, (255, 255, 255), -1) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 2, (int(z * 4), int(y * 4), int(x * 4)), -1) pred = np.array(pred).astype(np.float32) canonical = np.array(canonical).astype(np.float32) pointS = canonical * 1.0 / ref.outputRes pointT = pred * 1.0 / ref.outputRes R, t, s = horn87(pointS.transpose(), pointT.transpose(), score) rotated_pred = s * np.dot( R, canonical.transpose()).transpose() + t * ref.outputRes debugger.addImg(inp, 'inp') debugger.addImg(star, 'star') debugger.addImg(img, 'nms') debugger.addPoint3D(canonical / ref.outputRes - 0.5, c=color, marker='^') debugger.addPoint3D(pred / ref.outputRes - 0.5, c=color, marker='x') debugger.addPoint3D(rotated_pred / ref.outputRes - 0.5, c=color, marker='*') debugger.showAllImg(pause=True) debugger.show3D()
bestR = angle2dcm(pred) R_gt = angle2dcm(gt_view) err_ = ((logm(np.dot(np.transpose(bestR), R_gt))** 2).sum())**0.5 / (2.**0.5) * 180 / PI num[class_name] += 1 acc[class_name] += 1 if err_ <= 30. else 0 err[class_name].append(err_) if DEBUG: input, target, mask, view = dataset[index] debugger = Debugger() img = (input[:3].transpose(1, 2, 0) * 256).astype(np.uint8).copy() debugger.addImg(img) debugger.showAllImg(pause=False) accAll = 0. numAll = 0. mid = {} err_all = [] for k, v in ref.pascalClassName.items(): accAll += acc[v] numAll += num[v] acc[v] = 1.0 * acc[v] / num[v] mid[v] = np.sort(np.array(err[v]))[len(err[v]) // 2] err_all = err_all + err[v] print('Acc', acc) print('num', num) print('mid', mid)
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() Loss, Acc = AverageMeter(), AverageMeter() preds = [] nIters = len(dataLoader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, target, target2, meta) in enumerate(dataLoader): input_var = torch.autograd.Variable(input).float().cuda() target_var = torch.autograd.Variable(target).float().cuda() target_var2 = torch.autograd.Variable(target2).float().cuda() #print( input_var) output = model(input_var) #print(output[-1].size()) if opt.DEBUG >= 2: gt = getPreds(target.cpu().numpy()) * 4 pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4 debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype( np.uint8).copy() debugger.addImg(img) debugger.addPoint2D(pred[0], (255, 0, 0)) debugger.addPoint2D(gt[0], (0, 0, 255)) debugger.showAllImg(pause=True) loss = criterion(output[0], target_var) for k in range(1, opt.nStack): loss += criterion(output[k], target_var) Loss.update(loss.data[0], input.size(0)) Acc.update( Accuracy((output[opt.nStack - 1].data).cpu().numpy(), (target_var.data).cpu().numpy())) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy() input_[0] = Flip(input_[0]).copy() inputFlip_var = torch.autograd.Variable( torch.from_numpy(input_).view(1, input_.shape[1], ref.inputRes, ref.inputRes)).float().cuda( opt.GPU) outputFlip = model(inputFlip_var) outputFlip = ShuffleLR( Flip((outputFlip[opt.nStack - 1].data).cpu().numpy()[0])).reshape( 1, ref.nJoints, 64, 64) output_ = ( (output[opt.nStack - 1].data).cpu().numpy() + outputFlip) / 2 preds.append( finalPreds(output_, meta['center'], meta['scale'], meta['rotate'])[0]) Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split) bar.next() bar.finish() return {'Loss': Loss.avg, 'Acc': Acc.avg}, preds
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() preds = [] Loss, LossStar = AverageMeter(), AverageMeter() nIters = len(dataLoader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, target, mask) in enumerate(dataLoader): if mask.size(1) > 1: mask[:, 1:, :, :] *= ref.outputRes * (opt.regWeight**0.5) if opt.GPU > -1: input_var = torch.autograd.Variable(input.cuda( opt.GPU, async=True)).float().cuda(opt.GPU) target_var = torch.autograd.Variable( target.cuda(opt.GPU, async=True)).float().cuda(opt.GPU) mask_var = torch.autograd.Variable(mask.cuda( opt.GPU, async=True)).float().cuda(opt.GPU) else: input_var = torch.autograd.Variable(input).float() target_var = torch.autograd.Variable(target).float() mask_var = torch.autograd.Variable(mask).float() output = model(input_var) output_pred = output[opt.nStack - 1].data.cpu().numpy().copy() for k in range(opt.nStack): output[k] = mask_var * output[k] target_var = mask_var * target_var loss = 0 for k in range(opt.nStack): loss += criterion(output[k], target_var) LossStar.update(( (target.float()[:, 0, :, :] - output[opt.nStack - 1].cpu().data.float()[:, 0, :, :])**2).mean()) Loss.update(loss.data[0], input.size(0)) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: if opt.test: out = {} input_ = input.cpu().numpy() input_[0] = Flip(input_[0]).copy() inputFlip_var = torch.autograd.Variable( torch.from_numpy(input_).view( 1, input_.shape[1], ref.inputRes, ref.inputRes)).float().cuda(opt.GPU) outputFlip = model(inputFlip_var) output_flip = outputFlip[opt.nStack - 1].data.cpu().numpy() output_flip[0] = Flip(output_flip[0]) if not (opt.task == 'star'): output_flip[0, 1, :, :] = -output_flip[0, 1, :, :] output_pred = (output_pred + output_flip) / 2.0 out['map'] = output_pred preds.append(out) Bar.suffix = '{split:5} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | LossStar {lossStar.avg:.6f}'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, lossStar=LossStar, split=split) bar.next() if opt.DEBUG > 1 or (opt.DEBUG == 1 and i % (nIters / 200) == 0): for j in range(input.size(0)): debugger = Debugger() img = (input[j].numpy()[:3].transpose(1, 2, 0) * 256).astype( np.uint8).copy() img2 = img.copy().astype(np.float32) img3 = img.copy().astype(np.float32) imgMNS = img.copy() out = (cv2.resize( ((output[opt.nStack - 1][j, 0].data).cpu().numpy()).copy(), (ref.inputRes, ref.inputRes)) * 256) gtmap = (cv2.resize((target[j, 0].cpu().numpy()).copy(), (ref.inputRes, ref.inputRes)) * 256) out[out < 0] = 0 out[out > 255] = 255 img2[:, :, 0] = (img2[:, :, 0] + out) img2[img2 > 255] = 255 img3[:, :, 2] = (img3[:, :, 2] + gtmap) img3[img3 > 255] = 255 gtmap[gtmap > 255] = 255 idx = i * input.size(0) + j if opt.DEBUG == 1 else 0 img2, out, gtmap, img3 = img2.astype(np.uint8), out.astype( np.uint8), gtmap.astype(np.uint8), img3.astype(np.uint8) if 'emb' in opt.task: gt, pred = [], [] ps = parseHeatmap(target[j].numpy()) print('ps', ps) for k in range(len(ps[0])): print('target', k, target[j, 1:4, ps[0][k], ps[1][k]].numpy()) x, y, z = ( (target[j, 1:4, ps[0][k], ps[1][k]].numpy() + 0.5) * 255).astype(np.int32) gt.append(target[j, 1:4, ps[0][k], ps[1][k]].numpy()) cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 6, (int(x), int(y), int(z)), -1) ps = parseHeatmap(output_pred[j]) for k in range(len(ps[0])): print('pred', k, output_pred[j, 1:4, ps[0][k], ps[1][k]]) x, y, z = ( (output_pred[j, 1:4, ps[0][k], ps[1][k]] + 0.5) * 255).astype(np.int32) pred.append(output_pred[j, 1:4, ps[0][k], ps[1][k]]) cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 4, (255, 255, 255), -1) cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 2, (int(x), int(y), int(z)), -1) debugger.addPoint3D(np.array(gt), c='auto', marker='o') #debugger.addPoint3D(np.array(pred), c = 'auto', marker = 'x') debugger.addImg(imgMNS, '{}_mns'.format(idx)) debugger.addImg(out, '{}_out'.format(idx)) debugger.addImg(gtmap, '{}_gt'.format(idx)) debugger.addImg(img, '{}_img'.format(idx)) debugger.addImg(img2, '{}_img2'.format(idx)) debugger.addImg(img3, '{}_img3'.format(idx)) if opt.DEBUG == 1: debugger.saveAllImg(path=opt.debugPath) else: debugger.showAllImg(pause=not ('emb' in opt.task)) if 'emb' in opt.task: debugger.show3D() bar.finish() return {'Loss': Loss.avg, 'LossStar': LossStar.avg}, preds