def uncertainty_test(model, input_var, heat_thresh, ax): model.train() model.apply(set_dropout_to_train) T = 100 all_kps = None gmm_component_num = 0 # do sampling for i in range(T): output = model(input_var) hm = output[-1].data.cpu().numpy() ps = parseHeatmap(hm[0], heat_thresh) kp_num = len(ps[0]) if kp_num > gmm_component_num: gmm_component_num = kp_num for k in range(kp_num): kp = [ps[1][k] * 4, ps[0][k] * 4] if all_kps is None: all_kps = kp else: all_kps = np.vstack((all_kps, kp)) #print("debug: gmm_component_num {}".format(gmm_component_num)) #print("debug: all kp {}".format(all_kps[:, 0])) #exit() #gmm = GaussianMixture(n_components=gmm_component_num, covariance_type='full', random_state=42).fit(all_kps) # Fit a Dirichlet process Gaussian mixture using five components dpgmm = mixture.BayesianGaussianMixture( n_components=gmm_component_num, covariance_type='full', weight_concentration_prior_type="dirichlet_process", init_params='kmeans', mean_precision_prior=1, weight_concentration_prior=None).fit(all_kps) plot_gmm(dpgmm, all_kps, ax)
def main(): opt = opts().parse() model = torch.load(opt.loadModel) img = cv2.imread(opt.demo) s = max(img.shape[0], img.shape[1]) * 1.0 c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) img = Crop(img, c, s, 0, ref.inputRes) / 256. input = torch.from_numpy(img.copy()).float() input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() if opt.GPU > -1: model = model.cuda(opt.GPU) input_var = input_var.cuda(opt.GPU) output = model(input_var) hm = output[-1].data.cpu().numpy() debugger = Debugger() img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8).copy() inp = img.copy() star = (cv2.resize(hm[0, 0], (ref.inputRes, ref.inputRes)) * 255) star[star > 255] = 255 star[star < 0] = 0 star = np.tile(star, (3, 1, 1)).transpose(1, 2, 0) trans = 0.8 star = (trans * star + (1. - trans) * img).astype(np.uint8) ps = parseHeatmap(hm[0], thresh=0.1) canonical, pred, color, score = [], [], [], [] for k in range(len(ps[0])): x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(np.int32) dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype( np.int32) canonical.append([x, y, z]) pred.append([ps[1][k], ref.outputRes - dep, ref.outputRes - ps[0][k]]) score.append(hm[0, 0, ps[0][k], ps[1][k]]) color.append((1.0 * x / ref.outputRes, 1.0 * y / ref.outputRes, 1.0 * z / ref.outputRes)) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 4, (255, 255, 255), -1) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 2, (int(z * 4), int(y * 4), int(x * 4)), -1) pred = np.array(pred).astype(np.float32) canonical = np.array(canonical).astype(np.float32) pointS = canonical * 1.0 / ref.outputRes pointT = pred * 1.0 / ref.outputRes R, t, s = horn87(pointS.transpose(), pointT.transpose(), score) rotated_pred = s * np.dot( R, canonical.transpose()).transpose() + t * ref.outputRes debugger.addImg(inp, 'inp') debugger.addImg(star, 'star') debugger.addImg(img, 'nms') debugger.addPoint3D(canonical / ref.outputRes - 0.5, c=color, marker='^') debugger.addPoint3D(pred / ref.outputRes - 0.5, c=color, marker='x') debugger.addPoint3D(rotated_pred / ref.outputRes - 0.5, c=color, marker='*') debugger.showAllImg(pause=True) debugger.show3D()
class_name = ref.ObjectNet3DClassName[class_id] v = np.array([ dataset.annot['viewpoint_azimuth'][index], dataset.annot['viewpoint_elevation'][index], dataset.annot['viewpoint_theta'][index] ]) / 180. valid = dataset.annot['valid'][index] gt_model = np.array(dataset.annot['space_embedding'][index])[valid > 0] gt_view = v * PI anchors = np.array(dataset.annot['anchors_3d'][index][valid > 0]) gt_point = Rotate(gt_model, gt_view) hm = preds[index]['map'] ps = parseHeatmap(hm[0], thresh=0.05) if len(ps[0]) == 0: num[class_name] += 1 continue canonical = [] pred = [] color = [] score = [] for k in range(len(ps[0])): x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(np.int32) dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype( np.int32) score.append(hm[0, 0, ps[0][k], ps[1][k]]) canonical.append([x, y, z])
def main(): # use the model trained with dropout enabled model_path = '/home/erl/moshan/orcvio_gamma/orcvio_gamma/pytorch_models/starmap/trained_models/with_dropout/model_cpu.pth' img_path = './images/car.png' det_name = './det/car.png' # by default img size is 256 inputRes = 256 outputRes = 64 CUDA = torch.cuda.is_available() model = torch.load(model_path) img = cv2.imread(img_path) s = max(img.shape[0], img.shape[1]) * 1.0 c = np.array([img.shape[1] / 2., img.shape[0] / 2.]) # img = cv2.resize(img, (320, 240)) # print(img.shape) # crop only change h, w, c to c, h, w for images with size 256 x 256 img = Crop(img, c, s, 0, inputRes).astype(np.float32).transpose(2, 0, 1) / 256. input = torch.from_numpy(img.copy()).float() # change to b, c, h, w input = input.view(1, input.size(0), input.size(1), input.size(2)) input_var = torch.autograd.Variable(input).float() if CUDA: model.cuda() input_var = input_var.cuda() output = model(input_var) hm = output[-1].data.cpu().numpy() # convert to bgr, uint8 for display img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8).copy() inp = img.copy() # hm[0, 0] is an image, since 1st dim is batch star = (cv2.resize(hm[0, 0], (inputRes, inputRes)) * 255) # clip the values to 0-255 star[star > 255] = 255 star[star < 0] = 0 # tile Construct an array by repeating A the number of times given by reps. # convert to 3 channels, for bgr star = np.tile(star, (3, 1, 1)).transpose(1, 2, 0) trans = 0.8 star = (trans * star + (1. - trans) * img).astype(np.uint8) # select peaks and perform nms # set nms threshold heat_thresh = 0.25 ps = parseHeatmap(hm[0], heat_thresh) canonical, pred, color, score = [], [], [], [] # mc dropout f1 = plt.figure() ax1 = f1.add_subplot(111) ax1.imshow(img) uncertainty_test(model, input_var, heat_thresh, ax1) for k in range(len(ps[0])): # camviewfeature x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) * outputRes).astype( np.int32) dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * outputRes).astype( np.int32) canonical.append([x, y, z]) pred.append([ps[1][k], outputRes - dep, outputRes - ps[0][k]]) # kp confidence score score.append(hm[0, 0, ps[0][k], ps[1][k]]) color.append( (1.0 * x / outputRes, 1.0 * y / outputRes, 1.0 * z / outputRes)) # cv2.circle(img, center, radius, color[, thickness[, lineType[, shift]]]) → img # -1 means that a filled circle is to be drawn cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 6, (0, 0, 255), -1) cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 2, (int(z * 4), int(y * 4), int(x * 4)), -1) # plot cov # pos = kps_mean[k] # covar = kps_cov[k] # draw_ellipse(pos, covar, ax1) plt.axis('off') ax1.get_xaxis().set_visible(False) ax1.get_yaxis().set_visible(False) plt.show() f1.savefig('kp_cov.png', bbox_inches='tight', pad_inches=0) # plt.pause(5) pred = np.array(pred).astype(np.float32) canonical = np.array(canonical).astype(np.float32) pointS = canonical * 1.0 / outputRes pointT = pred * 1.0 / outputRes # calculate viewpoint R, t, s = horn87(pointS.transpose(), pointT.transpose(), score) rotated_pred = s * np.dot( R, canonical.transpose()).transpose() + t * outputRes
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None): if split == 'train': model.train() else: model.eval() preds = [] Loss, LossStar = AverageMeter(), AverageMeter() nIters = len(dataLoader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, target, mask) in enumerate(dataLoader): if mask.size(1) > 1: mask[:, 1:, :, :] *= ref.outputRes * (opt.regWeight**0.5) if opt.GPU > -1: input_var = torch.autograd.Variable(input.cuda( opt.GPU, async=True)).float().cuda(opt.GPU) target_var = torch.autograd.Variable( target.cuda(opt.GPU, async=True)).float().cuda(opt.GPU) mask_var = torch.autograd.Variable(mask.cuda( opt.GPU, async=True)).float().cuda(opt.GPU) else: input_var = torch.autograd.Variable(input).float() target_var = torch.autograd.Variable(target).float() mask_var = torch.autograd.Variable(mask).float() output = model(input_var) output_pred = output[opt.nStack - 1].data.cpu().numpy().copy() for k in range(opt.nStack): output[k] = mask_var * output[k] target_var = mask_var * target_var loss = 0 for k in range(opt.nStack): loss += criterion(output[k], target_var) LossStar.update(( (target.float()[:, 0, :, :] - output[opt.nStack - 1].cpu().data.float()[:, 0, :, :])**2).mean()) Loss.update(loss.data[0], input.size(0)) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: if opt.test: out = {} input_ = input.cpu().numpy() input_[0] = Flip(input_[0]).copy() inputFlip_var = torch.autograd.Variable( torch.from_numpy(input_).view( 1, input_.shape[1], ref.inputRes, ref.inputRes)).float().cuda(opt.GPU) outputFlip = model(inputFlip_var) output_flip = outputFlip[opt.nStack - 1].data.cpu().numpy() output_flip[0] = Flip(output_flip[0]) if not (opt.task == 'star'): output_flip[0, 1, :, :] = -output_flip[0, 1, :, :] output_pred = (output_pred + output_flip) / 2.0 out['map'] = output_pred preds.append(out) Bar.suffix = '{split:5} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | LossStar {lossStar.avg:.6f}'.format( epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, lossStar=LossStar, split=split) bar.next() if opt.DEBUG > 1 or (opt.DEBUG == 1 and i % (nIters / 200) == 0): for j in range(input.size(0)): debugger = Debugger() img = (input[j].numpy()[:3].transpose(1, 2, 0) * 256).astype( np.uint8).copy() img2 = img.copy().astype(np.float32) img3 = img.copy().astype(np.float32) imgMNS = img.copy() out = (cv2.resize( ((output[opt.nStack - 1][j, 0].data).cpu().numpy()).copy(), (ref.inputRes, ref.inputRes)) * 256) gtmap = (cv2.resize((target[j, 0].cpu().numpy()).copy(), (ref.inputRes, ref.inputRes)) * 256) out[out < 0] = 0 out[out > 255] = 255 img2[:, :, 0] = (img2[:, :, 0] + out) img2[img2 > 255] = 255 img3[:, :, 2] = (img3[:, :, 2] + gtmap) img3[img3 > 255] = 255 gtmap[gtmap > 255] = 255 idx = i * input.size(0) + j if opt.DEBUG == 1 else 0 img2, out, gtmap, img3 = img2.astype(np.uint8), out.astype( np.uint8), gtmap.astype(np.uint8), img3.astype(np.uint8) if 'emb' in opt.task: gt, pred = [], [] ps = parseHeatmap(target[j].numpy()) print('ps', ps) for k in range(len(ps[0])): print('target', k, target[j, 1:4, ps[0][k], ps[1][k]].numpy()) x, y, z = ( (target[j, 1:4, ps[0][k], ps[1][k]].numpy() + 0.5) * 255).astype(np.int32) gt.append(target[j, 1:4, ps[0][k], ps[1][k]].numpy()) cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 6, (int(x), int(y), int(z)), -1) ps = parseHeatmap(output_pred[j]) for k in range(len(ps[0])): print('pred', k, output_pred[j, 1:4, ps[0][k], ps[1][k]]) x, y, z = ( (output_pred[j, 1:4, ps[0][k], ps[1][k]] + 0.5) * 255).astype(np.int32) pred.append(output_pred[j, 1:4, ps[0][k], ps[1][k]]) cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 4, (255, 255, 255), -1) cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 2, (int(x), int(y), int(z)), -1) debugger.addPoint3D(np.array(gt), c='auto', marker='o') #debugger.addPoint3D(np.array(pred), c = 'auto', marker = 'x') debugger.addImg(imgMNS, '{}_mns'.format(idx)) debugger.addImg(out, '{}_out'.format(idx)) debugger.addImg(gtmap, '{}_gt'.format(idx)) debugger.addImg(img, '{}_img'.format(idx)) debugger.addImg(img2, '{}_img2'.format(idx)) debugger.addImg(img3, '{}_img3'.format(idx)) if opt.DEBUG == 1: debugger.saveAllImg(path=opt.debugPath) else: debugger.showAllImg(pause=not ('emb' in opt.task)) if 'emb' in opt.task: debugger.show3D() bar.finish() return {'Loss': Loss.avg, 'LossStar': LossStar.avg}, preds