def objective(para): loss = 0 ad = 0 count = 0 for i in range(len(primitives)): data_s = { 'pc': primitives[i]['pc_src'], 'normal': primitives[i]['normal_src'], 'feat': primitives[i]['feat_src'], 'weight': primitives[i]['weight_src'] } data_t = { 'pc': primitives[i]['pc_tgt'], 'normal': primitives[i]['normal_tgt'], 'feat': primitives[i]['feat_tgt'], 'weight': primitives[i]['weight_tgt'] } R_gt = primitives[i]['R_gt'] R_hat = RelativePoseEstimation_helper(data_s, data_t, para) loss += np.power(R_hat[:3, :3] - R_gt[:3, :3], 2).sum() ad += angular_distance_np(R_hat[:3, :3].reshape(1, 3, 3), R_gt[:3, :3].reshape(1, 3, 3))[0] count += 1 print(i) print(ad / count) loss /= count ad /= count print(f"{loss} {ad}") return loss, ad
def edge_quality(n, edges, Tstar, R): good_edges = 0.0 bad_edges = 0.0 err_bad = 0.0 err_good = 0.0 for edge in edges: if edge['weight'] < 0.01: continue sid = edge['src'] tid = edge['tgt'] Ti = Tstar[sid] Tj = Tstar[tid] Tij_gt = Tj.dot(inverse(Ti)) Rij_in = edge['R'] tij_in = edge['t'] Tij_in = pack(Rij_in, tij_in) aerr_gt = angular_distance_np(Tij_in[np.newaxis, :3, :3], Tij_gt[np.newaxis, :3, :3]).sum() Rij = R[tid].dot(R[sid].T) aerr = np.linalg.norm(Rij - Rij_in, 'fro')**2 if aerr_gt > 30.0: bad_edges += edge['weight'] err_bad += aerr * edge['weight'] else: good_edges += edge['weight'] err_good += aerr * edge['weight'] print('Edge Quality: #good=%f, #bad=%f, mean aerr=(%f, %f)' % (good_edges, bad_edges, err_good / good_edges, err_bad / bad_edges))
def eval_Spectral(): para = opts() dataS = sio.loadmat('./source.mat') dataT = sio.loadmat('./target.mat') num_data = dataS['R'].shape[0] ads = 0 count = 0 for j in range(num_data): R_gt = np.matmul(dataT['R'][j], np.linalg.inv(dataS['R'][j])) dataS_tmp = {} dataS_tmp['pc'] = dataS['pc'][0][j] dataS_tmp['normal'] = dataS['normal'][0][j] dataS_tmp['feat'] = dataS['feat'][0][j] dataT_tmp = {} dataT_tmp['pc'] = dataT['pc'][0][j] dataT_tmp['normal'] = dataT['normal'][0][j] dataT_tmp['feat'] = dataT['feat'][0][j] overlap_val, cam_dist_this, pc_dist_this, pc_nn = util.point_cloud_overlap( pc_src, pc_tgt, R_gt_44) overlap = '0-0.1' if overlap_val <= 0.1 else '0.1-0.5' if overlap_val <= 0.5 else '0.5-1.0' #import pdb;pdb.set_trace() R = Spectral_Matching(dataS_tmp, dataT_tmp, 'k_means', para) count += 1 if isinstance(R, list): ad_min = 360 for i in range(len(R)): ad_tmp = angular_distance_np(R[i][:3, :3].reshape(1, 3, 3), R_gt[:3, :3].reshape(1, 3, 3))[0] if ad_tmp < ad_min: print("min rp module:", i, ad_tmp) ad_min = ad_tmp ads += ad_min print(j, ads / count) else: #import pdb;pdb.set_trace() ad_tmp = angular_distance_np(R[:3, :3].reshape(1, 3, 3), R_gt[:3, :3].reshape(1, 3, 3))[0] print(ad_tmp) ads += ad_tmp print(j, ads / count) print("average angular distance:", ads / count)
def __get_label__(Rij, tij, Ti, Tj): """ Measure Quality of Edge """ Ristar, tistar = decompose(Ti) Rjstar, tjstar = decompose(Tj) label = 0.0 err_R = angular_distance_np(Rij[np.newaxis, :, :], Rjstar.dot(Ristar.T)[np.newaxis, :, :]).sum() err_T = np.linalg.norm(Rij.dot(tistar) + tij - tjstar, 2) if err_R < 30.0 and err_T < 0.2: label = 1.0 else: label = 0.0 return label
def __get_label__(Rij, tij, Ti, Tj): """ Measure Quality of Edge """ Ristar, tistar = decompose(Ti) Rjstar, tjstar = decompose(Tj) Tij_gt = Tj.dot(inverse(Ti)) Tij_in = pack(Rij, tij) label = 0.0 err_R = angular_distance_np(Rij[np.newaxis, :, :], Rjstar.dot(Ristar.T)[np.newaxis, :, :]).sum() err_T = np.linalg.norm(Tij_gt[:3, 3] - Tij_in[:3, 3], 2) if (err_R < 30.0) and (err_T < 0.2): label = 1.0 else: label = 0.0 return label
def error(T, G): aerrs = [] n = T.shape[0] terrs = [] for i in range(n): for j in range(i + 1, n): Ti = T[i, :, :] Tj = T[j, :, :] Gi = G[i, :, :] Gj = G[j, :, :] Tij = Tj.dot(inverse(Ti)) Gij = Gj.dot(inverse(Gi)) Rij = Tij[:3, :3] Rij_gt = Gij[:3, :3] fro = np.linalg.norm(Rij - Rij_gt, 'fro') aerr = angular_distance_np(Rij[np.newaxis, :, :], Rij_gt[np.newaxis, :, :]).sum() terr = np.linalg.norm(Tij[:3, 3] - Gij[:3, 3], 2) aerrs.append(aerr) terrs.append(terr) return np.mean(aerrs), np.mean(terrs)
def compute_sigma(mat_file1, mat_file2, txt, output_mat): mat1 = sio.loadmat(mat_file1) mat2 = sio.loadmat(mat_file2) v1 = mat1['vertex'] # [3, n] v2 = mat2['vertex'] # [3, n] Tij = read_super4pcs(txt) Tij_gt = mat2['pose'].dot(inverse(mat1['pose'])) Rij = Tij[:3, :3] tij = Tij[:3, 3] v1 = Rij.dot(v1) + tij[:, np.newaxis] tree = NN(n_neighbors=1, algorithm='kd_tree').fit(v1.T) distances, _ = tree.kneighbors(v2.T) distances = distances[distances < 0.2] d = {} d['sigma'] = np.median(distances) d['Tij'] = Tij d['aerr'] = angular_distance_np(Tij[np.newaxis, :3, :3], Tij_gt[np.newaxis, :3, :3]).sum() d['terr'] = np.linalg.norm(Tij[:3, 3]- Tij_gt[:3, 3], 2) d['src'] = mat_file1 d['tgt'] = mat_file2 sio.savemat(output_mat, mdict=d, do_compression=True)
def IterativeTransfSync(n, edges, eps0=-1, decay=0.8, Tstar=None, max_iter=10000, cheat=False, scheme='reweight'): if scheme == 'reweight': reweight = True else: reweight = False if cheat: for edge in edges: #if edge['weight'] < 0.5: # continue sid = edge['src'] tid = edge['tgt'] Ti = Tstar[sid] Tj = Tstar[tid] Tij_gt = Tj.dot(inverse(Ti)) Rij = edge['R'] tij = edge['t'] Tij = pack(Rij, tij) aerr = angular_distance_np(Tij[np.newaxis, :3, :3], Tij_gt[np.newaxis, :3, :3]).sum() terr = np.linalg.norm(Tij[:3, 3] - Tij_gt[:3, 3], 2) #p = 0.9 if aerr > 30.0 or terr > 0.2: weight = 0.0 #coin(0.02) else: weight = 1.0 #coin(0.9) edge['predicted_weight'] = weight edge['translation_weight'] = weight edge['rotation_weight'] = weight """ Edge Quality """ good_edges = 0.0 bad_edges = 0.0 err_bad = 0.0 err_good = 0.0 for edge in edges: if edge['predicted_weight'] < 0.5: continue sid = edge['src'] tid = edge['tgt'] Ti = Tstar[sid] Tj = Tstar[tid] Tij_gt = Tj.dot(inverse(Ti)) Rij = edge['R'] tij = edge['t'] Tij = pack(Rij, tij) aerr = angular_distance_np(Tij[np.newaxis, :3, :3], Tij_gt[np.newaxis, :3, :3]).sum() terr = np.linalg.norm(Tij[:3, 3] - Tij_gt[:3, 3], 2) if aerr > 30.0 or terr > 0.2: #print(sid, tid, edge['predicted_weight']) bad_edges += 1 err_bad += aerr * edge['predicted_weight'] else: good_edges += 1 err_good += aerr * edge['predicted_weight'] print('Edge Quality: #good=%f, #bad=%f' % (good_edges, bad_edges)) itr = 0 while itr < max_iter: R, t, eigengap = TransfSync(n, edges) #edge_quality(n, edges, Tstar, R) T = np.array([pack(R[i], t[i]) for i in range(n)]) err_gt = -1.0 if Tstar is not None: aerr_gt, terr_gt = error(T, Tstar) if not reweight: if eps0 < -0.5: eps0 = max_existing_err(n, edges, R, t) if reweight: reweightEdges(n, edges, R, t, sigma_r=0.01, sigma_t=0.01) else: truncatedWeightPredict(n, edges, R, t, eps0) mindeg, numedges, err_sum = computeStats(n, edges, R, t) print( 'iter=%d, avg(err^2)=%f, eigengap=%f, #edges=%d, min_deg=%f, eps0=%f, aerr_gt=%f, terr_gt=%f' % (itr, err_sum / numedges, eigengap, numedges, mindeg, eps0, aerr_gt, terr_gt)) """ Skip idle iterations """ if reweight: itr += 1 else: max_err = max_existing_err(n, edges, R, t) while (itr < max_iter) and (eps0 > max_err): eps0 = eps0 * decay itr += 1 if mindeg == 0: break if err_sum <= 1e-2: break return T
pred_num = args.top if args.global_method == 'ours': overlap_val = dataS['overlap'][0][i] if overlap_val > 0.1: large_overlap += 1 min_ad = 1000 for j in range(pred_num): current_pose = pred_pose[j] icp_pose = general_icp(pc_s, pc_t, current_pose) ad_tmp = util.angular_distance_np( icp_pose[:3, :3].reshape(1, 3, 3), gt_pose[:3, :3].reshape(1, 3, 3))[0] if ad_tmp < min_ad: min_ad = ad_tmp if ad_tmp < args.tolerate_error: correct_num += 1 break trans_error = np.linalg.norm(icp_pose[:3, 3] - gt_pose[:3, 3]) average_trans += trans_error average_error += min_ad trans_list.append(trans_error)
def __getitem__helper(self, index): #import ipdb;ipdb.set_trace() rets = {} index = index % self.__len__() imgs_depth = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype = np.float32) imgs_s = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype = np.float32) imgs_rgb = np.zeros((self.nViews, self.Inputheight, self.Inputwidth,3), dtype = np.float32) imgs_normal = np.zeros((self.nViews, self.Inputheight, self.Inputwidth,3), dtype = np.float32) pointcloud = np.zeros((self.nViews, 3+3+3+1, self.num_points), dtype = np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((7)) assert(self.nViews == 2) imgsPath = [] ct0,ct1 = self.__getpair__(index) if 'scannet_test_scenes' not in self.list: rets['overlap'] = float(self.dataList[index]['overlap']) room_id = self.base_this.split('/')[-1] basePath = os.path.join(self.base, room_id) imageKey = '%s-%06d-rgb' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_rgb[0] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0 imageKey = '%s-%06d-rgb' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_rgb[1] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0 imageKey = '%s-%06d-depth' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_depth[0] = cv2.imdecode(imageBuf, 2).astype('float')/1000.0 imageKey = '%s-%06d-depth' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_depth[1] = cv2.imdecode(imageBuf, 2).astype('float')/1000.0 #cv2.imwrite('test.png',imgs_rgb[0]*255) imageKey = '%s-%06d-normal' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_normal[0] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0*2-1 imageKey = '%s-%06d-normal' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_normal[1] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0*2-1 imageKey = '%s-%06d-semantic' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_s[0] = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:,:,0] imageKey = '%s-%06d-semantic' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_s[1] = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:,:,0] PerspectiveValidMask = (imgs_depth!=0) rets['PerspectiveValidMask'] = PerspectiveValidMask[None,:,None,:,:] rets['dataMask'] = rets['PerspectiveValidMask'] RKey = '%s-%06d-R' % (room_id, ct0) R[0] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4,4) RKey = '%s-%06d-R' % (room_id, ct1) R[1] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4,4) # convert from 3rd view to 4th view #R[0] = np.matmul(np.linalg.inv(self.Rs[3]),R[0]) #R[1] = np.matmul(np.linalg.inv(self.Rs[3]),R[1]) R_inv = np.linalg.inv(R) img2ind = np.zeros([2, self.num_points, 3]) imgPCid = np.zeros([2, self.num_points, 2]) if self.fullsize_rgbdn: imgs_rgb_full = np.zeros((self.nViews, 480,640, 3), dtype = np.float32) imgs_norm_full = np.zeros((self.nViews, 480,640, 3), dtype = np.float32) imgs_full = np.zeros((self.nViews, 480,640), dtype = np.float32) imgs_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_depth','%06d.png'%(ct0))).copy() imgs_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_depth','%06d.png'%(ct1))).copy() imgs_rgb_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_rgb','%06d.png'%(ct0)),depth=False).copy()/255. imgs_rgb_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_rgb','%06d.png'%(ct1)),depth=False).copy()/255. imgs_norm_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_normal','%06d.png'%(ct0)),depth=False).copy()/255*2-1. imgs_norm_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_normal','%06d.png'%(ct1)),depth=False).copy()/255*2-1. rets['rgb_full'] = imgs_rgb_full[np.newaxis,:] rets['norm_full'] = imgs_norm_full[np.newaxis,:] rets['depth_full'] = imgs_full[np.newaxis,:] if self.denseCorres: # get 3d point cloud for each pano pcs,masks = self.depth2pc(imgs_depth[0],needmask=True) # be aware of the order of returned pc!!! pct,maskt = self.depth2pc(imgs_depth[1],needmask=True) pct = (np.matmul(R_inv[1][:3,:3], pct.T) + R_inv[1][:3,3:4]).T pcs = (np.matmul(R_inv[0][:3,:3], pcs.T) + R_inv[0][:3,3:4]).T inds = np.arange(imgs_depth[0].shape[0]*imgs_depth[0].shape[1])[masks] indt = np.arange(imgs_depth[0].shape[0]*imgs_depth[0].shape[1])[maskt] # find correspondence using kdtree tree = KDTree(pct) IdxQuery=np.random.choice(range(pcs.shape[0]),5000) # sample 5000 query points pcsQuery = pcs[IdxQuery,:] pcsQueryid = inds[IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery, k=1) hasCorres=(nearest_dist < 0.08) idxTgtNeg=[] idxSrc= np.stack((pcsQueryid[hasCorres[:,0]] % self.Inputwidth, pcsQueryid[hasCorres[:,0]]// self.Inputwidth),1) idxTgt= np.stack((indt[nearest_ind[hasCorres]] % self.Inputwidth, indt[nearest_ind[hasCorres]] // self.Inputwidth),1) if hasCorres.sum() < 200: rets['denseCorres']={'idxSrc':np.zeros([1,500,2]).astype('int'),'idxTgt':np.zeros([1,500,2]).astype('int'),'valid':np.array([0]),'idxTgtNeg':idxTgtNeg} else: idx2000 = np.random.choice(range(idxSrc.shape[0]),500) idxSrc=idxSrc[idx2000][np.newaxis,:] idxTgt=idxTgt[idx2000][np.newaxis,:] rets['denseCorres']={'idxSrc':idxSrc.astype('int'),'idxTgt':idxTgt.astype('int'),'valid':np.array([1]),'idxTgtNeg':idxTgtNeg} if self.pointcloud or self.local: #pc = self.depth2pc(imgs_depth[0][:,160:160*2]).T pc, mask = self.depth2pc(imgs_depth[0][100-48:100+48,200-64:200+64], needmask=True) # util.write_ply('test.ply',np.concatenate((pc,pc1))) idx_s = np.random.choice(range(len(pc)),self.num_points) mask_s = np.where(mask)[0][idx_s] imgPCid[0] = np.stack((idx_s % 128, idx_s // 128)).T pointcloud[0,:3,:] = pc[idx_s,:].T pc_n = imgs_normal[0][100-48:100+48,200-64:200+64].reshape(-1, 3)[mask] pointcloud[0,3:6,:] = pc_n[idx_s,:].T pc_c = imgs_rgb[0][100-48:100+48,200-64:200+64].reshape(-1,3)[mask] pointcloud[0,6:9,:] = pc_c[idx_s,::-1].T pc_s = imgs_s[0][100-48:100+48,200-64:200+64].reshape(-1)[mask] pointcloud[0,9:10,:] = pc_s[idx_s] pc, mask = self.depth2pc(imgs_depth[1][100-48:100+48,200-64:200+64], needmask=True) idx_s = np.random.choice(range(len(pc)),self.num_points) mask_t = np.where(mask)[0][idx_s] imgPCid[1] = np.stack((idx_s % 128, idx_s // 128)).T pointcloud[1,:3,:] = pc[idx_s,:].T pc_n = imgs_normal[1][100-48:100+48,200-64:200+64].reshape(-1, 3)[mask] pointcloud[1,3:6,:] = pc_n[idx_s,:].T pc_c = imgs_rgb[1][100-48:100+48,200-64:200+64].reshape(-1,3)[mask] pointcloud[1,6:9,:] = pc_c[idx_s,::-1].T pc_s = imgs_s[1][100-48:100+48,200-64:200+64].reshape(-1)[mask] pointcloud[1,9:10,:] = pc_s[idx_s] rets['pointcloud']=pointcloud[None,...] if self.plane_r: Key = '%s-plane' % (room_id) plane_eq_raw = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1,9) Key = '%s-plane-validnum' % (room_id) valid_plane = np.frombuffer(self.txn.get(Key.encode()),np.uint8)[0] plane_eq = plane_eq_raw[:,3:7] plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_center = plane_eq_raw[:,:3] plane_center = (np.matmul(R[0][:3,:3], plane_center.T) + R[0][:3,3:4]).T rets['plane']=plane_eq[np.newaxis,:] rets['plane_raw']=plane_eq_raw[np.newaxis,:] rets['plane_c']=plane_center[np.newaxis,:] rets['valid_plane']=valid_plane if self.local: # sample point-level relation from plane relation try: R_s2t = np.matmul(R[1], R_inv[0]) pointcloud[0,:3,:] = np.matmul(R_s2t[:3,:3], pointcloud[0,:3,:]) + R_s2t[:3,3:4] pointcloud[0,3:6,:] = np.matmul(R_s2t[:3,:3], pointcloud[0,3:6,:]) if self.eval_local: N_PAIR_PTS = 6000 else: N_PAIR_PTS = 1000 N_PAIR_EXCEED_PTS = N_PAIR_PTS*10 ANGLE_THRESH = 5.0 PERP_THRESH = np.cos(np.deg2rad(90-ANGLE_THRESH)) PARALLEL_THRESH = np.cos(np.deg2rad(ANGLE_THRESH)) COPLANE_THRESH = 0.05 rel_cls_pts = np.zeros([N_PAIR_EXCEED_PTS]) ind_s = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS) ind_t = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS) pair_pts = np.stack((ind_s, ind_t), -1) normdot = (pointcloud[0, 3:6, pair_pts[:,0]] * pointcloud[1, 3:6, pair_pts[:,1]]).sum(1) dst = (np.abs(((pointcloud[0, 0:3, pair_pts[:,0]] - pointcloud[1, 0:3, pair_pts[:,1]]) * pointcloud[1, 3:6, pair_pts[:,1]]).sum(1)) + np.abs(((pointcloud[0, 0:3, pair_pts[:,0]] - pointcloud[1, 0:3, pair_pts[:,1]]) * pointcloud[0, 3:6, pair_pts[:,0]]).sum(1)))/2 rel_cls_pts[(np.abs(normdot) < PERP_THRESH)] = 1 rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst > COPLANE_THRESH)] = 2 rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst <= COPLANE_THRESH)] = 3 if self.split == 'train': # balance each class N_CLASS = 4 pair_pts_select=[] for j in range(N_CLASS): ind = np.where(rel_cls_pts == j)[0] if len(ind): pair_pts_select.append(ind[np.random.choice(len(ind), N_PAIR_PTS//N_CLASS)]) pair_pts_select = np.concatenate(pair_pts_select) pair_pts_select =pair_pts_select[np.random.choice(len(pair_pts_select), N_PAIR_PTS)] pair_pts = pair_pts[pair_pts_select] normdot = normdot[pair_pts_select] dst = dst[pair_pts_select] rel_cls_pts = rel_cls_pts[pair_pts_select] else: pair_pts_select = np.random.choice(len(pair_pts), N_PAIR_PTS) pair_pts = pair_pts[pair_pts_select] normdot = normdot[pair_pts_select] dst = dst[pair_pts_select] rel_cls_pts = rel_cls_pts[pair_pts_select] rets['normdot2'] = np.power(normdot,2)[None,:] rets['dst2'] = np.power(dst,2)[None,:] # convert to image coordinate if 1: R_s2t = np.matmul(R[1], R_inv[0]) R_t2s = np.linalg.inv(R_s2t) tp = (np.matmul(R_t2s[:3,:3], pointcloud[0, :3, pair_pts[:,0]].T)+R_t2s[:3,3:4]).T hfov = 120.0 vfov = 2*np.arctan(np.tan(hfov/2/180*np.pi)*200/400)/np.pi*180 zs = -tp[:,2] ys = (0.5 - (tp[:, 1]/96*200/zs/(np.tan(np.deg2rad(vfov/2))))/2)*96 xs = (0.5 + (tp[:, 0]/128*400/zs/(np.tan(np.deg2rad(hfov/2))))/2)*128 uv_s = np.stack((xs, ys), -1) tp = pointcloud[1, :3, pair_pts[:,1]] zs = -tp[:,2] ys = (0.5 - (tp[:, 1]/96*200/zs/(np.tan(np.deg2rad(vfov/2))))/2)*96 xs = (0.5 + (tp[:, 0]/128*400/zs/(np.tan(np.deg2rad(hfov/2))))/2)*128 uv_t = np.stack((xs, ys), -1) rets['uv_pts'] = np.stack((uv_s, uv_t))[None, :] rets['uv_pts'][:, :, :, 0] = rets['uv_pts'][:, :, :, 0].clip(0, 128-1) rets['uv_pts'][:, :, :, 1] = rets['uv_pts'][:, :, :, 1].clip(0, 96-1) rets['uv_pts'] = rets['uv_pts'].astype('int') except: import ipdb;ipdb.set_trace() rel_cls = np.array(rel_cls) rel_dst = np.array(rel_dst) rel_ndot = np.array(rel_ndot) pair = np.concatenate(pair).reshape(-1, 2) # padding f MAX_PAIR = 100 MAX_PLANE = 20 plane_params1 = np.array(plane_params1) plane_params2 = np.array(plane_params2) if len(plane_params1) <= MAX_PLANE: plane_params1 = np.concatenate((plane_params1, np.zeros([MAX_PLANE - len(plane_params1), 5]))) plane_center1 = np.concatenate((plane_center1, np.zeros([MAX_PLANE - len(plane_center1), 6]))) else: plane_params1 = plane_params1[:MAX_PLANE] plane_center1 = plane_center1[:MAX_PLANE] select = (pair[:, 0] < MAX_PLANE) pair = pair[select] rel_cls = rel_cls[select] rel_dst = rel_dst[select] rel_ndot = rel_ndot[select] if len(plane_params2) <= MAX_PLANE: plane_params2 = np.concatenate((plane_params2, np.zeros([MAX_PLANE - len(plane_params2), 5]))) plane_center2 = np.concatenate((plane_center2, np.zeros([MAX_PLANE - len(plane_center2), 6]))) else: plane_params2 = plane_params2[:MAX_PLANE] plane_center2 = plane_center2[:MAX_PLANE] select = (pair[:, 1] < MAX_PLANE) pair = pair[select] rel_cls = rel_cls[select] rel_dst = rel_dst[select] rel_ndot = rel_ndot[select] rel_valid = np.zeros([MAX_PAIR]) if len(rel_cls) < MAX_PAIR: rel_valid[:len(rel_cls)] = 1 rel_cls = np.concatenate((rel_cls, np.zeros([MAX_PAIR - len(rel_cls)]))) rel_dst = np.concatenate((rel_dst, np.zeros([MAX_PAIR - len(rel_dst)]))) rel_ndot = np.concatenate((rel_ndot, np.zeros([MAX_PAIR - len(rel_ndot)]))) pair = np.concatenate((pair, np.zeros([MAX_PAIR - len(pair), 2]))) else: pair = pair[:MAX_PAIR] rel_cls = rel_cls[:MAX_PAIR] rel_dst = rel_dst[:MAX_PAIR] rel_ndot = rel_ndot[:MAX_PAIR] rel_valid[:] = 1 rets['plane_center'] = np.stack((plane_center1,plane_center2))[None,...] rets['pair'] = pair[None,...].astype('int') rets['rel_cls'] = rel_cls[None,...].astype('int') rets['rel_dst'] = rel_dst[None,...] rets['rel_ndot'] = rel_ndot[None,...] rets['rel_valid'] = rel_valid[None,...] rets['plane_idx'] = np.stack((plane_idx1,plane_idx2))[None,...].astype('int') rets['rel_cls_pts'] = rel_cls_pts[None, :] rets['pair_pts'] = pair_pts[None, :] if self.eval_local: # convert back into local coordinate R_t2s = np.matmul(R[0], R_inv[1]) Kth = self.dataList[index % self.__len__()]['Kth'] pointcloud[0,:3,:] = np.matmul(R_t2s[:3,:3], pointcloud[0,:3,:]) + R_t2s[:3,3:4] pointcloud[0,3:6,:] = np.matmul(R_t2s[:3,:3], pointcloud[0,3:6,:]) R_pred = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pred_pose'] gt_pose = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['gt_pose'] err_r = util.angular_distance_np(R_pred[:3,:3],gt_pose[:3,:3])[0] rets['err_r'] = err_r rets['eval_key'] = '%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth) pos_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_s_360'] pos_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_t_360'] nor_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_s_360'] nor_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_t_360'] feat_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_s_360'] feat_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_t_360'] # transform source pos_s_360 = (np.matmul(R_pred[:3,:3], pos_s_360.T) + R_pred[:3,3:4]).T nor_s_360 = np.matmul(R_pred[:3,:3], nor_s_360.T).T # find top correspondence if 0: tree = KDTree(pos_s_360) nearest_dist1, nearest_ind1 = tree.query(pos_t_360, k=1) nearest_ind1 = nearest_ind1.squeeze() tree = KDTree(pos_t_360) nearest_dist2, nearest_ind2 = tree.query(pos_s_360, k=1) nearest_ind2 = nearest_ind2.squeeze() # if nearest_ind1[nearest_ind2] == np.range(len(feat_s_360)) rets['pos_s_360'] = (pos_s_360[nearest_ind1][None,:]) rets['pos_t_360'] = (pos_t_360[None,:]) rets['nor_s_360'] = (nor_s_360[nearest_ind1][None,:]) rets['nor_t_360'] = (nor_t_360[None,:]) if 1: rets['pos_s_360'] = (pos_s_360[None,:]) rets['pos_t_360'] = (pos_t_360[None,:]) rets['nor_s_360'] = (nor_s_360[None,:]) rets['nor_t_360'] = (nor_t_360[None,:]) pointcloud[0,:3,:] = np.matmul(R_pred[:3,:3], pointcloud[0,:3,:]) + R_pred[:3,3:4] pointcloud[0,3:6,:] = np.matmul(R_pred[:3,:3], pointcloud[0,3:6,:]) color_t_360 = np.tile(np.array([0,1,0])[None,:], [len(pos_t_360),1]) igt = np.matmul(R_s2t, np.linalg.inv(R_pred)) rets['igt'] = igt[None,:] rets['pred_pose'] = R_pred[None,:] rets['gt_pose'] = gt_pose[None,:] R_gt = igt[:3,:3] t_gt = igt[:3,3:4] else: delta_R = util.randomRotation(epsilon=0.1*3) delta_t = np.random.randn(3)*0.1 pointcloud_s_perturb = np.matmul(delta_R, pointcloud[0,:3,:] - pointcloud[0,:3,:].mean(1)[:,None]) + delta_t[:, None] + pointcloud[0,:3,:].mean(1)[:,None] tp_R = delta_R tp_t = np.matmul(np.eye(3) - delta_R, pointcloud[0,:3,:].mean(1)[:,None]) + delta_t[:, None] t_gt = np.matmul(np.eye(3) - delta_R.T, pointcloud[0,:3,:].mean(1)[:,None]) - np.matmul(delta_R.T, delta_t[:, None]) R_gt = delta_R.T igt = np.eye(4) igt[:3,:3] = R_gt igt[:3,3] = t_gt.squeeze() rets['igt'] = igt[None,:] pointcloud_s_n_perturb = np.matmul(delta_R, pointcloud[0,3:6,:]) # np.matmul(R_gt, pointcloud_s_perturb) + t_gt if self.local_method == 'patch': plane_params1[:,:4] = np.matmul(plane_params1[:,:4], igt) Q = np.concatenate((util.rot2Quaternion(R_gt),t_gt.squeeze())) R_ = np.eye(4) R_[:3, :3] = R_gt R_[:3, 3] = t_gt.squeeze() R_inv = np.linalg.inv(R_) pointcloud[0,:3,:] = pointcloud_s_perturb pointcloud[0,3:6,:] = pointcloud_s_n_perturb rets['pointcloud']=pointcloud[None,...] if self.topdown: Key = '%s-pc' % (room_id) roompc = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1,3) roompc = roompc[np.random.choice(roompc.shape[0],20000)] rets['roompc'] = roompc[None,:] Key = '%s-floor' % (room_id) plane_eq = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(4) plane_eqs = np.zeros([2, 4]) plane_eq_0 = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_eq_0 /= (np.linalg.norm(plane_eq_0[:3])+1e-16) plane_eqs[0, :] = plane_eq_0.copy() plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1])) plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3])+1e-16) plane_eqs[1, :] = plane_eq_1.copy() colors = np.random.rand(21,3) resolution = 0.03 height = 224 width = 224 pc0 = pointcloud[0,0:3,:].T pc2ind = np.zeros([2, len(pc0), 3]) npts = np.zeros([2]) pc2ind_mask = np.zeros([2, pointcloud.shape[2]]) # the floor plane # (0, 1, 0)'x + d = 0 # remove partial view's ceiling dst = np.abs(((plane_eq_0[:3][None,:] * pc0).sum(1) + plane_eq_0[3])) mask = dst < 1.5 # reorder pointcloud[0] validind = np.where(mask)[0] invalidind = np.where(~mask)[0] #pointcloud[0] = np.concatenate((pointcloud[0,:,validind].T,pointcloud[0,:,invalidind].T), -1) npts[0] = len(validind) pc0 = pc0[mask] pc2ind_mask[0] = mask # project camera position(0,0,0) to floor plane origin_0 = -plane_eq_0[:3] * plane_eq_0[3] # axis [0,0,-1], [] axis_base = np.array([0,0,-1]) axis_y_0 = axis_base - np.dot(axis_base,plane_eq_0[:3]) * plane_eq_0[:3] axis_y_0 /= (np.linalg.norm(axis_y_0)+1e-16) axis_x_0 = np.cross(axis_y_0, plane_eq_0[:3]) axis_x_0 /= (np.linalg.norm(axis_x_0)+1e-16) axis_z_0 = plane_eq_0[:3] imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_partial_0 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255. imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_partial_1 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255. imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_complete_0 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255. imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_complete_1 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255. imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_s_complete_0 = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8') imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_s_complete_1 = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8') tp = ~topdown_c_partial_0.sum(2).astype('bool') edt_0 = ndimage.distance_transform_edt(tp, return_indices=False) edt_0 = np.maximum(0.1, np.power(0.98, edt_0)) tp = ~topdown_c_partial_1.sum(2).astype('bool') edt_1 = ndimage.distance_transform_edt(tp, return_indices=False) edt_1 = np.maximum(0.1, np.power(0.98, edt_1)) rets['edt_w'] = np.stack((edt_0, edt_1))[None, ...] u = ((pc0 - origin_0[None,:]) * axis_x_0[None,:]).sum(1) v = ((pc0 - origin_0[None,:]) * axis_y_0[None,:]).sum(1) z = ((pc0 - origin_0[None,:]) * axis_z_0[None,:]).sum(1) u = width//2 + (u / resolution).astype('int') v = height//2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_0 = np.stack((u, v, ind_z), -1) u = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_x_0[None,:]).sum(1) v = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_y_0[None,:]).sum(1) z = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_z_0[None,:]).sum(1) u = width//2 + (u / resolution).astype('int') v = height//2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_img_0 = np.stack((u, v, ind_z), -1) pc2ind[0,mask] = topdown_ind_0 pc1 = pointcloud[1,0:3,:].T plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1])) plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3])+1e-16) plane_eqs[1, :] = plane_eq_1.copy() dst = np.abs(((plane_eq_1[:3][None,:] * pc1).sum(1) + plane_eq_1[3])) mask = dst < 1.5 validind = np.where(mask)[0] invalidind = np.where(~mask)[0] #pointcloud[1] = np.concatenate((pointcloud[1,:,validind].T,pointcloud[1,:,invalidind].T), -1) npts[1] = len(validind) pc1 = pc1[mask] pc2ind_mask[1] = mask origin_1 = -plane_eq_1[:3] * plane_eq_1[3] # axis [0,0,-1], [] axis_base = np.array([0,0,-1]) axis_y_1 = axis_base - np.dot(axis_base,plane_eq_1[:3]) * plane_eq_1[:3] axis_y_1 /= (np.linalg.norm(axis_y_1)+1e-16) axis_x_1 = np.cross(axis_y_1, plane_eq_1[:3]) axis_x_1 /= (np.linalg.norm(axis_x_1)+1e-16) axis_z_1 = plane_eq_1[:3] u = ((pc1 - origin_1[None,:]) * axis_x_1[None,:]).sum(1) v = ((pc1 - origin_1[None,:]) * axis_y_1[None,:]).sum(1) z = ((pc1 - origin_1[None,:]) * axis_z_1[None,:]).sum(1) # write_ply('test.ply',np.stack((u,v,z),-1), color=colors[pc_s]) u = width//2 + (u / resolution).astype('int') v = height//2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_1 = np.stack((u, v, ind_z), -1) u = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_x_1[None,:]).sum(1) v = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_y_1[None,:]).sum(1) z = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_z_1[None,:]).sum(1) u = width//2 + (u / resolution).astype('int') v = height//2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_img_1 = np.stack((u, v, ind_z), -1) img2ind[0] = topdown_ind_img_0 img2ind[1] = topdown_ind_img_1 pc2ind[1,mask] = topdown_ind_1 rets['img2ind'] = img2ind[None,...] rets['imgPCid'] = imgPCid[None,...] rets['axis_x'] = np.zeros([2,3]) rets['axis_y'] = np.zeros([2,3]) rets['origin'] = np.zeros([2,3]) rets['axis_x'][0] = axis_x_0 rets['axis_y'][0] = axis_y_0 rets['axis_x'][1] = axis_x_1 rets['axis_y'][1] = axis_y_1 rets['origin'][0] = origin_0 rets['origin'][1] = origin_1 rets['axis_x'] = rets['axis_x'][None,:] rets['axis_y'] = rets['axis_y'][None,:] rets['origin'] = rets['origin'][None,:] # sample points on source floor plane: if 1: #mask = ~((topdown_c_complete_0==0).sum(2)==3) mask = ~((topdown_c_partial_0==0).sum(2)==3) vs, us = np.where(mask) if not len(vs): vs = np.array([0,0]) us = np.array([0,0]) ind = np.random.choice(len(vs), 100) u_0 = us[ind] v_0 = vs[ind] kp_uv_0 = np.stack((u_0,v_0),-1) u_0 -= width//2 v_0 -= height//2 kp_3d_0 = origin_0[None,:] + axis_x_0[None,:] * u_0[:,None] * resolution - axis_y_0[None,:] * v_0[:,None] * resolution R01 = np.matmul(R[1], R_inv[0]) kp_3d_1 = (np.matmul(R01[:3,:3], kp_3d_0.T) + R01[:3,3:4]).T # random sample a set of points as negative correspondencs if 1: mask = ~((topdown_c_partial_1==0).sum(2)==3) vs_neg, us_neg = np.where(mask) if not len(vs_neg): vs_neg = np.array([0,0]) us_neg = np.array([0,0]) ind = np.random.choice(len(vs_neg), 100*100) u_neg_1 = us_neg[ind] v_neg_1 = vs_neg[ind] kp_uv_neg_1 = np.stack((u_neg_1,v_neg_1),-1) u_neg_1 -= width//2 v_neg_1 -= height//2 kp_3d_neg_1 = origin_1[None,:] + axis_x_1[None,:] * u_neg_1[:,None] * resolution - axis_y_1[None,:] * v_neg_1[:,None] * resolution R10 = np.matmul(R[0], R_inv[1]) kp_3d_neg_0 = (np.matmul(R10[:3,:3], kp_3d_neg_1.T) + R10[:3,3:4]).T u_neg_0 = ((kp_3d_neg_0 - origin_0[None,:]) * axis_x_0[None,:]).sum(1) v_neg_0 = ((kp_3d_neg_0 - origin_0[None,:]) * axis_y_0[None,:]).sum(1) u_neg_0 = width//2 + (u_neg_0 / resolution).astype('int') v_neg_0 = height//2 - (v_neg_0 / resolution).astype('int') kp_uv_neg_0 = np.stack((u_neg_0,v_neg_0),-1) kp_uv_neg_0[:,0] = kp_uv_neg_0[:,0].clip(0, width-1) kp_uv_neg_0[:,1] = kp_uv_neg_0[:,1].clip(0, height-1) kp_uv_neg_1 = kp_uv_neg_1.reshape(100, 100, 2) kp_uv_neg_0 = kp_uv_neg_0.reshape(100, 100, 2) w_uv_neg_1 = 1 - np.maximum(0.1, np.power(0.98, np.linalg.norm(kp_uv_neg_0 - kp_uv_0[:, None, :], axis=2))) u_1 = ((kp_3d_1 - origin_1[None,:]) * axis_x_1[None,:]).sum(1) v_1 = ((kp_3d_1 - origin_1[None,:]) * axis_y_1[None,:]).sum(1) u_1 = width//2 + (u_1 / resolution).astype('int') v_1 = height//2 - (v_1 / resolution).astype('int') kp_uv_1 = np.stack((u_1,v_1),-1) # visualize correspondence if 0: img_vis = (np.concatenate((topdown_c_complete_0,topdown_c_complete_1))*255).astype('uint8') for j in range(10): ind = np.random.choice(len(kp_uv_0),1)[0] img_vis = cv2.line(img_vis, (kp_uv_0[ind][0], kp_uv_0[ind][1]), (kp_uv_1[ind][0], kp_uv_1[ind][1]+topdown_c_complete_0.shape[0]), (255,255,0)) cv2.imwrite('test.png',img_vis) topdown_c_complete = np.stack((topdown_c_complete_0, topdown_c_complete_1)).transpose(0,3,1,2) topdown_s_complete = np.stack((topdown_s_complete_0, topdown_s_complete_1)) topdown_c_partial = np.stack((topdown_c_partial_0, topdown_c_partial_1)) kp_uv_0[:,0] = kp_uv_0[:,0].clip(0, width-1) kp_uv_0[:,1] = kp_uv_0[:,1].clip(0, height-1) kp_uv_1[:,0] = kp_uv_1[:,0].clip(0, width-1) kp_uv_1[:,1] = kp_uv_1[:,1].clip(0, height-1) rets['kp_uv'] = np.stack((kp_uv_0,kp_uv_1))[None,...] rets['kp_uv_neg'] = kp_uv_neg_1[None,...] rets['w_uv_neg'] = w_uv_neg_1[None,...] rets['plane_eq'] = plane_eqs[None,...] rets['pc2ind'] = pc2ind[None,...] rets['pc2ind_mask'] = pc2ind_mask[None,...] rets['topdown'] = topdown_c_complete[None,...] rets['topdown_s'] = topdown_s_complete[None,...] rets['topdown_partial'] = topdown_c_partial.transpose(0,3,1,2)[None,...] TopDownValidMask = ((topdown_c_complete==0).sum(1,keepdims=True)!=3) rets['TopDownValidMask'] = TopDownValidMask[None,...] rets['npts'] = npts[None,...] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") rets['norm']=imgs_normal.transpose(0,3,1,2)[None,...] rets['rgb']=imgs_rgb.transpose(0,3,1,2)[None,...] rets['semantic']=imgs_s[None,...] rets['depth']=imgs_depth[None,:,None,:,:] rets['Q']=Q[None,...] rets['R']=R[None,...] rets['R_inv'] = R_inv[None,...] rets['imgsPath']=imgsPath return rets, True
def __getitem__helper(self, index): rets = {} index = index % self.__len__() imgs_depth = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype=np.float32) imgs_s = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype=np.float32) imgs_rgb = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) imgs_normal = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) pointcloud = np.zeros((self.nViews, 3 + 3 + 3 + 1, self.num_points), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((7)) assert (self.nViews == 2) imgsPath = [] ct0, ct1 = self.__getpair__(index) rets['overlap'] = float(self.dataList[index]['overlap']) basePath = self.base_this scene_id = basePath.split('/')[-2] room_id = scene_id + '-' + basePath.split('/')[-1] imageKey = '%s-%06d-rgb' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_rgb[0] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 imageKey = '%s-%06d-rgb' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_rgb[1] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 imageKey = '%s-%06d-depth' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_depth[0] = cv2.imdecode(imageBuf, 2).astype('float') / 1000.0 imageKey = '%s-%06d-depth' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_depth[1] = cv2.imdecode(imageBuf, 2).astype('float') / 1000.0 #cv2.imwrite('test.png',imgs_rgb[0]*255) imageKey = '%s-%06d-normal' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_normal[0] = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 * 2 - 1 imageKey = '%s-%06d-normal' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_normal[1] = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 * 2 - 1 imageKey = '%s-%06d-semantic' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_s[0] = cv2.imdecode( imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:, :, 0] + 1 imageKey = '%s-%06d-semantic' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_s[1] = cv2.imdecode( imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:, :, 0] + 1 PerspectiveValidMask = (imgs_depth != 0) rets['PerspectiveValidMask'] = PerspectiveValidMask[None, :, None, :, :] rets['dataMask'] = rets['PerspectiveValidMask'] RKey = '%s-%06d-R' % (room_id, ct0) R[0] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4, 4) RKey = '%s-%06d-R' % (room_id, ct1) R[1] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4, 4) # convert from 3rd view to 4th view R[0] = np.matmul(np.linalg.inv(self.Rs[3]), R[0]) R[1] = np.matmul(np.linalg.inv(self.Rs[3]), R[1]) R_inv = np.linalg.inv(R) img2ind = np.zeros([2, self.num_points, 3]) imgPCid = np.zeros([2, self.num_points, 2]) if self.pointcloud or self.local: pc = self.depth2pc(imgs_depth[0][:, 160:160 * 2]).T # util.write_ply('test.ply',np.concatenate((pc,pc1))) idx_s = np.random.choice(range(len(pc)), self.num_points) imgPCid[0] = np.stack((idx_s % 160, idx_s // 160)).T pointcloud[0, :3, :] = pc[idx_s, :].T pc_n = imgs_normal[0][:, 160:160 * 2].reshape(-1, 3) pc_n = np.matmul(self.Rs[3][:3, :3].T, pc_n.T).T pointcloud[0, 3:6, :] = pc_n[idx_s, :].T pc_c = imgs_rgb[0, :, 160:160 * 2, :].reshape(-1, 3) pointcloud[0, 6:9, :] = pc_c[idx_s, ::-1].T pc_s = imgs_s[0, :, 160:160 * 2].reshape(-1) pointcloud[0, 9:10, :] = pc_s[idx_s] pc = self.depth2pc(imgs_depth[1][:, 160:160 * 2]).T idx_s = np.random.choice(range(len(pc)), self.num_points) imgPCid[1] = np.stack((idx_s % 160, idx_s // 160)).T pointcloud[1, :3, :] = pc[idx_s, :].T pc_n = imgs_normal[1][:, 160:160 * 2].reshape(-1, 3) pc_n = np.matmul(self.Rs[3][:3, :3].T, pc_n.T).T pointcloud[1, 3:6, :] = pc_n[idx_s, :].T pc_c = imgs_rgb[1, :, 160:160 * 2, :].reshape(-1, 3) pointcloud[1, 6:9, :] = pc_c[idx_s, ::-1].T pc_s = imgs_s[1, :, 160:160 * 2].reshape(-1) pointcloud[1, 9:10, :] = pc_s[idx_s] rets['pointcloud'] = pointcloud[None, ...] if self.plane_r: Key = '%s-plane' % (room_id) plane_eq_raw = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1, 9) Key = '%s-plane-validnum' % (room_id) valid_plane = np.frombuffer(self.txn.get(Key.encode()), np.uint8)[0] plane_eq = plane_eq_raw[:, 3:7] plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_center = plane_eq_raw[:, :3] plane_center = (np.matmul(R[0][:3, :3], plane_center.T) + R[0][:3, 3:4]).T rets['plane'] = plane_eq[np.newaxis, :] rets['plane_raw'] = plane_eq_raw[np.newaxis, :] rets['plane_c'] = plane_center[np.newaxis, :] rets['valid_plane'] = valid_plane if self.local: R_s2t = np.matmul(R[1], R_inv[0]) pointcloud[0, :3, :] = np.matmul( R_s2t[:3, :3], pointcloud[0, :3, :]) + R_s2t[:3, 3:4] pointcloud[0, 3:6, :] = np.matmul(R_s2t[:3, :3], pointcloud[0, 3:6, :]) #util.write_ply('test.ply', np.concatenate((pointcloud[0,:3,:].T,pointcloud[1,:3,:].T)), # normal=np.concatenate((pointcloud[0,3:6,:].T,pointcloud[1,3:6,:].T))) if 1: N_PAIR_PTS = 1000 N_PAIR_EXCEED_PTS = N_PAIR_PTS * 10 ANGLE_THRESH = 5.0 PERP_THRESH = np.cos(np.deg2rad(90 - ANGLE_THRESH)) PARALLEL_THRESH = np.cos(np.deg2rad(ANGLE_THRESH)) COPLANE_THRESH = 0.05 rel_cls_pts = np.zeros([N_PAIR_EXCEED_PTS]) ind_s = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS) ind_t = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS) pair_pts = np.stack((ind_s, ind_t), -1) normdot = (pointcloud[0, 3:6, pair_pts[:, 0]] * pointcloud[1, 3:6, pair_pts[:, 1]]).sum(1) dst = (np.abs( ((pointcloud[0, 0:3, pair_pts[:, 0]] - pointcloud[1, 0:3, pair_pts[:, 1]]) * pointcloud[1, 3:6, pair_pts[:, 1]]).sum(1)) + np.abs( ((pointcloud[0, 0:3, pair_pts[:, 0]] - pointcloud[1, 0:3, pair_pts[:, 1]]) * pointcloud[0, 3:6, pair_pts[:, 0]]).sum(1))) / 2 rel_cls_pts[(np.abs(normdot) < PERP_THRESH)] = 1 rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst > COPLANE_THRESH)] = 2 rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst <= COPLANE_THRESH)] = 3 if self.split == 'train': # balance each class N_CLASS = 4 pair_pts_select = [] for j in range(N_CLASS): ind = np.where(rel_cls_pts == j)[0] if len(ind): pair_pts_select.append(ind[np.random.choice( len(ind), N_PAIR_PTS // N_CLASS)]) pair_pts_select = np.concatenate(pair_pts_select) pair_pts_select = pair_pts_select[np.random.choice( len(pair_pts_select), N_PAIR_PTS)] pair_pts = pair_pts[pair_pts_select] normdot = normdot[pair_pts_select] dst = dst[pair_pts_select] rel_cls_pts = rel_cls_pts[pair_pts_select] else: pair_pts_select = np.random.choice(len(pair_pts), N_PAIR_PTS) pair_pts = pair_pts[pair_pts_select] normdot = normdot[pair_pts_select] dst = dst[pair_pts_select] rel_cls_pts = rel_cls_pts[pair_pts_select] rets['normdot2'] = np.power(normdot, 2)[None, :] rets['dst2'] = np.power(dst, 2)[None, :] # convert to image coordinate R_t2s = np.linalg.inv(R_s2t) tp = ( np.matmul(R_t2s[:3, :3], pointcloud[0, :3, pair_pts[:, 0]].T) + R_t2s[:3, 3:4]).T hfov = 90.0 vfov = 2 * np.arctan(np.tan(hfov / 2 / 180 * np.pi)) / np.pi * 180 zs = -tp[:, 2] ys = (0.5 - (tp[:, 1] / zs / (np.tan(np.deg2rad(vfov / 2)))) / 2) * 160 xs = (0.5 + (tp[:, 0] / zs / (np.tan(np.deg2rad(hfov / 2)))) / 2) * 160 uv_s = np.stack((xs, ys), -1) tp = pointcloud[1, :3, pair_pts[:, 1]] zs = -tp[:, 2] ys = (0.5 - (tp[:, 1] / zs / (np.tan(np.deg2rad(vfov / 2)))) / 2) * 160 xs = (0.5 + (tp[:, 0] / zs / (np.tan(np.deg2rad(hfov / 2)))) / 2) * 160 uv_t = np.stack((xs, ys), -1) rets['uv_pts'] = np.stack((uv_s, uv_t))[None, :] rets['uv_pts'][:, :, :, 0] = rets['uv_pts'][:, :, :, 0].clip(0, 160 - 1) rets['uv_pts'][:, :, :, 1] = rets['uv_pts'][:, :, :, 1].clip(0, 160 - 1) rets['uv_pts'] = rets['uv_pts'].astype('int') rets['rel_cls_pts'] = rel_cls_pts[None, :] rets['pair_pts'] = pair_pts[None, :] if self.eval_local: # convert back into local coordinate R_t2s = np.matmul(R[0], R_inv[1]) Kth = self.dataList[index % self.__len__()]['Kth'] pointcloud[0, :3, :] = np.matmul( R_t2s[:3, :3], pointcloud[0, :3, :]) + R_t2s[:3, 3:4] pointcloud[0, 3:6, :] = np.matmul(R_t2s[:3, :3], pointcloud[0, 3:6, :]) R_pred = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pred_pose'] gt_pose = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['gt_pose'] err_r = util.angular_distance_np(R_pred[:3, :3], gt_pose[:3, :3])[0] rets['err_r'] = err_r rets['eval_key'] = '%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth) pos_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_s_360'] pos_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_t_360'] nor_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_s_360'] nor_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_t_360'] feat_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_s_360'] feat_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_t_360'] rets['pos_s_360'] = (pos_s_360[None, :]) rets['pos_t_360'] = (pos_t_360[None, :]) rets['nor_s_360'] = (nor_s_360[None, :]) rets['nor_t_360'] = (nor_t_360[None, :]) pointcloud[0, :3, :] = np.matmul( R_pred[:3, :3], pointcloud[0, :3, :]) + R_pred[:3, 3:4] pointcloud[0, 3:6, :] = np.matmul(R_pred[:3, :3], pointcloud[0, 3:6, :]) igt = np.matmul(R_s2t, np.linalg.inv(R_pred)) rets['igt'] = igt[None, :] rets['pred_pose'] = R_pred[None, :] rets['gt_pose'] = gt_pose[None, :] R_gt = igt[:3, :3] t_gt = igt[:3, 3:4] else: delta_R = util.randomRotation(epsilon=0.1) delta_t = np.random.randn(3) * 0.1 pointcloud_s_perturb = np.matmul( delta_R, pointcloud[0, :3, :] - pointcloud[0, :3, :].mean(1)[:, None] ) + delta_t[:, None] + pointcloud[0, :3, :].mean(1)[:, None] tp_R = delta_R tp_t = np.matmul( np.eye(3) - delta_R, pointcloud[0, :3, :].mean(1)[:, None]) + delta_t[:, None] t_gt = np.matmul( np.eye(3) - delta_R.T, pointcloud[0, :3, :].mean(1)[:, None]) - np.matmul( delta_R.T, delta_t[:, None]) R_gt = delta_R.T igt = np.eye(4) igt[:3, :3] = R_gt igt[:3, 3] = t_gt.squeeze() rets['igt'] = igt[None, :] pointcloud_s_n_perturb = np.matmul(delta_R, pointcloud[0, 3:6, :]) pointcloud[0, :3, :] = pointcloud_s_perturb pointcloud[0, 3:6, :] = pointcloud_s_n_perturb Q = np.concatenate((util.rot2Quaternion(R_gt), t_gt.squeeze())) R_ = np.eye(4) R_[:3, :3] = R_gt R_[:3, 3] = t_gt.squeeze() R_inv = np.linalg.inv(R_) rets['pointcloud'] = pointcloud[None, ...] if self.topdown: Key = '%s-pc' % (room_id) roompc = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1, 3) roompc = roompc[np.random.choice(roompc.shape[0], 20000)] rets['roompc'] = roompc[None, :] Key = '%s-floor' % (room_id) plane_eq = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(4) plane_eqs = np.zeros([2, 4]) plane_eq_0 = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_eq_0 /= (np.linalg.norm(plane_eq_0[:3]) + 1e-16) plane_eqs[0, :] = plane_eq_0.copy() plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1])) plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3]) + 1e-16) plane_eqs[1, :] = plane_eq_1.copy() colors = np.random.rand(15 + 1, 3) # resolution = 0.02 # 0.2m resolution = 0.04 height = 224 width = 224 pc0 = pointcloud[0, 0:3, :].T pc2ind = np.zeros([2, len(pc0), 3]) npts = np.zeros([2]) pc2ind_mask = np.zeros([2, pointcloud.shape[2]]) # the floor plane # (0, 1, 0)'x + d = 0 # remove partial view's ceiling dst = np.abs( ((plane_eq_0[:3][None, :] * pc0).sum(1) + plane_eq_0[3])) mask = dst < 1.5 validind = np.where(mask)[0] invalidind = np.where(~mask)[0] npts[0] = len(validind) pc0 = pc0[mask] pc2ind_mask[0] = mask # project camera position(0,0,0) to floor plane origin_0 = -plane_eq_0[:3] * plane_eq_0[3] # axis [0,0,-1], [] axis_base = np.array([0, 0, -1]) axis_y_0 = axis_base - np.dot(axis_base, plane_eq_0[:3]) * plane_eq_0[:3] axis_y_0 /= (np.linalg.norm(axis_y_0) + 1e-16) axis_x_0 = np.cross(axis_y_0, plane_eq_0[:3]) axis_x_0 /= (np.linalg.norm(axis_x_0) + 1e-16) axis_z_0 = plane_eq_0[:3] imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_partial_0 = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255. imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_partial_1 = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255. imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_complete_0 = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255. imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_complete_1 = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255. imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_s_complete_0 = cv2.imdecode( imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8') imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_s_complete_1 = cv2.imdecode( imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8') tp = ~topdown_c_partial_0.sum(2).astype('bool') edt_0 = ndimage.distance_transform_edt(tp, return_indices=False) edt_0 = np.maximum(0.1, np.power(0.98, edt_0)) tp = ~topdown_c_partial_1.sum(2).astype('bool') edt_1 = ndimage.distance_transform_edt(tp, return_indices=False) edt_1 = np.maximum(0.1, np.power(0.98, edt_1)) rets['edt_w'] = np.stack((edt_0, edt_1))[None, ...] u = ((pc0 - origin_0[None, :]) * axis_x_0[None, :]).sum(1) v = ((pc0 - origin_0[None, :]) * axis_y_0[None, :]).sum(1) z = ((pc0 - origin_0[None, :]) * axis_z_0[None, :]).sum(1) # write_ply('test.ply',np.stack((u,v,z),-1), color=colors[pc_s]) u = width // 2 + (u / resolution).astype('int') v = height // 2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_0 = np.stack((u, v, ind_z), -1) u = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) * axis_x_0[None, :]).sum(1) v = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) * axis_y_0[None, :]).sum(1) z = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) * axis_z_0[None, :]).sum(1) u = width // 2 + (u / resolution).astype('int') v = height // 2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_img_0 = np.stack((u, v, ind_z), -1) pc2ind[0, mask] = topdown_ind_0 pc1 = pointcloud[1, 0:3, :].T plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1])) plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3]) + 1e-16) plane_eqs[1, :] = plane_eq_1.copy() dst = np.abs( ((plane_eq_1[:3][None, :] * pc1).sum(1) + plane_eq_1[3])) mask = dst < 1.5 validind = np.where(mask)[0] invalidind = np.where(~mask)[0] npts[1] = len(validind) pc1 = pc1[mask] pc2ind_mask[1] = mask origin_1 = -plane_eq_1[:3] * plane_eq_1[3] # axis [0,0,-1], [] axis_base = np.array([0, 0, -1]) axis_y_1 = axis_base - np.dot(axis_base, plane_eq_1[:3]) * plane_eq_1[:3] axis_y_1 /= (np.linalg.norm(axis_y_1) + 1e-16) axis_x_1 = np.cross(axis_y_1, plane_eq_1[:3]) axis_x_1 /= (np.linalg.norm(axis_x_1) + 1e-16) axis_z_1 = plane_eq_1[:3] u = ((pc1 - origin_1[None, :]) * axis_x_1[None, :]).sum(1) v = ((pc1 - origin_1[None, :]) * axis_y_1[None, :]).sum(1) z = ((pc1 - origin_1[None, :]) * axis_z_1[None, :]).sum(1) u = width // 2 + (u / resolution).astype('int') v = height // 2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_1 = np.stack((u, v, ind_z), -1) u = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) * axis_x_1[None, :]).sum(1) v = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) * axis_y_1[None, :]).sum(1) z = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) * axis_z_1[None, :]).sum(1) u = width // 2 + (u / resolution).astype('int') v = height // 2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_img_1 = np.stack((u, v, ind_z), -1) img2ind[0] = topdown_ind_img_0 img2ind[1] = topdown_ind_img_1 pc2ind[1, mask] = topdown_ind_1 rets['img2ind'] = img2ind[None, ...] rets['imgPCid'] = imgPCid[None, ...] rets['axis_x'] = np.zeros([2, 3]) rets['axis_y'] = np.zeros([2, 3]) rets['origin'] = np.zeros([2, 3]) rets['axis_x'][0] = axis_x_0 rets['axis_y'][0] = axis_y_0 rets['axis_x'][1] = axis_x_1 rets['axis_y'][1] = axis_y_1 rets['origin'][0] = origin_0 rets['origin'][1] = origin_1 rets['axis_x'] = rets['axis_x'][None, :] rets['axis_y'] = rets['axis_y'][None, :] rets['origin'] = rets['origin'][None, :] # sample points on source floor plane: mask = ~((topdown_c_partial_0 == 0).sum(2) == 3) vs, us = np.where(mask) if not len(vs): vs = np.array([0, 0]) us = np.array([0, 0]) ind = np.random.choice(len(vs), 100) u_0 = us[ind] v_0 = vs[ind] kp_uv_0 = np.stack((u_0, v_0), -1) u_0 -= width // 2 v_0 -= height // 2 kp_3d_0 = origin_0[None, :] + axis_x_0[ None, :] * u_0[:, None] * resolution - axis_y_0[ None, :] * v_0[:, None] * resolution R01 = np.matmul(R[1], R_inv[0]) kp_3d_1 = (np.matmul(R01[:3, :3], kp_3d_0.T) + R01[:3, 3:4]).T # random sample a set of points as negative correspondencs mask = ~((topdown_c_partial_1 == 0).sum(2) == 3) vs_neg, us_neg = np.where(mask) if not len(vs_neg): vs_neg = np.array([0, 0]) us_neg = np.array([0, 0]) ind = np.random.choice(len(vs_neg), 100 * 100) u_neg_1 = us_neg[ind] v_neg_1 = vs_neg[ind] kp_uv_neg_1 = np.stack((u_neg_1, v_neg_1), -1) u_neg_1 -= width // 2 v_neg_1 -= height // 2 kp_3d_neg_1 = origin_1[None, :] + axis_x_1[ None, :] * u_neg_1[:, None] * resolution - axis_y_1[ None, :] * v_neg_1[:, None] * resolution R10 = np.matmul(R[0], R_inv[1]) kp_3d_neg_0 = (np.matmul(R10[:3, :3], kp_3d_neg_1.T) + R10[:3, 3:4]).T u_neg_0 = ((kp_3d_neg_0 - origin_0[None, :]) * axis_x_0[None, :]).sum(1) v_neg_0 = ((kp_3d_neg_0 - origin_0[None, :]) * axis_y_0[None, :]).sum(1) u_neg_0 = width // 2 + (u_neg_0 / resolution).astype('int') v_neg_0 = height // 2 - (v_neg_0 / resolution).astype('int') kp_uv_neg_0 = np.stack((u_neg_0, v_neg_0), -1) kp_uv_neg_0[:, 0] = kp_uv_neg_0[:, 0].clip(0, width - 1) kp_uv_neg_0[:, 1] = kp_uv_neg_0[:, 1].clip(0, height - 1) kp_uv_neg_1 = kp_uv_neg_1.reshape(100, 100, 2) kp_uv_neg_0 = kp_uv_neg_0.reshape(100, 100, 2) w_uv_neg_1 = 1 - np.maximum( 0.1, np.power( 0.98, np.linalg.norm(kp_uv_neg_0 - kp_uv_0[:, None, :], axis=2))) u_1 = ((kp_3d_1 - origin_1[None, :]) * axis_x_1[None, :]).sum(1) v_1 = ((kp_3d_1 - origin_1[None, :]) * axis_y_1[None, :]).sum(1) u_1 = width // 2 + (u_1 / resolution).astype('int') v_1 = height // 2 - (v_1 / resolution).astype('int') kp_uv_1 = np.stack((u_1, v_1), -1) topdown_c_complete = np.stack( (topdown_c_complete_0, topdown_c_complete_1)).transpose(0, 3, 1, 2) topdown_s_complete = np.stack( (topdown_s_complete_0, topdown_s_complete_1)) topdown_c_partial = np.stack( (topdown_c_partial_0, topdown_c_partial_1)) kp_uv_0[:, 0] = kp_uv_0[:, 0].clip(0, width - 1) kp_uv_0[:, 1] = kp_uv_0[:, 1].clip(0, height - 1) kp_uv_1[:, 0] = kp_uv_1[:, 0].clip(0, width - 1) kp_uv_1[:, 1] = kp_uv_1[:, 1].clip(0, height - 1) rets['kp_uv'] = np.stack((kp_uv_0, kp_uv_1))[None, ...] rets['kp_uv_neg'] = kp_uv_neg_1[None, ...] rets['w_uv_neg'] = w_uv_neg_1[None, ...] rets['plane_eq'] = plane_eqs[None, ...] rets['pc2ind'] = pc2ind[None, ...] rets['pc2ind_mask'] = pc2ind_mask[None, ...] rets['topdown'] = topdown_c_complete[None, ...] rets['topdown_s'] = topdown_s_complete[None, ...] rets['topdown_partial'] = topdown_c_partial.transpose(0, 3, 1, 2)[None, ...] TopDownValidMask = ((topdown_c_complete == 0).sum(1, keepdims=True) != 3) rets['TopDownValidMask'] = TopDownValidMask[None, ...] rets['npts'] = npts[None, ...] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") rets['norm'] = imgs_normal.transpose(0, 3, 1, 2)[None, ...] rets['rgb'] = imgs_rgb.transpose(0, 3, 1, 2)[None, ...] rets['semantic'] = imgs_s[None, ...] rets['depth'] = imgs_depth[None, :, None, :, :] rets['Q'] = Q[None, ...] rets['R'] = R[None, ...] rets['R_inv'] = R_inv[None, ...] rets['imgsPath'] = imgsPath return rets, True
# average speed time_this = time.time() - st speedBenchmark.append(time_this) # compute rotation error and translation error if isinstance(R_hat, list): if 1: ad_min = 360 t_tmp = R_hat[0][:3, 3] tmp_idx = 0 for k in range(len(R_hat)): ad_tmp = util.angular_distance_np( R_hat[k][:3, :3].reshape(1, 3, 3), R_gt[np.newaxis, :, :])[0] if ad_tmp < ad_min: ad_min = ad_tmp t_tmp = R_hat[k][:3, 3] tmp_idx = k else: tmp_idx = np.where(overlap_val == np.max(overlap_val))[0][0] #import pdb; pdb.set_trace() ad_min = util.angular_distance_np( R_hat[tmp_idx][:3, :3].reshape(1, 3, 3), R_gt[np.newaxis, :, :])[0] t_tmp = R_hat[tmp_idx][:3, 3] if args.numMatches == 5: best_distribution[tmp_idx] += 1
def __getitem__(self, index): rets = {} imgs = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) if self.rgbd: imgs_rgb = np.zeros((self.nViews, *self.OutputSize[::-1], 3), dtype=np.float32) if self.segm: segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.dynamicWeighting: dynamicW = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.normal: normal = np.zeros((self.nViews, *self.OutputSize[::-1], 3), dtype=np.float32) if self.pointcloud: pointcloud = np.zeros( (self.nViews, 3 + 3 + 3 + 1, self.num_points), dtype=np.float32) pointcloud_flow = np.zeros((self.nViews, 3, self.num_points), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((self.nViews, 7)) assert (self.nViews == 2) ct0, ct1 = self.__getpair__(index) imgsPath = [] basePath = self.base_this frameid0 = f"{ct0:06d}" frameid1 = f"{ct1:06d}" if self.fullsize_rgbdn: imgs_rgb_full = np.zeros((self.nViews, 480, 640, 3), dtype=np.float32) imgs_full = np.zeros((self.nViews, 480, 640), dtype=np.float32) imgs_full[0] = self.LoadImage( os.path.join(basePath, 'obs_depth', '{}.png'.format(frameid0))).copy() imgs_full[1] = self.LoadImage( os.path.join(basePath, 'obs_depth', '{}.png'.format(frameid1))).copy() imgs_rgb_full[0] = self.LoadImage(os.path.join( basePath, 'obs_rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb_full[1] = self.LoadImage(os.path.join( basePath, 'obs_rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. rets['rgb_full'] = imgs_rgb_full[np.newaxis, :] rets['depth_full'] = imgs_full[np.newaxis, :] imgs[0] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy() imgs[1] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy() dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) dataMask[0, 0, :, :] = (imgs[0] != 0) dataMask[1, 0, :, :] = (imgs[1] != 0) rets['dataMask'] = dataMask[np.newaxis, :] if self.rgbd: imgs_rgb[0] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb[1] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. if self.scannet_new_name: tmp_basePath = basePath.replace('ScanNet_360', 'ScanNet') else: tmp_basePath = basePath R[0] = np.loadtxt( os.path.join(tmp_basePath, 'pose', frameid0 + '.pose.txt')) R[1] = np.loadtxt( os.path.join(tmp_basePath, 'pose', frameid1 + '.pose.txt')) Q[0, :4] = rot2Quaternion(R[0][:3, :3]) Q[0, 4:] = R[0][:3, 3] Q[1, :4] = rot2Quaternion(R[1][:3, :3]) Q[1, 4:] = R[1][:3, 3] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") if self.normal: tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid0)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[0] = tp tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid1)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[1] = tp if self.segm: tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx', '{}.png'.format(frameid0)), depth=False).copy())[:, :, 1] segm[0] = tp.reshape(segm[0].shape) tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx', '{}.png'.format(frameid1)), depth=False).copy())[:, :, 1] segm[1] = tp.reshape(segm[1].shape) segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) segm_[0] = segm[0] segm_[1] = segm[1] segm_ = segm_[np.newaxis, :] if self.denseCorres: # get 3d point cloud for each pano pcs, masks = self.Pano2PointCloud( imgs[0], self.representation) # be aware of the order of returned pc!!! pct, maskt = self.Pano2PointCloud(imgs[1], self.representation) #import pdb; pdb.set_trace() #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:] pct = np.matmul(np.linalg.inv(R[1]), np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pcs = np.matmul(np.linalg.inv(R[0]), np.concatenate( (pcs, np.ones([1, pcs.shape[1]]))))[:3, :] # find correspondence using kdtree tree = KDTree(pct.T) IdxQuery = np.random.choice(range(pcs.shape[1]), 5000) # sample 5000 query points pcsQuery = pcs[:, IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1) hasCorres = (nearest_dist < 0.08) idxTgtNeg = [] idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]], imgs.shape[1], imgs.shape[2], self.representation) idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], imgs.shape[1], imgs.shape[2], self.representation) if hasCorres.sum() < 200: rets['denseCorres'] = { 'idxSrc': np.zeros([1, 500, 2]), 'idxTgt': np.zeros([1, 500, 2]), 'valid': np.array([0]), 'idxTgtNeg': idxTgtNeg } else: # only pick 2000 correspondence per pair idx500 = np.random.choice(range(idxSrc.shape[0]), 500) idxSrc = idxSrc[idx500][np.newaxis, :] idxTgt = idxTgt[idx500][np.newaxis, :] rets['denseCorres'] = { 'idxSrc': idxSrc, 'idxTgt': idxTgt, 'valid': np.array([1]), 'idxTgtNeg': idxTgtNeg } imgPCid = np.zeros([2, self.num_points, 2]) if self.pointcloud: try: pc = self.depth2pc(imgs[0][:, 160:160 * 2]) idx_s = np.random.choice(range(len(pc)), self.num_points) imgPCid[0] = np.stack((idx_s % 160, idx_s // 160)).T pointcloud[0, :3, :] = pc[idx_s, :].T pc_n = normal[0][:, 160:160 * 2, :].reshape(-1, 3) pointcloud[0, 3:6, :] = pc_n[idx_s, :].T pc_c = imgs_rgb[0, :, 160:160 * 2, :].reshape(-1, 3) pointcloud[0, 6:9, :] = pc_c[idx_s, ::-1].T #pc_s = imgs_s[0,:,160:160*2].reshape(-1)+1 #pointcloud[0,9:10,:] = pc_s[idx_s] pc = self.depth2pc(imgs[1][:, 160:160 * 2]) idx_s = np.random.choice(range(len(pc)), self.num_points) imgPCid[1] = np.stack((idx_s % 160, idx_s // 160)).T pointcloud[1, :3, :] = pc[idx_s, :].T pc_n = normal[1][:, 160:160 * 2, :].reshape(-1, 3) pointcloud[1, 3:6, :] = pc_n[idx_s, :].T pc_c = imgs_rgb[1, :, 160:160 * 2, :].reshape(-1, 3) #pc_s = imgs_s[1,:, 160:160*2].reshape(-1)+1 #pointcloud[1,9:10,:] = pc_s[idx_s] except: #import pdb; pdb.set_trace() pointcloud = np.zeros( (self.nViews, 3 + 3 + 3 + 1, self.num_points), dtype=np.float32) pointcloud_flow = np.zeros((self.nViews, 3, self.num_points), dtype=np.float32) print("this pair does not contain point cloud!") if self.plane_r: scene_id = basePath.split('/')[-1] plane_file = '/media/yzp12/wdblue/2020_CVPR_Hybrid/data/ScanNet_plane/train/' + scene_id + '.npy' if os.path.exists(plane_file): plane_eq_raw = np.load(plane_file) if plane_eq_raw.shape[0] < 6: plane_eq_raw = np.concatenate([plane_eq_raw, plane_eq_raw], axis=0) MAX_PLANE = 10 plane_idx = np.argsort(plane_eq_raw[:, 7]) plane_eq_raw = plane_eq_raw[plane_idx[-MAX_PLANE:]] truncate_num = plane_eq_raw[-6, 7] / 2 plane_eq_raw = plane_eq_raw[plane_eq_raw[:, 7] > truncate_num] if plane_eq_raw.shape[0] < MAX_PLANE: valid_plane = plane_eq_raw.shape[0] plane_eq_raw = np.concatenate( (plane_eq_raw, np.zeros([ MAX_PLANE - plane_eq_raw.shape[0], plane_eq_raw.shape[-1] ]))) else: valid_plane = MAX_PLANE plane_eq = plane_eq_raw[:, 3:7] plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_center = plane_eq_raw[:, :3] plane_center = (np.matmul(R[0][:3, :3], plane_center.T) + R[0][:3, 3:4]).T #import pdb; pdb.set_trace() else: print("Missing plane data") import pdb pdb.set_trace() if self.plane_m: scene_id = basePath.split('/')[-1] plane_file = '/media/yzp12/wdblue/2020_CVPR_Hybrid/data/ScanNet_manual_plane/%s/' % self.split + scene_id + '.npy' plane_raw = np.load(plane_file, allow_pickle=True) plane_center = plane_raw[:, :3] plane_center = (np.matmul(R[0][:3, :3], plane_center.T) + R[0][:3, 3:4]).T plane_normal = plane_raw[:, 3:6] #plane_normal = (np.matmul(R[0][:3,:3],plane_normal.T)+R[0][:3,3:4]).T plane_normal = np.matmul(plane_normal, np.linalg.inv(R[0][:3, :3])) rets['plane_c'] = plane_center[np.newaxis, :] rets['plane_n'] = plane_normal[np.newaxis, :] rets['plane_raw'] = plane_raw[np.newaxis, :] # reprojct the second image into the first image plane if self.reproj: assert (imgs.shape[1] == 160 and imgs.shape[2] == 640) h = imgs.shape[1] pct, mask = util.depth2pc( imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44], 'scannet') # be aware of the order of returned pc!!! colorpct = imgs_rgb[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] normalpct = normal[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] depthpct = imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask] R_this = np.matmul(R[0], np.linalg.inv(R[1])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') t2s_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') t2s_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') t2s_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') t2s_mask_p = (t2s_d_p != 0).astype('int') pct, mask = util.depth2pc( imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44], 'scannet') # be aware of the order of returned pc!!! colorpct = imgs_rgb[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] normalpct = normal[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] depthpct = imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask] R_this = np.matmul(R[1], np.linalg.inv(R[0])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj # assume always observe the second view(right view) normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') s2t_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') s2t_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') s2t_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') s2t_mask_p = (s2t_d_p != 0).astype('int') # compute an envelop box try: tp = np.where(t2s_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(t2s_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1 t2s_box_p = np.zeros(t2s_d_p.shape) t2s_box_p[h0:h1, w0:w1] = 1 try: tp = np.where(s2t_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(s2t_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1 s2t_box_p = np.zeros(s2t_d_p.shape) s2t_box_p[h0:h1, w0:w1] = 1 rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :] rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb_p'] = np.stack( (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p), 0).reshape(1, 2, 1, t2s_d_p.shape[0], t2s_d_p.shape[1]) rets['proj_mask_p'] = np.stack( (t2s_mask_p, s2t_mask_p), 0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1]) rets['proj_box_p'] = np.stack( (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0], t2s_box_p.shape[1]) imgs = imgs[np.newaxis, :] if self.rgbd: imgs_rgb = imgs_rgb[np.newaxis, :].transpose(0, 1, 4, 2, 3) if self.normal: normal = normal[np.newaxis, :].transpose(0, 1, 4, 2, 3) R = R[np.newaxis, :] Q = Q[np.newaxis, :] if self.segm: rets['segm'] = segm_ if self.dynamicWeighting: rets['dynamicW'] = dynamicW[np.newaxis, :] if self.pointcloud: pointcloud = pointcloud[np.newaxis, :] pointcloud_flow = pointcloud_flow[np.newaxis, :] rets['pointcloud'] = pointcloud rets['pointcloud_flow'] = pointcloud_flow if self.plane_r: rets['plane'] = plane_eq[np.newaxis, :] rets['plane_raw'] = plane_eq_raw[np.newaxis, :] rets['plane_c'] = plane_center[np.newaxis, :] rets['valid_plane'] = valid_plane rets['interval'] = self.interval_this rets['norm'] = normal rets['rgb'] = imgs_rgb rets['depth'] = imgs rets['Q'] = Q rets['R'] = R rets['imgsPath'] = imgsPath return rets
def eval_fn(eval_dict, fp): fh = open(fp, 'w') results = [] for key in eval_dict: Kth = int(key.split('-')[-1]) R_g = eval_dict[key]['global'] R_l = eval_dict[key]['local'] R_gt = eval_dict[key]['gt'] err_r_g = util.angular_distance_np(R_g[:3, :3], R_gt[:3, :3]) err_r_l = util.angular_distance_np(R_l[:3, :3], R_gt[:3, :3]) err_t_g = np.linalg.norm(R_g[:3, 3] - R_gt[:3, 3]) err_t_l = np.linalg.norm(R_l[:3, 3] - R_gt[:3, 3]) results.append({ 'err_r_g': err_r_g, 'err_r_l': err_r_l, 'err_t_g': err_t_g, 'err_t_l': err_t_l, 'overlap': eval_dict[key]['overlap'], 'Kth': Kth }) for k in range(5): if k > 0: break # stats for global results_this = [res for res in results if res['Kth'] == k] err_r_g = [res['err_r_g'] for res in results_this] err_t_g = [res['err_t_g'] for res in results_this] log_string( 'total # %d, global module error rotation: %.5f, error translation: %.5f' % (len(err_r_g), np.mean(err_r_g), np.mean(err_t_g)), fh) err_r_g = [ res['err_r_g'] for res in results_this if res['overlap'] < 0.1 ] err_t_g = [ res['err_t_g'] for res in results_this if res['overlap'] < 0.1 ] log_string( '\ttotal # %d, small overlap: rotation: %.5f, error translation: %.5f' % (len(err_r_g), np.mean(err_r_g), np.mean(err_t_g)), fh) err_r_g = [ res['err_r_g'] for res in results_this if res['overlap'] >= 0.1 ] err_t_g = [ res['err_t_g'] for res in results_this if res['overlap'] >= 0.1 ] log_string( '\ttotal # %d, large overlap: rotation: %.5f, error translation: %.5f' % (len(err_r_g), np.mean(err_r_g), np.mean(err_t_g)), fh) # stats for local err_r_l = [res['err_r_l'] for res in results_this] err_t_l = [res['err_t_l'] for res in results_this] log_string( 'total # %d, local module error rotation: %.5f, error translation: %.5f' % (len(err_r_l), np.mean(err_r_l), np.mean(err_t_l)), fh) err_r_l = [ res['err_r_l'] for res in results_this if res['overlap'] < 0.1 ] err_t_l = [ res['err_t_l'] for res in results_this if res['overlap'] < 0.1 ] log_string( '\ttotal # %d, small overlap: rotation: %.5f, error translation: %.5f' % (len(err_r_l), np.mean(err_r_l), np.mean(err_t_l)), fh) err_r_l = [ res['err_r_l'] for res in results_this if res['overlap'] >= 0.1 ] err_t_l = [ res['err_t_l'] for res in results_this if res['overlap'] >= 0.1 ] log_string( '\ttotal # %d, large overlap: rotation: %.5f, error translation: %.5f' % (len(err_r_l), np.mean(err_r_l), np.mean(err_t_l)), fh) fh.close()
if args.dataset == 'suncg' or args.dataset == 'matterport': sceneID = tp.split('/')[-3] + '-' + tp.split('/')[-2] else: sceneID = tp.split('/')[-2] id_src = int(tp.split('/')[-1]) id_tgt = int(tp1.split('/')[-1]) key = '%s-%06d-%06d' % (sceneID, id_src, id_tgt) newList.append([ sceneID, tp.split('/')[-1], tp1.split('/')[-1], dataS['pred_pose'][i], dataS['gt_pose'][i], dataS['overlap'][0, i] ]) err_r.append( util.angular_distance_np(dataS['pred_pose'][i][0, :3, :3], dataS['gt_pose'][i][:3, :3])) out_dir = './data/dataList/%s_local/' % args.dataset if not os.path.exists(out_dir): os.makedirs(out_dir) np.save('%s/release_eval.npy' % out_dir, newList) print(np.mean(err_r)) netG = MODEL(input_chal=10, num_s=num_s).cuda() # resume checkpoint resume_checkpoint(netG, args.model) # build data loader val_loader = buildDataset(args) # iterate through all data
def __getitem__(self, index): import ipdb ipdb.set_trace() rets = {} imgs_ = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) imgs = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype=np.float32) if self.rgbd: imgs_rgb = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) imgs_rgb_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) if self.segm: segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.normal: normal = np.zeros( (self.nViews, 3, self.Inputheight, self.Inputwidth), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((self.nViews, 7)) assert (self.nViews == 2) ct0, ct1 = self.__getpair__(index) imgsPath = [] basePath = self.base_this frameid0 = f"{ct0:06d}" frameid1 = f"{ct1:06d}" imgs[0] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy() imgs[1] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy() dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) dataMask[0, 0, :, :] = (imgs[0] != 0) dataMask[1, 0, :, :] = (imgs[1] != 0) rets['dataMask'] = dataMask[np.newaxis, :] if self.rgbd: imgs_rgb[0] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb[1] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. R[0] = np.loadtxt( os.path.join(basePath, 'pose', frameid0 + '.pose.txt')) R[1] = np.loadtxt( os.path.join(basePath, 'pose', frameid1 + '.pose.txt')) Q[0, :4] = rot2Quaternion(R[0][:3, :3]) Q[0, 4:] = R[0][:3, 3] Q[1, :4] = rot2Quaternion(R[1][:3, :3]) Q[1, 4:] = R[1][:3, 3] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") if self.normal: tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid0)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[0] = tp.transpose(2, 0, 1) tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid1)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[1] = tp.transpose(2, 0, 1) normal_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) normal_[0] = cv2.resize(normal[0].transpose(1, 2, 0), self.OutputSize, interpolation=cv2.INTER_NEAREST).transpose( 2, 0, 1) normal_[1] = cv2.resize(normal[1].transpose(1, 2, 0), self.OutputSize, interpolation=cv2.INTER_NEAREST).transpose( 2, 0, 1) normal_ = normal_[np.newaxis, :] if self.segm: segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) tp = (self.LoadImage(os.path.join(basePath, 'semanticLabel', '{}.png'.format(frameid0)), depth=False)[:, :, 0].copy()) segm[0] = tp.reshape(segm[0].shape) tp = (self.LoadImage(os.path.join(basePath, 'semanticLabel', '{}.png'.format(frameid1)), depth=False)[:, :, 0].copy()) segm[1] = tp.reshape(segm[1].shape) segm_[0] = segm[0] segm_[1] = segm[1] # truncate semantic class segm_[segm_ >= self.snumclass] = 0 segm_ = segm_[np.newaxis, :] if self.denseCorres: # get 3d point cloud for each pano pcs, masks = self.Pano2PointCloud( imgs[0]) # be aware of the order of returned pc!!! pct, maskt = self.Pano2PointCloud(imgs[1]) #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:] pct = np.matmul(np.linalg.inv(R[1]), np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pcs = np.matmul(np.linalg.inv(R[0]), np.concatenate( (pcs, np.ones([1, pcs.shape[1]]))))[:3, :] # find correspondence using kdtree tree = KDTree(pct.T) IdxQuery = np.random.choice(range(pcs.shape[1]), 5000) # sample 5000 query points pcsQuery = pcs[:, IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1) hasCorres = (nearest_dist < 0.08) idxTgtNeg = [] idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]], 160, 640) idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], 160, 640) if hasCorres.sum() < 500: rets['denseCorres'] = { 'idxSrc': np.zeros([1, 2000, 2]), 'idxTgt': np.zeros([1, 2000, 2]), 'valid': np.array([0]), 'idxTgtNeg': idxTgtNeg } else: # only pick 2000 correspondence per pair idx2000 = np.random.choice(range(idxSrc.shape[0]), 2000) idxSrc = idxSrc[idx2000][np.newaxis, :] idxTgt = idxTgt[idx2000][np.newaxis, :] rets['denseCorres'] = { 'idxSrc': idxSrc, 'idxTgt': idxTgt, 'valid': np.array([1]), 'idxTgtNeg': idxTgtNeg } if self.reproj: h = imgs.shape[1] pct, mask = util.depth2pc( imgs[1, :, 160:160 * 2], 'matterport') # be aware of the order of returned pc!!! ii = 1 colorpct = imgs_rgb[1, :, ii * h:(ii + 1) * h, :].reshape(-1, 3)[mask, :] normalpct = normal_[0, 1, :, :, ii * h:(ii + 1) * h].reshape(3, -1).T[mask, :] depthpct = imgs[1, :, ii * h:(ii + 1) * h].reshape(-1)[mask] # get the coordinates of each point in the first coordinate system R_this = np.matmul(R[0], np.linalg.inv(R[1])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') t2s_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') t2s_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') t2s_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') t2s_mask_p = (t2s_d_p != 0).astype('int') pct, mask = util.depth2pc( imgs[0, :, 160:160 * 2], 'matterport') # be aware of the order of returned pc!!! colorpct = imgs_rgb[0, :, ii * h:(ii + 1) * h, :].reshape(-1, 3)[mask] normalpct = normal_[0, 0, :, :, ii * h:(ii + 1) * h].reshape(3, -1).T[mask] depthpct = imgs[0, :, ii * h:(ii + 1) * h].reshape(-1)[mask] R_this = np.matmul(R[1], np.linalg.inv(R[0])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj # assume always observe the second view(right view) normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') s2t_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') s2t_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') s2t_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') s2t_mask_p = (s2t_d_p != 0).astype('int') # compute an envelop box try: tp = np.where(t2s_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(t2s_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1 t2s_box_p = np.zeros(t2s_d_p.shape) t2s_box_p[h0:h1, w0:w1] = 1 try: tp = np.where(s2t_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(s2t_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1 s2t_box_p = np.zeros(s2t_d_p.shape) s2t_box_p[h0:h1, w0:w1] = 1 rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :] rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb_p'] = np.stack( (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p), 0).reshape(1, 2, 1, t2s_d_p.shape[0], t2s_d_p.shape[1]) rets['proj_mask_p'] = np.stack( (t2s_mask_p, s2t_mask_p), 0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1]) rets['proj_box_p'] = np.stack( (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0], t2s_box_p.shape[1]) for v in range(self.nViews): imgs_[v] = cv2.resize(imgs[v], self.OutputSize, interpolation=cv2.INTER_NEAREST) if self.rgbd: imgs_rgb_[v] = cv2.resize(imgs_rgb[v], self.OutputSize).transpose(2, 0, 1) imgs_ = imgs_[np.newaxis, :] if self.rgbd: imgs_rgb_ = imgs_rgb_[np.newaxis, :] R = R[np.newaxis, :] Q = Q[np.newaxis, :] if self.segm: rets['segm'] = segm_ rets['interval'] = self.interval_this rets['norm'] = normal_ rets['rgb'] = imgs_rgb_ rets['depth'] = imgs_ rets['Q'] = Q rets['R'] = R rets['imgsPath'] = imgsPath return rets
def __getitem__(self, index): rets = {} imgs_ = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) imgs = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype=np.float32) if self.rgbd: imgs_rgb = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) imgs_rgb_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) if self.hmap: hmap = np.zeros((self.nViews, 3, 64, 64), dtype=np.float32) if self.birdview: imgs_bv = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) imgs_bv_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) if self.pointcloud: pointcloud = np.zeros((self.nViews, 3, self.num_points), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((self.nViews, 7)) assert (self.nViews == 2) imgsPath = [] if self.AuthenticdepthMap: AuthenticdepthMap = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) ct0, ct1 = self.__getpair__(index) if self.segm: segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.normal: normal = np.zeros( (self.nViews, 3, self.Inputheight, self.Inputwidth), dtype=np.float32) basePath = self.base_this frameid0 = f"{ct0:06d}" frameid1 = f"{ct1:06d}" imgs[0] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy() imgs[1] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy() dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) dataMask[0, 0, :, :] = (imgs[0] != 0) dataMask[1, 0, :, :] = (imgs[1] != 0) rets['dataMask'] = dataMask[np.newaxis, :] if self.pointcloud: pc = util.DepthToPointCloud(imgs[0], self.intrinsicUnNorm) pointcloud[0] = pc[ np.random.choice(range(len(pc)), self.num_points), :].T pc = util.DepthToPointCloud(imgs[1], self.intrinsicUnNorm) pointcloud[1] = pc[ np.random.choice(range(len(pc)), self.num_points), :].T if self.birdview: imgs_bv[0] = self.LoadImage(os.path.join( basePath, 'BirdView', '{}.birdview.png'.format(frameid0)), depth=False).copy() / 255. imgs_bv[1] = self.LoadImage(os.path.join( basePath, 'BirdView', '{}.birdview.png'.format(frameid1)), depth=False).copy() / 255. if self.rgbd: imgs_rgb[0] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb[1] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. R[0] = np.loadtxt( os.path.join(basePath, 'pose', frameid0 + '.pose.txt')) R[1] = np.loadtxt( os.path.join(basePath, 'pose', frameid1 + '.pose.txt')) #R[1] = R[0] = np.eye(4) Q[0, :4] = rot2Quaternion(R[0][:3, :3]) Q[0, 4:] = R[0][:3, 3] Q[1, :4] = rot2Quaternion(R[1][:3, :3]) Q[1, 4:] = R[1][:3, 3] if self.normal: normal[0] = self.LoadImage( os.path.join(basePath, 'normal', '{}.png'.format(frameid0)), depth=False).copy().transpose(2, 0, 1) / 255. * 2 - 1 normal[1] = self.LoadImage( os.path.join(basePath, 'normal', '{}.png'.format(frameid1)), depth=False).copy().transpose(2, 0, 1) / 255. * 2 - 1 #print(f"normalmean:{np.mean(np.power(normal[0],2).sum(0))},{np.mean(np.power(normal[1],2).sum(0))}\n") if self.normal_pyramid: a = int(outS(self.height)) #41 b = int(outS(self.height * 0.5 + 1)) #21 normal_ = [ resize_label_batch(normal.transpose(2, 3, 1, 0), i).transpose(3, 2, 0, 1) for i in [a, a, b, a] ] normal_ = [ m.reshape(1, self.nViews, 3, m.shape[2], m.shape[3]) for m in normal_ ] else: normal_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) normal_[0] = cv2.resize( normal[0].transpose(1, 2, 0), self.OutputSize, interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1) normal_[1] = cv2.resize( normal[1].transpose(1, 2, 0), self.OutputSize, interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1) normal_ = normal_[np.newaxis, :] if self.denseCorres: # get 3d point cloud for each pano pcs = self.Pano2PointCloud( imgs[0]) # be aware of the order of returned pc!!! pct = self.Pano2PointCloud(imgs[1]) #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:] pct = np.matmul(np.linalg.inv(R[1]), np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pcs = np.matmul(np.linalg.inv(R[0]), np.concatenate( (pcs, np.ones([1, pcs.shape[1]]))))[:3, :] # find correspondence using kdtree tree = KDTree(pct.T) IdxQuery = np.random.choice(range(pcs.shape[1]), 5000) # sample 5000 query points pcsQuery = pcs[:, IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1) hasCorres = (nearest_dist < 0.08) idxTgtNeg = [] idxSrc = self.PanoIdx(IdxQuery[np.where(hasCorres)[0]], 160, 640) idxTgt = self.PanoIdx(nearest_ind[hasCorres], 160, 640) if hasCorres.sum() < 500: rets['denseCorres'] = { 'idxSrc': np.zeros([1, 2000, 2]), 'idxTgt': np.zeros([1, 2000, 2]), 'valid': np.array([0]), 'idxTgtNeg': idxTgtNeg } else: # only pick 2000 correspondence per pair idx2000 = np.random.choice(range(idxSrc.shape[0]), 2000) idxSrc = idxSrc[idx2000][np.newaxis, :] idxTgt = idxTgt[idx2000][np.newaxis, :] rets['denseCorres'] = { 'idxSrc': idxSrc, 'idxTgt': idxTgt, 'valid': np.array([1]), 'idxTgtNeg': idxTgtNeg } # reprojct the second image into the first image plane if self.reproj: h = imgs.shape[1] colorpct = [] normalpct = [] depthpct = [] for ii in range(4): colorpct.append(imgs_rgb[1, :, ii * h:(ii + 1) * h, :].reshape( -1, 3)) normalpct.append(normal_[0, 1, :, :, ii * h:(ii + 1) * h].reshape(3, -1)) depthpct.append(imgs[1, :, ii * h:(ii + 1) * h].reshape(-1)) colorpct = np.concatenate(colorpct, 0) normalpct = np.concatenate(normalpct, 1) depthpct = np.concatenate(depthpct) # get the coordinates of each point in the first coordinate system pct = self.Pano2PointCloud( imgs[1]) # be aware of the order of returned pc!!! R_this = np.matmul(R[0], np.linalg.inv(R[1])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate((pct, np.ones([1, pct.shape[1]]))))[:3, :] flow = pct_reproj_org - pct_reproj #if np.abs(pct).min()==0: # import ipdb;ipdb.set_trace() # assume always observe the second view(right view) colorpct = colorpct[h * h:h * h * 2, :] depthpct = depthpct[h * h:h * h * 2] normalpct = normalpct[:, h * h:h * h * 2] #normalpct=np.matmul(R_this[:3,:3], normalpct).T # used to be a mistake! normalpct = np.matmul(R_this_p[:3, :3], normalpct).T pct_reproj = pct_reproj[:, h * h:h * h * 2] pct_reproj_org = pct_reproj_org[:, h * h:h * h * 2] flow = flow[:, h * h:h * h * 2].T t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') t2s_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') t2s_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') t2s_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') t2s_mask_p = (t2s_d_p != 0).astype('int') #import ipdb;ipdb.set_trace() colorpct = [] normalpct = [] depthpct = [] for ii in range(4): colorpct.append(imgs_rgb[0, :, ii * h:(ii + 1) * h, :].reshape( -1, 3)) normalpct.append(normal_[0, 0, :, :, ii * h:(ii + 1) * h].reshape(3, -1)) depthpct.append(imgs[0, :, ii * h:(ii + 1) * h].reshape(-1)) colorpct = np.concatenate(colorpct, 0) normalpct = np.concatenate(normalpct, 1) depthpct = np.concatenate(depthpct) # get the coordinates of each point in the first coordinate system pct = self.Pano2PointCloud( imgs[0]) # be aware of the order of returned pc!!! R_this = np.matmul(R[1], np.linalg.inv(R[0])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate((pct, np.ones([1, pct.shape[1]]))))[:3, :] flow = pct_reproj_org - pct_reproj # assume always observe the second view(right view) colorpct = colorpct[h * h:h * h * 2, :] depthpct = depthpct[h * h:h * h * 2] normalpct = normalpct[:, h * h:h * h * 2] normalpct = np.matmul(R_this_p[:3, :3], normalpct).T pct_reproj = pct_reproj[:, h * h:h * h * 2] pct_reproj_org = pct_reproj_org[:, h * h:h * h * 2] flow = flow[:, h * h:h * h * 2].T s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') s2t_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') s2t_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') s2t_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') s2t_mask_p = (s2t_d_p != 0).astype('int') # compute an envelop box try: tp = np.where(t2s_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(t2s_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1 t2s_box_p = np.zeros(t2s_d_p.shape) t2s_box_p[h0:h1, w0:w1] = 1 try: tp = np.where(s2t_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(s2t_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1 s2t_box_p = np.zeros(s2t_d_p.shape) s2t_box_p[h0:h1, w0:w1] = 1 rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :] rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb_p'] = np.stack( (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p), 0).reshape(1, 2, 1, t2s_d_p.shape[0], t2s_d_p.shape[1]) rets['proj_mask_p'] = np.stack( (t2s_mask_p, s2t_mask_p), 0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1]) rets['proj_box_p'] = np.stack( (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0], t2s_box_p.shape[1]) if self.segm: segm[0] = (self.LoadImage(os.path.join(basePath, 'semanticLabel', '{}.png'.format(frameid0)), depth=False)[:, :, 0:1].copy()).transpose( 2, 0, 1) segm[1] = (self.LoadImage(os.path.join(basePath, 'semanticLabel', '{}.png'.format(frameid1)), depth=False)[:, :, 0:1].copy()).transpose( 2, 0, 1) segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) segm_[0] = segm[0] segm_[1] = segm[1] segm_ = segm_[np.newaxis, :] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") for v in range(self.nViews): imgs_[v] = cv2.resize(imgs[v], self.OutputSize, interpolation=cv2.INTER_NEAREST) if self.rgbd: imgs_rgb_[v] = cv2.resize(imgs_rgb[v], self.OutputSize).transpose(2, 0, 1) imgs_ = imgs_[np.newaxis, :] if self.hmap: hmap = hmap[np.newaxis, :] if self.rgbd: imgs_rgb_ = imgs_rgb_[np.newaxis, :] if self.birdview: imgs_bv_ = imgs_bv_[np.newaxis, :] if self.pointcloud: pointcloud = pointcloud[np.newaxis, :] R = R[np.newaxis, :] Q = Q[np.newaxis, :] if self.segm: rets['segm'] = segm_ rets['interval'] = self.interval_this rets['norm'] = normal_ rets['rgb'] = imgs_rgb_ rets['depth'] = imgs_ rets['Q'] = Q rets['R'] = R rets['imgsPath'] = imgsPath return rets
summary_mat = sio.loadmat(summary_mat) T = summary_mat['T'] Tstar = summary_mat['Tstar'] aerr = summary_mat['aerr'] terr = summary_mat['terr'] sigma = summary_mat['sigma'] n = Tstar.shape[0] n = 30 for i in range(n): for j in range(i+1, n): Tij = T[i*4:(i+1)*4, j*4:(j+1)*4] Tij_gt = Tstar[j, :, :].dot(inverse(Tstar[i, :, :])) terr_ij = np.linalg.norm((Tij_gt - Tij)[:3, 3], 2) assert abs(terr_ij - terr[i, j]) < 1e-4 terrs.append(terr_ij) aerr_ij = angular_distance_np(Tij_gt[np.newaxis, :3, :3], Tij[np.newaxis, :3, :3]).sum() assert abs(aerr_ij - aerr[i, j]) < 1e-4 aerrs.append(aerr_ij) sigmas.append(sigma[i, j]) aerrs = np.array(aerrs) terrs = np.array(terrs) sigmas = np.array(sigmas) for sigma_threshold in [0.1, 0.2]: valid_indices = np.where(sigmas < sigma_threshold)[0] terrs_temp = terrs[valid_indices] aerrs_temp = aerrs[valid_indices] for a in [3.0, 5.0, 10.0, 30.0, 45.0]:
def __getitem__(self, index): rets = {} imgs = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) if self.rgbd: imgs_rgb = np.zeros((self.nViews, *self.OutputSize[::-1], 3), dtype=np.float32) if self.segm: segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.dynamicWeighting: dynamicW = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.normal: normal = np.zeros((self.nViews, *self.OutputSize[::-1], 3), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((self.nViews, 7)) assert (self.nViews == 2) ct0, ct1 = self.__getpair__(index) imgsPath = [] basePath = self.base_this frameid0 = f"{ct0:06d}" frameid1 = f"{ct1:06d}" if self.fullsize_rgbdn: imgs_rgb_full = np.zeros((self.nViews, 480, 640, 3), dtype=np.float32) imgs_full = np.zeros((self.nViews, 480, 640), dtype=np.float32) imgs_full[0] = self.LoadImage( os.path.join(basePath, 'obs_depth', '{}.png'.format(frameid0))).copy() imgs_full[1] = self.LoadImage( os.path.join(basePath, 'obs_depth', '{}.png'.format(frameid1))).copy() imgs_rgb_full[0] = self.LoadImage(os.path.join( basePath, 'obs_rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb_full[1] = self.LoadImage(os.path.join( basePath, 'obs_rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. rets['rgb_full'] = imgs_rgb_full[np.newaxis, :] rets['depth_full'] = imgs_full[np.newaxis, :] imgs[0] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy() imgs[1] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy() dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) dataMask[0, 0, :, :] = (imgs[0] != 0) dataMask[1, 0, :, :] = (imgs[1] != 0) rets['dataMask'] = dataMask[np.newaxis, :] if self.rgbd: imgs_rgb[0] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb[1] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. R[0] = np.loadtxt( os.path.join(basePath, 'pose', frameid0 + '.pose.txt')) R[1] = np.loadtxt( os.path.join(basePath, 'pose', frameid1 + '.pose.txt')) Q[0, :4] = rot2Quaternion(R[0][:3, :3]) Q[0, 4:] = R[0][:3, 3] Q[1, :4] = rot2Quaternion(R[1][:3, :3]) Q[1, 4:] = R[1][:3, 3] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") if self.normal: tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid0)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[0] = tp tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid1)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[1] = tp if self.segm: tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx', '{}.png'.format(frameid0)), depth=False).copy())[:, :, 1] segm[0] = tp.reshape(segm[0].shape) tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx', '{}.png'.format(frameid1)), depth=False).copy())[:, :, 1] segm[1] = tp.reshape(segm[1].shape) segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) segm_[0] = segm[0] segm_[1] = segm[1] segm_ = segm_[np.newaxis, :] if self.denseCorres: # get 3d point cloud for each pano pcs, masks = self.Pano2PointCloud( imgs[0], self.representation) # be aware of the order of returned pc!!! pct, maskt = self.Pano2PointCloud(imgs[1], self.representation) #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:] pct = np.matmul(np.linalg.inv(R[1]), np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pcs = np.matmul(np.linalg.inv(R[0]), np.concatenate( (pcs, np.ones([1, pcs.shape[1]]))))[:3, :] # find correspondence using kdtree tree = KDTree(pct.T) IdxQuery = np.random.choice(range(pcs.shape[1]), 5000) # sample 5000 query points pcsQuery = pcs[:, IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1) hasCorres = (nearest_dist < 0.08) idxTgtNeg = [] idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]], imgs.shape[1], imgs.shape[2], self.representation) idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], imgs.shape[1], imgs.shape[2], self.representation) if hasCorres.sum() < 200: rets['denseCorres'] = { 'idxSrc': np.zeros([1, 500, 2]), 'idxTgt': np.zeros([1, 500, 2]), 'valid': np.array([0]), 'idxTgtNeg': idxTgtNeg } else: # only pick 2000 correspondence per pair idx500 = np.random.choice(range(idxSrc.shape[0]), 500) idxSrc = idxSrc[idx500][np.newaxis, :] idxTgt = idxTgt[idx500][np.newaxis, :] rets['denseCorres'] = { 'idxSrc': idxSrc, 'idxTgt': idxTgt, 'valid': np.array([1]), 'idxTgtNeg': idxTgtNeg } # reprojct the second image into the first image plane if self.reproj: assert (imgs.shape[1] == 160 and imgs.shape[2] == 640) h = imgs.shape[1] pct, mask = util.depth2pc( imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44], 'scannet') # be aware of the order of returned pc!!! colorpct = imgs_rgb[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] normalpct = normal[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] depthpct = imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask] R_this = np.matmul(R[0], np.linalg.inv(R[1])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') t2s_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') t2s_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') t2s_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') t2s_mask_p = (t2s_d_p != 0).astype('int') pct, mask = util.depth2pc( imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44], 'scannet') # be aware of the order of returned pc!!! colorpct = imgs_rgb[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] normalpct = normal[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] depthpct = imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask] R_this = np.matmul(R[1], np.linalg.inv(R[0])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj # assume always observe the second view(right view) normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') s2t_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') s2t_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') s2t_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') s2t_mask_p = (s2t_d_p != 0).astype('int') # compute an envelop box try: tp = np.where(t2s_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(t2s_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1 t2s_box_p = np.zeros(t2s_d_p.shape) t2s_box_p[h0:h1, w0:w1] = 1 try: tp = np.where(s2t_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(s2t_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1 s2t_box_p = np.zeros(s2t_d_p.shape) s2t_box_p[h0:h1, w0:w1] = 1 rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :] rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb_p'] = np.stack( (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p), 0).reshape(1, 2, 1, t2s_d_p.shape[0], t2s_d_p.shape[1]) rets['proj_mask_p'] = np.stack( (t2s_mask_p, s2t_mask_p), 0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1]) rets['proj_box_p'] = np.stack( (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0], t2s_box_p.shape[1]) imgs = imgs[np.newaxis, :] if self.rgbd: imgs_rgb = imgs_rgb[np.newaxis, :].transpose(0, 1, 4, 2, 3) if self.normal: normal = normal[np.newaxis, :].transpose(0, 1, 4, 2, 3) R = R[np.newaxis, :] Q = Q[np.newaxis, :] if self.segm: rets['segm'] = segm_ if self.dynamicWeighting: rets['dynamicW'] = dynamicW[np.newaxis, :] rets['interval'] = self.interval_this rets['norm'] = normal rets['rgb'] = imgs_rgb rets['depth'] = imgs rets['Q'] = Q rets['R'] = R rets['imgsPath'] = imgsPath return rets
}, { 'pc': ptt3d.T, 'normal': ptsnt, 'feat': dest, 'weight': pttW }, para_this) # average speed time_this = time.time() - st speedBenchmark.append(time_this) # compute rotation error and translation error t_hat = R_hat[:3, 3] R_hat = R_hat[:3, :3] ad_this = util.angular_distance_np(R_hat, R_gt[np.newaxis, :, :])[0] ad_blind_this = util.angular_distance_np( R_gt[np.newaxis, :, :], np.eye(3)[np.newaxis, :, :])[0] translation_this = np.linalg.norm( np.matmul((R_hat - R_gt_44[:3, :3]), pc_src.mean(0).reshape(3)) + t_hat - R_gt_44[:3, 3]) translation_blind_this = np.linalg.norm(t_hat - R_gt_44[:3, 3]) # save result for this pair R_pred_44 = np.eye(4) R_pred_44[:3, :3] = R_hat R_pred_44[:3, 3] = t_hat error_stats.append({ 'img_src': imgPath[0][0], 'img_tgt': imgPath[1][0],