def __getitem__helper(self, index): #import ipdb;ipdb.set_trace() rets = {} index = index % self.__len__() imgs_depth = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype = np.float32) imgs_s = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype = np.float32) imgs_rgb = np.zeros((self.nViews, self.Inputheight, self.Inputwidth,3), dtype = np.float32) imgs_normal = np.zeros((self.nViews, self.Inputheight, self.Inputwidth,3), dtype = np.float32) pointcloud = np.zeros((self.nViews, 3+3+3+1, self.num_points), dtype = np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((7)) assert(self.nViews == 2) imgsPath = [] ct0,ct1 = self.__getpair__(index) if 'scannet_test_scenes' not in self.list: rets['overlap'] = float(self.dataList[index]['overlap']) room_id = self.base_this.split('/')[-1] basePath = os.path.join(self.base, room_id) imageKey = '%s-%06d-rgb' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_rgb[0] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0 imageKey = '%s-%06d-rgb' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_rgb[1] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0 imageKey = '%s-%06d-depth' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_depth[0] = cv2.imdecode(imageBuf, 2).astype('float')/1000.0 imageKey = '%s-%06d-depth' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_depth[1] = cv2.imdecode(imageBuf, 2).astype('float')/1000.0 #cv2.imwrite('test.png',imgs_rgb[0]*255) imageKey = '%s-%06d-normal' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_normal[0] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0*2-1 imageKey = '%s-%06d-normal' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_normal[1] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255.0*2-1 imageKey = '%s-%06d-semantic' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_s[0] = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:,:,0] imageKey = '%s-%06d-semantic' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_s[1] = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:,:,0] PerspectiveValidMask = (imgs_depth!=0) rets['PerspectiveValidMask'] = PerspectiveValidMask[None,:,None,:,:] rets['dataMask'] = rets['PerspectiveValidMask'] RKey = '%s-%06d-R' % (room_id, ct0) R[0] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4,4) RKey = '%s-%06d-R' % (room_id, ct1) R[1] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4,4) # convert from 3rd view to 4th view #R[0] = np.matmul(np.linalg.inv(self.Rs[3]),R[0]) #R[1] = np.matmul(np.linalg.inv(self.Rs[3]),R[1]) R_inv = np.linalg.inv(R) img2ind = np.zeros([2, self.num_points, 3]) imgPCid = np.zeros([2, self.num_points, 2]) if self.fullsize_rgbdn: imgs_rgb_full = np.zeros((self.nViews, 480,640, 3), dtype = np.float32) imgs_norm_full = np.zeros((self.nViews, 480,640, 3), dtype = np.float32) imgs_full = np.zeros((self.nViews, 480,640), dtype = np.float32) imgs_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_depth','%06d.png'%(ct0))).copy() imgs_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_depth','%06d.png'%(ct1))).copy() imgs_rgb_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_rgb','%06d.png'%(ct0)),depth=False).copy()/255. imgs_rgb_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_rgb','%06d.png'%(ct1)),depth=False).copy()/255. imgs_norm_full[0] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_normal','%06d.png'%(ct0)),depth=False).copy()/255*2-1. imgs_norm_full[1] = self.LoadImage(os.path.join(basePath.replace('ScanNet','ScanNet_360'),'obs_normal','%06d.png'%(ct1)),depth=False).copy()/255*2-1. rets['rgb_full'] = imgs_rgb_full[np.newaxis,:] rets['norm_full'] = imgs_norm_full[np.newaxis,:] rets['depth_full'] = imgs_full[np.newaxis,:] if self.denseCorres: # get 3d point cloud for each pano pcs,masks = self.depth2pc(imgs_depth[0],needmask=True) # be aware of the order of returned pc!!! pct,maskt = self.depth2pc(imgs_depth[1],needmask=True) pct = (np.matmul(R_inv[1][:3,:3], pct.T) + R_inv[1][:3,3:4]).T pcs = (np.matmul(R_inv[0][:3,:3], pcs.T) + R_inv[0][:3,3:4]).T inds = np.arange(imgs_depth[0].shape[0]*imgs_depth[0].shape[1])[masks] indt = np.arange(imgs_depth[0].shape[0]*imgs_depth[0].shape[1])[maskt] # find correspondence using kdtree tree = KDTree(pct) IdxQuery=np.random.choice(range(pcs.shape[0]),5000) # sample 5000 query points pcsQuery = pcs[IdxQuery,:] pcsQueryid = inds[IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery, k=1) hasCorres=(nearest_dist < 0.08) idxTgtNeg=[] idxSrc= np.stack((pcsQueryid[hasCorres[:,0]] % self.Inputwidth, pcsQueryid[hasCorres[:,0]]// self.Inputwidth),1) idxTgt= np.stack((indt[nearest_ind[hasCorres]] % self.Inputwidth, indt[nearest_ind[hasCorres]] // self.Inputwidth),1) if hasCorres.sum() < 200: rets['denseCorres']={'idxSrc':np.zeros([1,500,2]).astype('int'),'idxTgt':np.zeros([1,500,2]).astype('int'),'valid':np.array([0]),'idxTgtNeg':idxTgtNeg} else: idx2000 = np.random.choice(range(idxSrc.shape[0]),500) idxSrc=idxSrc[idx2000][np.newaxis,:] idxTgt=idxTgt[idx2000][np.newaxis,:] rets['denseCorres']={'idxSrc':idxSrc.astype('int'),'idxTgt':idxTgt.astype('int'),'valid':np.array([1]),'idxTgtNeg':idxTgtNeg} if self.pointcloud or self.local: #pc = self.depth2pc(imgs_depth[0][:,160:160*2]).T pc, mask = self.depth2pc(imgs_depth[0][100-48:100+48,200-64:200+64], needmask=True) # util.write_ply('test.ply',np.concatenate((pc,pc1))) idx_s = np.random.choice(range(len(pc)),self.num_points) mask_s = np.where(mask)[0][idx_s] imgPCid[0] = np.stack((idx_s % 128, idx_s // 128)).T pointcloud[0,:3,:] = pc[idx_s,:].T pc_n = imgs_normal[0][100-48:100+48,200-64:200+64].reshape(-1, 3)[mask] pointcloud[0,3:6,:] = pc_n[idx_s,:].T pc_c = imgs_rgb[0][100-48:100+48,200-64:200+64].reshape(-1,3)[mask] pointcloud[0,6:9,:] = pc_c[idx_s,::-1].T pc_s = imgs_s[0][100-48:100+48,200-64:200+64].reshape(-1)[mask] pointcloud[0,9:10,:] = pc_s[idx_s] pc, mask = self.depth2pc(imgs_depth[1][100-48:100+48,200-64:200+64], needmask=True) idx_s = np.random.choice(range(len(pc)),self.num_points) mask_t = np.where(mask)[0][idx_s] imgPCid[1] = np.stack((idx_s % 128, idx_s // 128)).T pointcloud[1,:3,:] = pc[idx_s,:].T pc_n = imgs_normal[1][100-48:100+48,200-64:200+64].reshape(-1, 3)[mask] pointcloud[1,3:6,:] = pc_n[idx_s,:].T pc_c = imgs_rgb[1][100-48:100+48,200-64:200+64].reshape(-1,3)[mask] pointcloud[1,6:9,:] = pc_c[idx_s,::-1].T pc_s = imgs_s[1][100-48:100+48,200-64:200+64].reshape(-1)[mask] pointcloud[1,9:10,:] = pc_s[idx_s] rets['pointcloud']=pointcloud[None,...] if self.plane_r: Key = '%s-plane' % (room_id) plane_eq_raw = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1,9) Key = '%s-plane-validnum' % (room_id) valid_plane = np.frombuffer(self.txn.get(Key.encode()),np.uint8)[0] plane_eq = plane_eq_raw[:,3:7] plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_center = plane_eq_raw[:,:3] plane_center = (np.matmul(R[0][:3,:3], plane_center.T) + R[0][:3,3:4]).T rets['plane']=plane_eq[np.newaxis,:] rets['plane_raw']=plane_eq_raw[np.newaxis,:] rets['plane_c']=plane_center[np.newaxis,:] rets['valid_plane']=valid_plane if self.local: # sample point-level relation from plane relation try: R_s2t = np.matmul(R[1], R_inv[0]) pointcloud[0,:3,:] = np.matmul(R_s2t[:3,:3], pointcloud[0,:3,:]) + R_s2t[:3,3:4] pointcloud[0,3:6,:] = np.matmul(R_s2t[:3,:3], pointcloud[0,3:6,:]) if self.eval_local: N_PAIR_PTS = 6000 else: N_PAIR_PTS = 1000 N_PAIR_EXCEED_PTS = N_PAIR_PTS*10 ANGLE_THRESH = 5.0 PERP_THRESH = np.cos(np.deg2rad(90-ANGLE_THRESH)) PARALLEL_THRESH = np.cos(np.deg2rad(ANGLE_THRESH)) COPLANE_THRESH = 0.05 rel_cls_pts = np.zeros([N_PAIR_EXCEED_PTS]) ind_s = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS) ind_t = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS) pair_pts = np.stack((ind_s, ind_t), -1) normdot = (pointcloud[0, 3:6, pair_pts[:,0]] * pointcloud[1, 3:6, pair_pts[:,1]]).sum(1) dst = (np.abs(((pointcloud[0, 0:3, pair_pts[:,0]] - pointcloud[1, 0:3, pair_pts[:,1]]) * pointcloud[1, 3:6, pair_pts[:,1]]).sum(1)) + np.abs(((pointcloud[0, 0:3, pair_pts[:,0]] - pointcloud[1, 0:3, pair_pts[:,1]]) * pointcloud[0, 3:6, pair_pts[:,0]]).sum(1)))/2 rel_cls_pts[(np.abs(normdot) < PERP_THRESH)] = 1 rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst > COPLANE_THRESH)] = 2 rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst <= COPLANE_THRESH)] = 3 if self.split == 'train': # balance each class N_CLASS = 4 pair_pts_select=[] for j in range(N_CLASS): ind = np.where(rel_cls_pts == j)[0] if len(ind): pair_pts_select.append(ind[np.random.choice(len(ind), N_PAIR_PTS//N_CLASS)]) pair_pts_select = np.concatenate(pair_pts_select) pair_pts_select =pair_pts_select[np.random.choice(len(pair_pts_select), N_PAIR_PTS)] pair_pts = pair_pts[pair_pts_select] normdot = normdot[pair_pts_select] dst = dst[pair_pts_select] rel_cls_pts = rel_cls_pts[pair_pts_select] else: pair_pts_select = np.random.choice(len(pair_pts), N_PAIR_PTS) pair_pts = pair_pts[pair_pts_select] normdot = normdot[pair_pts_select] dst = dst[pair_pts_select] rel_cls_pts = rel_cls_pts[pair_pts_select] rets['normdot2'] = np.power(normdot,2)[None,:] rets['dst2'] = np.power(dst,2)[None,:] # convert to image coordinate if 1: R_s2t = np.matmul(R[1], R_inv[0]) R_t2s = np.linalg.inv(R_s2t) tp = (np.matmul(R_t2s[:3,:3], pointcloud[0, :3, pair_pts[:,0]].T)+R_t2s[:3,3:4]).T hfov = 120.0 vfov = 2*np.arctan(np.tan(hfov/2/180*np.pi)*200/400)/np.pi*180 zs = -tp[:,2] ys = (0.5 - (tp[:, 1]/96*200/zs/(np.tan(np.deg2rad(vfov/2))))/2)*96 xs = (0.5 + (tp[:, 0]/128*400/zs/(np.tan(np.deg2rad(hfov/2))))/2)*128 uv_s = np.stack((xs, ys), -1) tp = pointcloud[1, :3, pair_pts[:,1]] zs = -tp[:,2] ys = (0.5 - (tp[:, 1]/96*200/zs/(np.tan(np.deg2rad(vfov/2))))/2)*96 xs = (0.5 + (tp[:, 0]/128*400/zs/(np.tan(np.deg2rad(hfov/2))))/2)*128 uv_t = np.stack((xs, ys), -1) rets['uv_pts'] = np.stack((uv_s, uv_t))[None, :] rets['uv_pts'][:, :, :, 0] = rets['uv_pts'][:, :, :, 0].clip(0, 128-1) rets['uv_pts'][:, :, :, 1] = rets['uv_pts'][:, :, :, 1].clip(0, 96-1) rets['uv_pts'] = rets['uv_pts'].astype('int') except: import ipdb;ipdb.set_trace() rel_cls = np.array(rel_cls) rel_dst = np.array(rel_dst) rel_ndot = np.array(rel_ndot) pair = np.concatenate(pair).reshape(-1, 2) # padding f MAX_PAIR = 100 MAX_PLANE = 20 plane_params1 = np.array(plane_params1) plane_params2 = np.array(plane_params2) if len(plane_params1) <= MAX_PLANE: plane_params1 = np.concatenate((plane_params1, np.zeros([MAX_PLANE - len(plane_params1), 5]))) plane_center1 = np.concatenate((plane_center1, np.zeros([MAX_PLANE - len(plane_center1), 6]))) else: plane_params1 = plane_params1[:MAX_PLANE] plane_center1 = plane_center1[:MAX_PLANE] select = (pair[:, 0] < MAX_PLANE) pair = pair[select] rel_cls = rel_cls[select] rel_dst = rel_dst[select] rel_ndot = rel_ndot[select] if len(plane_params2) <= MAX_PLANE: plane_params2 = np.concatenate((plane_params2, np.zeros([MAX_PLANE - len(plane_params2), 5]))) plane_center2 = np.concatenate((plane_center2, np.zeros([MAX_PLANE - len(plane_center2), 6]))) else: plane_params2 = plane_params2[:MAX_PLANE] plane_center2 = plane_center2[:MAX_PLANE] select = (pair[:, 1] < MAX_PLANE) pair = pair[select] rel_cls = rel_cls[select] rel_dst = rel_dst[select] rel_ndot = rel_ndot[select] rel_valid = np.zeros([MAX_PAIR]) if len(rel_cls) < MAX_PAIR: rel_valid[:len(rel_cls)] = 1 rel_cls = np.concatenate((rel_cls, np.zeros([MAX_PAIR - len(rel_cls)]))) rel_dst = np.concatenate((rel_dst, np.zeros([MAX_PAIR - len(rel_dst)]))) rel_ndot = np.concatenate((rel_ndot, np.zeros([MAX_PAIR - len(rel_ndot)]))) pair = np.concatenate((pair, np.zeros([MAX_PAIR - len(pair), 2]))) else: pair = pair[:MAX_PAIR] rel_cls = rel_cls[:MAX_PAIR] rel_dst = rel_dst[:MAX_PAIR] rel_ndot = rel_ndot[:MAX_PAIR] rel_valid[:] = 1 rets['plane_center'] = np.stack((plane_center1,plane_center2))[None,...] rets['pair'] = pair[None,...].astype('int') rets['rel_cls'] = rel_cls[None,...].astype('int') rets['rel_dst'] = rel_dst[None,...] rets['rel_ndot'] = rel_ndot[None,...] rets['rel_valid'] = rel_valid[None,...] rets['plane_idx'] = np.stack((plane_idx1,plane_idx2))[None,...].astype('int') rets['rel_cls_pts'] = rel_cls_pts[None, :] rets['pair_pts'] = pair_pts[None, :] if self.eval_local: # convert back into local coordinate R_t2s = np.matmul(R[0], R_inv[1]) Kth = self.dataList[index % self.__len__()]['Kth'] pointcloud[0,:3,:] = np.matmul(R_t2s[:3,:3], pointcloud[0,:3,:]) + R_t2s[:3,3:4] pointcloud[0,3:6,:] = np.matmul(R_t2s[:3,:3], pointcloud[0,3:6,:]) R_pred = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pred_pose'] gt_pose = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['gt_pose'] err_r = util.angular_distance_np(R_pred[:3,:3],gt_pose[:3,:3])[0] rets['err_r'] = err_r rets['eval_key'] = '%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth) pos_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_s_360'] pos_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_t_360'] nor_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_s_360'] nor_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_t_360'] feat_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_s_360'] feat_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_t_360'] # transform source pos_s_360 = (np.matmul(R_pred[:3,:3], pos_s_360.T) + R_pred[:3,3:4]).T nor_s_360 = np.matmul(R_pred[:3,:3], nor_s_360.T).T # find top correspondence if 0: tree = KDTree(pos_s_360) nearest_dist1, nearest_ind1 = tree.query(pos_t_360, k=1) nearest_ind1 = nearest_ind1.squeeze() tree = KDTree(pos_t_360) nearest_dist2, nearest_ind2 = tree.query(pos_s_360, k=1) nearest_ind2 = nearest_ind2.squeeze() # if nearest_ind1[nearest_ind2] == np.range(len(feat_s_360)) rets['pos_s_360'] = (pos_s_360[nearest_ind1][None,:]) rets['pos_t_360'] = (pos_t_360[None,:]) rets['nor_s_360'] = (nor_s_360[nearest_ind1][None,:]) rets['nor_t_360'] = (nor_t_360[None,:]) if 1: rets['pos_s_360'] = (pos_s_360[None,:]) rets['pos_t_360'] = (pos_t_360[None,:]) rets['nor_s_360'] = (nor_s_360[None,:]) rets['nor_t_360'] = (nor_t_360[None,:]) pointcloud[0,:3,:] = np.matmul(R_pred[:3,:3], pointcloud[0,:3,:]) + R_pred[:3,3:4] pointcloud[0,3:6,:] = np.matmul(R_pred[:3,:3], pointcloud[0,3:6,:]) color_t_360 = np.tile(np.array([0,1,0])[None,:], [len(pos_t_360),1]) igt = np.matmul(R_s2t, np.linalg.inv(R_pred)) rets['igt'] = igt[None,:] rets['pred_pose'] = R_pred[None,:] rets['gt_pose'] = gt_pose[None,:] R_gt = igt[:3,:3] t_gt = igt[:3,3:4] else: delta_R = util.randomRotation(epsilon=0.1*3) delta_t = np.random.randn(3)*0.1 pointcloud_s_perturb = np.matmul(delta_R, pointcloud[0,:3,:] - pointcloud[0,:3,:].mean(1)[:,None]) + delta_t[:, None] + pointcloud[0,:3,:].mean(1)[:,None] tp_R = delta_R tp_t = np.matmul(np.eye(3) - delta_R, pointcloud[0,:3,:].mean(1)[:,None]) + delta_t[:, None] t_gt = np.matmul(np.eye(3) - delta_R.T, pointcloud[0,:3,:].mean(1)[:,None]) - np.matmul(delta_R.T, delta_t[:, None]) R_gt = delta_R.T igt = np.eye(4) igt[:3,:3] = R_gt igt[:3,3] = t_gt.squeeze() rets['igt'] = igt[None,:] pointcloud_s_n_perturb = np.matmul(delta_R, pointcloud[0,3:6,:]) # np.matmul(R_gt, pointcloud_s_perturb) + t_gt if self.local_method == 'patch': plane_params1[:,:4] = np.matmul(plane_params1[:,:4], igt) Q = np.concatenate((util.rot2Quaternion(R_gt),t_gt.squeeze())) R_ = np.eye(4) R_[:3, :3] = R_gt R_[:3, 3] = t_gt.squeeze() R_inv = np.linalg.inv(R_) pointcloud[0,:3,:] = pointcloud_s_perturb pointcloud[0,3:6,:] = pointcloud_s_n_perturb rets['pointcloud']=pointcloud[None,...] if self.topdown: Key = '%s-pc' % (room_id) roompc = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1,3) roompc = roompc[np.random.choice(roompc.shape[0],20000)] rets['roompc'] = roompc[None,:] Key = '%s-floor' % (room_id) plane_eq = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(4) plane_eqs = np.zeros([2, 4]) plane_eq_0 = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_eq_0 /= (np.linalg.norm(plane_eq_0[:3])+1e-16) plane_eqs[0, :] = plane_eq_0.copy() plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1])) plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3])+1e-16) plane_eqs[1, :] = plane_eq_1.copy() colors = np.random.rand(21,3) resolution = 0.03 height = 224 width = 224 pc0 = pointcloud[0,0:3,:].T pc2ind = np.zeros([2, len(pc0), 3]) npts = np.zeros([2]) pc2ind_mask = np.zeros([2, pointcloud.shape[2]]) # the floor plane # (0, 1, 0)'x + d = 0 # remove partial view's ceiling dst = np.abs(((plane_eq_0[:3][None,:] * pc0).sum(1) + plane_eq_0[3])) mask = dst < 1.5 # reorder pointcloud[0] validind = np.where(mask)[0] invalidind = np.where(~mask)[0] #pointcloud[0] = np.concatenate((pointcloud[0,:,validind].T,pointcloud[0,:,invalidind].T), -1) npts[0] = len(validind) pc0 = pc0[mask] pc2ind_mask[0] = mask # project camera position(0,0,0) to floor plane origin_0 = -plane_eq_0[:3] * plane_eq_0[3] # axis [0,0,-1], [] axis_base = np.array([0,0,-1]) axis_y_0 = axis_base - np.dot(axis_base,plane_eq_0[:3]) * plane_eq_0[:3] axis_y_0 /= (np.linalg.norm(axis_y_0)+1e-16) axis_x_0 = np.cross(axis_y_0, plane_eq_0[:3]) axis_x_0 /= (np.linalg.norm(axis_x_0)+1e-16) axis_z_0 = plane_eq_0[:3] imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_partial_0 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255. imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_partial_1 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255. imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_complete_0 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255. imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_complete_1 = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float')/255. imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_s_complete_0 = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8') imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_s_complete_1 = cv2.imdecode(imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8') tp = ~topdown_c_partial_0.sum(2).astype('bool') edt_0 = ndimage.distance_transform_edt(tp, return_indices=False) edt_0 = np.maximum(0.1, np.power(0.98, edt_0)) tp = ~topdown_c_partial_1.sum(2).astype('bool') edt_1 = ndimage.distance_transform_edt(tp, return_indices=False) edt_1 = np.maximum(0.1, np.power(0.98, edt_1)) rets['edt_w'] = np.stack((edt_0, edt_1))[None, ...] u = ((pc0 - origin_0[None,:]) * axis_x_0[None,:]).sum(1) v = ((pc0 - origin_0[None,:]) * axis_y_0[None,:]).sum(1) z = ((pc0 - origin_0[None,:]) * axis_z_0[None,:]).sum(1) u = width//2 + (u / resolution).astype('int') v = height//2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_0 = np.stack((u, v, ind_z), -1) u = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_x_0[None,:]).sum(1) v = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_y_0[None,:]).sum(1) z = ((pointcloud[0,0:3,:].T - origin_0[None,:]) * axis_z_0[None,:]).sum(1) u = width//2 + (u / resolution).astype('int') v = height//2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_img_0 = np.stack((u, v, ind_z), -1) pc2ind[0,mask] = topdown_ind_0 pc1 = pointcloud[1,0:3,:].T plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1])) plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3])+1e-16) plane_eqs[1, :] = plane_eq_1.copy() dst = np.abs(((plane_eq_1[:3][None,:] * pc1).sum(1) + plane_eq_1[3])) mask = dst < 1.5 validind = np.where(mask)[0] invalidind = np.where(~mask)[0] #pointcloud[1] = np.concatenate((pointcloud[1,:,validind].T,pointcloud[1,:,invalidind].T), -1) npts[1] = len(validind) pc1 = pc1[mask] pc2ind_mask[1] = mask origin_1 = -plane_eq_1[:3] * plane_eq_1[3] # axis [0,0,-1], [] axis_base = np.array([0,0,-1]) axis_y_1 = axis_base - np.dot(axis_base,plane_eq_1[:3]) * plane_eq_1[:3] axis_y_1 /= (np.linalg.norm(axis_y_1)+1e-16) axis_x_1 = np.cross(axis_y_1, plane_eq_1[:3]) axis_x_1 /= (np.linalg.norm(axis_x_1)+1e-16) axis_z_1 = plane_eq_1[:3] u = ((pc1 - origin_1[None,:]) * axis_x_1[None,:]).sum(1) v = ((pc1 - origin_1[None,:]) * axis_y_1[None,:]).sum(1) z = ((pc1 - origin_1[None,:]) * axis_z_1[None,:]).sum(1) # write_ply('test.ply',np.stack((u,v,z),-1), color=colors[pc_s]) u = width//2 + (u / resolution).astype('int') v = height//2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_1 = np.stack((u, v, ind_z), -1) u = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_x_1[None,:]).sum(1) v = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_y_1[None,:]).sum(1) z = ((pointcloud[1,0:3,:].T - origin_1[None,:]) * axis_z_1[None,:]).sum(1) u = width//2 + (u / resolution).astype('int') v = height//2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_img_1 = np.stack((u, v, ind_z), -1) img2ind[0] = topdown_ind_img_0 img2ind[1] = topdown_ind_img_1 pc2ind[1,mask] = topdown_ind_1 rets['img2ind'] = img2ind[None,...] rets['imgPCid'] = imgPCid[None,...] rets['axis_x'] = np.zeros([2,3]) rets['axis_y'] = np.zeros([2,3]) rets['origin'] = np.zeros([2,3]) rets['axis_x'][0] = axis_x_0 rets['axis_y'][0] = axis_y_0 rets['axis_x'][1] = axis_x_1 rets['axis_y'][1] = axis_y_1 rets['origin'][0] = origin_0 rets['origin'][1] = origin_1 rets['axis_x'] = rets['axis_x'][None,:] rets['axis_y'] = rets['axis_y'][None,:] rets['origin'] = rets['origin'][None,:] # sample points on source floor plane: if 1: #mask = ~((topdown_c_complete_0==0).sum(2)==3) mask = ~((topdown_c_partial_0==0).sum(2)==3) vs, us = np.where(mask) if not len(vs): vs = np.array([0,0]) us = np.array([0,0]) ind = np.random.choice(len(vs), 100) u_0 = us[ind] v_0 = vs[ind] kp_uv_0 = np.stack((u_0,v_0),-1) u_0 -= width//2 v_0 -= height//2 kp_3d_0 = origin_0[None,:] + axis_x_0[None,:] * u_0[:,None] * resolution - axis_y_0[None,:] * v_0[:,None] * resolution R01 = np.matmul(R[1], R_inv[0]) kp_3d_1 = (np.matmul(R01[:3,:3], kp_3d_0.T) + R01[:3,3:4]).T # random sample a set of points as negative correspondencs if 1: mask = ~((topdown_c_partial_1==0).sum(2)==3) vs_neg, us_neg = np.where(mask) if not len(vs_neg): vs_neg = np.array([0,0]) us_neg = np.array([0,0]) ind = np.random.choice(len(vs_neg), 100*100) u_neg_1 = us_neg[ind] v_neg_1 = vs_neg[ind] kp_uv_neg_1 = np.stack((u_neg_1,v_neg_1),-1) u_neg_1 -= width//2 v_neg_1 -= height//2 kp_3d_neg_1 = origin_1[None,:] + axis_x_1[None,:] * u_neg_1[:,None] * resolution - axis_y_1[None,:] * v_neg_1[:,None] * resolution R10 = np.matmul(R[0], R_inv[1]) kp_3d_neg_0 = (np.matmul(R10[:3,:3], kp_3d_neg_1.T) + R10[:3,3:4]).T u_neg_0 = ((kp_3d_neg_0 - origin_0[None,:]) * axis_x_0[None,:]).sum(1) v_neg_0 = ((kp_3d_neg_0 - origin_0[None,:]) * axis_y_0[None,:]).sum(1) u_neg_0 = width//2 + (u_neg_0 / resolution).astype('int') v_neg_0 = height//2 - (v_neg_0 / resolution).astype('int') kp_uv_neg_0 = np.stack((u_neg_0,v_neg_0),-1) kp_uv_neg_0[:,0] = kp_uv_neg_0[:,0].clip(0, width-1) kp_uv_neg_0[:,1] = kp_uv_neg_0[:,1].clip(0, height-1) kp_uv_neg_1 = kp_uv_neg_1.reshape(100, 100, 2) kp_uv_neg_0 = kp_uv_neg_0.reshape(100, 100, 2) w_uv_neg_1 = 1 - np.maximum(0.1, np.power(0.98, np.linalg.norm(kp_uv_neg_0 - kp_uv_0[:, None, :], axis=2))) u_1 = ((kp_3d_1 - origin_1[None,:]) * axis_x_1[None,:]).sum(1) v_1 = ((kp_3d_1 - origin_1[None,:]) * axis_y_1[None,:]).sum(1) u_1 = width//2 + (u_1 / resolution).astype('int') v_1 = height//2 - (v_1 / resolution).astype('int') kp_uv_1 = np.stack((u_1,v_1),-1) # visualize correspondence if 0: img_vis = (np.concatenate((topdown_c_complete_0,topdown_c_complete_1))*255).astype('uint8') for j in range(10): ind = np.random.choice(len(kp_uv_0),1)[0] img_vis = cv2.line(img_vis, (kp_uv_0[ind][0], kp_uv_0[ind][1]), (kp_uv_1[ind][0], kp_uv_1[ind][1]+topdown_c_complete_0.shape[0]), (255,255,0)) cv2.imwrite('test.png',img_vis) topdown_c_complete = np.stack((topdown_c_complete_0, topdown_c_complete_1)).transpose(0,3,1,2) topdown_s_complete = np.stack((topdown_s_complete_0, topdown_s_complete_1)) topdown_c_partial = np.stack((topdown_c_partial_0, topdown_c_partial_1)) kp_uv_0[:,0] = kp_uv_0[:,0].clip(0, width-1) kp_uv_0[:,1] = kp_uv_0[:,1].clip(0, height-1) kp_uv_1[:,0] = kp_uv_1[:,0].clip(0, width-1) kp_uv_1[:,1] = kp_uv_1[:,1].clip(0, height-1) rets['kp_uv'] = np.stack((kp_uv_0,kp_uv_1))[None,...] rets['kp_uv_neg'] = kp_uv_neg_1[None,...] rets['w_uv_neg'] = w_uv_neg_1[None,...] rets['plane_eq'] = plane_eqs[None,...] rets['pc2ind'] = pc2ind[None,...] rets['pc2ind_mask'] = pc2ind_mask[None,...] rets['topdown'] = topdown_c_complete[None,...] rets['topdown_s'] = topdown_s_complete[None,...] rets['topdown_partial'] = topdown_c_partial.transpose(0,3,1,2)[None,...] TopDownValidMask = ((topdown_c_complete==0).sum(1,keepdims=True)!=3) rets['TopDownValidMask'] = TopDownValidMask[None,...] rets['npts'] = npts[None,...] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") rets['norm']=imgs_normal.transpose(0,3,1,2)[None,...] rets['rgb']=imgs_rgb.transpose(0,3,1,2)[None,...] rets['semantic']=imgs_s[None,...] rets['depth']=imgs_depth[None,:,None,:,:] rets['Q']=Q[None,...] rets['R']=R[None,...] rets['R_inv'] = R_inv[None,...] rets['imgsPath']=imgsPath return rets, True
def __getitem__(self, index): rets = {} imgs_ = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) imgs = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype=np.float32) if self.rgbd: imgs_rgb = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) imgs_rgb_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) if self.hmap: hmap = np.zeros((self.nViews, 3, 64, 64), dtype=np.float32) if self.birdview: imgs_bv = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) imgs_bv_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) if self.pointcloud: pointcloud = np.zeros((self.nViews, 3, self.num_points), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((self.nViews, 7)) assert (self.nViews == 2) imgsPath = [] if self.AuthenticdepthMap: AuthenticdepthMap = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) ct0, ct1 = self.__getpair__(index) if self.segm: segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.normal: normal = np.zeros( (self.nViews, 3, self.Inputheight, self.Inputwidth), dtype=np.float32) basePath = self.base_this frameid0 = f"{ct0:06d}" frameid1 = f"{ct1:06d}" imgs[0] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy() imgs[1] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy() dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) dataMask[0, 0, :, :] = (imgs[0] != 0) dataMask[1, 0, :, :] = (imgs[1] != 0) rets['dataMask'] = dataMask[np.newaxis, :] if self.pointcloud: pc = util.DepthToPointCloud(imgs[0], self.intrinsicUnNorm) pointcloud[0] = pc[ np.random.choice(range(len(pc)), self.num_points), :].T pc = util.DepthToPointCloud(imgs[1], self.intrinsicUnNorm) pointcloud[1] = pc[ np.random.choice(range(len(pc)), self.num_points), :].T if self.birdview: imgs_bv[0] = self.LoadImage(os.path.join( basePath, 'BirdView', '{}.birdview.png'.format(frameid0)), depth=False).copy() / 255. imgs_bv[1] = self.LoadImage(os.path.join( basePath, 'BirdView', '{}.birdview.png'.format(frameid1)), depth=False).copy() / 255. if self.rgbd: imgs_rgb[0] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb[1] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. R[0] = np.loadtxt( os.path.join(basePath, 'pose', frameid0 + '.pose.txt')) R[1] = np.loadtxt( os.path.join(basePath, 'pose', frameid1 + '.pose.txt')) #R[1] = R[0] = np.eye(4) Q[0, :4] = rot2Quaternion(R[0][:3, :3]) Q[0, 4:] = R[0][:3, 3] Q[1, :4] = rot2Quaternion(R[1][:3, :3]) Q[1, 4:] = R[1][:3, 3] if self.normal: normal[0] = self.LoadImage( os.path.join(basePath, 'normal', '{}.png'.format(frameid0)), depth=False).copy().transpose(2, 0, 1) / 255. * 2 - 1 normal[1] = self.LoadImage( os.path.join(basePath, 'normal', '{}.png'.format(frameid1)), depth=False).copy().transpose(2, 0, 1) / 255. * 2 - 1 #print(f"normalmean:{np.mean(np.power(normal[0],2).sum(0))},{np.mean(np.power(normal[1],2).sum(0))}\n") if self.normal_pyramid: a = int(outS(self.height)) #41 b = int(outS(self.height * 0.5 + 1)) #21 normal_ = [ resize_label_batch(normal.transpose(2, 3, 1, 0), i).transpose(3, 2, 0, 1) for i in [a, a, b, a] ] normal_ = [ m.reshape(1, self.nViews, 3, m.shape[2], m.shape[3]) for m in normal_ ] else: normal_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) normal_[0] = cv2.resize( normal[0].transpose(1, 2, 0), self.OutputSize, interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1) normal_[1] = cv2.resize( normal[1].transpose(1, 2, 0), self.OutputSize, interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1) normal_ = normal_[np.newaxis, :] if self.denseCorres: # get 3d point cloud for each pano pcs = self.Pano2PointCloud( imgs[0]) # be aware of the order of returned pc!!! pct = self.Pano2PointCloud(imgs[1]) #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:] pct = np.matmul(np.linalg.inv(R[1]), np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pcs = np.matmul(np.linalg.inv(R[0]), np.concatenate( (pcs, np.ones([1, pcs.shape[1]]))))[:3, :] # find correspondence using kdtree tree = KDTree(pct.T) IdxQuery = np.random.choice(range(pcs.shape[1]), 5000) # sample 5000 query points pcsQuery = pcs[:, IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1) hasCorres = (nearest_dist < 0.08) idxTgtNeg = [] idxSrc = self.PanoIdx(IdxQuery[np.where(hasCorres)[0]], 160, 640) idxTgt = self.PanoIdx(nearest_ind[hasCorres], 160, 640) if hasCorres.sum() < 500: rets['denseCorres'] = { 'idxSrc': np.zeros([1, 2000, 2]), 'idxTgt': np.zeros([1, 2000, 2]), 'valid': np.array([0]), 'idxTgtNeg': idxTgtNeg } else: # only pick 2000 correspondence per pair idx2000 = np.random.choice(range(idxSrc.shape[0]), 2000) idxSrc = idxSrc[idx2000][np.newaxis, :] idxTgt = idxTgt[idx2000][np.newaxis, :] rets['denseCorres'] = { 'idxSrc': idxSrc, 'idxTgt': idxTgt, 'valid': np.array([1]), 'idxTgtNeg': idxTgtNeg } # reprojct the second image into the first image plane if self.reproj: h = imgs.shape[1] colorpct = [] normalpct = [] depthpct = [] for ii in range(4): colorpct.append(imgs_rgb[1, :, ii * h:(ii + 1) * h, :].reshape( -1, 3)) normalpct.append(normal_[0, 1, :, :, ii * h:(ii + 1) * h].reshape(3, -1)) depthpct.append(imgs[1, :, ii * h:(ii + 1) * h].reshape(-1)) colorpct = np.concatenate(colorpct, 0) normalpct = np.concatenate(normalpct, 1) depthpct = np.concatenate(depthpct) # get the coordinates of each point in the first coordinate system pct = self.Pano2PointCloud( imgs[1]) # be aware of the order of returned pc!!! R_this = np.matmul(R[0], np.linalg.inv(R[1])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate((pct, np.ones([1, pct.shape[1]]))))[:3, :] flow = pct_reproj_org - pct_reproj #if np.abs(pct).min()==0: # import ipdb;ipdb.set_trace() # assume always observe the second view(right view) colorpct = colorpct[h * h:h * h * 2, :] depthpct = depthpct[h * h:h * h * 2] normalpct = normalpct[:, h * h:h * h * 2] #normalpct=np.matmul(R_this[:3,:3], normalpct).T # used to be a mistake! normalpct = np.matmul(R_this_p[:3, :3], normalpct).T pct_reproj = pct_reproj[:, h * h:h * h * 2] pct_reproj_org = pct_reproj_org[:, h * h:h * h * 2] flow = flow[:, h * h:h * h * 2].T t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') t2s_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') t2s_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') t2s_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') t2s_mask_p = (t2s_d_p != 0).astype('int') #import ipdb;ipdb.set_trace() colorpct = [] normalpct = [] depthpct = [] for ii in range(4): colorpct.append(imgs_rgb[0, :, ii * h:(ii + 1) * h, :].reshape( -1, 3)) normalpct.append(normal_[0, 0, :, :, ii * h:(ii + 1) * h].reshape(3, -1)) depthpct.append(imgs[0, :, ii * h:(ii + 1) * h].reshape(-1)) colorpct = np.concatenate(colorpct, 0) normalpct = np.concatenate(normalpct, 1) depthpct = np.concatenate(depthpct) # get the coordinates of each point in the first coordinate system pct = self.Pano2PointCloud( imgs[0]) # be aware of the order of returned pc!!! R_this = np.matmul(R[1], np.linalg.inv(R[0])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate((pct, np.ones([1, pct.shape[1]]))))[:3, :] flow = pct_reproj_org - pct_reproj # assume always observe the second view(right view) colorpct = colorpct[h * h:h * h * 2, :] depthpct = depthpct[h * h:h * h * 2] normalpct = normalpct[:, h * h:h * h * 2] normalpct = np.matmul(R_this_p[:3, :3], normalpct).T pct_reproj = pct_reproj[:, h * h:h * h * 2] pct_reproj_org = pct_reproj_org[:, h * h:h * h * 2] flow = flow[:, h * h:h * h * 2].T s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') s2t_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') s2t_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') s2t_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') s2t_mask_p = (s2t_d_p != 0).astype('int') # compute an envelop box try: tp = np.where(t2s_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(t2s_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1 t2s_box_p = np.zeros(t2s_d_p.shape) t2s_box_p[h0:h1, w0:w1] = 1 try: tp = np.where(s2t_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(s2t_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1 s2t_box_p = np.zeros(s2t_d_p.shape) s2t_box_p[h0:h1, w0:w1] = 1 rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :] rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb_p'] = np.stack( (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p), 0).reshape(1, 2, 1, t2s_d_p.shape[0], t2s_d_p.shape[1]) rets['proj_mask_p'] = np.stack( (t2s_mask_p, s2t_mask_p), 0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1]) rets['proj_box_p'] = np.stack( (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0], t2s_box_p.shape[1]) if self.segm: segm[0] = (self.LoadImage(os.path.join(basePath, 'semanticLabel', '{}.png'.format(frameid0)), depth=False)[:, :, 0:1].copy()).transpose( 2, 0, 1) segm[1] = (self.LoadImage(os.path.join(basePath, 'semanticLabel', '{}.png'.format(frameid1)), depth=False)[:, :, 0:1].copy()).transpose( 2, 0, 1) segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) segm_[0] = segm[0] segm_[1] = segm[1] segm_ = segm_[np.newaxis, :] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") for v in range(self.nViews): imgs_[v] = cv2.resize(imgs[v], self.OutputSize, interpolation=cv2.INTER_NEAREST) if self.rgbd: imgs_rgb_[v] = cv2.resize(imgs_rgb[v], self.OutputSize).transpose(2, 0, 1) imgs_ = imgs_[np.newaxis, :] if self.hmap: hmap = hmap[np.newaxis, :] if self.rgbd: imgs_rgb_ = imgs_rgb_[np.newaxis, :] if self.birdview: imgs_bv_ = imgs_bv_[np.newaxis, :] if self.pointcloud: pointcloud = pointcloud[np.newaxis, :] R = R[np.newaxis, :] Q = Q[np.newaxis, :] if self.segm: rets['segm'] = segm_ rets['interval'] = self.interval_this rets['norm'] = normal_ rets['rgb'] = imgs_rgb_ rets['depth'] = imgs_ rets['Q'] = Q rets['R'] = R rets['imgsPath'] = imgsPath return rets
old = os.dup(1) sys.stdout.flush() os.close(1) os.open(logfile, os.O_WRONLY) shape_file = "{2}/{0}/{1}/models/model_normalized.obj".format( CATEGORY, MODEL, SHAPENETPATH) bpy.ops.import_scene.obj(filepath=shape_file) for m in bpy.data.materials: m.use_shadeless = True N = 100 for i in range(N): # uniformly sample rotation angle rho, azim, elev, theta = util.randomRotation() camPos = util.objectCenteredCamPos(rho, azim, elev) q1 = util.camPosToQuaternion(camPos) q2 = util.camRotQuaternion(camPos, theta) q = util.quaternionProduct(q2, q1) util.setCameraExtrinsics(camera, camPos, q) q_extr, t_extr = util.cameraExtrinsicMatrix(q, camPos) # for ShapeNetCore.v2 all the objects are rotated 90 degrees # comment out this block if ShapeNetCore.v1 is used if i == 0: for o in bpy.data.objects: if o == camera: o.select = False else: o.select = True bpy.ops.transform.rotate(value=-np.pi / 2, axis=(0, 0, 1))
def __getitem__(self, index): rets = {} imgs = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) if self.rgbd: imgs_rgb = np.zeros((self.nViews, *self.OutputSize[::-1], 3), dtype=np.float32) if self.segm: segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.dynamicWeighting: dynamicW = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.normal: normal = np.zeros((self.nViews, *self.OutputSize[::-1], 3), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((self.nViews, 7)) assert (self.nViews == 2) ct0, ct1 = self.__getpair__(index) imgsPath = [] basePath = self.base_this frameid0 = f"{ct0:06d}" frameid1 = f"{ct1:06d}" if self.fullsize_rgbdn: imgs_rgb_full = np.zeros((self.nViews, 480, 640, 3), dtype=np.float32) imgs_full = np.zeros((self.nViews, 480, 640), dtype=np.float32) imgs_full[0] = self.LoadImage( os.path.join(basePath, 'obs_depth', '{}.png'.format(frameid0))).copy() imgs_full[1] = self.LoadImage( os.path.join(basePath, 'obs_depth', '{}.png'.format(frameid1))).copy() imgs_rgb_full[0] = self.LoadImage(os.path.join( basePath, 'obs_rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb_full[1] = self.LoadImage(os.path.join( basePath, 'obs_rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. rets['rgb_full'] = imgs_rgb_full[np.newaxis, :] rets['depth_full'] = imgs_full[np.newaxis, :] imgs[0] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy() imgs[1] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy() dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) dataMask[0, 0, :, :] = (imgs[0] != 0) dataMask[1, 0, :, :] = (imgs[1] != 0) rets['dataMask'] = dataMask[np.newaxis, :] if self.rgbd: imgs_rgb[0] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb[1] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. R[0] = np.loadtxt( os.path.join(basePath, 'pose', frameid0 + '.pose.txt')) R[1] = np.loadtxt( os.path.join(basePath, 'pose', frameid1 + '.pose.txt')) Q[0, :4] = rot2Quaternion(R[0][:3, :3]) Q[0, 4:] = R[0][:3, 3] Q[1, :4] = rot2Quaternion(R[1][:3, :3]) Q[1, 4:] = R[1][:3, 3] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") if self.normal: tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid0)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[0] = tp tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid1)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[1] = tp if self.segm: tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx', '{}.png'.format(frameid0)), depth=False).copy())[:, :, 1] segm[0] = tp.reshape(segm[0].shape) tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx', '{}.png'.format(frameid1)), depth=False).copy())[:, :, 1] segm[1] = tp.reshape(segm[1].shape) segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) segm_[0] = segm[0] segm_[1] = segm[1] segm_ = segm_[np.newaxis, :] if self.denseCorres: # get 3d point cloud for each pano pcs, masks = self.Pano2PointCloud( imgs[0], self.representation) # be aware of the order of returned pc!!! pct, maskt = self.Pano2PointCloud(imgs[1], self.representation) #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:] pct = np.matmul(np.linalg.inv(R[1]), np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pcs = np.matmul(np.linalg.inv(R[0]), np.concatenate( (pcs, np.ones([1, pcs.shape[1]]))))[:3, :] # find correspondence using kdtree tree = KDTree(pct.T) IdxQuery = np.random.choice(range(pcs.shape[1]), 5000) # sample 5000 query points pcsQuery = pcs[:, IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1) hasCorres = (nearest_dist < 0.08) idxTgtNeg = [] idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]], imgs.shape[1], imgs.shape[2], self.representation) idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], imgs.shape[1], imgs.shape[2], self.representation) if hasCorres.sum() < 200: rets['denseCorres'] = { 'idxSrc': np.zeros([1, 500, 2]), 'idxTgt': np.zeros([1, 500, 2]), 'valid': np.array([0]), 'idxTgtNeg': idxTgtNeg } else: # only pick 2000 correspondence per pair idx500 = np.random.choice(range(idxSrc.shape[0]), 500) idxSrc = idxSrc[idx500][np.newaxis, :] idxTgt = idxTgt[idx500][np.newaxis, :] rets['denseCorres'] = { 'idxSrc': idxSrc, 'idxTgt': idxTgt, 'valid': np.array([1]), 'idxTgtNeg': idxTgtNeg } # reprojct the second image into the first image plane if self.reproj: assert (imgs.shape[1] == 160 and imgs.shape[2] == 640) h = imgs.shape[1] pct, mask = util.depth2pc( imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44], 'scannet') # be aware of the order of returned pc!!! colorpct = imgs_rgb[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] normalpct = normal[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] depthpct = imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask] R_this = np.matmul(R[0], np.linalg.inv(R[1])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') t2s_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') t2s_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') t2s_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') t2s_mask_p = (t2s_d_p != 0).astype('int') pct, mask = util.depth2pc( imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44], 'scannet') # be aware of the order of returned pc!!! colorpct = imgs_rgb[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] normalpct = normal[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] depthpct = imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask] R_this = np.matmul(R[1], np.linalg.inv(R[0])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj # assume always observe the second view(right view) normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') s2t_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') s2t_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') s2t_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') s2t_mask_p = (s2t_d_p != 0).astype('int') # compute an envelop box try: tp = np.where(t2s_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(t2s_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1 t2s_box_p = np.zeros(t2s_d_p.shape) t2s_box_p[h0:h1, w0:w1] = 1 try: tp = np.where(s2t_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(s2t_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1 s2t_box_p = np.zeros(s2t_d_p.shape) s2t_box_p[h0:h1, w0:w1] = 1 rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :] rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb_p'] = np.stack( (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p), 0).reshape(1, 2, 1, t2s_d_p.shape[0], t2s_d_p.shape[1]) rets['proj_mask_p'] = np.stack( (t2s_mask_p, s2t_mask_p), 0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1]) rets['proj_box_p'] = np.stack( (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0], t2s_box_p.shape[1]) imgs = imgs[np.newaxis, :] if self.rgbd: imgs_rgb = imgs_rgb[np.newaxis, :].transpose(0, 1, 4, 2, 3) if self.normal: normal = normal[np.newaxis, :].transpose(0, 1, 4, 2, 3) R = R[np.newaxis, :] Q = Q[np.newaxis, :] if self.segm: rets['segm'] = segm_ if self.dynamicWeighting: rets['dynamicW'] = dynamicW[np.newaxis, :] rets['interval'] = self.interval_this rets['norm'] = normal rets['rgb'] = imgs_rgb rets['depth'] = imgs rets['Q'] = Q rets['R'] = R rets['imgsPath'] = imgsPath return rets
def __getitem__(self, index): rets = {} imgs = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) if self.rgbd: imgs_rgb = np.zeros((self.nViews, *self.OutputSize[::-1], 3), dtype=np.float32) if self.segm: segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.dynamicWeighting: dynamicW = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.normal: normal = np.zeros((self.nViews, *self.OutputSize[::-1], 3), dtype=np.float32) if self.pointcloud: pointcloud = np.zeros( (self.nViews, 3 + 3 + 3 + 1, self.num_points), dtype=np.float32) pointcloud_flow = np.zeros((self.nViews, 3, self.num_points), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((self.nViews, 7)) assert (self.nViews == 2) ct0, ct1 = self.__getpair__(index) imgsPath = [] basePath = self.base_this frameid0 = f"{ct0:06d}" frameid1 = f"{ct1:06d}" if self.fullsize_rgbdn: imgs_rgb_full = np.zeros((self.nViews, 480, 640, 3), dtype=np.float32) imgs_full = np.zeros((self.nViews, 480, 640), dtype=np.float32) imgs_full[0] = self.LoadImage( os.path.join(basePath, 'obs_depth', '{}.png'.format(frameid0))).copy() imgs_full[1] = self.LoadImage( os.path.join(basePath, 'obs_depth', '{}.png'.format(frameid1))).copy() imgs_rgb_full[0] = self.LoadImage(os.path.join( basePath, 'obs_rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb_full[1] = self.LoadImage(os.path.join( basePath, 'obs_rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. rets['rgb_full'] = imgs_rgb_full[np.newaxis, :] rets['depth_full'] = imgs_full[np.newaxis, :] imgs[0] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy() imgs[1] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy() dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) dataMask[0, 0, :, :] = (imgs[0] != 0) dataMask[1, 0, :, :] = (imgs[1] != 0) rets['dataMask'] = dataMask[np.newaxis, :] if self.rgbd: imgs_rgb[0] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb[1] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. if self.scannet_new_name: tmp_basePath = basePath.replace('ScanNet_360', 'ScanNet') else: tmp_basePath = basePath R[0] = np.loadtxt( os.path.join(tmp_basePath, 'pose', frameid0 + '.pose.txt')) R[1] = np.loadtxt( os.path.join(tmp_basePath, 'pose', frameid1 + '.pose.txt')) Q[0, :4] = rot2Quaternion(R[0][:3, :3]) Q[0, 4:] = R[0][:3, 3] Q[1, :4] = rot2Quaternion(R[1][:3, :3]) Q[1, 4:] = R[1][:3, 3] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") if self.normal: tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid0)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[0] = tp tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid1)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[1] = tp if self.segm: tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx', '{}.png'.format(frameid0)), depth=False).copy())[:, :, 1] segm[0] = tp.reshape(segm[0].shape) tp = (self.LoadImage(os.path.join(basePath, 'semantic_idx', '{}.png'.format(frameid1)), depth=False).copy())[:, :, 1] segm[1] = tp.reshape(segm[1].shape) segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) segm_[0] = segm[0] segm_[1] = segm[1] segm_ = segm_[np.newaxis, :] if self.denseCorres: # get 3d point cloud for each pano pcs, masks = self.Pano2PointCloud( imgs[0], self.representation) # be aware of the order of returned pc!!! pct, maskt = self.Pano2PointCloud(imgs[1], self.representation) #import pdb; pdb.set_trace() #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:] pct = np.matmul(np.linalg.inv(R[1]), np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pcs = np.matmul(np.linalg.inv(R[0]), np.concatenate( (pcs, np.ones([1, pcs.shape[1]]))))[:3, :] # find correspondence using kdtree tree = KDTree(pct.T) IdxQuery = np.random.choice(range(pcs.shape[1]), 5000) # sample 5000 query points pcsQuery = pcs[:, IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1) hasCorres = (nearest_dist < 0.08) idxTgtNeg = [] idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]], imgs.shape[1], imgs.shape[2], self.representation) idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], imgs.shape[1], imgs.shape[2], self.representation) if hasCorres.sum() < 200: rets['denseCorres'] = { 'idxSrc': np.zeros([1, 500, 2]), 'idxTgt': np.zeros([1, 500, 2]), 'valid': np.array([0]), 'idxTgtNeg': idxTgtNeg } else: # only pick 2000 correspondence per pair idx500 = np.random.choice(range(idxSrc.shape[0]), 500) idxSrc = idxSrc[idx500][np.newaxis, :] idxTgt = idxTgt[idx500][np.newaxis, :] rets['denseCorres'] = { 'idxSrc': idxSrc, 'idxTgt': idxTgt, 'valid': np.array([1]), 'idxTgtNeg': idxTgtNeg } imgPCid = np.zeros([2, self.num_points, 2]) if self.pointcloud: try: pc = self.depth2pc(imgs[0][:, 160:160 * 2]) idx_s = np.random.choice(range(len(pc)), self.num_points) imgPCid[0] = np.stack((idx_s % 160, idx_s // 160)).T pointcloud[0, :3, :] = pc[idx_s, :].T pc_n = normal[0][:, 160:160 * 2, :].reshape(-1, 3) pointcloud[0, 3:6, :] = pc_n[idx_s, :].T pc_c = imgs_rgb[0, :, 160:160 * 2, :].reshape(-1, 3) pointcloud[0, 6:9, :] = pc_c[idx_s, ::-1].T #pc_s = imgs_s[0,:,160:160*2].reshape(-1)+1 #pointcloud[0,9:10,:] = pc_s[idx_s] pc = self.depth2pc(imgs[1][:, 160:160 * 2]) idx_s = np.random.choice(range(len(pc)), self.num_points) imgPCid[1] = np.stack((idx_s % 160, idx_s // 160)).T pointcloud[1, :3, :] = pc[idx_s, :].T pc_n = normal[1][:, 160:160 * 2, :].reshape(-1, 3) pointcloud[1, 3:6, :] = pc_n[idx_s, :].T pc_c = imgs_rgb[1, :, 160:160 * 2, :].reshape(-1, 3) #pc_s = imgs_s[1,:, 160:160*2].reshape(-1)+1 #pointcloud[1,9:10,:] = pc_s[idx_s] except: #import pdb; pdb.set_trace() pointcloud = np.zeros( (self.nViews, 3 + 3 + 3 + 1, self.num_points), dtype=np.float32) pointcloud_flow = np.zeros((self.nViews, 3, self.num_points), dtype=np.float32) print("this pair does not contain point cloud!") if self.plane_r: scene_id = basePath.split('/')[-1] plane_file = '/media/yzp12/wdblue/2020_CVPR_Hybrid/data/ScanNet_plane/train/' + scene_id + '.npy' if os.path.exists(plane_file): plane_eq_raw = np.load(plane_file) if plane_eq_raw.shape[0] < 6: plane_eq_raw = np.concatenate([plane_eq_raw, plane_eq_raw], axis=0) MAX_PLANE = 10 plane_idx = np.argsort(plane_eq_raw[:, 7]) plane_eq_raw = plane_eq_raw[plane_idx[-MAX_PLANE:]] truncate_num = plane_eq_raw[-6, 7] / 2 plane_eq_raw = plane_eq_raw[plane_eq_raw[:, 7] > truncate_num] if plane_eq_raw.shape[0] < MAX_PLANE: valid_plane = plane_eq_raw.shape[0] plane_eq_raw = np.concatenate( (plane_eq_raw, np.zeros([ MAX_PLANE - plane_eq_raw.shape[0], plane_eq_raw.shape[-1] ]))) else: valid_plane = MAX_PLANE plane_eq = plane_eq_raw[:, 3:7] plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_center = plane_eq_raw[:, :3] plane_center = (np.matmul(R[0][:3, :3], plane_center.T) + R[0][:3, 3:4]).T #import pdb; pdb.set_trace() else: print("Missing plane data") import pdb pdb.set_trace() if self.plane_m: scene_id = basePath.split('/')[-1] plane_file = '/media/yzp12/wdblue/2020_CVPR_Hybrid/data/ScanNet_manual_plane/%s/' % self.split + scene_id + '.npy' plane_raw = np.load(plane_file, allow_pickle=True) plane_center = plane_raw[:, :3] plane_center = (np.matmul(R[0][:3, :3], plane_center.T) + R[0][:3, 3:4]).T plane_normal = plane_raw[:, 3:6] #plane_normal = (np.matmul(R[0][:3,:3],plane_normal.T)+R[0][:3,3:4]).T plane_normal = np.matmul(plane_normal, np.linalg.inv(R[0][:3, :3])) rets['plane_c'] = plane_center[np.newaxis, :] rets['plane_n'] = plane_normal[np.newaxis, :] rets['plane_raw'] = plane_raw[np.newaxis, :] # reprojct the second image into the first image plane if self.reproj: assert (imgs.shape[1] == 160 and imgs.shape[2] == 640) h = imgs.shape[1] pct, mask = util.depth2pc( imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44], 'scannet') # be aware of the order of returned pc!!! colorpct = imgs_rgb[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] normalpct = normal[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] depthpct = imgs[1, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask] R_this = np.matmul(R[0], np.linalg.inv(R[1])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') t2s_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') t2s_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') t2s_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') t2s_mask_p = (t2s_d_p != 0).astype('int') pct, mask = util.depth2pc( imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44], 'scannet') # be aware of the order of returned pc!!! colorpct = imgs_rgb[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] normalpct = normal[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44, :].reshape(-1, 3)[mask] depthpct = imgs[0, 80 - 33:80 + 33, 160 + 80 - 44:160 + 80 + 44].reshape(-1)[mask] R_this = np.matmul(R[1], np.linalg.inv(R[0])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj # assume always observe the second view(right view) normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') s2t_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') s2t_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') s2t_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') s2t_mask_p = (s2t_d_p != 0).astype('int') # compute an envelop box try: tp = np.where(t2s_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(t2s_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1 t2s_box_p = np.zeros(t2s_d_p.shape) t2s_box_p[h0:h1, w0:w1] = 1 try: tp = np.where(s2t_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(s2t_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1 s2t_box_p = np.zeros(s2t_d_p.shape) s2t_box_p[h0:h1, w0:w1] = 1 rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :] rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb_p'] = np.stack( (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p), 0).reshape(1, 2, 1, t2s_d_p.shape[0], t2s_d_p.shape[1]) rets['proj_mask_p'] = np.stack( (t2s_mask_p, s2t_mask_p), 0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1]) rets['proj_box_p'] = np.stack( (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0], t2s_box_p.shape[1]) imgs = imgs[np.newaxis, :] if self.rgbd: imgs_rgb = imgs_rgb[np.newaxis, :].transpose(0, 1, 4, 2, 3) if self.normal: normal = normal[np.newaxis, :].transpose(0, 1, 4, 2, 3) R = R[np.newaxis, :] Q = Q[np.newaxis, :] if self.segm: rets['segm'] = segm_ if self.dynamicWeighting: rets['dynamicW'] = dynamicW[np.newaxis, :] if self.pointcloud: pointcloud = pointcloud[np.newaxis, :] pointcloud_flow = pointcloud_flow[np.newaxis, :] rets['pointcloud'] = pointcloud rets['pointcloud_flow'] = pointcloud_flow if self.plane_r: rets['plane'] = plane_eq[np.newaxis, :] rets['plane_raw'] = plane_eq_raw[np.newaxis, :] rets['plane_c'] = plane_center[np.newaxis, :] rets['valid_plane'] = valid_plane rets['interval'] = self.interval_this rets['norm'] = normal rets['rgb'] = imgs_rgb rets['depth'] = imgs rets['Q'] = Q rets['R'] = R rets['imgsPath'] = imgsPath return rets
def __getitem__(self, index): import ipdb ipdb.set_trace() rets = {} imgs_ = np.zeros((self.nViews, *self.OutputSize[::-1]), dtype=np.float32) imgs = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype=np.float32) if self.rgbd: imgs_rgb = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) imgs_rgb_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) if self.segm: segm = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) if self.normal: normal = np.zeros( (self.nViews, 3, self.Inputheight, self.Inputwidth), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((self.nViews, 7)) assert (self.nViews == 2) ct0, ct1 = self.__getpair__(index) imgsPath = [] basePath = self.base_this frameid0 = f"{ct0:06d}" frameid1 = f"{ct1:06d}" imgs[0] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid0))).copy() imgs[1] = self.LoadImage( os.path.join(basePath, 'depth', '{}.png'.format(frameid1))).copy() dataMask = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) dataMask[0, 0, :, :] = (imgs[0] != 0) dataMask[1, 0, :, :] = (imgs[1] != 0) rets['dataMask'] = dataMask[np.newaxis, :] if self.rgbd: imgs_rgb[0] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid0)), depth=False).copy() / 255. imgs_rgb[1] = self.LoadImage(os.path.join( basePath, 'rgb', '{}.png'.format(frameid1)), depth=False).copy() / 255. R[0] = np.loadtxt( os.path.join(basePath, 'pose', frameid0 + '.pose.txt')) R[1] = np.loadtxt( os.path.join(basePath, 'pose', frameid1 + '.pose.txt')) Q[0, :4] = rot2Quaternion(R[0][:3, :3]) Q[0, 4:] = R[0][:3, 3] Q[1, :4] = rot2Quaternion(R[1][:3, :3]) Q[1, 4:] = R[1][:3, 3] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") if self.normal: tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid0)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[0] = tp.transpose(2, 0, 1) tp = self.LoadImage(os.path.join(basePath, 'normal', '{}.png'.format(frameid1)), depth=False).copy().astype('float') mask = (tp == 0).sum(2) < 3 tp[mask] = tp[mask] / 255. * 2 - 1 normal[1] = tp.transpose(2, 0, 1) normal_ = np.zeros((self.nViews, 3, *self.OutputSize[::-1]), dtype=np.float32) normal_[0] = cv2.resize(normal[0].transpose(1, 2, 0), self.OutputSize, interpolation=cv2.INTER_NEAREST).transpose( 2, 0, 1) normal_[1] = cv2.resize(normal[1].transpose(1, 2, 0), self.OutputSize, interpolation=cv2.INTER_NEAREST).transpose( 2, 0, 1) normal_ = normal_[np.newaxis, :] if self.segm: segm_ = np.zeros((self.nViews, 1, *self.OutputSize[::-1]), dtype=np.float32) tp = (self.LoadImage(os.path.join(basePath, 'semanticLabel', '{}.png'.format(frameid0)), depth=False)[:, :, 0].copy()) segm[0] = tp.reshape(segm[0].shape) tp = (self.LoadImage(os.path.join(basePath, 'semanticLabel', '{}.png'.format(frameid1)), depth=False)[:, :, 0].copy()) segm[1] = tp.reshape(segm[1].shape) segm_[0] = segm[0] segm_[1] = segm[1] # truncate semantic class segm_[segm_ >= self.snumclass] = 0 segm_ = segm_[np.newaxis, :] if self.denseCorres: # get 3d point cloud for each pano pcs, masks = self.Pano2PointCloud( imgs[0]) # be aware of the order of returned pc!!! pct, maskt = self.Pano2PointCloud(imgs[1]) #pct = np.matmul(R[0],np.matmul(np.linalg.inv(R[1]),np.concatenate((pct,np.ones([1,pct.shape[1]])))))[:3,:] pct = np.matmul(np.linalg.inv(R[1]), np.concatenate( (pct, np.ones([1, pct.shape[1]]))))[:3, :] pcs = np.matmul(np.linalg.inv(R[0]), np.concatenate( (pcs, np.ones([1, pcs.shape[1]]))))[:3, :] # find correspondence using kdtree tree = KDTree(pct.T) IdxQuery = np.random.choice(range(pcs.shape[1]), 5000) # sample 5000 query points pcsQuery = pcs[:, IdxQuery] nearest_dist, nearest_ind = tree.query(pcsQuery.T, k=1) hasCorres = (nearest_dist < 0.08) idxTgtNeg = [] idxSrc = self.PanoIdx(masks[IdxQuery[np.where(hasCorres)[0]]], 160, 640) idxTgt = self.PanoIdx(maskt[nearest_ind[hasCorres]], 160, 640) if hasCorres.sum() < 500: rets['denseCorres'] = { 'idxSrc': np.zeros([1, 2000, 2]), 'idxTgt': np.zeros([1, 2000, 2]), 'valid': np.array([0]), 'idxTgtNeg': idxTgtNeg } else: # only pick 2000 correspondence per pair idx2000 = np.random.choice(range(idxSrc.shape[0]), 2000) idxSrc = idxSrc[idx2000][np.newaxis, :] idxTgt = idxTgt[idx2000][np.newaxis, :] rets['denseCorres'] = { 'idxSrc': idxSrc, 'idxTgt': idxTgt, 'valid': np.array([1]), 'idxTgtNeg': idxTgtNeg } if self.reproj: h = imgs.shape[1] pct, mask = util.depth2pc( imgs[1, :, 160:160 * 2], 'matterport') # be aware of the order of returned pc!!! ii = 1 colorpct = imgs_rgb[1, :, ii * h:(ii + 1) * h, :].reshape(-1, 3)[mask, :] normalpct = normal_[0, 1, :, :, ii * h:(ii + 1) * h].reshape(3, -1).T[mask, :] depthpct = imgs[1, :, ii * h:(ii + 1) * h].reshape(-1)[mask] # get the coordinates of each point in the first coordinate system R_this = np.matmul(R[0], np.linalg.inv(R[1])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 t2s_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T t2s_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') t2s_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') t2s_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') t2s_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') t2s_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') t2s_mask_p = (t2s_d_p != 0).astype('int') pct, mask = util.depth2pc( imgs[0, :, 160:160 * 2], 'matterport') # be aware of the order of returned pc!!! colorpct = imgs_rgb[0, :, ii * h:(ii + 1) * h, :].reshape(-1, 3)[mask] normalpct = normal_[0, 0, :, :, ii * h:(ii + 1) * h].reshape(3, -1).T[mask] depthpct = imgs[0, :, ii * h:(ii + 1) * h].reshape(-1)[mask] R_this = np.matmul(R[1], np.linalg.inv(R[0])) R_this_p = R_this.copy() dR = util.randomRotation(epsilon=0.1) dRangle = angular_distance_np(dR[np.newaxis, :], np.eye(3)[np.newaxis, :])[0] R_this_p[:3, :3] = np.matmul(dR, R_this_p[:3, :3]) R_this_p[:3, 3] += np.random.randn(3) * 0.1 s2t_dr = np.matmul(R_this, np.linalg.inv(R_this_p)) pct_reproj = np.matmul( R_this_p, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] pct_reproj_org = np.matmul( R_this, np.concatenate( (pct.T, np.ones([1, pct.shape[0]]))))[:3, :] flow = pct_reproj_org - pct_reproj # assume always observe the second view(right view) normalpct = np.matmul(R_this_p[:3, :3], normalpct.T).T flow = flow.T s2t_rgb = self.reproj_helper(pct_reproj_org, colorpct, imgs_rgb[0].shape, 'color') s2t_rgb_p = self.reproj_helper(pct_reproj, colorpct, imgs_rgb[0].shape, 'color') s2t_n_p = self.reproj_helper(pct_reproj, normalpct, imgs_rgb[0].shape, 'normal') s2t_d_p = self.reproj_helper(pct_reproj, depthpct, imgs_rgb[0].shape[:2], 'depth') s2t_flow_p = self.reproj_helper(pct_reproj, flow, imgs_rgb[0].shape, 'color') s2t_mask_p = (s2t_d_p != 0).astype('int') # compute an envelop box try: tp = np.where(t2s_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(t2s_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = t2s_d_p.shape[1] - 1, t2s_d_p.shape[0] - 1 t2s_box_p = np.zeros(t2s_d_p.shape) t2s_box_p[h0:h1, w0:w1] = 1 try: tp = np.where(s2t_d_p.sum(0))[0] w0, w1 = tp[0], tp[-1] tp = np.where(s2t_d_p.sum(1))[0] h0, h1 = tp[0], tp[-1] except: w0, h0 = 0, 0 w1, h1 = s2t_d_p.shape[1] - 1, s2t_d_p.shape[0] - 1 s2t_box_p = np.zeros(s2t_d_p.shape) s2t_box_p[h0:h1, w0:w1] = 1 rets['proj_dr'] = np.stack((t2s_dr, s2t_dr), 0)[np.newaxis, :] rets['proj_flow'] = np.stack((t2s_flow_p, s2t_flow_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb'] = np.stack((t2s_rgb, s2t_rgb), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_rgb_p'] = np.stack( (t2s_rgb_p, s2t_rgb_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_n_p'] = np.stack((t2s_n_p, s2t_n_p), 0).transpose(0, 3, 1, 2)[np.newaxis, :] rets['proj_d_p'] = np.stack((t2s_d_p, s2t_d_p), 0).reshape(1, 2, 1, t2s_d_p.shape[0], t2s_d_p.shape[1]) rets['proj_mask_p'] = np.stack( (t2s_mask_p, s2t_mask_p), 0).reshape(1, 2, 1, t2s_mask_p.shape[0], t2s_mask_p.shape[1]) rets['proj_box_p'] = np.stack( (t2s_box_p, s2t_box_p), 0).reshape(1, 2, 1, t2s_box_p.shape[0], t2s_box_p.shape[1]) for v in range(self.nViews): imgs_[v] = cv2.resize(imgs[v], self.OutputSize, interpolation=cv2.INTER_NEAREST) if self.rgbd: imgs_rgb_[v] = cv2.resize(imgs_rgb[v], self.OutputSize).transpose(2, 0, 1) imgs_ = imgs_[np.newaxis, :] if self.rgbd: imgs_rgb_ = imgs_rgb_[np.newaxis, :] R = R[np.newaxis, :] Q = Q[np.newaxis, :] if self.segm: rets['segm'] = segm_ rets['interval'] = self.interval_this rets['norm'] = normal_ rets['rgb'] = imgs_rgb_ rets['depth'] = imgs_ rets['Q'] = Q rets['R'] = R rets['imgsPath'] = imgsPath return rets
def __getitem__helper(self, index): rets = {} index = index % self.__len__() imgs_depth = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype=np.float32) imgs_s = np.zeros((self.nViews, self.Inputheight, self.Inputwidth), dtype=np.float32) imgs_rgb = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) imgs_normal = np.zeros( (self.nViews, self.Inputheight, self.Inputwidth, 3), dtype=np.float32) pointcloud = np.zeros((self.nViews, 3 + 3 + 3 + 1, self.num_points), dtype=np.float32) R = np.zeros((self.nViews, 4, 4)) Q = np.zeros((7)) assert (self.nViews == 2) imgsPath = [] ct0, ct1 = self.__getpair__(index) rets['overlap'] = float(self.dataList[index]['overlap']) basePath = self.base_this scene_id = basePath.split('/')[-2] room_id = scene_id + '-' + basePath.split('/')[-1] imageKey = '%s-%06d-rgb' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_rgb[0] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 imageKey = '%s-%06d-rgb' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_rgb[1] = cv2.imdecode(imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 imageKey = '%s-%06d-depth' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_depth[0] = cv2.imdecode(imageBuf, 2).astype('float') / 1000.0 imageKey = '%s-%06d-depth' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_depth[1] = cv2.imdecode(imageBuf, 2).astype('float') / 1000.0 #cv2.imwrite('test.png',imgs_rgb[0]*255) imageKey = '%s-%06d-normal' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_normal[0] = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 * 2 - 1 imageKey = '%s-%06d-normal' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_normal[1] = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255.0 * 2 - 1 imageKey = '%s-%06d-semantic' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_s[0] = cv2.imdecode( imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:, :, 0] + 1 imageKey = '%s-%06d-semantic' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) imgs_s[1] = cv2.imdecode( imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8')[:, :, 0] + 1 PerspectiveValidMask = (imgs_depth != 0) rets['PerspectiveValidMask'] = PerspectiveValidMask[None, :, None, :, :] rets['dataMask'] = rets['PerspectiveValidMask'] RKey = '%s-%06d-R' % (room_id, ct0) R[0] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4, 4) RKey = '%s-%06d-R' % (room_id, ct1) R[1] = np.frombuffer(self.txn.get(RKey.encode()), np.float).reshape(4, 4) # convert from 3rd view to 4th view R[0] = np.matmul(np.linalg.inv(self.Rs[3]), R[0]) R[1] = np.matmul(np.linalg.inv(self.Rs[3]), R[1]) R_inv = np.linalg.inv(R) img2ind = np.zeros([2, self.num_points, 3]) imgPCid = np.zeros([2, self.num_points, 2]) if self.pointcloud or self.local: pc = self.depth2pc(imgs_depth[0][:, 160:160 * 2]).T # util.write_ply('test.ply',np.concatenate((pc,pc1))) idx_s = np.random.choice(range(len(pc)), self.num_points) imgPCid[0] = np.stack((idx_s % 160, idx_s // 160)).T pointcloud[0, :3, :] = pc[idx_s, :].T pc_n = imgs_normal[0][:, 160:160 * 2].reshape(-1, 3) pc_n = np.matmul(self.Rs[3][:3, :3].T, pc_n.T).T pointcloud[0, 3:6, :] = pc_n[idx_s, :].T pc_c = imgs_rgb[0, :, 160:160 * 2, :].reshape(-1, 3) pointcloud[0, 6:9, :] = pc_c[idx_s, ::-1].T pc_s = imgs_s[0, :, 160:160 * 2].reshape(-1) pointcloud[0, 9:10, :] = pc_s[idx_s] pc = self.depth2pc(imgs_depth[1][:, 160:160 * 2]).T idx_s = np.random.choice(range(len(pc)), self.num_points) imgPCid[1] = np.stack((idx_s % 160, idx_s // 160)).T pointcloud[1, :3, :] = pc[idx_s, :].T pc_n = imgs_normal[1][:, 160:160 * 2].reshape(-1, 3) pc_n = np.matmul(self.Rs[3][:3, :3].T, pc_n.T).T pointcloud[1, 3:6, :] = pc_n[idx_s, :].T pc_c = imgs_rgb[1, :, 160:160 * 2, :].reshape(-1, 3) pointcloud[1, 6:9, :] = pc_c[idx_s, ::-1].T pc_s = imgs_s[1, :, 160:160 * 2].reshape(-1) pointcloud[1, 9:10, :] = pc_s[idx_s] rets['pointcloud'] = pointcloud[None, ...] if self.plane_r: Key = '%s-plane' % (room_id) plane_eq_raw = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1, 9) Key = '%s-plane-validnum' % (room_id) valid_plane = np.frombuffer(self.txn.get(Key.encode()), np.uint8)[0] plane_eq = plane_eq_raw[:, 3:7] plane_eq = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_center = plane_eq_raw[:, :3] plane_center = (np.matmul(R[0][:3, :3], plane_center.T) + R[0][:3, 3:4]).T rets['plane'] = plane_eq[np.newaxis, :] rets['plane_raw'] = plane_eq_raw[np.newaxis, :] rets['plane_c'] = plane_center[np.newaxis, :] rets['valid_plane'] = valid_plane if self.local: R_s2t = np.matmul(R[1], R_inv[0]) pointcloud[0, :3, :] = np.matmul( R_s2t[:3, :3], pointcloud[0, :3, :]) + R_s2t[:3, 3:4] pointcloud[0, 3:6, :] = np.matmul(R_s2t[:3, :3], pointcloud[0, 3:6, :]) #util.write_ply('test.ply', np.concatenate((pointcloud[0,:3,:].T,pointcloud[1,:3,:].T)), # normal=np.concatenate((pointcloud[0,3:6,:].T,pointcloud[1,3:6,:].T))) if 1: N_PAIR_PTS = 1000 N_PAIR_EXCEED_PTS = N_PAIR_PTS * 10 ANGLE_THRESH = 5.0 PERP_THRESH = np.cos(np.deg2rad(90 - ANGLE_THRESH)) PARALLEL_THRESH = np.cos(np.deg2rad(ANGLE_THRESH)) COPLANE_THRESH = 0.05 rel_cls_pts = np.zeros([N_PAIR_EXCEED_PTS]) ind_s = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS) ind_t = np.random.choice(pointcloud.shape[-1], N_PAIR_EXCEED_PTS) pair_pts = np.stack((ind_s, ind_t), -1) normdot = (pointcloud[0, 3:6, pair_pts[:, 0]] * pointcloud[1, 3:6, pair_pts[:, 1]]).sum(1) dst = (np.abs( ((pointcloud[0, 0:3, pair_pts[:, 0]] - pointcloud[1, 0:3, pair_pts[:, 1]]) * pointcloud[1, 3:6, pair_pts[:, 1]]).sum(1)) + np.abs( ((pointcloud[0, 0:3, pair_pts[:, 0]] - pointcloud[1, 0:3, pair_pts[:, 1]]) * pointcloud[0, 3:6, pair_pts[:, 0]]).sum(1))) / 2 rel_cls_pts[(np.abs(normdot) < PERP_THRESH)] = 1 rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst > COPLANE_THRESH)] = 2 rel_cls_pts[(np.abs(normdot) > PARALLEL_THRESH) & (dst <= COPLANE_THRESH)] = 3 if self.split == 'train': # balance each class N_CLASS = 4 pair_pts_select = [] for j in range(N_CLASS): ind = np.where(rel_cls_pts == j)[0] if len(ind): pair_pts_select.append(ind[np.random.choice( len(ind), N_PAIR_PTS // N_CLASS)]) pair_pts_select = np.concatenate(pair_pts_select) pair_pts_select = pair_pts_select[np.random.choice( len(pair_pts_select), N_PAIR_PTS)] pair_pts = pair_pts[pair_pts_select] normdot = normdot[pair_pts_select] dst = dst[pair_pts_select] rel_cls_pts = rel_cls_pts[pair_pts_select] else: pair_pts_select = np.random.choice(len(pair_pts), N_PAIR_PTS) pair_pts = pair_pts[pair_pts_select] normdot = normdot[pair_pts_select] dst = dst[pair_pts_select] rel_cls_pts = rel_cls_pts[pair_pts_select] rets['normdot2'] = np.power(normdot, 2)[None, :] rets['dst2'] = np.power(dst, 2)[None, :] # convert to image coordinate R_t2s = np.linalg.inv(R_s2t) tp = ( np.matmul(R_t2s[:3, :3], pointcloud[0, :3, pair_pts[:, 0]].T) + R_t2s[:3, 3:4]).T hfov = 90.0 vfov = 2 * np.arctan(np.tan(hfov / 2 / 180 * np.pi)) / np.pi * 180 zs = -tp[:, 2] ys = (0.5 - (tp[:, 1] / zs / (np.tan(np.deg2rad(vfov / 2)))) / 2) * 160 xs = (0.5 + (tp[:, 0] / zs / (np.tan(np.deg2rad(hfov / 2)))) / 2) * 160 uv_s = np.stack((xs, ys), -1) tp = pointcloud[1, :3, pair_pts[:, 1]] zs = -tp[:, 2] ys = (0.5 - (tp[:, 1] / zs / (np.tan(np.deg2rad(vfov / 2)))) / 2) * 160 xs = (0.5 + (tp[:, 0] / zs / (np.tan(np.deg2rad(hfov / 2)))) / 2) * 160 uv_t = np.stack((xs, ys), -1) rets['uv_pts'] = np.stack((uv_s, uv_t))[None, :] rets['uv_pts'][:, :, :, 0] = rets['uv_pts'][:, :, :, 0].clip(0, 160 - 1) rets['uv_pts'][:, :, :, 1] = rets['uv_pts'][:, :, :, 1].clip(0, 160 - 1) rets['uv_pts'] = rets['uv_pts'].astype('int') rets['rel_cls_pts'] = rel_cls_pts[None, :] rets['pair_pts'] = pair_pts[None, :] if self.eval_local: # convert back into local coordinate R_t2s = np.matmul(R[0], R_inv[1]) Kth = self.dataList[index % self.__len__()]['Kth'] pointcloud[0, :3, :] = np.matmul( R_t2s[:3, :3], pointcloud[0, :3, :]) + R_t2s[:3, 3:4] pointcloud[0, 3:6, :] = np.matmul(R_t2s[:3, :3], pointcloud[0, 3:6, :]) R_pred = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pred_pose'] gt_pose = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['gt_pose'] err_r = util.angular_distance_np(R_pred[:3, :3], gt_pose[:3, :3])[0] rets['err_r'] = err_r rets['eval_key'] = '%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth) pos_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_s_360'] pos_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['pos_t_360'] nor_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_s_360'] nor_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['nor_t_360'] feat_s_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_s_360'] feat_t_360 = self.eval_gt_dict['%s-%06d-%06d-%d' % (room_id, ct0, ct1, Kth)]['feat_t_360'] rets['pos_s_360'] = (pos_s_360[None, :]) rets['pos_t_360'] = (pos_t_360[None, :]) rets['nor_s_360'] = (nor_s_360[None, :]) rets['nor_t_360'] = (nor_t_360[None, :]) pointcloud[0, :3, :] = np.matmul( R_pred[:3, :3], pointcloud[0, :3, :]) + R_pred[:3, 3:4] pointcloud[0, 3:6, :] = np.matmul(R_pred[:3, :3], pointcloud[0, 3:6, :]) igt = np.matmul(R_s2t, np.linalg.inv(R_pred)) rets['igt'] = igt[None, :] rets['pred_pose'] = R_pred[None, :] rets['gt_pose'] = gt_pose[None, :] R_gt = igt[:3, :3] t_gt = igt[:3, 3:4] else: delta_R = util.randomRotation(epsilon=0.1) delta_t = np.random.randn(3) * 0.1 pointcloud_s_perturb = np.matmul( delta_R, pointcloud[0, :3, :] - pointcloud[0, :3, :].mean(1)[:, None] ) + delta_t[:, None] + pointcloud[0, :3, :].mean(1)[:, None] tp_R = delta_R tp_t = np.matmul( np.eye(3) - delta_R, pointcloud[0, :3, :].mean(1)[:, None]) + delta_t[:, None] t_gt = np.matmul( np.eye(3) - delta_R.T, pointcloud[0, :3, :].mean(1)[:, None]) - np.matmul( delta_R.T, delta_t[:, None]) R_gt = delta_R.T igt = np.eye(4) igt[:3, :3] = R_gt igt[:3, 3] = t_gt.squeeze() rets['igt'] = igt[None, :] pointcloud_s_n_perturb = np.matmul(delta_R, pointcloud[0, 3:6, :]) pointcloud[0, :3, :] = pointcloud_s_perturb pointcloud[0, 3:6, :] = pointcloud_s_n_perturb Q = np.concatenate((util.rot2Quaternion(R_gt), t_gt.squeeze())) R_ = np.eye(4) R_[:3, :3] = R_gt R_[:3, 3] = t_gt.squeeze() R_inv = np.linalg.inv(R_) rets['pointcloud'] = pointcloud[None, ...] if self.topdown: Key = '%s-pc' % (room_id) roompc = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(-1, 3) roompc = roompc[np.random.choice(roompc.shape[0], 20000)] rets['roompc'] = roompc[None, :] Key = '%s-floor' % (room_id) plane_eq = np.frombuffer(self.txn.get(Key.encode()), np.float).reshape(4) plane_eqs = np.zeros([2, 4]) plane_eq_0 = np.matmul(plane_eq, np.linalg.inv(R[0])) plane_eq_0 /= (np.linalg.norm(plane_eq_0[:3]) + 1e-16) plane_eqs[0, :] = plane_eq_0.copy() plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1])) plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3]) + 1e-16) plane_eqs[1, :] = plane_eq_1.copy() colors = np.random.rand(15 + 1, 3) # resolution = 0.02 # 0.2m resolution = 0.04 height = 224 width = 224 pc0 = pointcloud[0, 0:3, :].T pc2ind = np.zeros([2, len(pc0), 3]) npts = np.zeros([2]) pc2ind_mask = np.zeros([2, pointcloud.shape[2]]) # the floor plane # (0, 1, 0)'x + d = 0 # remove partial view's ceiling dst = np.abs( ((plane_eq_0[:3][None, :] * pc0).sum(1) + plane_eq_0[3])) mask = dst < 1.5 validind = np.where(mask)[0] invalidind = np.where(~mask)[0] npts[0] = len(validind) pc0 = pc0[mask] pc2ind_mask[0] = mask # project camera position(0,0,0) to floor plane origin_0 = -plane_eq_0[:3] * plane_eq_0[3] # axis [0,0,-1], [] axis_base = np.array([0, 0, -1]) axis_y_0 = axis_base - np.dot(axis_base, plane_eq_0[:3]) * plane_eq_0[:3] axis_y_0 /= (np.linalg.norm(axis_y_0) + 1e-16) axis_x_0 = np.cross(axis_y_0, plane_eq_0[:3]) axis_x_0 /= (np.linalg.norm(axis_x_0) + 1e-16) axis_z_0 = plane_eq_0[:3] imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_partial_0 = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255. imageKey = '%s-%06d-topdown_c_partial' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_partial_1 = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255. imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_complete_0 = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255. imageKey = '%s-%06d-topdown_c_complete' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_c_complete_1 = cv2.imdecode( imageBuf, cv2.IMREAD_COLOR).astype('float') / 255. imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct0) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_s_complete_0 = cv2.imdecode( imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8') imageKey = '%s-%06d-topdown_s_complete' % (room_id, ct1) imageBin = self.txn.get(imageKey.encode()) imageBuf = np.frombuffer(imageBin, dtype=np.uint8) topdown_s_complete_1 = cv2.imdecode( imageBuf, cv2.IMREAD_UNCHANGED).astype('uint8') tp = ~topdown_c_partial_0.sum(2).astype('bool') edt_0 = ndimage.distance_transform_edt(tp, return_indices=False) edt_0 = np.maximum(0.1, np.power(0.98, edt_0)) tp = ~topdown_c_partial_1.sum(2).astype('bool') edt_1 = ndimage.distance_transform_edt(tp, return_indices=False) edt_1 = np.maximum(0.1, np.power(0.98, edt_1)) rets['edt_w'] = np.stack((edt_0, edt_1))[None, ...] u = ((pc0 - origin_0[None, :]) * axis_x_0[None, :]).sum(1) v = ((pc0 - origin_0[None, :]) * axis_y_0[None, :]).sum(1) z = ((pc0 - origin_0[None, :]) * axis_z_0[None, :]).sum(1) # write_ply('test.ply',np.stack((u,v,z),-1), color=colors[pc_s]) u = width // 2 + (u / resolution).astype('int') v = height // 2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_0 = np.stack((u, v, ind_z), -1) u = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) * axis_x_0[None, :]).sum(1) v = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) * axis_y_0[None, :]).sum(1) z = ((pointcloud[0, 0:3, :].T - origin_0[None, :]) * axis_z_0[None, :]).sum(1) u = width // 2 + (u / resolution).astype('int') v = height // 2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_img_0 = np.stack((u, v, ind_z), -1) pc2ind[0, mask] = topdown_ind_0 pc1 = pointcloud[1, 0:3, :].T plane_eq_1 = np.matmul(plane_eq, np.linalg.inv(R[1])) plane_eq_1 /= (np.linalg.norm(plane_eq_1[:3]) + 1e-16) plane_eqs[1, :] = plane_eq_1.copy() dst = np.abs( ((plane_eq_1[:3][None, :] * pc1).sum(1) + plane_eq_1[3])) mask = dst < 1.5 validind = np.where(mask)[0] invalidind = np.where(~mask)[0] npts[1] = len(validind) pc1 = pc1[mask] pc2ind_mask[1] = mask origin_1 = -plane_eq_1[:3] * plane_eq_1[3] # axis [0,0,-1], [] axis_base = np.array([0, 0, -1]) axis_y_1 = axis_base - np.dot(axis_base, plane_eq_1[:3]) * plane_eq_1[:3] axis_y_1 /= (np.linalg.norm(axis_y_1) + 1e-16) axis_x_1 = np.cross(axis_y_1, plane_eq_1[:3]) axis_x_1 /= (np.linalg.norm(axis_x_1) + 1e-16) axis_z_1 = plane_eq_1[:3] u = ((pc1 - origin_1[None, :]) * axis_x_1[None, :]).sum(1) v = ((pc1 - origin_1[None, :]) * axis_y_1[None, :]).sum(1) z = ((pc1 - origin_1[None, :]) * axis_z_1[None, :]).sum(1) u = width // 2 + (u / resolution).astype('int') v = height // 2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_1 = np.stack((u, v, ind_z), -1) u = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) * axis_x_1[None, :]).sum(1) v = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) * axis_y_1[None, :]).sum(1) z = ((pointcloud[1, 0:3, :].T - origin_1[None, :]) * axis_z_1[None, :]).sum(1) u = width // 2 + (u / resolution).astype('int') v = height // 2 - (v / resolution).astype('int') ind_z = np.digitize(z, [-0.1, 0.7, 1.5]) topdown_ind_img_1 = np.stack((u, v, ind_z), -1) img2ind[0] = topdown_ind_img_0 img2ind[1] = topdown_ind_img_1 pc2ind[1, mask] = topdown_ind_1 rets['img2ind'] = img2ind[None, ...] rets['imgPCid'] = imgPCid[None, ...] rets['axis_x'] = np.zeros([2, 3]) rets['axis_y'] = np.zeros([2, 3]) rets['origin'] = np.zeros([2, 3]) rets['axis_x'][0] = axis_x_0 rets['axis_y'][0] = axis_y_0 rets['axis_x'][1] = axis_x_1 rets['axis_y'][1] = axis_y_1 rets['origin'][0] = origin_0 rets['origin'][1] = origin_1 rets['axis_x'] = rets['axis_x'][None, :] rets['axis_y'] = rets['axis_y'][None, :] rets['origin'] = rets['origin'][None, :] # sample points on source floor plane: mask = ~((topdown_c_partial_0 == 0).sum(2) == 3) vs, us = np.where(mask) if not len(vs): vs = np.array([0, 0]) us = np.array([0, 0]) ind = np.random.choice(len(vs), 100) u_0 = us[ind] v_0 = vs[ind] kp_uv_0 = np.stack((u_0, v_0), -1) u_0 -= width // 2 v_0 -= height // 2 kp_3d_0 = origin_0[None, :] + axis_x_0[ None, :] * u_0[:, None] * resolution - axis_y_0[ None, :] * v_0[:, None] * resolution R01 = np.matmul(R[1], R_inv[0]) kp_3d_1 = (np.matmul(R01[:3, :3], kp_3d_0.T) + R01[:3, 3:4]).T # random sample a set of points as negative correspondencs mask = ~((topdown_c_partial_1 == 0).sum(2) == 3) vs_neg, us_neg = np.where(mask) if not len(vs_neg): vs_neg = np.array([0, 0]) us_neg = np.array([0, 0]) ind = np.random.choice(len(vs_neg), 100 * 100) u_neg_1 = us_neg[ind] v_neg_1 = vs_neg[ind] kp_uv_neg_1 = np.stack((u_neg_1, v_neg_1), -1) u_neg_1 -= width // 2 v_neg_1 -= height // 2 kp_3d_neg_1 = origin_1[None, :] + axis_x_1[ None, :] * u_neg_1[:, None] * resolution - axis_y_1[ None, :] * v_neg_1[:, None] * resolution R10 = np.matmul(R[0], R_inv[1]) kp_3d_neg_0 = (np.matmul(R10[:3, :3], kp_3d_neg_1.T) + R10[:3, 3:4]).T u_neg_0 = ((kp_3d_neg_0 - origin_0[None, :]) * axis_x_0[None, :]).sum(1) v_neg_0 = ((kp_3d_neg_0 - origin_0[None, :]) * axis_y_0[None, :]).sum(1) u_neg_0 = width // 2 + (u_neg_0 / resolution).astype('int') v_neg_0 = height // 2 - (v_neg_0 / resolution).astype('int') kp_uv_neg_0 = np.stack((u_neg_0, v_neg_0), -1) kp_uv_neg_0[:, 0] = kp_uv_neg_0[:, 0].clip(0, width - 1) kp_uv_neg_0[:, 1] = kp_uv_neg_0[:, 1].clip(0, height - 1) kp_uv_neg_1 = kp_uv_neg_1.reshape(100, 100, 2) kp_uv_neg_0 = kp_uv_neg_0.reshape(100, 100, 2) w_uv_neg_1 = 1 - np.maximum( 0.1, np.power( 0.98, np.linalg.norm(kp_uv_neg_0 - kp_uv_0[:, None, :], axis=2))) u_1 = ((kp_3d_1 - origin_1[None, :]) * axis_x_1[None, :]).sum(1) v_1 = ((kp_3d_1 - origin_1[None, :]) * axis_y_1[None, :]).sum(1) u_1 = width // 2 + (u_1 / resolution).astype('int') v_1 = height // 2 - (v_1 / resolution).astype('int') kp_uv_1 = np.stack((u_1, v_1), -1) topdown_c_complete = np.stack( (topdown_c_complete_0, topdown_c_complete_1)).transpose(0, 3, 1, 2) topdown_s_complete = np.stack( (topdown_s_complete_0, topdown_s_complete_1)) topdown_c_partial = np.stack( (topdown_c_partial_0, topdown_c_partial_1)) kp_uv_0[:, 0] = kp_uv_0[:, 0].clip(0, width - 1) kp_uv_0[:, 1] = kp_uv_0[:, 1].clip(0, height - 1) kp_uv_1[:, 0] = kp_uv_1[:, 0].clip(0, width - 1) kp_uv_1[:, 1] = kp_uv_1[:, 1].clip(0, height - 1) rets['kp_uv'] = np.stack((kp_uv_0, kp_uv_1))[None, ...] rets['kp_uv_neg'] = kp_uv_neg_1[None, ...] rets['w_uv_neg'] = w_uv_neg_1[None, ...] rets['plane_eq'] = plane_eqs[None, ...] rets['pc2ind'] = pc2ind[None, ...] rets['pc2ind_mask'] = pc2ind_mask[None, ...] rets['topdown'] = topdown_c_complete[None, ...] rets['topdown_s'] = topdown_s_complete[None, ...] rets['topdown_partial'] = topdown_c_partial.transpose(0, 3, 1, 2)[None, ...] TopDownValidMask = ((topdown_c_complete == 0).sum(1, keepdims=True) != 3) rets['TopDownValidMask'] = TopDownValidMask[None, ...] rets['npts'] = npts[None, ...] imgsPath.append(f"{basePath}/{ct0:06d}") imgsPath.append(f"{basePath}/{ct1:06d}") rets['norm'] = imgs_normal.transpose(0, 3, 1, 2)[None, ...] rets['rgb'] = imgs_rgb.transpose(0, 3, 1, 2)[None, ...] rets['semantic'] = imgs_s[None, ...] rets['depth'] = imgs_depth[None, :, None, :, :] rets['Q'] = Q[None, ...] rets['R'] = R[None, ...] rets['R_inv'] = R_inv[None, ...] rets['imgsPath'] = imgsPath return rets, True