def transform_val(self, raw, gt): raw = TF.center_crop(raw, (self.height, self.width)) gt = TF.center_crop(gt, (self.height, self.width)) return raw, gt
def __call__(self, image, target): image = F.center_crop(image, self.size) target = F.center_crop(target, self.size) return image, target
def images_to_psnrs_crop(img1, img2, bsize=10): h, w = img1.shape[-2:] csize = [h - bsize, w - bsize] crop1 = tvF.center_crop(img1, csize) crop2 = tvF.center_crop(img2, csize) return images_to_psnrs(crop1, crop2)
def center_crop_frames(frames,csize=30): csize = 30 cc_frames = edict() for name,burst in frames.items(): cc_frames[name] = tvF.center_crop(burst,(csize,csize)) return cc_frames
def run_multiscale_nnf(cfg, noisy, clean, nlevels=3, verbose=False): T, C, H, W = noisy.shape nframes = T noisy = noisy[:, None] clean = clean[:, None] isize = edict({'h': H, 'w': W}) isize_l = [H, W] pad3 = cfg.nblocks // 2 + 3 // 2 psize3 = edict({'h': H - pad3, 'w': W - pad3}) pad = cfg.nblocks // 2 + cfg.patchsize // 2 psize = edict({'h': H - pad, 'w': W - pad}) cfg_patchsize = cfg.patchsize factor = 2 noisy = tvF.resize(noisy[:, 0], [H // factor, W // factor], interpolation=InterpMode.BILINEAR)[:, None] clean = tvF.resize(clean[:, 0], [H // factor, W // factor], interpolation=InterpMode.BILINEAR)[:, None] T, _, C, H, W = noisy.shape isize = edict({'h': H, 'w': W}) isize_l = [H, W] pad3 = cfg.nblocks // 2 + 3 // 2 psize3 = edict({'h': H - pad3, 'w': W - pad3}) pad = cfg.nblocks // 2 + cfg.patchsize // 2 psize = edict({'h': H - pad, 'w': W - pad}) cfg_patchsize = cfg.patchsize # -- looks good -- cfg.patchsize = 3 align_fxn = get_align_method(cfg, "l2_global") _, flow = align_fxn(clean.to(0)) aclean = align_from_flow(clean, flow, cfg.nblocks, isize=isize) save_image("aclean.png", aclean) apsnr = align_psnr(aclean, psize3) print("[global] clean: ", apsnr) # -- looks not good -- _, flow = align_fxn(noisy.to(0)) isize = edict({'h': H, 'w': W}) aclean = align_from_flow(clean, flow, cfg.nblocks, isize=isize) save_image("aclean_rs1.png", aclean) apsnr = align_psnr(aclean, psize3) print("noisy: ", apsnr) # -- fix it -- cfg.nblocks = 5 align_fxn = get_align_method(cfg, "pair_l2_local") _, flow = align_fxn(aclean.to(0)) isize = edict({'h': H, 'w': W}) aclean = align_from_flow(aclean, flow, cfg.nblocks, isize=isize) save_image("aclean_rs1.png", aclean) apsnr = align_psnr(aclean, psize3) print("[fixed] noisy: ", apsnr) # # -- [Tiled] try it again and to fix it -- # img_ps = 3 cfg.patchsize = img_ps cfg.nblocks = 50 tnoisy = padAndTileBatch(noisy, cfg.patchsize, cfg.nblocks) tclean = padAndTileBatch(clean, cfg.patchsize, cfg.nblocks) t2i_clean = tvF.center_crop(tiled_to_img(tclean, img_ps), isize_l) print(t2i_clean.shape, clean.shape) save_image("atiled_to_img.png", t2i_clean) delta = torch.sum(torch.abs(clean - t2i_clean)).item() assert delta < 1e-8, "tiled to image must work!" cfg.patchsize = 3 align_fxn = get_align_method(cfg, "pair_l2_local") _, flow = align_fxn(tnoisy.to(0)) print(flow.shape, tclean.shape, clean.shape, np.sqrt(flow.shape[1])) nbHalf = cfg.nblocks // 2 pisize = edict({'h': H + 2 * nbHalf, 'w': W + 2 * nbHalf}) aclean = align_from_flow(tclean, flow, cfg.nblocks, isize=pisize) aclean_img = tvF.center_crop(tiled_to_img(aclean, img_ps), isize_l) save_image("aclean_rs1_tiled.png", aclean_img) apsnr = align_psnr(aclean_img, psize3) print("[tiled] noisy: ", apsnr) # i want to use a different block size but I need to correct the image padding..? # def shrink_search_space(tclean,flow,nblocks_prev,nblocks_curr): # print("tclean.shape: ",tclean.shape) # print("flow.shape: ",flow.shape) # T,_,C,H,W = tclean.shape # flow = rearrange(flow,'i (h w) t two -> t i two h w',h=H) # tclean = tvF.center_crop(tclean,new_size) # = tvF.center_crop(tclean,new_size) nblocks_prev = cfg.nblocks cfg.nblocks = 5 # tclean,flow = shrink_search_space(tclean,flow,nblocks_prev,cfg.nblocks) align_fxn = get_align_method(cfg, "pair_l2_local") at_clean = align_from_flow(tclean, flow, cfg.nblocks, isize=pisize) _, flow_at = align_fxn(at_clean.to(0)) aaclean = align_from_flow(at_clean, flow_at, cfg.nblocks, isize=pisize) aaclean_img = tvF.center_crop(tiled_to_img(aaclean, img_ps), isize_l) save_image("aclean_rs1_fixed.png", aaclean_img) apsnr = align_psnr(aaclean_img, psize3) print("[fixed] noisy: ", apsnr) exit() cfg.patchsize = 1 #cfg_patchsize align_fxn = get_align_method(cfg, "pair_l2_local") # clusters = cluster_flow(flow,H,nclusters=4) cflow = flow #replace_flow_median(flow,clusters,H,cfg.nblocks) # save_image("clusters.png",clusters.type(torch.float)) cflow_img = flow2img(cflow, H, T) save_image("cflow.png", cflow_img) aclean = align_from_flow(clean, cflow, cfg.nblocks, isize=isize) save_image("aclean_rs1_cf.png", aclean) print(cflow[:, 64 * 64 + 64]) apsnr = align_psnr(aclean, psize) print("noisy_cf: ", apsnr) print(flow.shape) # flow = rearrange(flow,'i (h w) t two -> t i two h w',h=H) # print_stats(flow) flow_img = flow2img(flow, H, T) save_image("flow.png", flow_img) print(torch.histc(flow.type(torch.float))) factor = 2 cfg.nblocks = max(cfg.nblocks // 2, 3) cfg.patchsize = 1 # cfg.patchsize = max(cfg.patchsize//2,3) noisy_rs = tvF.resize(noisy[:, 0], [H // factor, W // factor], interpolation=InterpMode.BILINEAR)[:, None] _, flow_rs = align_fxn(noisy_rs.to(0)) clean_rs = tvF.resize(clean[:, 0], [H // factor, W // factor], interpolation=InterpMode.BILINEAR)[:, None] isize = edict({'h': H // factor, 'w': W // factor}) aclean = align_from_flow(clean_rs, flow_rs, cfg.nblocks, isize=isize) save_image("aclean_rs2.png", aclean) apsnr = align_psnr(aclean, psize) print("rs2", apsnr, cfg.nblocks, cfg.patchsize) clusters = cluster_flow(flow_rs, H // factor, nclusters=3) save_image("clusters_rs.png", clusters.type(torch.float)) # cflow_rs = cluster_flow(flow_rs,H//factor,nclusters=5) # print(cflow_rs) aclean = align_from_flow(clean_rs, cflow_rs, cfg.nblocks, isize=isize) save_image("aclean_rs2_cl.png", aclean) apsnr = align_psnr(aclean, psize) print("rs2_cl", apsnr, cfg.nblocks, cfg.patchsize) exit() print(flow_rs.shape) # flow_rs = rearrange(flow_rs,'i (h w) t two -> t i two h w',h=H//factor) print(flow_rs.shape) flow_img = flow2img(flow_rs, H // factor, T) save_image("flow_rs2.png", flow_img) fmin, fmax, fmean = print_stats(flow_rs) print(torch.histc(flow_rs.type(torch.float), max=50, min=-50)) factor = 4 cfg.nblocks = max(cfg.nblocks // 2, 3) # cfg.patchsize = max(cfg.patchsize//2,3) noisy_rs = tvF.resize(noisy[:, 0], [H // factor, W // factor], interpolation=InterpMode.BILINEAR)[:, None] _, flow_rs = align_fxn(noisy_rs.to(0)) clean_rs = tvF.resize(clean[:, 0], [H // factor, W // factor], interpolation=InterpMode.BILINEAR)[:, None] isize = edict({'h': H // factor, 'w': W // factor}) aclean = align_from_flow(clean_rs, flow_rs, cfg.nblocks, isize=isize) save_image("aclean_rs4.png", aclean) apsnr = align_psnr(aclean, psize) print(apsnr, cfg.nblocks, cfg.patchsize) print(flow_rs.shape) # flow_rs = rearrange(flow_rs,'i (h w) t two -> t i two h w',h=H//factor) print(flow_rs.shape) flow_img = flow2img(flow_rs, H // factor, T) save_image("flow_rs4.png", flow_img) fmin, fmax, fmean = print_stats(flow_rs) print(torch.histc(flow_rs.type(torch.float), max=50, min=-50))
def center_crop_all(imgs, size): imgs_cropped = [] for img in imgs: imgs_cropped.append(TF.center_crop(img, size)) return imgs_cropped
def __call__(self, input_img, target_img): return F.center_crop(input_img, self.size), F.center_crop(target_img, self.size)
def center_crop(data, label, crop_size): data = ff.center_crop(data, crop_size) label = ff.center_crop(label, crop_size) return data, label
def __call__(self, image, target): """Center crop and return.""" image = F.center_crop(image, self.size) target = F.center_crop(target, self.size) return image, target
def __call__(self, sample): A = sample sample = F.center_crop(A, self.size) return sample
def __call__(self, img_rgb, img_depth): return f.center_crop(img_rgb, self.size), f.center_crop(img_depth, self.size)
def im_resize(img, size=256): img = trans_fn.resize(img, size, Image.LANCZOS) img = trans_fn.center_crop(img, size) return img
def crop_center(size, img): """Crop central size*size window out of a PIL image.""" return torchvision_util.center_crop(img, size)
def __call__(self, img): assert TVF._is_pil_image(img) return TVF.center_crop(img, min(img.size))
def __call__(self, img, target): hor_trans = random.choice((True, False)) return( F.center_crop(ImageTransform(img, hor_trans), (300, 500)), \ F.center_crop(horiz_flip(target, hor_trans), (300,500)))
def get_local_map(**kwargs): T = kwargs.pop('T', None).detach() dataset = kwargs.pop('dataset', 'SEVENSCENES') scene = kwargs.pop('scene', 'heads/') sequences = kwargs.pop('sequences', 'TrainSplit.txt') num_pc = kwargs.pop('num_pc', 8) resize_fact = kwargs.pop('resize', 1/16) reduce_fact = kwargs.pop('reduce_fact', 2) K = kwargs.pop('K', [[585, 0.0, 240], [0.0, 585, 240], [0.0, 0.0, 1.0]]) frame_spacing = kwargs.pop('frame_spacing', 20) output_size = kwargs.pop('output_size', 5000) cnn_descriptor = kwargs.pop('cnn_descriptor', False) cnn_depth = kwargs.pop('cnn_depth', False) cnn_enc = kwargs.pop('cnn_enc', None) cnn_dec = kwargs.pop('cnn_dec', None) no_grad = kwargs.pop('no_grad', True) if kwargs: raise TypeError('Unexpected **kwargs: %r' % kwargs) timing = False if timing: tt = time.time() # Loading files... if hasattr(get_local_map, 'data') is False: env_var = os.environ[dataset] get_local_map.folders = list() with open(env_var + scene + sequences, 'r') as f: for line in f: fold = 'seq-{:02d}/'.format(int(re.search('(?<=sequence)\d', line).group(0))) get_local_map.folders.append(env_var + scene + fold) get_local_map.data = list() for i, folder in enumerate(get_local_map.folders): p = path.Path(folder) get_local_map.data += [(i, re.search('(?<=-)\d+', file.name).group(0)) for file in p.iterdir() if file.is_file() and '.txt' in file.name] get_local_map.poses = list() for fold, seq_num in get_local_map.data: pose_file = get_local_map.folders[fold] + 'frame-' + seq_num + '.pose.txt' pose = np.ndarray((4, 4), dtype=np.float32) with open(pose_file, 'r') as pose_file_pt: for i, line in enumerate(pose_file_pt): for j, c in enumerate(line.split('\t')): try: pose[i, j] = float(c) except ValueError: pass get_local_map.poses.append(pose) if timing: print('Files loading in {}s'.format(time.time() - tt)) t = time.time() # Nearest pose search ''' eye_mat = T.new_zeros(4, 4) eye_mat[0, 0] = eye_mat[1, 1] = eye_mat[2, 2] = eye_mat[3, 3] = 1 d_poses = [torch.norm(eye_mat - pose.matmul(T.inverse())).item() for pose in poses] ''' InvnpT = np.linalg.inv(T.cpu().numpy()) eye_mat = np.eye(4, 4) d_poses = [np.linalg.norm(eye_mat - np.matmul(pose, InvnpT)) for pose in get_local_map.poses] nearest_idx = sorted(range(len(d_poses)), key=lambda k: d_poses[k]) if timing: print('NN search in {}s'.format(time.time() - t)) t = time.time() # Computing local pc K = T.new_tensor(K) K[:2, :] *= resize_fact pcs = list() if cnn_descriptor: descs = list() for i in range(0, num_pc*frame_spacing, frame_spacing): fold, num = get_local_map.data[nearest_idx[i]] if cnn_descriptor or cnn_depth: file_name = get_local_map.folders[fold] + 'frame-' + num + '.color.png' im = PIL.Image.open(file_name) new_h = int(min(im.size) * resize_fact * reduce_fact) # 2 time depth map by default im = func.to_tensor( func.center_crop( func.resize(im, new_h, interpolation=PIL.Image.BILINEAR), new_h ) ).float() im = im.to(T.device) # move on GPU if necessary if no_grad: with torch.no_grad(): out_enc = cnn_enc(im.unsqueeze(0)) else: out_enc = cnn_enc(im.unsqueeze(0)) if cnn_descriptor: desc = out_enc[cnn_descriptor].squeeze() desc = desc.view(desc.size(0), -1) if cnn_depth: if no_grad: with torch.no_grad(): if isinstance(cnn_dec, Resnet.Deconv): depth = cnn_dec(out_enc['feat'], out_enc['res_1'], out_enc['res_2']).squeeze(0) else: depth = cnn_dec(out_enc).squeeze(0) else: if isinstance(cnn_dec, Resnet.Deconv): depth = cnn_dec(out_enc['feat'], out_enc['res_1'], out_enc['res_2']).squeeze(0) else: depth = cnn_dec(out_enc).squeeze(0) depth = torch.reciprocal(depth.clamp(min=1e-8)) - 1 # Need to inverse the depth pcs.append( toSceneCoord(depth, torch.from_numpy(get_local_map.poses[nearest_idx[i]]).to(T.device), K, remove_zeros=False)) if cnn_descriptor: descs.append(desc) else: file_name = get_local_map.folders[fold] + 'frame-' + num + '.depth.png' depth = PIL.Image.open(file_name) new_h = int(min(depth.size)*resize_fact) if new_h/2 != min(K[0, 2].item(), K[1, 2].item()): logger.warn('Resize factor is modifying the 3D geometry!! (fact={})'.format(resize_fact)) depth = func.to_tensor( func.center_crop( func.resize(func.resize(depth, new_h*2, interpolation=PIL.Image.NEAREST), new_h, interpolation=PIL.Image.NEAREST), new_h ) ).float() depth[depth == 65535] = 0 depth *= 1e-3 depth = depth.to(T.device) # move on GPU if necessary if cnn_descriptor: desc = desc[:, depth.view(1, -1).squeeze() != 0] descs.append(desc) pcs.append(toSceneCoord(depth, torch.from_numpy(get_local_map.poses[nearest_idx[i]]).to(T.device), K, remove_zeros=True)) if timing: print('PC creation in {}s'.format(time.time() - t)) t = time.time() # Pruning step final_pc = torch.cat(pcs, 1) if cnn_descriptor: cnn_desc_out = torch.cat(descs, 1) logger.debug('Final points before pruning cloud has {} points'.format(final_pc.size(1))) if not isinstance(output_size, bool): torch.manual_seed(42) indexor = torch.randperm(final_pc.size(1)) final_pc = final_pc[:, indexor] final_pc = final_pc[:, :output_size] if cnn_descriptor: cnn_desc_out = cnn_desc_out[:, indexor] cnn_desc_out = cnn_desc_out[:, :output_size] if timing: print('Pruning in {}s'.format(time.time() - t)) if cnn_descriptor: return final_pc, cnn_desc_out else: return final_pc
ims_nn = list() depths = list() depthsquares = list() poses = list() pcs = list() pcssquare = list() for id in ids: rgb_im = root + id + '.color.png' depth_im = root + id + '.depth.png' pose_im = root + id + '.pose.txt' ims.append( func.normalize(func.to_tensor( func.center_crop( func.resize(PIL.Image.open(rgb_im), int(480 * scale)), int(480 * scale))).float(), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))) ims_nn.append( func.to_tensor( func.resize(PIL.Image.open(rgb_im), int(480 * scale))).float(), ) depth = func.to_tensor( func.center_crop( func.resize(PIL.Image.open(depth_im), int(480 * scale), interpolation=0), int(480 * scale))).float() depth[depth == 65535] = 0 depth *= 1e-3
def __call__(self, sample): img = F.center_crop(sample['image'], self.img_size) lbl = F.center_crop(sample['label'], self.lbl_size) return {'image': img, 'label': lbl}
def center_crop(self, img, label, crop_size): img = ff.center_crop(img, crop_size) label = ff.center_crop(label, crop_size) return img, label
def __call__(self, sample): sample['input'] = F.center_crop(sample['input'], self.size) sample['gt'] = [F.center_crop(gt, self.size) for gt in sample['gt']] return sample
def center_crop(self, data, label, crop_size): """裁剪输入的图片和标签大小""" data = ff.center_crop(data, crop_size) label = ff.center_crop(label, crop_size) return data, label
def _pad_rotate(self, img, angle): w, h = img.size img = F.pad(img, w // 2, 0, self.pad_mode) img = F.rotate(img, angle, False, self.expand, self.center) img = F.center_crop(img, (w, h)) return img
def cc(image): return tvF.center_crop(image, (psize.h, psize.w))
def __call__(self, imgRGB, imgDepth): return F.center_crop(imgRGB, self.size), F.center_crop(imgDepth, self.size)
def center_crop(self, data, label, crop_size): #裁剪输入的图片&标签的大小 data = ttf.center_crop(data, crop_size) label = ttf.center_crop(label, crop_size) return data, label
def __call__(self, img, target): hor_trans = random.choice((True, False)) return( F.normalize(F.to_tensor(F.center_crop(ImageTransform(img, hor_trans),(300, 500))), \ [0.45679754, 0.44313163, 0.4082983], [0.23698017, 0.23328756, 0.23898676], False), \ torch.from_numpy(np.array(F.center_crop(horiz_flip(target, hor_trans), (300,500)), dtype = np.int32 )).long())
def forward(self, img): return F.center_crop(img[0], self.size), F.center_crop(img[1], self.size), F.center_crop(img[2], self.size)
def __call__(self, img, target): return (F.normalize(F.to_tensor(F.center_crop(img, (300, 500) )), [0.45679754, 0.44313163, 0.4082983], [0.23698017, 0.23328756, 0.23898676], False), \ torch.from_numpy(np.array(F.center_crop(target, (300, 500)), dtype = np.int32 )).long())
def __call__(self, sample): return {k: TF.center_crop(v, self.size) for k, v in sample.items()}
def __call__(self, image): return [F.center_crop(c, self.size) for c in image]