def train_transform(rgb, sparse, target, rgb_near, args): # s = np.random.uniform(1.0, 1.5) # random scaling # angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip transform_geometric = transforms.Compose([ # transforms.Rotate(angle), # transforms.Resize(s), transforms.BottomCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) if sparse is not None: sparse = transform_geometric(sparse) target = transform_geometric(target) if rgb is not None: brightness = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) contrast = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) saturation = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) transform_rgb = transforms.Compose([ transforms.ColorJitter(brightness, contrast, saturation, 0), transform_geometric ]) rgb = transform_rgb(rgb) if rgb_near is not None: rgb_near = transform_rgb(rgb_near) # sparse = drop_depth_measurements(sparse, 0.9) return rgb, sparse, target, rgb_near
def train_transform(self, im, gt): im = np.array(im).astype(np.float32) gt = np.array(gt).astype(np.float32) s = np.random.uniform(1.0, 1.5) # random scaling angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4) transform = my_transforms.Compose([ my_transforms.Crop(130, 10, 240, 1200), my_transforms.Resize(460 / 240, interpolation='bilinear'), my_transforms.Rotate(angle), my_transforms.Resize(s), my_transforms.CenterCrop(self.size), my_transforms.HorizontalFlip(do_flip) ]) im_ = transform(im) im_ = color_jitter(im_) gt_ = transform(gt) im_ = np.array(im_).astype(np.float32) gt_ = np.array(gt_).astype(np.float32) im_ /= 255.0 gt_ /= 100.0 * s im_ = to_tensor(im_) gt_ = to_tensor(gt_) gt_ = gt_.unsqueeze(0) return im_, gt_
def __init__(self, directory, dims, output_size, train=True): super().__init__(directory) self.dims = dims self.output_size = output_size if train: self.transform = self.train_transform self.color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) else: self.transform = self.validate_transform
def train_transform(self, im, gt, mask): im = np.array(im).astype(np.float32) im = cv2.resize(im, (512, 256), interpolation=cv2.INTER_AREA) gt = cv2.resize(gt, (512, 256), interpolation=cv2.INTER_AREA) mask = cv2.resize(mask, (512, 256), interpolation=cv2.INTER_AREA) # h,w,c = im.shape # th, tw = 256,512 # x1 = random.randint(0, w - tw) # y1 = random.randint(0, h - th) # img = im[y1:y1 + th, x1:x1 + tw, :] # gt = gt[y1:y1 + th, x1:x1 + tw] # mask = mask[y1:y1 + th, x1:x1 + tw] s = np.random.uniform(1.0, 1.5) # random scaling angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4) transform = my_transforms.Compose([ my_transforms.Rotate(angle), my_transforms.Resize(s), my_transforms.CenterCrop(self.size), my_transforms.HorizontalFlip(do_flip) ]) im_ = transform(im) im_ = color_jitter(im_) gt_ = transform(gt) mask_ = transform(mask) im_ = np.array(im_).astype(np.float32) gt_ = np.array(gt_).astype(np.float32) mask_ = np.array(mask_).astype(np.float32) im_ /= 255.0 gt_ /= s im_ = to_tensor(im_) gt_ = to_tensor(gt_) mask_ = to_tensor(mask_) gt_ = gt_.unsqueeze(0) mask_ = mask_.unsqueeze(0) return im_, gt_, mask_
class MyDataloader(data.Dataset): modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, type, sparsifier=None, modality='rgb', augArgs=None, loader=h5_loader): classes, class_to_idx = find_classes(root) imgs = make_dataset(root, class_to_idx) assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n" print("Found {} images in {} folder.".format(len(imgs), type)) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx self.augArgs = augArgs if type == 'train': self.transform = self.train_transform elif type == 'val': self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) self.loader = loader self.sparsifier = sparsifier assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def create_sparse_depth(self, rgb, depth): if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth) sparse_depth = np.zeros(depth.shape) sparse_depth[mask_keep] = depth[mask_keep] return sparse_depth def create_rgbd(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) return rgbd def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ path, target = self.imgs[index] rgb, depth = self.loader(path) return rgb, depth def __getitem__(self, index): rgb, depth = self.__getraw__(index) augArgs = self.augArgs if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise (RuntimeError("transform not defined")) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': input_np = self.create_rgbd(rgb_np, depth_np) elif self.modality == 'd': input_np = self.create_sparse_depth(rgb_np, depth_np) input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs) def getFocalScale(self): #Returns a float which is the focal length scale factor scaleMin = self.augArgs.scale_min scaleMax = self.augArgs.scale_max s = np.random.uniform(scaleMin, scaleMax) # random scaling factor return s def getDepthGroup(self): #Returns a float which is the depth group scale factor, sampled from a gaussian # centered on a randomly selected element of the global-scale-variances list if (isinstance(self.augArgs.scaleMeans, float)): #If no tuple is provided mean = self.augArgs.scaleMeans variance = self.augArgs.scaleVariances scale = np.random.normal(mean, variance, 1) else: idx = np.random.randint(0, len(self.augArgs.scaleMeans)) mean = self.augArgs.scaleMeans[idx] variance = self.augArgs.scaleVariances[idx] scale = np.random.normal(mean, variance, 1) return scale
class DepthNoiseDataset(Dataset): color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root_dir, type, sparsifier=None, num_augmented=0, sample_cap=-1, scene_cap=-1, sim_offline=False): self.root_dir = root_dir self.sim_offline = sim_offline self.sparsifier = sparsifier self.transform = self.val_transform if type == "train": self.train_transform self.folders, self.descriptor = build_descriptor( root_dir, sample_cap, scene_cap, num_augmented) self.output_size = (228, 304) self.type = type self.sample_cap = sample_cap def __len__(self): return len(self.descriptor) def create_sparse_depth(self, rgb, depth, seed=None): if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth, seed) sparse_depth = np.zeros(depth.shape) # if a residual map has been passed if isinstance(mask_keep, tuple): mask_keep, noise_offset = mask_keep noise_offset *= 10. depth = np.clip(depth + noise_offset, 0., 10.) sparse_depth[mask_keep] = depth[mask_keep] return sparse_depth def __getitem__(self, idx): # restrict the amount of possible input folder_idx, sample_idx, aug_seed = self.descriptor[idx] # get scene directory name scene_dir = os.path.join(self.root_dir, self.folders[folder_idx]) if self.type == "train": if self.sample_cap > 0: rgb_name = os.path.join(scene_dir, "med_image_%03d.png") rgb_name = rgb_name % self.sample_cap gt_d_name = os.path.join(scene_dir, "med_depth_%03d.png") gt_d_name = gt_d_name % self.sample_cap else: rgb_name = os.path.join(scene_dir, "med_image.png") gt_d_name = os.path.join(scene_dir, "med_depth_filled.png") # the sparse to dense method labels pure white depths as dropout depth = np.clip(cv2.imread(gt_d_name, 0), 0, 254) depth = (depth.astype(np.float64) / 255.) * 10. else: rgb_name = os.path.join(scene_dir, "med_image.png") gt_d_name = os.path.join(scene_dir, "med_depth_filled.npy") depth = np.load(gt_d_name) * 10. # load images rgb = cv2.imread(rgb_name, 1) sp_d_name = os.path.join(scene_dir, "depths", "depth_%04d.npy") sp_d_name = sp_d_name % sample_idx # get sparse depth if sample_idx < 0: seed = aug_seed if self.sim_offline else None sparse_depth = self.create_sparse_depth(rgb, depth, seed) else: sparse_depth = np.clip(np.load(sp_d_name), 0, 9999).astype('float') sparse_depth = sparse_depth / 1000. # Applying augmentation seed = int(time()) rgb_np, depth_np = self.transform(rgb, depth, seed) _, sparse_depth_np = self.transform(rgb, sparse_depth, seed) input_np = np.expand_dims(sparse_depth_np, axis=2) input_np = np.append(rgb_np, input_np, axis=2) # I don't know what this is for, but I'm running with it input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def train_transform(self, rgb, depth, random_seed): np.random.seed(random_seed) s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ # this is for computational efficiency, since rotation can be slow transforms.Resize(250.0 / iheight), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np def val_transform(self, rgb, depth, random_seed): np.random.seed(random_seed) depth_np = depth transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
class MyDataloader(data.Dataset): modality_names = ['rgb'] def is_image_file(self, filename): IMG_EXTENSIONS = ['.h5'] return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) def find_classes(self, dir): classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] classes.sort() class_to_idx = {classes[i]: i for i in range(len(classes))} return classes, class_to_idx def make_dataset(self, dir, class_to_idx): images = [] dir = os.path.expanduser(dir) for target in sorted(os.listdir(dir)): d = os.path.join(dir, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): if self.is_image_file(fname): path = os.path.join(root, fname) item = (path, class_to_idx[target]) images.append(item) return images color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, split, modality='rgb', loader=h5_loader): classes, class_to_idx = self.find_classes(root) imgs = self.make_dataset(root, class_to_idx) assert len(imgs)>0, "Found 0 images in subfolders of: " + root + "\n" # print("Found {} images in {} folder.".format(len(imgs), split)) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx if split == 'train': self.transform = self.train_transform elif split == 'holdout': self.transform = self.val_transform elif split == 'val': self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset split: " + split + "\n" "Supported dataset splits are: train, val")) self.loader = loader assert (modality in self.modality_names), "Invalid modality split: " + modality + "\n" + \ "Supported dataset splits are: " + ''.join(self.modality_names) self.modality = modality def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ path, target = self.imgs[index] rgb, depth = self.loader(path) return rgb, depth def __getitem__(self, index): rgb, depth = self.__getraw__(index) if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise(RuntimeError("transform not defined")) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == 'rgb': input_np = rgb_np to_tensor = transforms.ToTensor() input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
class MyDataloader(data.Dataset): modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) #HSV变化 def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader): classes, class_to_idx = find_classes(root) imgs = make_dataset(root, class_to_idx) assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n" print("Found {} images in {} folder.".format(len(imgs), type)) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx #对训练集和验证集有不同的数据处理模式 if type == 'train': self.transform = self.train_transform elif type == 'val': self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) self.loader = loader self.sparsifier = sparsifier assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def create_sparse_depth(self, rgb, depth): #生成稀疏点 if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth) sparse_depth = np.zeros(depth.shape) #全部赋值为0 mask_Leon = np.zeros(depth.shape) for x in range(mask_Leon.shape[1]): for y in range(depth.shape[0]): if (x % 8 == 0) and (y % 8 == 0): mask_Leon[y, x] = 1 #sparse_depth[mask_keep] = depth[mask_keep]#生成稀疏点#注释了以后生成0点 #return sparse_depth #print("111111111111111111111111111111111111111111111111") #print((depth * mask_Leon).shape) #print(depth*mask_Leon) #图像缩小再变大 depth_rescale = transform.resize(depth, (28, 38)) depth_rescale = transform.resize(depth_rescale, (228, 304)) return depth * mask_Leon #return sparse_depth def create_rgbd(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) return rgbd def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ path, target = self.imgs[index] rgb, depth = self.loader(path) return rgb, depth def __getitem__(self, index): rgb, depth = self.__getraw__(index) if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise (RuntimeError("transform not defined")) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': input_np = self.create_rgbd(rgb_np, depth_np) elif self.modality == 'd': input_np = self.create_sparse_depth(rgb_np, depth_np) input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
class MyDataloader(data.Dataset): modality_names = ['rgb', 'rgbd', 'd'] color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader): if(loader == png_loader): imgs = make_dataset_png(root) if(loader == h5_loader): imgs = make_dataset_h5(root) # if(type == val and len(imgs) > 3200): # np.random.shuffle(imgs) # imgs = imgs[:3200] assert len(imgs)>0, "Found 0 images in subfolders of: " + root + "\n" print("Found {} images in {} folder.".format(len(imgs), type)) self.root = root self.imgs = imgs if type == 'train': self.transform = self.train_transform elif type == 'val': self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) self.loader = loader self.sparsifier = sparsifier assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(self, rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def create_sparse_depth(self, rgb, depth): if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth) sparse_depth = np.zeros(depth.shape) sparse_depth[mask_keep] = depth[mask_keep] return sparse_depth def create_rgbd(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) #Use the tensor version of expand_dims... return rgbd def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ if self.loader == png_loader: rgb_path, depth_path = self.imgs[index] rgb, depth = self.loader(rgb_path, depth_path) if self.loader == h5_loader: path = self.imgs[index] rgb, depth = self.loader(path) return rgb, depth def __getitem__(self, index): rgb, depth = self.__getraw__(index) if self.transform is not None: rgb_np = rgb depth_np = depth rgb_np, depth_np = self.transform(rgb, depth) else: raise(RuntimeError("transform not defined")) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': input_np = self.create_rgbd(rgb_np, depth_np) elif self.modality == 'd': input_np = self.create_sparse_depth(rgb_np, depth_np) input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
class Floorplan3DDataset(data.Dataset): color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, dataset_type, split): self.output_size = (257, 353) self.root = root file_list = "{}/{}_{}.list".format(root, dataset_type, split) with open(file_list, "r") as f: self.imgs = f.readlines() self.depth_loader = DepthLoader self.color_loader = PILLoader if split == 'train': self.transform = self.train_transform elif split == 'val': self.transform = self.val_transform def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize(288.0 / iheight), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ transforms.Resize(288.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ (path, target) = self.imgs[index].strip().split(" ") path = os.path.join(self.root, path) target = os.path.join(self.root, target) rgb = self.color_loader(path) depth = self.depth_loader(target) return rgb, depth def __getitem__(self, index): rgb, depth = self.__getraw__(index) if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise (RuntimeError("transform not defined")) input_tensor = to_tensor(rgb_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
class MyDataloader(data.Dataset): modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader): classes, class_to_idx = find_classes(root) imgs = make_dataset(root, class_to_idx) assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n" print("Found {} images in {} folder.".format(len(imgs), type)) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx self.mode = type if type == 'train': self.transform = self.train_transform elif type == 'val': self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) self.loader = loader self.sparsifier = sparsifier self.K = None self.output_size = None assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def create_sparse_depth(self, rgb, depth): if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth) sparse_depth = np.zeros(depth.shape) sparse_depth[mask_keep] = depth[mask_keep] return sparse_depth def create_rgbd(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) return rgbd def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ path, target = self.imgs[index] rgb, depth = self.loader(path) rgb_near = near_rgb(path, self.loader) return rgb, depth, rgb_near def __getitem__(self, index): rgb, depth, rgb_near = self.__getraw__(index) if self.transform is not None: rgb_np, depth_np, rgb_near_np = self.transform( rgb, depth, rgb_near) else: raise (RuntimeError("transform not defined")) # If in train mode, compute pose for near image #print("K = ", self.K) rot_mat, t_vec = None, None if self.mode == "train": rgb_cv = (rgb_np * 255).astype(np.uint8) rgb_near_cv = (rgb_near_np * 255).astype(np.uint8) succ, rot_vec, t_vec = get_pose(rgb_cv, depth_np, rgb_near_cv, self.K) if succ: rot_mat, _ = cv2.Rodrigues(rot_vec) else: rgb_near_np = rgb_np t_vec = np.zeros((3, 1)) rot_mat = np.eye(3) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': input_np = self.create_rgbd(rgb_np, depth_np) elif self.modality == 'd': input_np = self.create_sparse_depth(rgb_np, depth_np) #input_tensor = to_tensor(input_np) #while input_tensor.dim() < 3: # input_tensor = input_tensor.unsqueeze(0) #depth_tensor = to_tensor(depth_np) #depth_tensor = depth_tensor.unsqueeze(0) #rgb_near_tensor = to_tensor(rgb_near) #print(input_np.shape) candidates = {"rgb":rgb_np, "gt":np.expand_dims(depth_np,-1), "d":input_np[:,:,3:], \ "r_mat":rot_mat, "t_vec":t_vec, "rgb_near":rgb_near_np} #print(self.K) intrinsics = { "fx": self.K[0, 0], "fy": self.K[1, 1], "cx": self.K[0, 2], "cy": self.K[1, 2], "output_size": self.output_size } items = { key: to_float_tensor(val) for key, val in candidates.items() if val is not None } return items, intrinsics #return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
class UWTestDataset(Dataset): modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, modality='rgb'): imgs = [] data_path = None val = 'real_uw_test_list.txt' self.root = root self.output_size = (228, 304) data_path = os.path.join(self.root, val) self.transform = self.val_transform fh = open(data_path, 'r') for line in fh: line = line.rstrip() words = line.split() rgb_path = self.root + words[0] depth_path = self.root + words[1] imgs.append((rgb_path, depth_path)) self.imgs = imgs # self.imgs = imgs[:64] # debug self.modality = modality def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) # for compare with Eigen's paper depth_np = depth_data_transforms(depth_np) return rgb_np, depth_np def __getitem__(self, index): rgb_path, depth_path = self.imgs[index] depth = read_depth(depth_path) rgb = cv2.imread(rgb_path) rgb = cv2.resize(rgb, (640, 480)) rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) # water style # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB).astype(np.float) # rgb /= rgb.max() # normalize to [0, 1] # rgb = uw_style(rgb, depth) # rgb /= rgb.max() / 255 # normalize to uint8 # rgb = rgb.astype(np.uint8) # rgb = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR) if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise (RuntimeError("transform not defined")) if self.modality == 'rgb': input_np = rgb_np input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
class OmniDataset(Dataset): modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, type, sparsifier=None, modality='rgbd'): imgs = [] data_path = None train = 'original_train_split.txt' # train = 'original_train_debug.txt' val = 'original_test_split.txt' # val = 'original_test_debug.txt' self.root = root # self.output_size = (243, 486) self.output_size = (256, 512) if type == 'train': data_path = os.path.join(self.root, train) self.transform = self.train_transform elif type == 'val': data_path = os.path.join(self.root, val) self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) fh = open(data_path, 'r') for line in fh: line = line.rstrip() words = line.split() rgb_path = self.root + words[0] depth_path = self.root + words[1] imgs.append((rgb_path, depth_path)) self.imgs = imgs # self.imgs = imgs[:64] # debug self.sparsifier = sparsifier assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality def train_transform(self, rgb, depth): # for create fake underwater images rgb = uw_style(rgb, depth) rgb /= rgb.max() / 255 rgb = rgb.astype(np.uint8) s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ # transforms.Rotate(angle), # transforms.Resize(s), # transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip), transforms.Resize(size=self.output_size) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np def val_transform(self, rgb, depth): # for create fake underwater images rgb = uw_style(rgb, depth) rgb /= rgb.max() / 255 rgb = rgb.astype(np.uint8) depth_np = depth transform = transforms.Compose([ # transforms.CenterCrop(self.output_size), transforms.Resize(size=self.output_size) ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np def create_sparse_depth(self, rgb, depth): if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth) sparse_depth = np.zeros(depth.shape) sparse_depth[mask_keep] = depth[mask_keep] return sparse_depth def create_rgbd(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) return rgbd def __getitem__(self, index): rgb_path, depth_path = self.imgs[index] # rgb = cv2.imread(rgb_path) # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) rgb = plt.imread(rgb_path) depth = read_depth(depth_path) # depth = fill_depth_colorization(rgb, depth) # fill the empty hole of the depth image, it will spend a lot of time if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise (RuntimeError("transform not defined")) if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': input_np = self.create_rgbd(rgb_np, depth_np) elif self.modality == 'd': input_np = self.create_sparse_depth(rgb_np, depth_np) input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
def train_transform(rgb, sparse, target, position, args): # s = np.random.uniform(1.0, 1.5) # random scaling # angle = np.random.uniform(-5.0, 5.0) # random rotation degrees oheight = args.val_h owidth = args.val_w do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip transforms_list = [ # transforms.Rotate(angle), # transforms.Resize(s), transforms.BottomCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ] # if small_training == True: # transforms_list.append(transforms.RandomCrop((rheight, rwidth))) transform_geometric = transforms.Compose(transforms_list) if sparse is not None: sparse = transform_geometric(sparse) target = transform_geometric(target) if rgb is not None: brightness = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) contrast = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) saturation = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) transform_rgb = transforms.Compose([ transforms.ColorJitter(brightness, contrast, saturation, 0), transform_geometric ]) rgb = transform_rgb(rgb) # sparse = drop_depth_measurements(sparse, 0.9) if position is not None: bottom_crop_only = transforms.Compose( [transforms.BottomCrop((oheight, owidth))]) position = bottom_crop_only(position) # random crop #if small_training == True: if args.not_random_crop == False: h = oheight w = owidth rheight = args.random_crop_height rwidth = args.random_crop_width # randomlize i = np.random.randint(0, h - rheight + 1) j = np.random.randint(0, w - rwidth + 1) if rgb is not None: if rgb.ndim == 3: rgb = rgb[i:i + rheight, j:j + rwidth, :] elif rgb.ndim == 2: rgb = rgb[i:i + rheight, j:j + rwidth] if sparse is not None: if sparse.ndim == 3: sparse = sparse[i:i + rheight, j:j + rwidth, :] elif sparse.ndim == 2: sparse = sparse[i:i + rheight, j:j + rwidth] if target is not None: if target.ndim == 3: target = target[i:i + rheight, j:j + rwidth, :] elif target.ndim == 2: target = target[i:i + rheight, j:j + rwidth] if position is not None: if position.ndim == 3: position = position[i:i + rheight, j:j + rwidth, :] elif position.ndim == 2: position = position[i:i + rheight, j:j + rwidth] return rgb, sparse, target, position
class MHKDataset(Dataset): modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, modality='rgb'): imgs = [] data_path = None val = 'MHL-15k.txt' self.root = root self.output_size = (228, 304) data_path = os.path.join(self.root, val) self.transform = self.val_transform fh = open(data_path, 'r') for line in fh: line = line.rstrip() rgb_path = self.root + line imgs.append(rgb_path) self.imgs = imgs # self.imgs = imgs[:64] # debug self.modality = modality def val_transform(self, rgb): transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 return rgb_np def __getitem__(self, index): rgb_path = self.imgs[index] rgb = cv2.imread(rgb_path) rgb = cv2.resize(rgb, (640, 480)) # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) if self.transform is not None: rgb_np = self.transform(rgb) else: raise (RuntimeError("transform not defined")) if self.modality == 'rgb': input_np = rgb_np input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) return input_tensor def __len__(self): return len(self.imgs)
class RawDataloader(data.Dataset): modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader, aug_limit=-1): classes, class_to_idx = find_classes(root) # raw_classes, raw_class_to_idx = find_classes(raw_root) imgs = make_dataset(root, class_to_idx, aug_limit) # raw_imgs = make_dataset(raw_root, class_to_idx) assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n" print("Found {} images in {} folder.".format(len(imgs), type)) # assert len(raw_imgs)>0, "Found 0 images in subfolders of: " + raw_root + "\n" # print("Found {} images in {} folder.".format(len(raw_imgs), type)) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx # self.raw_root = raw_root # self.raw_imgs = raw_imgs # self.raw_classes = raw_classes # self.raw_class_to_idx = raw_class_to_idx if type == 'train': self.transform = self.train_transform elif type == 'val': self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) self.loader = loader # self.loader = TempLoader(type, loader) self.sparsifier = sparsifier assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def create_sparse_depth(self, rgb, depth): if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth) sparse_depth = np.zeros(depth.shape) # if a residual map has been passed if isinstance(mask_keep, tuple): mask_keep, noise_offset = mask_keep noise_offset *= 10. depth = np.clip(depth + noise_offset, 0., 10.) sparse_depth[mask_keep] = depth[mask_keep] return sparse_depth def create_rgbd(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) return rgbd def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ path, target = self.imgs[index] rgb, depth, raw_depth = self.loader(path) # raw_path, raw_target = self.raw_imgs[index] # _, raw_depth, _ = self.loader(raw_path) return rgb, depth, raw_depth def __getitem__(self, index): rgb, depth, raw_depth = self.__getraw__(index) # pick_real = random.randint(0, 10) == 0 # sparse_depth = self.create_sparse_depth(rgb, depth) if not pick_real else raw_depth sparse_depth = raw_depth if self.sparsifier is None else self.create_sparse_depth( rgb, depth) if self.transform is not None: seed = int(time.time()) rgb_np, depth_np = self.transform(rgb, depth, seed) _, sparse_depth_np = self.transform(rgb, sparse_depth, seed) else: raise (RuntimeError("transform not defined")) if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': # input_np = self.create_rgbd(rgb_np, raw_depth_np) input_np = np.append(rgb_np, np.expand_dims(sparse_depth_np, axis=2), axis=2) elif self.modality == 'd': # input_np = self.create_sparse_depth(rgb_np, raw_depth_np) input_np = sparse_depth_np input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) depth_tensor = depth_tensor.unsqueeze(0) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
class MyDataloader(data.Dataset): modality_names = ['rgb', 'rgbd', 'd', 'rgbl', 'rgbde'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, type, sparsifier=None, modality='rgb', make_dataset=make_dataset, loader=h5_loader): classes, class_to_idx = find_classes(root) imgs = make_dataset(root, class_to_idx) assert len(imgs)>0, "Found 0 images in subfolders of: " + root + "\n" print("Found {} images in {} folder.".format(len(imgs), type)) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx if type == 'train': if modality == 'rgbl': self.transform = self.train_transform_label else: self.transform = self.train_transform elif type == 'val': if modality == 'rgbl': self.transform = self.val_transform_label else: self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) self.loader = loader self.sparsifier = sparsifier assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality def train_transform_label(self, rgb, depth, label): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform_label(self, rgb, depth, label): raise (RuntimeError("val_transform() is not implemented.")) def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(self, rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def create_sparse_depth(self, rgb, depth): if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth) sparse_depth = np.zeros(depth.shape) sparse_depth[mask_keep] = depth[mask_keep] return sparse_depth def create_rgbd(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) return rgbd def create_rgbde(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) edge = cv2.Canny(rgb, 100, 200) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) rgbde = np.append(rgbd, np.expand_dims(edge, axis=2), axis=2) return rgbde def create_rgbdl(self, rgb, depth): rgbd = np.append(rgb, np.expand_dims(depth, axis=2), axis=2) return rgbd def get_label(self, index): path, target = self.imgs[index] filename = path + '_' + self.label_type + '_label.png' label = imageio.imread(filename).astype('float32') / 255.0 * 1000.0 return label def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ path, target = self.imgs[index] rgb, depth = self.loader(path) return rgb, depth def __getitem__(self, index): rgb, depth = self.__getraw__(index) if self.modality != 'rgbl': if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise(RuntimeError("transform not defined")) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) label_np = None debug_figure = False if debug_figure: fig = plt.figure(1) a1 = fig.add_subplot(2, 3, 1) a2 = fig.add_subplot(2, 3, 2) a3 = fig.add_subplot(2, 3, 3) a4 = fig.add_subplot(2, 3, 4) a5 = fig.add_subplot(2, 3, 5) a6 = fig.add_subplot(2, 3, 6) if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': input_np = self.create_rgbd(rgb_np, depth_np) elif self.modality == 'd': input_np = self.create_sparse_depth(rgb_np, depth_np) elif self.modality == 'rgbde': input_np = self.create_rgbde(rgb_np, depth_np) elif self.modality == 'rgbl': label_np = self.get_label(index) if debug_figure: a1.imshow(rgb) a2.imshow(depth) a3.imshow(label_np) rgb_np, depth_np, label_np = self.transform(rgb, depth, label_np) input_np = self.create_rgbdl(rgb_np, depth_np) if debug_figure: a4.imshow(rgb_np) a5.imshow(depth_np) a6.imshow(label_np) plt.show() input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) if label_np is not None: label_tensor = to_tensor(label_np) else: label_tensor = to_tensor(depth_np) label_tensor = label_tensor.unsqueeze(0) return input_tensor, label_tensor def __len__(self): return len(self.imgs)
class MyDataloader(data.Dataset): modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd' color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader): classes, class_to_idx = find_classes(root) imgs = make_dataset(root, class_to_idx) assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n" print("Found {} images in {} folder.".format(len(imgs), type)) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx if type == 'train': self.transform = self.train_transform elif type == 'val': self.transform = self.val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) self.loader = loader self.sparsifier = sparsifier assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality def train_transform(self, rgb, depth): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(rgb, depth): raise (RuntimeError("val_transform() is not implemented.")) def create_sparse_depth(self, rgb, depth): if self.sparsifier is None: return depth else: mask_keep = self.sparsifier.dense_to_sparse(rgb, depth) sparse_depth = np.zeros(depth.shape) sparse_depth[mask_keep] = depth[mask_keep] return sparse_depth def create_rgbd(self, rgb, depth): sparse_depth = self.create_sparse_depth(rgb, depth) rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) return rgbd def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ path, target = self.imgs[index] rgb, depth = self.loader(path) return rgb, depth def __getitem__(self, index): rgb, depth = self.__getraw__(index) #print('{:04d} min={:f} max={:f} shape='.format(index, np.amin(depth), np.amax(depth)) + str(depth.shape)) if self.transform is not None: rgb_np, depth_np = self.transform(rgb, depth) else: raise (RuntimeError("transform not defined")) #print('{:04d} min={:f} max={:f} shape='.format(index, np.amin(depth_np), np.amax(depth_np)) + str(depth_np.shape)) # color normalization # rgb_tensor = normalize_rgb(rgb_tensor) # rgb_np = normalize_np(rgb_np) if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': input_np = self.create_rgbd(rgb_np, depth_np) elif self.modality == 'd': input_np = self.create_sparse_depth(rgb_np, depth_np) input_tensor = to_tensor(input_np) while input_tensor.dim() < 3: input_tensor = input_tensor.unsqueeze(0) depth_tensor = to_tensor(depth_np) #print('{:04d} '.format(index) + str(depth_tensor.shape)) depth_tensor = depth_tensor.unsqueeze(0) #print('{:04d} '.format(index) + str(depth_tensor.shape)) return input_tensor, depth_tensor def __len__(self): return len(self.imgs)
class CarlaDataset(data.Dataset): seed = 42 output_size = (450, 1600) _modality_names = ['rgb', 'rgbd'] _color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) # as the crop is hardcoded we assert a given image size to prevent errors _input_width = 1600 _input_height = 900 _crop_upper_x = 0 _crop_upper_y = 450 _crop_width = 1600 _crop_height = 450 _road_crop = (_crop_upper_y, _crop_upper_x, _crop_height, _crop_width) def __init__(self, root, type, modality="rgbd", max_depth=500, camera_rgb_name="cam_front", camera_depth_name="cam_front_depth", lidar_name="lidar_top", ego_pose_sensor_name="imu_perfect"): random.seed(self.seed) assert type == "val" or type == "train", "unsupported dataset type {}".format( type) root = pathlib.Path(root) self._loader = dataset_loader.DatasetLoader(root) self._loader.setup() self._projector = LidarToCameraProjector(self._loader, camera_depth_name, lidar_name, ego_pose_sensor_name) self._camera_rgb_sensor, _ = loading_utils.load_sensor_with_calib( self._loader, camera_rgb_name) self._camera_depth_sensor, _ = loading_utils.load_sensor_with_calib( self._loader, camera_depth_name) # check image sizes assert self._camera_rgb_sensor.meta[ 'image_size_x'] == self._input_width, "crop does not match input images, pls adapt." assert self._camera_rgb_sensor.meta[ 'image_size_y'] == self._input_height, "crop does not match input images, pls adapt." assert self._camera_depth_sensor.meta[ 'image_size_x'] == self._input_width, "crop does not match input images, pls adapt." assert self._camera_depth_sensor.meta[ 'image_size_y'] == self._input_height, "crop does not match input images, pls adapt." self._data_entries = self.prepare_dataset() if type == 'train': self._transform = self._train_transform elif type == 'val': self._transform = self._val_transform else: raise (RuntimeError("Invalid dataset type: " + type + "\n" "Supported dataset types are: train, val")) assert (modality in self._modality_names), "Invalid modality type: " + modality + "\n" + \ "Supported dataset types are: " + ''.join(self.modality_names) self.modality = modality self._max_depth = max_depth def _sensor_data_to_full_filepath(self, s_data): path = s_data.file path = str(self._loader.dataset_root.joinpath(path)) return path def prepare_dataset(self, ): """ We simply store all samples of the dataset in a random order. """ data_entries = [] for scene_token in tqdm.tqdm(self._loader.scene_tokens): scene = self._loader.get_scene(scene_token) sample_token = scene.first_sample_token # next token is none if scene ends while sample_token != None: sample = self._loader.get_sample(sample_token) data_entries.append(sample) sample_token = sample.next_token # shuffle # random.shuffle(data_entries) return data_entries def load_data(self, sample): depth_map_lidar = self._projector.lidar2depth_map(sample) # non set values are nan -> set to 0.0 for training depth_map_lidar = np.nan_to_num(depth_map_lidar, nan=0) cam_bgr_img = loading_utils.load_camera_image(self._loader, sample, self._camera_rgb_sensor) # bgr -> rgb cam_rgb_img = cam_bgr_img[..., ::-1] cam_depth_img = loading_utils.load_camera_image( self._loader, sample, self._camera_depth_sensor) # convert to single channel float img cam_depth_img = loading_utils.rgb_encoded_depth_to_float(cam_depth_img) if self._max_depth != np.inf: depth_map_lidar = np.clip(depth_map_lidar, a_min=0.0, a_max=self._max_depth) cam_depth_img = np.clip(cam_depth_img, a_min=0.0, a_max=self._max_depth) return cam_rgb_img, depth_map_lidar, cam_depth_img def visualize(self, sample, vis_dir="/workspace/visualization", i=0): # TODO vis_dir = pathlib.Path(vis_dir) depth_map_lidar = self._projector.lidar2depth_map(sample) # for vis replace nan with 0.0 depth_map_lidar = np.nan_to_num(depth_map_lidar) depth_map_lidar = visualization_utils.depth_to_img(depth_map_lidar) depth__lidar_path = str( vis_dir.joinpath("depth_lidar_{}.jpg".format(i))) cv2.imwrite(depth__lidar_path, depth_map_lidar) cam_rgb_img = loading_utils.load_camera_image(self._loader, sample, self._camera_rgb_sensor) rgb_path = str(vis_dir.joinpath("rgb_{}.jpg".format(i))) cv2.imwrite(rgb_path, cam_rgb_img) cam_depth_img = loading_utils.load_camera_image( self._loader, sample, self._camera_depth_sensor) cam_depth_img = loading_utils.rgb_encoded_depth_to_float(cam_depth_img) cam_depth_img = visualization_utils.depth_to_img(cam_depth_img) depth__gt_path = str(vis_dir.joinpath("depth_gt_{}.jpg".format(i))) cv2.imwrite(depth__gt_path, cam_depth_img) def __getraw__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth_lidar, depth_gt) the raw data. """ sample = self._data_entries[index] return self.load_data(sample) def __getitem__(self, index): rgb, depth_lidar, depth_gt = self.__getraw__(index) # apply transforms (for data augmentation) if self._transform is not None: rgb, depth_lidar, depth_gt = self._transform( rgb, depth_lidar, depth_gt) else: raise (RuntimeError("transform not defined")) input_img = None if self.modality == "rgb": input_img = rgb elif self.modality == "rgbd": # depth is h x w -> h x w x 1 depth_lidar = np.expand_dims(depth_lidar, axis=-1) input_img = np.concatenate([rgb, depth_lidar], axis=-1) # convert to torch and flip channels in front input_img = to_tensor(input_img) depth_gt = to_tensor(depth_gt) # make 1 x h x w depth_gt = depth_gt.unsqueeze(0) return input_img, depth_gt def __len__(self): return len(self._data_entries) def _train_transform(self, rgb, sparse_depth, depth_gt): s = np.random.uniform(1.0, 1.5) # random scaling depth_gt = depth_gt / s # TODO critical why is the input not scaled in original implementation? sparse_depth = sparse_depth / s # TODO adapt and refactor angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation # TODO critical adjust sizes transform = transforms.Compose([ transforms.Crop(*self._road_crop), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb = transform(rgb) sparse_depth = transform(sparse_depth) # TODO needed? # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_gt = np.asfarray(depth_gt, dtype='float32') depth_gt = transform(depth_gt) rgb = self._color_jitter(rgb) # random color jittering # convert color [0,255] -> [0.0, 1.0] floats rgb = np.asfarray(rgb, dtype='float') / 255 return rgb, sparse_depth, depth_gt def _val_transform(self, rgb, sparse_depth, depth_gt): transform = transforms.Compose([ transforms.Crop(*self._road_crop), transforms.CenterCrop(self.output_size), ]) rgb = transform(rgb) rgb = np.asfarray(rgb, dtype='float') / 255 sparse_depth = np.asfarray(sparse_depth, dtype='float32') sparse_depth = transform(sparse_depth) depth_gt = np.asfarray(depth_gt, dtype='float32') depth_gt = transform(depth_gt) return rgb, sparse_depth, depth_gt
import os import h5py import numpy as np import torch import torch.utils.data as data from dataloaders import make_dataset, transforms RAW_HEIGHT, RAW_WIDTH = 480, 640 # raw image size to_tensor = transforms.ToTensor() color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def h5_loader(path): """Image/Depth extractor from h5 format file. Args: path (str): Path to h5 format file Returns: rgb (np.array): RGB image (shape=[H,W,3]) depth (np.array): Depth image (shape=[H,W]) """ h5f = h5py.File(path, "r") rgb = np.array(h5f['rgb']) rgb = np.transpose(rgb, (1, 2, 0)) depth = np.array(h5f['depth']) return rgb, depth class NYUCamMat: """Calculate Resized Camera Intrinsic Matrix of NYUDepth dataset.
class MyDataloaderExt(data.Dataset): color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4) def __init__(self, root, type, sparsifier=None, max_gt_depth=math.inf, modality='rgb'): if type == 'train': self.transform = self.train_transform elif type == 'val': self.transform = self.val_transform else: raise RuntimeError('invalid type of dataset') dataset_folder = os.path.join(root, type) general_img_index = [] self.beginning_offset = 0 classes, class_to_idx = find_classes( dataset_folder, ('ds' if '-k' in modality else None)) general_class_data = [None] * len(classes) for i_class, curr_class in enumerate(classes): class_images = load_class_dataset(dataset_folder, curr_class) class_extras = None if 'dsx' in curr_class: class_extras, class_images = load_class_extras( root, type, class_images) general_class_data[i_class] = dict(name=curr_class, images=class_images, extras=class_extras) for i_img in range(self.beginning_offset, len(class_images)): general_img_index.append((i_class, i_img)) assert len(general_img_index ) > 0, "Found 0 images in subfolders of: " + root + "\n" print("Found {} images in {} folder.".format(len(general_img_index), type)) self.root = root self.general_img_index = general_img_index self.general_class_data = general_class_data self.classes = classes self.class_to_idx = class_to_idx self.sparsifier = sparsifier self.modality = Modality(modality) self.max_gt_depth = max_gt_depth def train_transform(self, channels): raise (RuntimeError("train_transform() is not implemented. ")) def val_transform(rgb, channels): raise (RuntimeError("val_transform() is not implemented.")) def create_sparse_depth(self, rgb, targe_depth): if self.sparsifier is None: raise (RuntimeError("please select a sparsifier ")) else: mask_keep = self.sparsifier.dense_to_sparse(rgb, targe_depth) sparse_depth = np.zeros(targe_depth.shape) sparse_depth[mask_keep] = targe_depth[mask_keep] return sparse_depth # gt_depth - gt depth # rgb -color channel # grey - disable # fd - fake slam uing the given sparsifier from gt depth # kor - slam keypoint + slam depth # kde - slam keypoint + mesh-based denoise depth # kgt - slam keypoint + gt depth # kw - sparse confidence measurements # dor - mesh+interpolation of slam points # dde - mesh+interpolation of denoised slam points # kvor - slam keypoint expanded to voronoi diagram cell around (dense) # d2dwor - 2d image distance transformantion using slam keypoints as seeds # d3dwde - 3d euclidian distance to closest the denoised slam keypoint # d3dwor - 3d euclidian distance to closest the slam keypoint def calc_from_sparse_input(self, in_sparse_map, voronoi=True, edt=True): res_voronoi = None res_edt = None if voronoi or edt: mask = (in_sparse_map < epsilon) edt_result = ndimage.distance_transform_edt(mask, return_indices=voronoi) res_edt = np.sqrt(edt_result[0]) if voronoi: res_voronoi = np.zeros_like(in_sparse_map) it = np.nditer(res_voronoi, flags=['multi_index'], op_flags=['writeonly']) with it: while not it.finished: xp = edt_result[1][0, it.multi_index[0], it.multi_index[1]] yp = edt_result[1][1, it.multi_index[0], it.multi_index[1]] it[0] = in_sparse_map[xp, yp] it.iternext() return res_voronoi, res_edt #pose = none | gt | slam def h5_loader_general(self, img_path, extra_path, type, pose='none'): result = dict() #path, target = self.imgs[index] h5f = h5py.File(img_path, "r") h5fextra = None if extra_path is not None: h5fextra = h5py.File(extra_path, "r") #target depth if 'dense_image_data' in h5f: dense_data = h5f['dense_image_data'] depth = np.array(dense_data[0, :, :]) mask_array = depth > 10000 # in this software inf distance is zero. depth[mask_array] = 0 result['gt_depth'] = depth if 'normal_data' in h5f: normal_rescaled = ( (np.array(h5f['normal_data'], dtype='float32') / 127.5) - 1.0) result['normal_x'] = normal_rescaled[0, :, :] result['normal_y'] = normal_rescaled[1, :, :] result['normal_z'] = normal_rescaled[2, :, :] elif 'depth' in h5f: depth = np.array(h5f['depth']) if not math.isinf(self.max_gt_depth) and self.max_gt_depth > 0: mask_max = depth > self.max_gt_depth depth[mask_max] = 0 result['gt_depth'] = depth if pose == 'gt': if h5fextra is not None: result['t_wc'] = np.array(h5fextra['gt_twc_data']) else: if 'gt_twc_data' not in h5f: return None result['t_wc'] = np.array(h5f['gt_twc_data']) assert result['t_wc'].shape == ( 4, 4), 'file {} - the t_wc is not 4x4'.format(path) if pose == 'slam': if h5fextra is not None: result['t_wc'] = np.array(h5fextra['slam_twc_data']) else: if 'slam_twc_data' not in h5f: return None result['t_wc'] = np.array(h5f['slam_twc_data']) assert result['t_wc'].shape == ( 4, 4), 'file {} - the t_wc is not 4x4'.format(path) # color data if 'rgb_image_data' in h5f: rgb = np.array(h5f['rgb_image_data']) elif 'rgb' in h5f: rgb = np.array(h5f['rgb']) else: rgb = None if 'grey' in type: grey_img = rgb2grayscale(rgb) result['grey'] = grey_img rgb = np.transpose(rgb, (1, 2, 0)) if 'rgb' in type: result['rgb'] = rgb #fake sparse data using the spasificator and ground-truth depth if 'fd' in type: result['fd'] = self.create_sparse_depth(rgb, depth) if 'kfd' in type: result['kfd'] = self.create_sparse_depth(rgb, depth) #using real keypoints from slam # if 'landmark_2d_data' in h5f: if h5fextra is not None: data_2d = np.array(h5fextra['landmark_2d_data']) else: if 'landmark_2d_data' not in h5f: data_2d = None else: data_2d = np.array(h5f['landmark_2d_data']) # else: # data_2d = None if 'kor' in type: kor_input = np.zeros_like(depth) for row in data_2d: xp = int(math.floor(row[1])) yp = int(math.floor(row[0])) if (row[2] > 0): kor_input[xp, yp] = row[2] #res_voronoi,res_edt = self.calc_from_sparse_input(kor_input,'dvor' in type,'d2dwor' in type) if 'kor' in type: result['kor'] = kor_input # if 'dvor' in type: # result['dvor'] = res_voronoi # if 'd2dwor' in type: # result['d2dwor'] = res_edt if 'kgt' in type or 'dvgt' in type or 'd2dwgt' in type: kgt_input = np.zeros_like(depth) for row in data_2d: xp = int(math.floor(row[1])) yp = int(math.floor(row[0])) if (depth[xp, yp] > 0): kgt_input[xp, yp] = depth[xp, yp] res_voronoi, res_edt = self.calc_from_sparse_input( kgt_input, 'dvgt' in type, 'd2dwgt' in type) if 'kgt' in type: result['kgt'] = kgt_input if 'dvgt' in type: result['dvgt'] = res_voronoi if 'd2dwgt' in type: result['d2dwgt'] = res_edt if 'kde' in type or 'dvde' in type or 'd2dwde' in type: kde_input = np.zeros_like(depth) for row in data_2d: xp = int(math.floor(row[1])) yp = int(math.floor(row[0])) if (row[3] > 0): kde_input[xp, yp] = row[3] res_voronoi, res_edt = self.calc_from_sparse_input( kde_input, 'dvde' in type, 'd2dwde' in type) if 'kde' in type: result['kde'] = kde_input if 'dvde' in type: result['dvde'] = res_voronoi if 'd2dwde' in type: result['d2dwde'] = res_edt if 'wkde' in type: kde_input = np.zeros_like(depth) for row in data_2d: xp = int(math.floor(row[1])) yp = int(math.floor(row[0])) if (row[3] > 0): kde_input[xp, yp] = row[3] result['wkde'] = kde_input if 'kw' in type: kw_input = np.zeros_like(depth) for row in data_2d: xp = int(math.floor(row[1])) yp = int(math.floor(row[0])) if (row[4] > 0): kw_input[xp, yp] = row[4] result['kw'] = kw_input if 'dor' in type: result['dor'] = np.array(dense_data[1, :, :]) if 'dore' in type: result['dore'] = np.array(dense_data[1, :, :]) dore_mask = result['dore'] < epsilon result['dore'][dore_mask] = np.array( dense_data[2, :, :])[dore_mask] if 'd3dwor' in type: result['d3dwor'] = np.array(dense_data[3, :, :]) if 'dvor' in type: result['dvor'] = np.array(dense_data[2, :, :]) if 'd2dwor' in type: result['d2dwor'] = np.array(dense_data[5, :, :]) if 'dde' in type: result['dde'] = np.array(dense_data[4, :, :]) if 'ddee' in type: result['ddee'] = np.array(dense_data[4, :, :]) dore_mask = result['ddee'] < epsilon result['ddee'][dore_mask] = np.array( dense_data[2, :, :])[dore_mask] if 'd3dwde' in type: result['d3dwde'] = np.array(dense_data[6, :, :]) if 'wdde' in type: result['wdde'] = np.array(dense_data[4, :, :]) return result def to_tensor(self, img): if not isinstance(img, np.ndarray): raise TypeError('img should be ndarray. Got {}'.format(type(img))) # handle numpy array if img.ndim == 3 or img.ndim == 2: img = torch.from_numpy(img.copy()) else: raise RuntimeError( 'img should be ndarray with 2 or 3 dimensions. Got {}'.format( img.ndim)) return img.float() def append_tensor3d(self, input_np, value): if not isinstance(input_np, np.ndarray): # first element if value.ndim == 2: input_np = np.expand_dims(value, axis=0) elif value.ndim == 3: input_np = value else: # 2nd ,3rd ... if value.ndim == 2: input_np = np.append(input_np, np.expand_dims(value, axis=0), axis=0) elif value.ndim == 3: input_np = np.append(input_np, value, axis=0) else: raise RuntimeError( 'value should be ndarray with 2 or 3 dimensions. Got {}'. format(value.ndim)) return input_np def __getitem__(self, index): class_idx, img_idx = self.general_img_index[index] class_entry = self.general_class_data[class_idx] img_path = class_entry['images'][img_idx] extra_path = (class_entry['extras'][img_idx] if class_entry['extras'] is not None else None) channels_np = self.h5_loader_general(img_path, extra_path, self.modality) input_np = None if channels_np is None: return None, None, None if self.transform is not None: channels_transformed_np = self.transform(channels_np) else: raise (RuntimeError("transform not defined")) target_data = None target_data = channels_transformed_np['gt_depth'] num_image_channel, image_channel = self.modality.get_input_image_channel( ) if num_image_channel > 0: input_np = self.append_tensor3d( input_np, channels_transformed_np[image_channel]) else: raise RuntimeError('rgb channel expected') num_depth_channel, depth_channel = self.modality.get_input_depth_channel( ) if num_depth_channel > 0: input_np = self.append_tensor3d( input_np, channels_transformed_np[depth_channel]) else: zeros = np.zeros_like(input_np[0, :, :]) input_np = self.append_tensor3d(input_np, zeros) num_weight_channel, weight_channel = self.modality.get_input_weight_channel( ) if num_weight_channel > 0 and weight_channel != 'bin': input_np = self.append_tensor3d( input_np, channels_transformed_np[weight_channel]) else: confidence = np.zeros_like(input_np[0, :, :]) valid_mask = ((input_np[3, :, :] > 0)) confidence[valid_mask] = 1.0 input_np = self.append_tensor3d(input_np, confidence) input_tensor = self.to_tensor(input_np) target_depth_tensor = self.to_tensor(target_data).unsqueeze(0) return input_tensor, target_depth_tensor, channels_transformed_np[ 'scale'] def __len__(self): return len(self.general_img_index)