def train_transform(rgb, sparse, target, rgb_near, args):
    # s = np.random.uniform(1.0, 1.5) # random scaling
    # angle = np.random.uniform(-5.0, 5.0) # random rotation degrees
    do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

    transform_geometric = transforms.Compose([
        # transforms.Rotate(angle),
        # transforms.Resize(s),
        transforms.BottomCrop((oheight, owidth)),
        transforms.HorizontalFlip(do_flip)
    ])
    if sparse is not None:
        sparse = transform_geometric(sparse)
    target = transform_geometric(target)
    if rgb is not None:
        brightness = np.random.uniform(max(0, 1 - args.jitter),
                                       1 + args.jitter)
        contrast = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter)
        saturation = np.random.uniform(max(0, 1 - args.jitter),
                                       1 + args.jitter)
        transform_rgb = transforms.Compose([
            transforms.ColorJitter(brightness, contrast, saturation, 0),
            transform_geometric
        ])
        rgb = transform_rgb(rgb)
        if rgb_near is not None:
            rgb_near = transform_rgb(rgb_near)
    # sparse = drop_depth_measurements(sparse, 0.9)

    return rgb, sparse, target, rgb_near
Пример #2
0
    def train_transform(self, im, gt):
        im = np.array(im).astype(np.float32)
        gt = np.array(gt).astype(np.float32)

        s = np.random.uniform(1.0, 1.5)  # random scaling
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
        color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4)

        transform = my_transforms.Compose([
            my_transforms.Crop(130, 10, 240, 1200),
            my_transforms.Resize(460 / 240, interpolation='bilinear'),
            my_transforms.Rotate(angle),
            my_transforms.Resize(s),
            my_transforms.CenterCrop(self.size),
            my_transforms.HorizontalFlip(do_flip)
        ])

        im_ = transform(im)
        im_ = color_jitter(im_)

        gt_ = transform(gt)

        im_ = np.array(im_).astype(np.float32)
        gt_ = np.array(gt_).astype(np.float32)

        im_ /= 255.0
        gt_ /= 100.0 * s
        im_ = to_tensor(im_)
        gt_ = to_tensor(gt_)

        gt_ = gt_.unsqueeze(0)

        return im_, gt_
Пример #3
0
 def __init__(self, directory, dims, output_size, train=True):
     super().__init__(directory)
     self.dims = dims
     self.output_size = output_size
     if train:
         self.transform = self.train_transform
         self.color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)
     else:
         self.transform = self.validate_transform
Пример #4
0
    def train_transform(self, im, gt, mask):
        im = np.array(im).astype(np.float32)
        im = cv2.resize(im, (512, 256), interpolation=cv2.INTER_AREA)
        gt = cv2.resize(gt, (512, 256), interpolation=cv2.INTER_AREA)
        mask = cv2.resize(mask, (512, 256), interpolation=cv2.INTER_AREA)

        # h,w,c = im.shape
        # th, tw = 256,512
        # x1 = random.randint(0, w - tw)
        # y1 = random.randint(0, h - th)
        # img = im[y1:y1 + th, x1:x1 + tw, :]
        # gt = gt[y1:y1 + th, x1:x1 + tw]
        # mask = mask[y1:y1 + th, x1:x1 + tw]
        s = np.random.uniform(1.0, 1.5)  # random scaling
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
        color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4)

        transform = my_transforms.Compose([
            my_transforms.Rotate(angle),
            my_transforms.Resize(s),
            my_transforms.CenterCrop(self.size),
            my_transforms.HorizontalFlip(do_flip)
        ])

        im_ = transform(im)
        im_ = color_jitter(im_)

        gt_ = transform(gt)
        mask_ = transform(mask)
        im_ = np.array(im_).astype(np.float32)
        gt_ = np.array(gt_).astype(np.float32)
        mask_ = np.array(mask_).astype(np.float32)

        im_ /= 255.0
        gt_ /= s
        im_ = to_tensor(im_)
        gt_ = to_tensor(gt_)
        mask_ = to_tensor(mask_)

        gt_ = gt_.unsqueeze(0)
        mask_ = mask_.unsqueeze(0)

        return im_, gt_, mask_
Пример #5
0
class MyDataloader(data.Dataset):
    modality_names = ['rgb', 'rgbd', 'd']  # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self,
                 root,
                 type,
                 sparsifier=None,
                 modality='rgb',
                 augArgs=None,
                 loader=h5_loader):
        classes, class_to_idx = find_classes(root)
        imgs = make_dataset(root, class_to_idx)
        assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n"
        print("Found {} images in {} folder.".format(len(imgs), type))
        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.augArgs = augArgs
        if type == 'train':
            self.transform = self.train_transform
        elif type == 'val':
            self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))
        self.loader = loader
        self.sparsifier = sparsifier

        assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
                                "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform(self, rgb, depth):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(rgb, depth):
        raise (RuntimeError("val_transform() is not implemented."))

    def create_sparse_depth(self, rgb, depth):
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
            sparse_depth = np.zeros(depth.shape)
            sparse_depth[mask_keep] = depth[mask_keep]
            return sparse_depth

    def create_rgbd(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
        return rgbd

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth) the raw data.
        """
        path, target = self.imgs[index]
        rgb, depth = self.loader(path)
        return rgb, depth

    def __getitem__(self, index):
        rgb, depth = self.__getraw__(index)
        augArgs = self.augArgs
        if self.transform is not None:
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise (RuntimeError("transform not defined"))

        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            input_np = self.create_rgbd(rgb_np, depth_np)
        elif self.modality == 'd':
            input_np = self.create_sparse_depth(rgb_np, depth_np)

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)

    def getFocalScale(self):
        #Returns a float which is the focal length scale factor
        scaleMin = self.augArgs.scale_min
        scaleMax = self.augArgs.scale_max
        s = np.random.uniform(scaleMin, scaleMax)  # random scaling factor
        return s

    def getDepthGroup(self):
        #Returns a float which is the depth group scale factor, sampled from a gaussian
        # centered on a randomly selected element of the global-scale-variances list
        if (isinstance(self.augArgs.scaleMeans,
                       float)):  #If no tuple is provided
            mean = self.augArgs.scaleMeans
            variance = self.augArgs.scaleVariances
            scale = np.random.normal(mean, variance, 1)
        else:
            idx = np.random.randint(0, len(self.augArgs.scaleMeans))
            mean = self.augArgs.scaleMeans[idx]
            variance = self.augArgs.scaleVariances[idx]
            scale = np.random.normal(mean, variance, 1)
        return scale
Пример #6
0
class DepthNoiseDataset(Dataset):
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self,
                 root_dir,
                 type,
                 sparsifier=None,
                 num_augmented=0,
                 sample_cap=-1,
                 scene_cap=-1,
                 sim_offline=False):

        self.root_dir = root_dir
        self.sim_offline = sim_offline
        self.sparsifier = sparsifier

        self.transform = self.val_transform
        if type == "train":
            self.train_transform

        self.folders, self.descriptor = build_descriptor(
            root_dir, sample_cap, scene_cap, num_augmented)
        self.output_size = (228, 304)

        self.type = type
        self.sample_cap = sample_cap

    def __len__(self):
        return len(self.descriptor)

    def create_sparse_depth(self, rgb, depth, seed=None):
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth, seed)
            sparse_depth = np.zeros(depth.shape)

            # if a residual map has been passed
            if isinstance(mask_keep, tuple):
                mask_keep, noise_offset = mask_keep
                noise_offset *= 10.
                depth = np.clip(depth + noise_offset, 0., 10.)

            sparse_depth[mask_keep] = depth[mask_keep]
            return sparse_depth

    def __getitem__(self, idx):
        # restrict the amount of possible input
        folder_idx, sample_idx, aug_seed = self.descriptor[idx]

        # get scene directory name
        scene_dir = os.path.join(self.root_dir, self.folders[folder_idx])

        if self.type == "train":
            if self.sample_cap > 0:
                rgb_name = os.path.join(scene_dir, "med_image_%03d.png")
                rgb_name = rgb_name % self.sample_cap
                gt_d_name = os.path.join(scene_dir, "med_depth_%03d.png")
                gt_d_name = gt_d_name % self.sample_cap
            else:
                rgb_name = os.path.join(scene_dir, "med_image.png")
                gt_d_name = os.path.join(scene_dir, "med_depth_filled.png")

            # the sparse to dense method labels pure white depths as dropout
            depth = np.clip(cv2.imread(gt_d_name, 0), 0, 254)
            depth = (depth.astype(np.float64) / 255.) * 10.
        else:
            rgb_name = os.path.join(scene_dir, "med_image.png")
            gt_d_name = os.path.join(scene_dir, "med_depth_filled.npy")
            depth = np.load(gt_d_name) * 10.

        # load images
        rgb = cv2.imread(rgb_name, 1)
        sp_d_name = os.path.join(scene_dir, "depths", "depth_%04d.npy")
        sp_d_name = sp_d_name % sample_idx

        # get sparse depth
        if sample_idx < 0:
            seed = aug_seed if self.sim_offline else None
            sparse_depth = self.create_sparse_depth(rgb, depth, seed)
        else:
            sparse_depth = np.clip(np.load(sp_d_name), 0, 9999).astype('float')
            sparse_depth = sparse_depth / 1000.

        # Applying augmentation
        seed = int(time())
        rgb_np, depth_np = self.transform(rgb, depth, seed)
        _, sparse_depth_np = self.transform(rgb, sparse_depth, seed)

        input_np = np.expand_dims(sparse_depth_np, axis=2)
        input_np = np.append(rgb_np, input_np, axis=2)

        # I don't know what this is for, but I'm running with it
        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def train_transform(self, rgb, depth, random_seed):
        np.random.seed(random_seed)

        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            # this is for computational efficiency, since rotation can be slow
            transforms.Resize(250.0 / iheight),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np

    def val_transform(self, rgb, depth, random_seed):
        np.random.seed(random_seed)

        depth_np = depth
        transform = transforms.Compose([
            transforms.Resize(240.0 / iheight),
            transforms.CenterCrop(self.output_size),
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np
Пример #7
0
class MyDataloader(data.Dataset):
    modality_names = ['rgb']

    def is_image_file(self, filename):
        IMG_EXTENSIONS = ['.h5']
        return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)

    def find_classes(self, dir):
        classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
        classes.sort()
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        return classes, class_to_idx

    def make_dataset(self, dir, class_to_idx):
        images = []
        dir = os.path.expanduser(dir)
        for target in sorted(os.listdir(dir)):
            d = os.path.join(dir, target)
            if not os.path.isdir(d):
                continue
            for root, _, fnames in sorted(os.walk(d)):
                for fname in sorted(fnames):
                    if self.is_image_file(fname):
                        path = os.path.join(root, fname)
                        item = (path, class_to_idx[target])
                        images.append(item)
        return images

    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self, root, split, modality='rgb', loader=h5_loader):
        classes, class_to_idx = self.find_classes(root)
        imgs = self.make_dataset(root, class_to_idx)
        assert len(imgs)>0, "Found 0 images in subfolders of: " + root + "\n"
        # print("Found {} images in {} folder.".format(len(imgs), split))
        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        if split == 'train':
            self.transform = self.train_transform
        elif split == 'holdout':
            self.transform = self.val_transform
        elif split == 'val':
            self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset split: " + split + "\n"
                                "Supported dataset splits are: train, val"))
        self.loader = loader

        assert (modality in self.modality_names), "Invalid modality split: " + modality + "\n" + \
                                "Supported dataset splits are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform(self, rgb, depth):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(rgb, depth):
        raise (RuntimeError("val_transform() is not implemented."))

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth) the raw data.
        """
        path, target = self.imgs[index]
        rgb, depth = self.loader(path)
        return rgb, depth

    def __getitem__(self, index):
        rgb, depth = self.__getraw__(index)
        if self.transform is not None:
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise(RuntimeError("transform not defined"))

        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)

        if self.modality == 'rgb':
            input_np = rgb_np

        to_tensor = transforms.ToTensor()
        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
Пример #8
0
class MyDataloader(data.Dataset):
    modality_names = ['rgb', 'rgbd', 'd']  # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)  #HSV变化

    def __init__(self,
                 root,
                 type,
                 sparsifier=None,
                 modality='rgb',
                 loader=h5_loader):
        classes, class_to_idx = find_classes(root)
        imgs = make_dataset(root, class_to_idx)
        assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n"
        print("Found {} images in {} folder.".format(len(imgs), type))
        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        #对训练集和验证集有不同的数据处理模式
        if type == 'train':
            self.transform = self.train_transform
        elif type == 'val':
            self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))
        self.loader = loader
        self.sparsifier = sparsifier

        assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
                                "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform(self, rgb, depth):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(rgb, depth):
        raise (RuntimeError("val_transform() is not implemented."))

    def create_sparse_depth(self, rgb, depth):  #生成稀疏点
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
            sparse_depth = np.zeros(depth.shape)  #全部赋值为0
            mask_Leon = np.zeros(depth.shape)
            for x in range(mask_Leon.shape[1]):
                for y in range(depth.shape[0]):
                    if (x % 8 == 0) and (y % 8 == 0):
                        mask_Leon[y, x] = 1
            #sparse_depth[mask_keep] = depth[mask_keep]#生成稀疏点#注释了以后生成0点
            #return sparse_depth
            #print("111111111111111111111111111111111111111111111111")
            #print((depth * mask_Leon).shape)
            #print(depth*mask_Leon)
            #图像缩小再变大
            depth_rescale = transform.resize(depth, (28, 38))
            depth_rescale = transform.resize(depth_rescale, (228, 304))

            return depth * mask_Leon
            #return sparse_depth
    def create_rgbd(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
        return rgbd

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth) the raw data.
        """
        path, target = self.imgs[index]
        rgb, depth = self.loader(path)
        return rgb, depth

    def __getitem__(self, index):
        rgb, depth = self.__getraw__(index)
        if self.transform is not None:
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise (RuntimeError("transform not defined"))

        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            input_np = self.create_rgbd(rgb_np, depth_np)
        elif self.modality == 'd':
            input_np = self.create_sparse_depth(rgb_np, depth_np)

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
class MyDataloader(data.Dataset):
    modality_names = ['rgb', 'rgbd', 'd']
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader):
        
        if(loader == png_loader):
            imgs = make_dataset_png(root)
        if(loader == h5_loader):
            imgs = make_dataset_h5(root)
            
#         if(type == val and len(imgs) > 3200):
#             np.random.shuffle(imgs)
#             imgs = imgs[:3200]
            
        assert len(imgs)>0, "Found 0 images in subfolders of: " + root + "\n"
        print("Found {} images in {} folder.".format(len(imgs), type))
        self.root = root
        self.imgs = imgs

        if type == 'train':
            self.transform = self.train_transform
        elif type == 'val':
            self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))
        self.loader = loader
        self.sparsifier = sparsifier

        assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
                                "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform(self, rgb, depth):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(self, rgb, depth):
        raise (RuntimeError("val_transform() is not implemented."))

    def create_sparse_depth(self, rgb, depth):
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
            sparse_depth = np.zeros(depth.shape)
            sparse_depth[mask_keep] = depth[mask_keep]
            return sparse_depth

    def create_rgbd(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2) #Use the tensor version of expand_dims...
        return rgbd

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth) the raw data.
        """
        if self.loader == png_loader:
            rgb_path, depth_path = self.imgs[index]
            rgb, depth = self.loader(rgb_path, depth_path)
        if self.loader == h5_loader:
            path = self.imgs[index]
            rgb, depth = self.loader(path)
            
        return rgb, depth

    def __getitem__(self, index):
        rgb, depth = self.__getraw__(index)

        if self.transform is not None:
            rgb_np = rgb
            depth_np = depth
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise(RuntimeError("transform not defined"))


        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            input_np = self.create_rgbd(rgb_np, depth_np)
        elif self.modality == 'd':
            input_np = self.create_sparse_depth(rgb_np, depth_np)

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
Пример #10
0
class Floorplan3DDataset(data.Dataset):
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)
    def __init__(self, root, dataset_type, split):
        self.output_size = (257, 353)
        self.root = root
        file_list = "{}/{}_{}.list".format(root, dataset_type, split)
        with open(file_list, "r") as f:
            self.imgs = f.readlines()

        self.depth_loader = DepthLoader
        self.color_loader = PILLoader

        if split == 'train':
            self.transform = self.train_transform
        elif split == 'val':
            self.transform = self.val_transform
        

    def train_transform(self, rgb, depth):       
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Resize(288.0 / iheight),  # this is for computational efficiency, since rotation can be slow
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np

    def val_transform(self, rgb, depth):
        depth_np = depth
        transform = transforms.Compose([
            transforms.Resize(288.0 / iheight),
            transforms.CenterCrop(self.output_size),
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth) the raw data.
        """
        (path, target) = self.imgs[index].strip().split("  ")
        path = os.path.join(self.root, path)
        target = os.path.join(self.root, target)
        rgb = self.color_loader(path)
        depth = self.depth_loader(target)
        return rgb, depth

    def __getitem__(self, index):
        rgb, depth = self.__getraw__(index)
        if self.transform is not None:
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise (RuntimeError("transform not defined"))

        input_tensor = to_tensor(rgb_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
class MyDataloader(data.Dataset):
    modality_names = ['rgb', 'rgbd', 'd']  # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self,
                 root,
                 type,
                 sparsifier=None,
                 modality='rgb',
                 loader=h5_loader):
        classes, class_to_idx = find_classes(root)
        imgs = make_dataset(root, class_to_idx)
        assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n"
        print("Found {} images in {} folder.".format(len(imgs), type))
        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.mode = type
        if type == 'train':
            self.transform = self.train_transform
        elif type == 'val':
            self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))
        self.loader = loader
        self.sparsifier = sparsifier

        self.K = None
        self.output_size = None

        assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
                                "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform(self, rgb, depth):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(rgb, depth):
        raise (RuntimeError("val_transform() is not implemented."))

    def create_sparse_depth(self, rgb, depth):
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
            sparse_depth = np.zeros(depth.shape)
            sparse_depth[mask_keep] = depth[mask_keep]
            return sparse_depth

    def create_rgbd(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
        return rgbd

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth) the raw data.
        """
        path, target = self.imgs[index]
        rgb, depth = self.loader(path)
        rgb_near = near_rgb(path, self.loader)
        return rgb, depth, rgb_near

    def __getitem__(self, index):
        rgb, depth, rgb_near = self.__getraw__(index)
        if self.transform is not None:
            rgb_np, depth_np, rgb_near_np = self.transform(
                rgb, depth, rgb_near)
        else:
            raise (RuntimeError("transform not defined"))

        # If in train mode, compute pose for near image
        #print("K = ", self.K)
        rot_mat, t_vec = None, None
        if self.mode == "train":
            rgb_cv = (rgb_np * 255).astype(np.uint8)
            rgb_near_cv = (rgb_near_np * 255).astype(np.uint8)
            succ, rot_vec, t_vec = get_pose(rgb_cv, depth_np, rgb_near_cv,
                                            self.K)
            if succ:
                rot_mat, _ = cv2.Rodrigues(rot_vec)
            else:
                rgb_near_np = rgb_np
                t_vec = np.zeros((3, 1))
                rot_mat = np.eye(3)

        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            input_np = self.create_rgbd(rgb_np, depth_np)
        elif self.modality == 'd':
            input_np = self.create_sparse_depth(rgb_np, depth_np)

        #input_tensor = to_tensor(input_np)
        #while input_tensor.dim() < 3:
        #    input_tensor = input_tensor.unsqueeze(0)
        #depth_tensor = to_tensor(depth_np)
        #depth_tensor = depth_tensor.unsqueeze(0)
        #rgb_near_tensor = to_tensor(rgb_near)
        #print(input_np.shape)
        candidates = {"rgb":rgb_np, "gt":np.expand_dims(depth_np,-1), "d":input_np[:,:,3:], \
                      "r_mat":rot_mat, "t_vec":t_vec, "rgb_near":rgb_near_np}

        #print(self.K)
        intrinsics = {
            "fx": self.K[0, 0],
            "fy": self.K[1, 1],
            "cx": self.K[0, 2],
            "cy": self.K[1, 2],
            "output_size": self.output_size
        }
        items = {
            key: to_float_tensor(val)
            for key, val in candidates.items() if val is not None
        }

        return items, intrinsics

        #return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
class UWTestDataset(Dataset):
    modality_names = ['rgb', 'rgbd', 'd']  # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self, root, modality='rgb'):
        imgs = []
        data_path = None

        val = 'real_uw_test_list.txt'

        self.root = root
        self.output_size = (228, 304)

        data_path = os.path.join(self.root, val)
        self.transform = self.val_transform
        fh = open(data_path, 'r')

        for line in fh:
            line = line.rstrip()
            words = line.split()

            rgb_path = self.root + words[0]
            depth_path = self.root + words[1]

            imgs.append((rgb_path, depth_path))

        self.imgs = imgs
        # self.imgs = imgs[:64]       # debug

        self.modality = modality

    def val_transform(self, rgb, depth):
        depth_np = depth
        transform = transforms.Compose([
            transforms.Resize(240.0 / iheight),
            transforms.CenterCrop(self.output_size),
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        # for compare with Eigen's paper
        depth_np = depth_data_transforms(depth_np)

        return rgb_np, depth_np

    def __getitem__(self, index):
        rgb_path, depth_path = self.imgs[index]
        depth = read_depth(depth_path)
        rgb = cv2.imread(rgb_path)
        rgb = cv2.resize(rgb, (640, 480))
        rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)

        # water style
        # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB).astype(np.float)
        # rgb /= rgb.max()  # normalize to [0, 1]
        # rgb = uw_style(rgb, depth)
        # rgb /= rgb.max() / 255  # normalize to uint8
        # rgb = rgb.astype(np.uint8)
        # rgb = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)

        if self.transform is not None:
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise (RuntimeError("transform not defined"))

        if self.modality == 'rgb':
            input_np = rgb_np

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
Пример #13
0
class OmniDataset(Dataset):
    modality_names = ['rgb', 'rgbd', 'd']  # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self, root, type, sparsifier=None, modality='rgbd'):
        imgs = []
        data_path = None

        train = 'original_train_split.txt'
        # train = 'original_train_debug.txt'
        val = 'original_test_split.txt'
        # val = 'original_test_debug.txt'

        self.root = root
        # self.output_size = (243, 486)
        self.output_size = (256, 512)

        if type == 'train':
            data_path = os.path.join(self.root, train)
            self.transform = self.train_transform
        elif type == 'val':
            data_path = os.path.join(self.root, val)
            self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))

        fh = open(data_path, 'r')

        for line in fh:
            line = line.rstrip()
            words = line.split()

            rgb_path = self.root + words[0]
            depth_path = self.root + words[1]

            imgs.append((rgb_path, depth_path))

        self.imgs = imgs
        # self.imgs = imgs[:64]       # debug
        self.sparsifier = sparsifier

        assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
                                "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform(self, rgb, depth):
        # for create fake underwater images
        rgb = uw_style(rgb, depth)
        rgb /= rgb.max() / 255
        rgb = rgb.astype(np.uint8)

        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            # transforms.Rotate(angle),
            # transforms.Resize(s),
            # transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip),
            transforms.Resize(size=self.output_size)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np

    def val_transform(self, rgb, depth):
        # for create fake underwater images
        rgb = uw_style(rgb, depth)
        rgb /= rgb.max() / 255
        rgb = rgb.astype(np.uint8)

        depth_np = depth
        transform = transforms.Compose([
            # transforms.CenterCrop(self.output_size),
            transforms.Resize(size=self.output_size)
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np

    def create_sparse_depth(self, rgb, depth):
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
            sparse_depth = np.zeros(depth.shape)
            sparse_depth[mask_keep] = depth[mask_keep]
            return sparse_depth

    def create_rgbd(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
        return rgbd

    def __getitem__(self, index):
        rgb_path, depth_path = self.imgs[index]
        # rgb = cv2.imread(rgb_path)
        # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
        rgb = plt.imread(rgb_path)
        depth = read_depth(depth_path)
        # depth = fill_depth_colorization(rgb, depth)   # fill the empty hole of the depth image, it will spend a lot of time

        if self.transform is not None:
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise (RuntimeError("transform not defined"))

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            input_np = self.create_rgbd(rgb_np, depth_np)
        elif self.modality == 'd':
            input_np = self.create_sparse_depth(rgb_np, depth_np)

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
Пример #14
0
def train_transform(rgb, sparse, target, position, args):
    # s = np.random.uniform(1.0, 1.5) # random scaling
    # angle = np.random.uniform(-5.0, 5.0) # random rotation degrees
    oheight = args.val_h
    owidth = args.val_w

    do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

    transforms_list = [
        # transforms.Rotate(angle),
        # transforms.Resize(s),
        transforms.BottomCrop((oheight, owidth)),
        transforms.HorizontalFlip(do_flip)
    ]

    # if small_training == True:
    # transforms_list.append(transforms.RandomCrop((rheight, rwidth)))

    transform_geometric = transforms.Compose(transforms_list)

    if sparse is not None:
        sparse = transform_geometric(sparse)
    target = transform_geometric(target)
    if rgb is not None:
        brightness = np.random.uniform(max(0, 1 - args.jitter),
                                       1 + args.jitter)
        contrast = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter)
        saturation = np.random.uniform(max(0, 1 - args.jitter),
                                       1 + args.jitter)
        transform_rgb = transforms.Compose([
            transforms.ColorJitter(brightness, contrast, saturation, 0),
            transform_geometric
        ])
        rgb = transform_rgb(rgb)
    # sparse = drop_depth_measurements(sparse, 0.9)

    if position is not None:
        bottom_crop_only = transforms.Compose(
            [transforms.BottomCrop((oheight, owidth))])
        position = bottom_crop_only(position)

    # random crop
    #if small_training == True:
    if args.not_random_crop == False:
        h = oheight
        w = owidth
        rheight = args.random_crop_height
        rwidth = args.random_crop_width
        # randomlize
        i = np.random.randint(0, h - rheight + 1)
        j = np.random.randint(0, w - rwidth + 1)

        if rgb is not None:
            if rgb.ndim == 3:
                rgb = rgb[i:i + rheight, j:j + rwidth, :]
            elif rgb.ndim == 2:
                rgb = rgb[i:i + rheight, j:j + rwidth]

        if sparse is not None:
            if sparse.ndim == 3:
                sparse = sparse[i:i + rheight, j:j + rwidth, :]
            elif sparse.ndim == 2:
                sparse = sparse[i:i + rheight, j:j + rwidth]

        if target is not None:
            if target.ndim == 3:
                target = target[i:i + rheight, j:j + rwidth, :]
            elif target.ndim == 2:
                target = target[i:i + rheight, j:j + rwidth]

        if position is not None:
            if position.ndim == 3:
                position = position[i:i + rheight, j:j + rwidth, :]
            elif position.ndim == 2:
                position = position[i:i + rheight, j:j + rwidth]

    return rgb, sparse, target, position
Пример #15
0
class MHKDataset(Dataset):
    modality_names = ['rgb', 'rgbd', 'd']  # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self, root, modality='rgb'):
        imgs = []
        data_path = None

        val = 'MHL-15k.txt'

        self.root = root
        self.output_size = (228, 304)

        data_path = os.path.join(self.root, val)
        self.transform = self.val_transform
        fh = open(data_path, 'r')

        for line in fh:
            line = line.rstrip()

            rgb_path = self.root + line

            imgs.append(rgb_path)

        self.imgs = imgs
        # self.imgs = imgs[:64]       # debug

        self.modality = modality

    def val_transform(self, rgb):
        transform = transforms.Compose([
            transforms.Resize(240.0 / iheight),
            transforms.CenterCrop(self.output_size),
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255

        return rgb_np

    def __getitem__(self, index):
        rgb_path = self.imgs[index]

        rgb = cv2.imread(rgb_path)
        rgb = cv2.resize(rgb, (640, 480))
        # rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            rgb_np = self.transform(rgb)
        else:
            raise (RuntimeError("transform not defined"))

        if self.modality == 'rgb':
            input_np = rgb_np

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)

        return input_tensor

    def __len__(self):
        return len(self.imgs)
Пример #16
0
class RawDataloader(data.Dataset):
    modality_names = ['rgb', 'rgbd', 'd']  # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self,
                 root,
                 type,
                 sparsifier=None,
                 modality='rgb',
                 loader=h5_loader,
                 aug_limit=-1):
        classes, class_to_idx = find_classes(root)
        # raw_classes, raw_class_to_idx = find_classes(raw_root)

        imgs = make_dataset(root, class_to_idx, aug_limit)
        # raw_imgs = make_dataset(raw_root, class_to_idx)

        assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n"
        print("Found {} images in {} folder.".format(len(imgs), type))
        # assert len(raw_imgs)>0, "Found 0 images in subfolders of: " + raw_root + "\n"
        # print("Found {} images in {} folder.".format(len(raw_imgs), type))

        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx

        # self.raw_root = raw_root
        # self.raw_imgs = raw_imgs
        # self.raw_classes = raw_classes
        # self.raw_class_to_idx = raw_class_to_idx
        if type == 'train':
            self.transform = self.train_transform
        elif type == 'val':
            self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))
        self.loader = loader
        # self.loader = TempLoader(type, loader)
        self.sparsifier = sparsifier

        assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
              "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform(self, rgb, depth):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(rgb, depth):
        raise (RuntimeError("val_transform() is not implemented."))

    def create_sparse_depth(self, rgb, depth):
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
            sparse_depth = np.zeros(depth.shape)

            # if a residual map has been passed
            if isinstance(mask_keep, tuple):
                mask_keep, noise_offset = mask_keep
                noise_offset *= 10.
                depth = np.clip(depth + noise_offset, 0., 10.)

            sparse_depth[mask_keep] = depth[mask_keep]
            return sparse_depth

    def create_rgbd(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
        return rgbd

    def __getraw__(self, index):
        """
		Args:
			index (int): Index

		Returns:
			tuple: (rgb, depth) the raw data.
		"""
        path, target = self.imgs[index]
        rgb, depth, raw_depth = self.loader(path)

        # raw_path, raw_target = self.raw_imgs[index]
        # _, raw_depth, _ = self.loader(raw_path)

        return rgb, depth, raw_depth

    def __getitem__(self, index):
        rgb, depth, raw_depth = self.__getraw__(index)
        # pick_real = random.randint(0, 10) == 0
        # sparse_depth = self.create_sparse_depth(rgb, depth) if not pick_real else raw_depth
        sparse_depth = raw_depth if self.sparsifier is None else self.create_sparse_depth(
            rgb, depth)

        if self.transform is not None:
            seed = int(time.time())
            rgb_np, depth_np = self.transform(rgb, depth, seed)
            _, sparse_depth_np = self.transform(rgb, sparse_depth, seed)
        else:
            raise (RuntimeError("transform not defined"))

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            # input_np = self.create_rgbd(rgb_np, raw_depth_np)
            input_np = np.append(rgb_np,
                                 np.expand_dims(sparse_depth_np, axis=2),
                                 axis=2)
        elif self.modality == 'd':
            # input_np = self.create_sparse_depth(rgb_np, raw_depth_np)
            input_np = sparse_depth_np

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
class MyDataloader(data.Dataset):
    modality_names = ['rgb', 'rgbd', 'd', 'rgbl', 'rgbde'] # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self, root, type, sparsifier=None, modality='rgb', make_dataset=make_dataset, loader=h5_loader):
        classes, class_to_idx = find_classes(root)
        imgs = make_dataset(root, class_to_idx)
        assert len(imgs)>0, "Found 0 images in subfolders of: " + root + "\n"
        print("Found {} images in {} folder.".format(len(imgs), type))
        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        if type == 'train':
            if modality == 'rgbl':
                self.transform = self.train_transform_label
            else:
                self.transform = self.train_transform
        elif type == 'val':
            if modality == 'rgbl':
                self.transform = self.val_transform_label
            else:
                self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))
        self.loader = loader
        self.sparsifier = sparsifier

        assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
                                "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform_label(self, rgb, depth, label):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform_label(self, rgb, depth, label):
        raise (RuntimeError("val_transform() is not implemented."))

    def train_transform(self, rgb, depth):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(self, rgb, depth):
        raise (RuntimeError("val_transform() is not implemented."))

    def create_sparse_depth(self, rgb, depth):
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
            sparse_depth = np.zeros(depth.shape)
            sparse_depth[mask_keep] = depth[mask_keep]
            return sparse_depth

    def create_rgbd(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
        return rgbd

    def create_rgbde(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        edge = cv2.Canny(rgb, 100, 200)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
        rgbde = np.append(rgbd, np.expand_dims(edge, axis=2), axis=2)
        return rgbde

    def create_rgbdl(self, rgb, depth):
        rgbd = np.append(rgb, np.expand_dims(depth, axis=2), axis=2)
        return rgbd

    def get_label(self, index):
        path, target = self.imgs[index]
        filename = path + '_' + self.label_type + '_label.png'
        label = imageio.imread(filename).astype('float32') / 255.0 * 1000.0
        return label

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth) the raw data.
        """
        path, target = self.imgs[index]
        rgb, depth = self.loader(path)
        return rgb, depth

    def __getitem__(self, index):
        rgb, depth = self.__getraw__(index)
        if self.modality != 'rgbl':
            if self.transform is not None:
                rgb_np, depth_np = self.transform(rgb, depth)
            else:
                raise(RuntimeError("transform not defined"))

        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)
        label_np = None
        debug_figure = False
        if debug_figure:
            fig = plt.figure(1)
            a1 = fig.add_subplot(2, 3, 1)
            a2 = fig.add_subplot(2, 3, 2)
            a3 = fig.add_subplot(2, 3, 3)
            a4 = fig.add_subplot(2, 3, 4)
            a5 = fig.add_subplot(2, 3, 5)
            a6 = fig.add_subplot(2, 3, 6)

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            input_np = self.create_rgbd(rgb_np, depth_np)
        elif self.modality == 'd':
            input_np = self.create_sparse_depth(rgb_np, depth_np)
        elif self.modality == 'rgbde':
            input_np = self.create_rgbde(rgb_np, depth_np)
        elif self.modality == 'rgbl':
            label_np = self.get_label(index)
            if debug_figure:
                a1.imshow(rgb)
                a2.imshow(depth)
                a3.imshow(label_np)
            rgb_np, depth_np, label_np = self.transform(rgb, depth, label_np)
            input_np = self.create_rgbdl(rgb_np, depth_np)

        if debug_figure:
            a4.imshow(rgb_np)
            a5.imshow(depth_np)
            a6.imshow(label_np)
            plt.show()

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        if label_np is not None:
            label_tensor = to_tensor(label_np)
        else:
            label_tensor = to_tensor(depth_np)
        label_tensor = label_tensor.unsqueeze(0)

        return input_tensor, label_tensor

    def __len__(self):
        return len(self.imgs)
Пример #18
0
class MyDataloader(data.Dataset):
    modality_names = ['rgb', 'rgbd', 'd']  # , 'g', 'gd'
    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self,
                 root,
                 type,
                 sparsifier=None,
                 modality='rgb',
                 loader=h5_loader):
        classes, class_to_idx = find_classes(root)
        imgs = make_dataset(root, class_to_idx)
        assert len(imgs) > 0, "Found 0 images in subfolders of: " + root + "\n"
        print("Found {} images in {} folder.".format(len(imgs), type))
        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        if type == 'train':
            self.transform = self.train_transform
        elif type == 'val':
            self.transform = self.val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))
        self.loader = loader
        self.sparsifier = sparsifier

        assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
                                "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality

    def train_transform(self, rgb, depth):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(rgb, depth):
        raise (RuntimeError("val_transform() is not implemented."))

    def create_sparse_depth(self, rgb, depth):
        if self.sparsifier is None:
            return depth
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
            sparse_depth = np.zeros(depth.shape)
            sparse_depth[mask_keep] = depth[mask_keep]
            return sparse_depth

    def create_rgbd(self, rgb, depth):
        sparse_depth = self.create_sparse_depth(rgb, depth)
        rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
        return rgbd

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth) the raw data.
        """
        path, target = self.imgs[index]
        rgb, depth = self.loader(path)
        return rgb, depth

    def __getitem__(self, index):
        rgb, depth = self.__getraw__(index)

        #print('{:04d}  min={:f}  max={:f}  shape='.format(index, np.amin(depth), np.amax(depth)) + str(depth.shape))

        if self.transform is not None:
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise (RuntimeError("transform not defined"))

        #print('{:04d}  min={:f}  max={:f}  shape='.format(index, np.amin(depth_np), np.amax(depth_np)) + str(depth_np.shape))

        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            input_np = self.create_rgbd(rgb_np, depth_np)
        elif self.modality == 'd':
            input_np = self.create_sparse_depth(rgb_np, depth_np)

        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        #print('{:04d} '.format(index) + str(depth_tensor.shape))
        depth_tensor = depth_tensor.unsqueeze(0)
        #print('{:04d} '.format(index) + str(depth_tensor.shape))

        return input_tensor, depth_tensor

    def __len__(self):
        return len(self.imgs)
class CarlaDataset(data.Dataset):
    seed = 42

    output_size = (450, 1600)
    _modality_names = ['rgb', 'rgbd']
    _color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    # as the crop is hardcoded we assert a given image size to prevent errors
    _input_width = 1600
    _input_height = 900

    _crop_upper_x = 0
    _crop_upper_y = 450
    _crop_width = 1600
    _crop_height = 450

    _road_crop = (_crop_upper_y, _crop_upper_x, _crop_height, _crop_width)

    def __init__(self,
                 root,
                 type,
                 modality="rgbd",
                 max_depth=500,
                 camera_rgb_name="cam_front",
                 camera_depth_name="cam_front_depth",
                 lidar_name="lidar_top",
                 ego_pose_sensor_name="imu_perfect"):

        random.seed(self.seed)

        assert type == "val" or type == "train", "unsupported dataset type {}".format(
            type)

        root = pathlib.Path(root)

        self._loader = dataset_loader.DatasetLoader(root)
        self._loader.setup()

        self._projector = LidarToCameraProjector(self._loader,
                                                 camera_depth_name, lidar_name,
                                                 ego_pose_sensor_name)

        self._camera_rgb_sensor, _ = loading_utils.load_sensor_with_calib(
            self._loader, camera_rgb_name)

        self._camera_depth_sensor, _ = loading_utils.load_sensor_with_calib(
            self._loader, camera_depth_name)

        # check image sizes
        assert self._camera_rgb_sensor.meta[
            'image_size_x'] == self._input_width, "crop does not match input images, pls adapt."
        assert self._camera_rgb_sensor.meta[
            'image_size_y'] == self._input_height, "crop does not match input images, pls adapt."

        assert self._camera_depth_sensor.meta[
            'image_size_x'] == self._input_width, "crop does not match input images, pls adapt."
        assert self._camera_depth_sensor.meta[
            'image_size_y'] == self._input_height, "crop does not match input images, pls adapt."

        self._data_entries = self.prepare_dataset()

        if type == 'train':
            self._transform = self._train_transform
        elif type == 'val':
            self._transform = self._val_transform
        else:
            raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                "Supported dataset types are: train, val"))

        assert (modality in self._modality_names), "Invalid modality type: " + modality + "\n" + \
            "Supported dataset types are: " + ''.join(self.modality_names)
        self.modality = modality
        self._max_depth = max_depth

    def _sensor_data_to_full_filepath(self, s_data):
        path = s_data.file
        path = str(self._loader.dataset_root.joinpath(path))
        return path

    def prepare_dataset(self, ):
        """
        We simply store all samples of the dataset in a random order.
        """
        data_entries = []
        for scene_token in tqdm.tqdm(self._loader.scene_tokens):
            scene = self._loader.get_scene(scene_token)
            sample_token = scene.first_sample_token
            # next token is none if scene ends
            while sample_token != None:

                sample = self._loader.get_sample(sample_token)
                data_entries.append(sample)
                sample_token = sample.next_token

        # shuffle
        # random.shuffle(data_entries)
        return data_entries

    def load_data(self, sample):

        depth_map_lidar = self._projector.lidar2depth_map(sample)

        # non set values are nan -> set to 0.0 for training
        depth_map_lidar = np.nan_to_num(depth_map_lidar, nan=0)

        cam_bgr_img = loading_utils.load_camera_image(self._loader, sample,
                                                      self._camera_rgb_sensor)

        # bgr -> rgb
        cam_rgb_img = cam_bgr_img[..., ::-1]

        cam_depth_img = loading_utils.load_camera_image(
            self._loader, sample, self._camera_depth_sensor)
        # convert to single channel float img
        cam_depth_img = loading_utils.rgb_encoded_depth_to_float(cam_depth_img)

        if self._max_depth != np.inf:
            depth_map_lidar = np.clip(depth_map_lidar,
                                      a_min=0.0,
                                      a_max=self._max_depth)
            cam_depth_img = np.clip(cam_depth_img,
                                    a_min=0.0,
                                    a_max=self._max_depth)

        return cam_rgb_img, depth_map_lidar, cam_depth_img

    def visualize(self, sample, vis_dir="/workspace/visualization", i=0):
        # TODO
        vis_dir = pathlib.Path(vis_dir)
        depth_map_lidar = self._projector.lidar2depth_map(sample)

        # for vis replace nan with 0.0
        depth_map_lidar = np.nan_to_num(depth_map_lidar)

        depth_map_lidar = visualization_utils.depth_to_img(depth_map_lidar)

        depth__lidar_path = str(
            vis_dir.joinpath("depth_lidar_{}.jpg".format(i)))
        cv2.imwrite(depth__lidar_path, depth_map_lidar)

        cam_rgb_img = loading_utils.load_camera_image(self._loader, sample,
                                                      self._camera_rgb_sensor)

        rgb_path = str(vis_dir.joinpath("rgb_{}.jpg".format(i)))
        cv2.imwrite(rgb_path, cam_rgb_img)

        cam_depth_img = loading_utils.load_camera_image(
            self._loader, sample, self._camera_depth_sensor)

        cam_depth_img = loading_utils.rgb_encoded_depth_to_float(cam_depth_img)
        cam_depth_img = visualization_utils.depth_to_img(cam_depth_img)

        depth__gt_path = str(vis_dir.joinpath("depth_gt_{}.jpg".format(i)))
        cv2.imwrite(depth__gt_path, cam_depth_img)

    def __getraw__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (rgb, depth_lidar, depth_gt) the raw data.
        """
        sample = self._data_entries[index]
        return self.load_data(sample)

    def __getitem__(self, index):
        rgb, depth_lidar, depth_gt = self.__getraw__(index)

        # apply transforms (for data augmentation)
        if self._transform is not None:
            rgb, depth_lidar, depth_gt = self._transform(
                rgb, depth_lidar, depth_gt)
        else:
            raise (RuntimeError("transform not defined"))

        input_img = None
        if self.modality == "rgb":
            input_img = rgb
        elif self.modality == "rgbd":
            # depth is h x w -> h x w x 1
            depth_lidar = np.expand_dims(depth_lidar, axis=-1)
            input_img = np.concatenate([rgb, depth_lidar], axis=-1)

        # convert to torch and flip channels in front
        input_img = to_tensor(input_img)
        depth_gt = to_tensor(depth_gt)
        # make 1 x h x w
        depth_gt = depth_gt.unsqueeze(0)

        return input_img, depth_gt

    def __len__(self):
        return len(self._data_entries)

    def _train_transform(self, rgb, sparse_depth, depth_gt):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_gt = depth_gt / s

        # TODO critical why is the input not scaled in original implementation?
        sparse_depth = sparse_depth / s

        # TODO adapt and refactor
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        # TODO critical adjust sizes
        transform = transforms.Compose([
            transforms.Crop(*self._road_crop),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])

        rgb = transform(rgb)
        sparse_depth = transform(sparse_depth)

        # TODO needed?
        # Scipy affine_transform produced RuntimeError when the depth map was
        # given as a 'numpy.ndarray'
        depth_gt = np.asfarray(depth_gt, dtype='float32')
        depth_gt = transform(depth_gt)

        rgb = self._color_jitter(rgb)  # random color jittering

        # convert color [0,255] -> [0.0, 1.0] floats
        rgb = np.asfarray(rgb, dtype='float') / 255

        return rgb, sparse_depth, depth_gt

    def _val_transform(self, rgb, sparse_depth, depth_gt):
        transform = transforms.Compose([
            transforms.Crop(*self._road_crop),
            transforms.CenterCrop(self.output_size),
        ])
        rgb = transform(rgb)
        rgb = np.asfarray(rgb, dtype='float') / 255

        sparse_depth = np.asfarray(sparse_depth, dtype='float32')
        sparse_depth = transform(sparse_depth)

        depth_gt = np.asfarray(depth_gt, dtype='float32')
        depth_gt = transform(depth_gt)

        return rgb, sparse_depth, depth_gt
Пример #20
0
import os
import h5py
import numpy as np
import torch
import torch.utils.data as data

from dataloaders import make_dataset, transforms

RAW_HEIGHT, RAW_WIDTH = 480, 640  # raw image size
to_tensor = transforms.ToTensor()
color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)


def h5_loader(path):
    """Image/Depth extractor from h5 format file.
    Args:
        path (str): Path to h5 format file
    Returns:
        rgb (np.array): RGB image (shape=[H,W,3])
        depth (np.array): Depth image (shape=[H,W])
    """
    h5f = h5py.File(path, "r")
    rgb = np.array(h5f['rgb'])
    rgb = np.transpose(rgb, (1, 2, 0))
    depth = np.array(h5f['depth'])
    return rgb, depth


class NYUCamMat:
    """Calculate Resized Camera Intrinsic Matrix of NYUDepth dataset.
Пример #21
0
class MyDataloaderExt(data.Dataset):

    color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)

    def __init__(self,
                 root,
                 type,
                 sparsifier=None,
                 max_gt_depth=math.inf,
                 modality='rgb'):

        if type == 'train':
            self.transform = self.train_transform
        elif type == 'val':
            self.transform = self.val_transform
        else:
            raise RuntimeError('invalid type of dataset')

        dataset_folder = os.path.join(root, type)

        general_img_index = []
        self.beginning_offset = 0

        classes, class_to_idx = find_classes(
            dataset_folder, ('ds' if '-k' in modality else None))
        general_class_data = [None] * len(classes)
        for i_class, curr_class in enumerate(classes):
            class_images = load_class_dataset(dataset_folder, curr_class)
            class_extras = None
            if 'dsx' in curr_class:
                class_extras, class_images = load_class_extras(
                    root, type, class_images)
            general_class_data[i_class] = dict(name=curr_class,
                                               images=class_images,
                                               extras=class_extras)

            for i_img in range(self.beginning_offset, len(class_images)):
                general_img_index.append((i_class, i_img))

        assert len(general_img_index
                   ) > 0, "Found 0 images in subfolders of: " + root + "\n"
        print("Found {} images in {} folder.".format(len(general_img_index),
                                                     type))
        self.root = root
        self.general_img_index = general_img_index
        self.general_class_data = general_class_data
        self.classes = classes
        self.class_to_idx = class_to_idx

        self.sparsifier = sparsifier
        self.modality = Modality(modality)
        self.max_gt_depth = max_gt_depth

    def train_transform(self, channels):
        raise (RuntimeError("train_transform() is not implemented. "))

    def val_transform(rgb, channels):
        raise (RuntimeError("val_transform() is not implemented."))

    def create_sparse_depth(self, rgb, targe_depth):
        if self.sparsifier is None:
            raise (RuntimeError("please select a sparsifier "))
        else:
            mask_keep = self.sparsifier.dense_to_sparse(rgb, targe_depth)
            sparse_depth = np.zeros(targe_depth.shape)
            sparse_depth[mask_keep] = targe_depth[mask_keep]
            return sparse_depth

    # gt_depth - gt depth
    # rgb -color channel
    # grey - disable
    # fd - fake slam uing the given sparsifier from gt depth
    # kor - slam keypoint + slam depth
    # kde - slam keypoint + mesh-based denoise depth
    # kgt - slam keypoint + gt depth
    # kw - sparse confidence measurements
    # dor - mesh+interpolation of slam points
    # dde - mesh+interpolation of denoised slam points
    # kvor - slam keypoint  expanded to voronoi diagram cell around (dense)

    # d2dwor - 2d image distance transformantion using slam keypoints as seeds
    # d3dwde - 3d euclidian distance to closest the denoised slam keypoint
    # d3dwor - 3d euclidian distance to closest the slam keypoint

    def calc_from_sparse_input(self, in_sparse_map, voronoi=True, edt=True):

        res_voronoi = None
        res_edt = None

        if voronoi or edt:
            mask = (in_sparse_map < epsilon)
            edt_result = ndimage.distance_transform_edt(mask,
                                                        return_indices=voronoi)
            res_edt = np.sqrt(edt_result[0])

            if voronoi:
                res_voronoi = np.zeros_like(in_sparse_map)
                it = np.nditer(res_voronoi,
                               flags=['multi_index'],
                               op_flags=['writeonly'])

                with it:
                    while not it.finished:
                        xp = edt_result[1][0, it.multi_index[0],
                                           it.multi_index[1]]
                        yp = edt_result[1][1, it.multi_index[0],
                                           it.multi_index[1]]

                        it[0] = in_sparse_map[xp, yp]
                        it.iternext()

        return res_voronoi, res_edt


#pose = none | gt | slam

    def h5_loader_general(self, img_path, extra_path, type, pose='none'):
        result = dict()
        #path, target = self.imgs[index]
        h5f = h5py.File(img_path, "r")
        h5fextra = None
        if extra_path is not None:
            h5fextra = h5py.File(extra_path, "r")

        #target depth
        if 'dense_image_data' in h5f:
            dense_data = h5f['dense_image_data']
            depth = np.array(dense_data[0, :, :])
            mask_array = depth > 10000  # in this software inf distance is zero.
            depth[mask_array] = 0
            result['gt_depth'] = depth
            if 'normal_data' in h5f:
                normal_rescaled = (
                    (np.array(h5f['normal_data'], dtype='float32') / 127.5) -
                    1.0)
                result['normal_x'] = normal_rescaled[0, :, :]
                result['normal_y'] = normal_rescaled[1, :, :]
                result['normal_z'] = normal_rescaled[2, :, :]
        elif 'depth' in h5f:
            depth = np.array(h5f['depth'])
            if not math.isinf(self.max_gt_depth) and self.max_gt_depth > 0:
                mask_max = depth > self.max_gt_depth
                depth[mask_max] = 0
            result['gt_depth'] = depth

        if pose == 'gt':
            if h5fextra is not None:
                result['t_wc'] = np.array(h5fextra['gt_twc_data'])
            else:
                if 'gt_twc_data' not in h5f:
                    return None
                result['t_wc'] = np.array(h5f['gt_twc_data'])
                assert result['t_wc'].shape == (
                    4, 4), 'file {} - the t_wc is not 4x4'.format(path)

        if pose == 'slam':
            if h5fextra is not None:
                result['t_wc'] = np.array(h5fextra['slam_twc_data'])
            else:
                if 'slam_twc_data' not in h5f:
                    return None
                result['t_wc'] = np.array(h5f['slam_twc_data'])
                assert result['t_wc'].shape == (
                    4, 4), 'file {} - the t_wc is not 4x4'.format(path)

        # color data
        if 'rgb_image_data' in h5f:
            rgb = np.array(h5f['rgb_image_data'])
        elif 'rgb' in h5f:
            rgb = np.array(h5f['rgb'])
        else:
            rgb = None

        if 'grey' in type:
            grey_img = rgb2grayscale(rgb)
            result['grey'] = grey_img

        rgb = np.transpose(rgb, (1, 2, 0))

        if 'rgb' in type:
            result['rgb'] = rgb

        #fake sparse data using the spasificator and ground-truth depth
        if 'fd' in type:
            result['fd'] = self.create_sparse_depth(rgb, depth)
        if 'kfd' in type:
            result['kfd'] = self.create_sparse_depth(rgb, depth)

        #using real keypoints from slam
        # if 'landmark_2d_data' in h5f:
        if h5fextra is not None:
            data_2d = np.array(h5fextra['landmark_2d_data'])
        else:
            if 'landmark_2d_data' not in h5f:
                data_2d = None
            else:
                data_2d = np.array(h5f['landmark_2d_data'])
        # else:
        #     data_2d = None

        if 'kor' in type:
            kor_input = np.zeros_like(depth)
            for row in data_2d:
                xp = int(math.floor(row[1]))
                yp = int(math.floor(row[0]))
                if (row[2] > 0):
                    kor_input[xp, yp] = row[2]

            #res_voronoi,res_edt = self.calc_from_sparse_input(kor_input,'dvor' in type,'d2dwor' in type)

            if 'kor' in type:
                result['kor'] = kor_input
            # if 'dvor' in type:
            #     result['dvor'] = res_voronoi
            # if 'd2dwor' in type:
            #     result['d2dwor'] = res_edt

        if 'kgt' in type or 'dvgt' in type or 'd2dwgt' in type:
            kgt_input = np.zeros_like(depth)
            for row in data_2d:
                xp = int(math.floor(row[1]))
                yp = int(math.floor(row[0]))
                if (depth[xp, yp] > 0):
                    kgt_input[xp, yp] = depth[xp, yp]
            res_voronoi, res_edt = self.calc_from_sparse_input(
                kgt_input, 'dvgt' in type, 'd2dwgt' in type)

            if 'kgt' in type:
                result['kgt'] = kgt_input
            if 'dvgt' in type:
                result['dvgt'] = res_voronoi
            if 'd2dwgt' in type:
                result['d2dwgt'] = res_edt

        if 'kde' in type or 'dvde' in type or 'd2dwde' in type:
            kde_input = np.zeros_like(depth)
            for row in data_2d:
                xp = int(math.floor(row[1]))
                yp = int(math.floor(row[0]))
                if (row[3] > 0):
                    kde_input[xp, yp] = row[3]

            res_voronoi, res_edt = self.calc_from_sparse_input(
                kde_input, 'dvde' in type, 'd2dwde' in type)

            if 'kde' in type:
                result['kde'] = kde_input
            if 'dvde' in type:
                result['dvde'] = res_voronoi
            if 'd2dwde' in type:
                result['d2dwde'] = res_edt

        if 'wkde' in type:
            kde_input = np.zeros_like(depth)
            for row in data_2d:
                xp = int(math.floor(row[1]))
                yp = int(math.floor(row[0]))
                if (row[3] > 0):
                    kde_input[xp, yp] = row[3]
            result['wkde'] = kde_input

        if 'kw' in type:
            kw_input = np.zeros_like(depth)
            for row in data_2d:
                xp = int(math.floor(row[1]))
                yp = int(math.floor(row[0]))
                if (row[4] > 0):
                    kw_input[xp, yp] = row[4]
            result['kw'] = kw_input

        if 'dor' in type:
            result['dor'] = np.array(dense_data[1, :, :])

        if 'dore' in type:
            result['dore'] = np.array(dense_data[1, :, :])
            dore_mask = result['dore'] < epsilon
            result['dore'][dore_mask] = np.array(
                dense_data[2, :, :])[dore_mask]

        if 'd3dwor' in type:
            result['d3dwor'] = np.array(dense_data[3, :, :])

        if 'dvor' in type:
            result['dvor'] = np.array(dense_data[2, :, :])

        if 'd2dwor' in type:
            result['d2dwor'] = np.array(dense_data[5, :, :])

        if 'dde' in type:
            result['dde'] = np.array(dense_data[4, :, :])

        if 'ddee' in type:
            result['ddee'] = np.array(dense_data[4, :, :])
            dore_mask = result['ddee'] < epsilon
            result['ddee'][dore_mask] = np.array(
                dense_data[2, :, :])[dore_mask]

        if 'd3dwde' in type:
            result['d3dwde'] = np.array(dense_data[6, :, :])

        if 'wdde' in type:
            result['wdde'] = np.array(dense_data[4, :, :])

        return result

    def to_tensor(self, img):

        if not isinstance(img, np.ndarray):
            raise TypeError('img should be ndarray. Got {}'.format(type(img)))

        # handle numpy array
        if img.ndim == 3 or img.ndim == 2:
            img = torch.from_numpy(img.copy())
        else:
            raise RuntimeError(
                'img should be ndarray with 2 or 3 dimensions. Got {}'.format(
                    img.ndim))

        return img.float()

    def append_tensor3d(self, input_np, value):
        if not isinstance(input_np, np.ndarray):  # first element
            if value.ndim == 2:
                input_np = np.expand_dims(value, axis=0)
            elif value.ndim == 3:
                input_np = value
        else:  # 2nd ,3rd ...
            if value.ndim == 2:
                input_np = np.append(input_np,
                                     np.expand_dims(value, axis=0),
                                     axis=0)
            elif value.ndim == 3:
                input_np = np.append(input_np, value, axis=0)
            else:
                raise RuntimeError(
                    'value should be ndarray with 2 or 3 dimensions. Got {}'.
                    format(value.ndim))
        return input_np

    def __getitem__(self, index):

        class_idx, img_idx = self.general_img_index[index]
        class_entry = self.general_class_data[class_idx]
        img_path = class_entry['images'][img_idx]
        extra_path = (class_entry['extras'][img_idx]
                      if class_entry['extras'] is not None else None)
        channels_np = self.h5_loader_general(img_path, extra_path,
                                             self.modality)

        input_np = None

        if channels_np is None:
            return None, None, None

        if self.transform is not None:
            channels_transformed_np = self.transform(channels_np)
        else:
            raise (RuntimeError("transform not defined"))

        target_data = None
        target_data = channels_transformed_np['gt_depth']

        num_image_channel, image_channel = self.modality.get_input_image_channel(
        )
        if num_image_channel > 0:
            input_np = self.append_tensor3d(
                input_np, channels_transformed_np[image_channel])
        else:
            raise RuntimeError('rgb channel expected')

        num_depth_channel, depth_channel = self.modality.get_input_depth_channel(
        )
        if num_depth_channel > 0:
            input_np = self.append_tensor3d(
                input_np, channels_transformed_np[depth_channel])
        else:
            zeros = np.zeros_like(input_np[0, :, :])
            input_np = self.append_tensor3d(input_np, zeros)

        num_weight_channel, weight_channel = self.modality.get_input_weight_channel(
        )
        if num_weight_channel > 0 and weight_channel != 'bin':
            input_np = self.append_tensor3d(
                input_np, channels_transformed_np[weight_channel])
        else:
            confidence = np.zeros_like(input_np[0, :, :])
            valid_mask = ((input_np[3, :, :] > 0))
            confidence[valid_mask] = 1.0
            input_np = self.append_tensor3d(input_np, confidence)

        input_tensor = self.to_tensor(input_np)

        target_depth_tensor = self.to_tensor(target_data).unsqueeze(0)

        return input_tensor, target_depth_tensor, channels_transformed_np[
            'scale']

    def __len__(self):
        return len(self.general_img_index)