示例#1
0
    def train_transform(self, rgb, depth):
        scale = np.random.uniform(low=1, high=1.5)
        depth = depth / scale

        angle = np.random.uniform(-5.0, 5.0)
        should_flip = np.random.uniform(0.0, 1.0) < 0.5

        h_offset = int((768 - 228) * np.random.uniform(0.0, 1.0))
        v_offset = int((1024 - 304) * np.random.uniform(0.0, 1.0))

        base_transform = transforms.Compose([
            transforms.Resize(250 / iheight),
            transforms.Rotate(angle),
            transforms.Resize(scale),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(should_flip),
        ])

        rgb = base_transform(rgb)
        rgb = self.color_jitter(rgb)
        rgb = rgb / 255.0

        depth = base_transform(depth)

        return (rgb, depth)
    def train_transform(self, im, gt):
        im = np.array(im).astype(np.float32)
        gt = np.array(gt).astype(np.float32)

        s = np.random.uniform(1.0, 1.5)  # random scaling
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
        color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4)

        transform = my_transforms.Compose([
            my_transforms.Crop(130, 10, 240, 1200),
            my_transforms.Resize(460 / 240, interpolation='bilinear'),
            my_transforms.Rotate(angle),
            my_transforms.Resize(s),
            my_transforms.CenterCrop(self.size),
            my_transforms.HorizontalFlip(do_flip)
        ])

        im_ = transform(im)
        im_ = color_jitter(im_)

        gt_ = transform(gt)

        im_ = np.array(im_).astype(np.float32)
        gt_ = np.array(gt_).astype(np.float32)

        im_ /= 255.0
        gt_ /= 100.0 * s
        im_ = to_tensor(im_)
        gt_ = to_tensor(gt_)

        gt_ = gt_.unsqueeze(0)

        return im_, gt_
    def train_transform(self, rgb, depth):
        #s = np.random.uniform(1.0, 1.5)  # random scaling
        #depth_np = depth / s
        s = self.getFocalScale()

        if (self.augArgs.varFocus):  #Variable focal length simulation
            depth_np = depth
        else:
            depth_np = depth / s  #Correct for focal length

        if (self.augArgs.varScale):  #Variable global scale simulation
            scale = self.getDepthGroup()
            depth_np = depth_np * scale

        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Crop(130, 10, 240, 1200),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        # Scipy affine_transform produced RuntimeError when the depth map was
        # given as a 'numpy.ndarray'
        depth_np = np.asfarray(depth_np, dtype='float32')
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def train_transform(self, rgb, depth):
        s = self.getFocalScale()

        if (self.augArgs.varFocus):  #Variable focal length simulation
            depth_np = depth
        else:
            depth_np = depth / s  #Correct for focal length

        if (self.augArgs.varScale):  #Variable global scale simulation
            scale = self.getDepthGroup()
            depth_np = depth_np * scale

        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Resize(
                250.0 / iheight
            ),  # this is for computational efficiency, since rotation can be slow
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def train_transform(self, rgb, depth):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        random_size = (int(s * 224), int(s * 224))
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        # transform = torchvision.transforms.Compose([
        #     torchvision.transforms.Resize(self.output_size), # this is for computational efficiency, since rotation can be slow
        #    torchvision.transforms.RandomRotation(angle),
        #    torchvision.transforms.Resize(random_size),
        #    torchvision.transforms.CenterCrop(self.output_size),
        #    torchvision.transforms.RandomHorizontalFlip(do_flip)
        #])
        transform2 = transforms.Compose([
            transforms.Resize(
                250.0 / iheight
            ),  # this is for computational efficiency, since rotation can be slow
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform2(rgb)
        #rgb_n = Image.fromarray(np.uint8(rgb_np * 255))
        #rgb_np = self.color_jitter(rgb_n) # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform2(depth_np)
        #depth_np = np.asfarray(depth_np, dtype='float') / 255

        return rgb_np, depth_np
示例#6
0
    def train_transform(self, rgb, depth, rgb_near):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Resize(
                250.0 / iheight
            ),  # this is for computational efficiency, since rotation can be slow
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        rgb_near_np = None
        if rgb_near is not None:
            rgb_near_np = transform(rgb_near)
            rgb_near_np = np.asfarray(rgb_near_np, dtype='float') / 255
        depth_np = transform(depth_np)

        self.K = TransfromIntrinsics(self.K, (250.0 / iheight) * s,
                                     self.output_size)
        return rgb_np, depth_np, rgb_near_np
示例#7
0
    def train_transform(self, im, gt, mask):
        im = np.array(im).astype(np.float32)
        im = cv2.resize(im, (512, 256), interpolation=cv2.INTER_AREA)
        gt = cv2.resize(gt, (512, 256), interpolation=cv2.INTER_AREA)
        mask = cv2.resize(mask, (512, 256), interpolation=cv2.INTER_AREA)

        # h,w,c = im.shape
        # th, tw = 256,512
        # x1 = random.randint(0, w - tw)
        # y1 = random.randint(0, h - th)
        # img = im[y1:y1 + th, x1:x1 + tw, :]
        # gt = gt[y1:y1 + th, x1:x1 + tw]
        # mask = mask[y1:y1 + th, x1:x1 + tw]
        s = np.random.uniform(1.0, 1.5)  # random scaling
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
        color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4)

        transform = my_transforms.Compose([
            my_transforms.Rotate(angle),
            my_transforms.Resize(s),
            my_transforms.CenterCrop(self.size),
            my_transforms.HorizontalFlip(do_flip)
        ])

        im_ = transform(im)
        im_ = color_jitter(im_)

        gt_ = transform(gt)
        mask_ = transform(mask)
        im_ = np.array(im_).astype(np.float32)
        gt_ = np.array(gt_).astype(np.float32)
        mask_ = np.array(mask_).astype(np.float32)

        im_ /= 255.0
        gt_ /= s
        im_ = to_tensor(im_)
        gt_ = to_tensor(gt_)
        mask_ = to_tensor(mask_)

        gt_ = gt_.unsqueeze(0)
        mask_ = mask_.unsqueeze(0)

        return im_, gt_, mask_
示例#8
0
    def train_transform(self, rgb, depth):
        """
        [Reference]
        https://github.com/fangchangma/sparse-to-dense.pytorch/blob/master/dataloaders/nyu_dataloader.py

        Args:
            rgb (np.array): RGB image (shape=[H,W,3])
            depth (np.array): Depth image (shape=[H,W])

        Returns:
            torch.Tensor: Tranformed RGB image
            torch.Tensor: Transformed Depth image
            np.array: Transformed RGB image without color jitter (for 2D mesh creation)
        """
        # Parameters for each augmentation
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # Perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Resize(250.0 / RAW_HEIGHT),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.img_size),
            transforms.HorizontalFlip(do_flip)
        ])

        # Apply this transform to rgb/depth
        rgb_np_orig = transform(rgb)
        rgb_np_for_edge = np.asfarray(
            rgb_np_orig)  # Used for canny edge detection
        rgb_np = color_jitter(rgb_np_orig)  # random color jittering
        rgb_np = np.asfarray(rgb_np) / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np, rgb_np_for_edge
    def train_transform(self, rgb, depth):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / (s * self.depth_divider)
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Crop(130, 10, 240, 1200),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        # Scipy affine_transform produced RuntimeError when the depth map was
        # given as a 'numpy.ndarray'
        depth_np = np.asfarray(depth_np, dtype='float32')
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def train_transform(self, rgb, depth):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            #Why not crop like in KITTI? Also, if resizing done, why not reflect this in depth_np as well?
            transforms.Resize(
                250.0 / iheight
            ),  # this is for computational efficiency, since rotation can be slow
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def _train_transform(self, rgb, sparse_depth, depth_gt):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_gt = depth_gt / s

        # TODO critical why is the input not scaled in original implementation?
        sparse_depth = sparse_depth / s

        # TODO adapt and refactor
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        # TODO critical adjust sizes
        transform = transforms.Compose([
            transforms.Crop(*self._road_crop),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])

        rgb = transform(rgb)
        sparse_depth = transform(sparse_depth)

        # TODO needed?
        # Scipy affine_transform produced RuntimeError when the depth map was
        # given as a 'numpy.ndarray'
        depth_gt = np.asfarray(depth_gt, dtype='float32')
        depth_gt = transform(depth_gt)

        rgb = self._color_jitter(rgb)  # random color jittering

        # convert color [0,255] -> [0.0, 1.0] floats
        rgb = np.asfarray(rgb, dtype='float') / 255

        return rgb, sparse_depth, depth_gt
示例#12
0
    def train_transform(self, rgb, depth):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        # s = 1.5
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Resize(
                250.0 / iheight
            ),  # this is for computational efficiency, since rotation can be slow
            transforms.Rotate(angle),
            transforms.Resize(
                s),  # TODO (Katie): figure out how to resize properly
            transforms.RandomCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np
示例#13
0
    def train_transform(self, rgb, depth):
        s = np.random.uniform(1.0, 1.5) # random scaling
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0) # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip
        iheight = rgb.shape[0]

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Resize(250.0 / iheight), # this is for computational efficiency, since rotation can be slow
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop((228, 304)),
            transforms.HorizontalFlip(do_flip),
            transforms.Resize(self.output_size),
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np) # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        if depth_np.ndim != 2:
            print("Wrong Depth ",depth_np)
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def train_transform(self, attrib_list):

        iheight = attrib_list['gt_depth'].shape[0]
        iwidth = attrib_list['gt_depth'].shape[1]

        s = np.random.uniform(1.0, 1.5)  # random scaling

        angle = np.random.uniform(-15.0, 15.0)  # random rotation degrees
        hdo_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
        vdo_flip = np.random.uniform(0.0, 1.0) < 0.5  # random vertical flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Resize(
                270.0 / iheight
            ),  # this is for computational efficiency, since rotation can be slow
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(hdo_flip),
            transforms.VerticalFlip(vdo_flip)
        ])

        attrib_np = dict()

        if self.depth_divider == 0:
            if 'fd' in attrib_list:
                minmax_image = transform(attrib_list['fd'])
                max_depth = max(minmax_image.max(), 1.0)
            if 'kor' in attrib_list:
                minmax_image = transform(attrib_list['kor'])
                max_depth = max(minmax_image.max(), 1.0)
            else:
                max_depth = 50

            scale = 10.0 / max_depth  # 10 is arbitrary. the network only converge in a especific range
        else:
            scale = 1.0 / self.depth_divider

        attrib_np['scale'] = 1.0 / scale

        for key, value in attrib_list.items():
            attrib_np[key] = transform(value)
            if key in Modality.need_divider:  #['gt_depth','fd','kor','kde','kgt','dor','dde', 'd3dwde','d3dwor','dvor','dvde','dvgt']:
                attrib_np[key] = scale * attrib_np[
                    key]  #(attrib_np[key] - min_depth+0.01) / (max_depth - min_depth) #/
            elif key in Modality.image_size_weight_names:  #['d2dwor', 'd2dwde', 'd2dwgt']:
                attrib_np[key] = attrib_np[key] / (
                    iwidth * 1.5)  # 1.5 about sqrt(2)- square's diagonal

        if 'rgb' in attrib_np:
            attrib_np['rgb'] = self.color_jitter(
                attrib_np['rgb'])  # random color jittering
            attrib_np['rgb'] = (np.asfarray(attrib_np['rgb'], dtype='float') /
                                255).transpose(
                                    (2, 0,
                                     1))  #all channels need to have C x H x W

        if 'grey' in attrib_np:
            attrib_np['grey'] = np.expand_dims(
                np.asfarray(attrib_np['grey'], dtype='float') / 255, axis=0)

        return attrib_np
示例#15
0
    def __getitem__(self, idx):
        # read input image
        filename = self.filenames[idx]
        filenameGt = self.filenamesGt[idx]
        rgb_image = Image.open(filename).convert('RGB')
        depth_image = cv2.imread(filenameGt,
                                 flags=(cv2.IMREAD_GRAYSCALE
                                        | cv2.IMREAD_ANYDEPTH))

        if depth_image.ndim < 2:
            print(depth_image.shape)
            print(filenameGt)
        _s = np.random.uniform(1.0, 1.5)
        depth_image = depth_image / _s
        s = (np.int(365 * _s), np.int(547 * _s))
        depth_image = np.asarray(cv2.resize(depth_image,
                                            dsize=(s[1], s[0]),
                                            interpolation=cv2.INTER_NEAREST),
                                 dtype=np.float32)
        # s = (912,608)
        degree = np.random.uniform(-5.0, 5.0)
        do_flip = np.random.uniform(0.0, 1.0)
        if self.split == 'train':
            tRgb = data_transform.Compose(
                [  # transforms.functional.crop(130,10,1368,912),
                    transforms.Resize(s),
                    data_transform.Rotation(degree),
                    transforms.ColorJitter(brightness=0.4,
                                           contrast=0.4,
                                           saturation=0.4),
                    transforms.CenterCrop((352, 512)),
                    transforms.ToTensor(),
                ])

            tDepth = data_transform.Compose(
                [  # transforms.functional.crop(130,10,1368,912),
                    cfctransforms.Rotate(degree),
                    cfctransforms.CenterCrop((352, 512)),
                ])

            rgb_image = tRgb(rgb_image)

            depth_image = tDepth(depth_image)
            depth_image = np.asarray(depth_image, dtype=np.float32)

            ### exclude points with depth > 500m ####
            sparse_depth = np.zeros(depth_image.shape)
            mask_l = depth_image > 0
            mask_keep = np.bitwise_and(mask_l, depth_image <= 500)
            sparse_depth[mask_keep] = depth_image[mask_keep]
            depth_image = sparse_depth

            # depth_image = scale(depth_image, out_range=(0.01, 1))
            #scaling of depth maps
            max_depth = max(depth_image.max(), 1.0)
            depth_image = (10 / max_depth) * depth_image

        if self.split == 'val' or self.split == 'test':
            s = (365, 547)
            depth_image = np.asarray(cv2.resize(
                depth_image,
                dsize=(s[1], s[0]),
                interpolation=cv2.INTER_NEAREST),
                                     dtype=np.float32)

            tRgb = data_transform.Compose(
                [  # transforms.functional.crop(130,10,1368,912),
                    # data_transform.Rotation(degree),
                    transforms.Resize(s),
                    # transforms.CenterCrop((228*1, 304*1)),
                    transforms.CenterCrop((352, 512)),
                    #transforms.CenterCrop((365,547)),
                    transforms.ToTensor(),
                    # transforms.Normalize((0.0115, 0.0124, 0.0111), (0.0085, 0.0086, 0.0084)),
                    # transforms.ToPILImage()
                ])

            tDepth = data_transform.Compose(
                [  # transforms.functional.crop(130,10,1368,912),
                    # data_transform.Rotation(degree),
                    cfctransforms.Resize(1.0),
                    cfctransforms.CenterCrop((352, 512)),
                    # transforms.CenterCrop((228*1, 304*1)),
                    #cfctransforms.CenterCrop((365,547)),
                    # transforms.ToTensor()
                ])

            rgb_image = tRgb(rgb_image)
            # depth_image = transforms.functional.crop(depth_image, 130, 10, 548, 821)
            depth_image = tDepth(depth_image)
            # print(depth_image.shape)
            depth_image = np.asarray(depth_image, dtype=np.float32)

            ### exclude points with depth > 500m ####
            sparse_depth = np.zeros(depth_image.shape)
            mask_l = depth_image > 0
            mask_keep = np.bitwise_and(mask_l, depth_image <= 500)
            sparse_depth[mask_keep] = depth_image[mask_keep]
            depth_image = sparse_depth

            #print("max", depth_image.max())
            # depth_image = scale(depth_image, out_range=(0.01, 1))
            max_depth = max(depth_image.max(), 1.0)
            depth_image = (10 / max_depth) * depth_image
            scale = max_depth / 10

        if self.modality == 'rgb':
            input_np = rgb_np
        elif self.modality == 'rgbd':
            input_np = self.create_rgbd(rgb_np, depth_np)
        elif self.modality == 'd':
            input_np = self.create_sparse_depth(rgb_np, depth_np)

        depth_image = transforms.ToTensor()(depth_image)

        return transforms.ToTensor()(self.create_rgbdm(
            rgb_image.squeeze(0).numpy().transpose(1, 2, 0),
            depth_image.squeeze(0).numpy())), depth_image