Exemplo n.º 1
0
    def _tr_preprocess(self, image, depth, image_path):
        crop_h, crop_w = self.config["tr_crop_size"]
        # resize
        W, H = image.size
        dH, dW = depth.shape

        ### Minghan: filled depth is of the same size, which may not be the same as images
        minW = W if W < dW else dW
        minH = H if H < dH else dH
        # assert W == dW and H == dH, \
        #     "image shape should be same with depth, but image shape is {}, depth shape is {}".format((H, W), (dH, dW))

        # scale_h, scale_w = max(crop_h/H, 1.0), max(crop_w/W, 1.0)
        # scale = max(scale_h, scale_w)
        # H, W = math.ceil(scale*H), math.ceil(scale*W)
        # H, W = max(int(scale*H), crop_h), max(int(scale*W), crop_w)

        # print("w={}, h={}".format(W, H))
        scale = max(crop_h / H, 1.0)
        H, W = max(crop_h, H), math.ceil(scale * W)
        image = image.resize((W, H), Image.BILINEAR)
        # depth = cv2.resize(depth, (W, H), cv2.INTER_LINEAR)
        # print("image shape:", image.size, " depth shape:", depth.shape)

        crop_dh, crop_dw = int(crop_h / scale), int(crop_w / scale)

        # random crop size
        ### Minghan: in case image and depth are not of the same size
        # x = random.randint(0, W - crop_w)
        # y = random.randint(0, H - crop_h)
        x = random.randint(0, minW - crop_w)
        y = random.randint(0, minH - crop_h)
        
        dx, dy = math.floor(x/scale), math.floor(y/scale)
        # print("corp dh = {}, crop dw = {}".format(crop_dh, crop_dw))

        image = image.crop((x, y, x + crop_w, y + crop_h))
        depth = depth[dy:dy + crop_dh, dx:dx + crop_dw]
        # print("depth shape: ", depth.shape)

        # normalize
        image = np.asarray(image).astype(np.float32) / 255.0
        image = nomalize(image, type=self.config['norm_type'])
        image = image.transpose(2, 0, 1)

        return image, depth, None
    def _te_preprocess(self, image, depth):
        crop_h, crop_w = self.config["te_crop_size"]
        # resize
        W, H = image.size
        dH, dW = depth.shape

        assert W == dW and H == dH, \
            "image shape should be same with depth, but image shape is {}, depth shape is {}".format((H, W), (dH, dW))

        # scale_h, scale_w = max(crop_h/H, 1.0), max(crop_w/W, 1.0)
        # scale = max(scale_h, scale_w)
        # # H, W = math.ceil(scale*H), math.ceil(scale*W)
        scale = max(crop_h / H, 1.0)
        H, W = max(crop_h, H), math.ceil(scale * W)
        # H, W = max(int(scale*H), crop_h), max(int(scale*W), crop_w)

        image_n = image.copy()
        image = image.resize((W, H), Image.BILINEAR)
        crop_dh, crop_dw = int(crop_h / scale), int(crop_w / scale)
        # print("corp dh = {}, crop dw = {}".format(crop_dh, crop_dw))
        # depth = cv2.resize(depth, (W, H), cv2.INTER_LINEAR)

        # center crop
        x = (W - crop_w) // 2
        y = (H - crop_h) // 2
        dx = (dW - crop_dw) // 2
        dy = (dH - crop_dh) // 2

        image = image.crop((x, y, x + crop_w, y + crop_h))
        depth = depth[dy:dy + crop_dh, dx:dx + crop_dw]
        image_n = image_n.crop((dx, dy, dx + crop_dw, dy + crop_dh))

        # normalize
        image_n = np.array(image_n).astype(np.float32)
        image = np.asarray(image).astype(np.float32) / 255.0
        image = nomalize(image, type=self.config['norm_type'])
        image = image.transpose(2, 0, 1)

        output_dict = {"image_n": image_n}

        return image, depth, output_dict
Exemplo n.º 3
0
    def _tr_preprocess(self, image, depth, image_path):
        ### Minghan: load cam_info, which should be adjusted with preprocessing logged in cam_ops
        ntp = self.datareader.ffinder.ntp_from_fname(image_path, 'rgb')
        if 'calib' in self.datareader.ffinder.preload_ftypes:
            ntp_cam = self.cam_proj.dataset_reader.ffinder.ntp_ftype_convert(
                ntp, ftype='calib')
            cam_info = self.cam_proj.prepare_cam_info(key=ntp_cam)
        else:
            inex = self.datareader.read_from_ntp(ntp, ftype='calib')
            cam_info = self.cam_proj.prepare_cam_info(intr=inex)

        cam_ops = []

        crop_h, crop_w = self.config["tr_crop_size"]
        # resize
        W, H = image.size
        dH, dW = depth.shape

        ### Minghan: filled depth is of the same size, which may not be the same as images
        minW = W if W < dW else dW
        minH = H if H < dH else dH
        # assert W == dW and H == dH, \
        #     "image shape should be same with depth, but image shape is {}, depth shape is {}".format((H, W), (dH, dW))

        # scale_h, scale_w = max(crop_h/H, 1.0), max(crop_w/W, 1.0)
        # scale = max(scale_h, scale_w)
        # H, W = math.ceil(scale*H), math.ceil(scale*W)
        # H, W = max(int(scale*H), crop_h), max(int(scale*W), crop_w)

        # print("w={}, h={}".format(W, H))
        scale = max(crop_h / H, 1.0)
        H, W = max(crop_h, H), math.ceil(scale * W)
        image = image.resize((W, H), Image.BILINEAR)
        # depth = cv2.resize(depth, (W, H), cv2.INTER_LINEAR)
        # print("image shape:", image.size, " depth shape:", depth.shape)

        crop_dh, crop_dw = int(crop_h / scale), int(crop_w / scale)

        # random crop size
        ### Minghan: in case image and depth are not of the same size
        # x = random.randint(0, W - crop_w)
        # y = random.randint(0, H - crop_h)
        x = random.randint(0, minW - crop_w)
        y = random.randint(0, minH - crop_h)

        dx, dy = math.floor(x / scale), math.floor(y / scale)
        # print("corp dh = {}, crop dw = {}".format(crop_dh, crop_dw))

        image = image.crop((x, y, x + crop_w, y + crop_h))
        depth = depth[dy:dy + crop_dh, dx:dx + crop_dw]
        # print("depth shape: ", depth.shape)

        ### Minghan: log the cropping operation in cam_ops
        cam_ops.append(CamCrop(x, y, crop_w, crop_h))
        ### Minghan: we assume always using scale = 1
        assert crop_h == crop_dh
        assert crop_w == crop_dw
        assert scale == 1
        assert x == dx
        assert y == dy

        # normalize
        image = np.asarray(image).astype(np.float32) / 255.0
        image = nomalize(image, type=self.config['norm_type'])
        image = image.transpose(2, 0, 1)

        ### mask used by c3d loss
        mask_gt = depth > 0
        mask = binary_closing(mask_gt, self.dilate_struct)
        ## create channel dimension at the end (does not expand depth here to be consistent with original dorn)
        mask_gt = np.expand_dims(mask_gt, axis=0)
        mask = np.expand_dims(mask, axis=0)
        ### Minghan: update cam_info according to cam_ops
        cam_info = seq_ops_on_cam_info(cam_info, cam_ops)
        extra_dict = {"cam_info": cam_info, "mask": mask, "mask_gt": mask_gt}

        return image, depth, extra_dict
Exemplo n.º 4
0
    def _te_preprocess(self, image, depth):
        crop_h, crop_w = self.config["te_crop_size"]
        # resize
        W, H = image.size
        dH, dW = depth.shape

        W_img, H_img = W, H

        assert W == dW and H == dH, \
            "image shape should be same with depth, but image shape is {}, depth shape is {}".format((H, W), (dH, dW))

        # scale_h, scale_w = max(crop_h/H, 1.0), max(crop_w/W, 1.0)
        # scale = max(scale_h, scale_w)
        # # H, W = math.ceil(scale*H), math.ceil(scale*W)
        scale = max(crop_h / H, 1.0)
        H, W = max(crop_h, H), math.ceil(scale * W)
        # H, W = max(int(scale*H), crop_h), max(int(scale*W), crop_w)

        image_n = image.copy()
        image = image.resize((W, H), Image.BILINEAR)
        crop_dh, crop_dw = int(crop_h / scale), int(crop_w / scale)
        # print("corp dh = {}, crop dw = {}".format(crop_dh, crop_dw))
        # depth = cv2.resize(depth, (W, H), cv2.INTER_LINEAR)

        images = []
        depths = []
        image_ns = []
        x0s = []
        x1s = []
        y0s = []
        y1s = []
        for i in range(4):
            y0 = 0
            y1 = H
            x0 = int(0 + i * 256)
            x1 = x0 + crop_w
            if x1 > W:
                x0 = W - crop_w
                x1 = W
            image_ = image.crop((x0, y0, x1, y1))

            x0s.append(x0)
            x1s.append(x1)
            y0s.append(y0)
            y1s.append(y1)

            y0 = 0
            y1 = dH
            x0 = int(0 + i * 256)
            x1 = x0 + crop_dw
            if x1 > dW:
                x0 = dW - crop_dw
                x1 = dW

            depth_ = depth[y0:y1, x0:x1]
            image_n_ = image_n.crop((x0, y0, x1, y1))

            # normalize
            image_n_ = np.array(image_n_).astype(np.float32)
            image_ = np.asarray(image_).astype(np.float32) / 255.0
            image_ = nomalize(image_, type=self.config['norm_type'])
            image_ = image_.transpose(2, 0, 1)

            images.append(image_)
            depths.append(depth_)
            image_ns.append(image_n_)

        image = np.stack(images)
        depth = np.stack(depths)
        image_n = np.stack(image_ns)
        output_dict = {
            "image_n": image_n,
            "x0": x0s,
            "x1": x1s,
            "y0": y0s,
            "y1": y1s,
            "H": H,
            "W": W,
            "W_img": W_img,
            "H_img": H_img
        }
        return image, depth, output_dict
Exemplo n.º 5
0
    def _te_preprocess(self, image, depth, image_path):
        ### Minghan: load cam_info, which should be adjusted with preprocessing logged in cam_ops
        ntp = self.datareader.ffinder.ntp_from_fname(image_path, 'rgb')
        if 'calib' in self.datareader.ffinder.preload_ftypes:
            ntp_cam = self.cam_proj.dataset_reader.ffinder.ntp_ftype_convert(ntp, ftype='calib')
            cam_info = self.cam_proj.prepare_cam_info(key=ntp_cam)
        else:
            inex = self.datareader.read_from_ntp(ntp, ftype='calib')
            cam_info = self.cam_proj.prepare_cam_info(intr=inex)
        cam_ops = []

        ### for evaluating on full image
        ### Minghan: this is only applicable if batch_size=1 for evaluation, because raw full images in KITTI are not of exactly the same size
        depth_full = depth.copy() if depth is not None else None
        image_full = image.copy()
        image_full = np.array(image_full).astype(np.float32)
        image_full = image_full.transpose(2, 0, 1)

        crop_h, crop_w = self.config["te_crop_size"]
        # resize
        W, H = image.size
        if depth is not None:
            dH, dW = depth.shape
        else:
            dH = H
            dW = W

        assert W == dW and H == dH, \
            "image shape should be same with depth, but image shape is {}, depth shape is {}".format((H, W), (dH, dW))       

        # scale_h, scale_w = max(crop_h/H, 1.0), max(crop_w/W, 1.0)
        # scale = max(scale_h, scale_w)
        # # H, W = math.ceil(scale*H), math.ceil(scale*W)
        scale = max(crop_h / H, 1.0)
        H, W = max(crop_h, H), math.ceil(scale * W)
        # H, W = max(int(scale*H), crop_h), max(int(scale*W), crop_w)

        image_n = image.copy()
        image = image.resize((W, H), Image.BILINEAR)
        crop_dh, crop_dw = int(crop_h/scale), int(crop_w/scale)
        # print("corp dh = {}, crop dw = {}".format(crop_dh, crop_dw))
        # depth = cv2.resize(depth, (W, H), cv2.INTER_LINEAR)

        assert crop_dh == crop_h, "{} {}".format(crop_dh, crop_h)
        assert crop_dw == crop_w, "{} {}".format(crop_dw, crop_w)
        crop_mode = self.config["te_crop_mode"]
        if crop_mode == "center":
            # center crop
            x = (W - crop_w) // 2
            y = (H - crop_h) // 2
            dx = (dW - crop_dw) // 2
            dy = (dH - crop_dh) // 2
        elif crop_mode == "kb_crop":
            ### this mode actually cannot be used because DORN requires fixed size input due to fc layers
            assert crop_w == 1216, crop_w
            assert crop_h == 352, crop_h
            
            x = (W - crop_w) // 2
            y = H - crop_h
            dx = (dW - crop_dw) // 2
            dy = dH - crop_dh
        elif crop_mode == "bottom_left":
            x = 0
            y = H - crop_h
            dx = 0
            dy = dH - crop_dh
        elif crop_mode == "bottom_right":
            x = W - crop_w
            y = H - crop_h
            dx = dW - crop_dw
            dy = dH - crop_dh
        elif crop_mode == "random":
            x = random.randint(0, W - crop_w)
            y = random.randint(0, H - crop_h)
            dx = random.randint(0, dW - crop_dw)
            dy = random.randint(0, dH - crop_dh)
        else: 
            raise ValueError("crop_mode {} not recognized".format(crop_mode))


        image = image.crop((x, y, x + crop_w, y + crop_h))
        if depth is not None:
            depth = depth[dy:dy + crop_dh, dx:dx + crop_dw]
        image_n = image_n.crop((dx, dy, dx + crop_dw, dy + crop_dh))

        # normalize
        image_n = np.array(image_n).astype(np.float32)
        image = np.asarray(image).astype(np.float32) / 255.0
        image = nomalize(image, type=self.config['norm_type'])
        image = image.transpose(2, 0, 1)

        output_dict = {"image_n": image_n}

        ### Minghan: log the cropping operation in cam_ops
        cam_ops.append(CamCrop(x, y, crop_w, crop_h))
        ### Minghan: we assume always using scale = 1
        assert crop_h == crop_dh
        assert crop_w == crop_dw
        assert scale == 1
        assert x == dx
        assert y == dy

        cam_info_img = seq_ops_on_cam_info(cam_info, cam_ops)
        x_kb = (W - 1216) // 2
        y_kb = H - 352
        cam_info_kb_crop = seq_ops_on_cam_info(cam_info, [CamCrop(x_kb, y_kb, 1216, 352)] )

        ### Minghan: save full image for future reference
        ### Minghan: note that "cam_info_full" is likely not able to be batched because the shape of full images are not the same. 
        ###          However the testing dataloader is likely to have batch size 1 so it is okay.
        output_dict.update({"cam_info": cam_info_img, "cam_info_kb_crop": cam_info_kb_crop, "cam_info_full": cam_info, "depth_full": depth_full, "image_full": image_full})

        return image, depth, output_dict