Пример #1
0
    def postprocess(self, pred, lbl):
        '''Returns image and label with original dimensions

        pred (np.array): prediction array
        lbl (np.array): label array
        '''
        assert isinstance(pred, np.ndarray) and isinstance(lbl, np.ndarray)
        # if GSV (6656*3328), add top half of image with 0 values and upscale to
        # original dimensions if task is not defect detection
        pred = topcrop(pred, reverse=True)
        lbl = topcrop(lbl, reverse=True)
        if not self.defects:
            pred_pil = Image.fromarray(pred.astype(np.uint8))
            lbl_pil = Image.fromarray(lbl.astype(np.uint8))
            pred = pred_pil.resize((3328, 6656), Image.NEAREST)
            lbl = lbl_pil.resize((3328, 6656), Image.NEAREST)

        return pred, lbl
Пример #2
0
    def seg_mask(self, image, model, top_crop=True, downscale=True):
        '''
        Returns prediction mask as np array for sidewalk on image with value = 1 for sidewalk
        :param model_path: image as np array
        :param model: pytorch model to use for semantic segmentation
        :param vis: if True, will save labels as .png files and create .jpg overlays with image
        :return: per-pixel mask for sidewalk as np array of same dimensions as image
        '''
        model.eval()  # set model mode to eval()
        device = torch.device('cuda:0' if torch.cuda.is_available() else
                              'cpu')  # work on GPU if available

        image_torch = self.img_preproc(image,
                                       top_crop=top_crop,
                                       downscale=downscale)
        image_torch = image_torch[
            None]  # mimick dataloader with 4th channel (batch channel)
        image_torch = image_torch.to(device)  # send image to device

        # output: predictions (segmentation maps with horizontal flip, i.e. test time augmentation)
        pred = model.tta(
            image_torch, net_type='deeplab'
        )  # TODO try without tta. don't think it will have a significant effect on iou
        pred = pred.argmax(
            dim=1)  # take channel with highest pixel value as winning class
        pred = pred.detach().cpu().numpy(
        )  # send back to cpu, convert to numpy

        # take first pred of single-item list of preds. Reduce item to 3D.
        pred = pred[0]

        # set all pixel in pred corresponding to an ignore_pixel in label to 0
        # pred[label == dataset.ignore_index] = 0

        if topcrop:
            #if pred.shape[1] / pred.shape[0] == 4:
            pred = topcrop(
                pred, reverse=True
            )  # restore top half of prediction mask by filling with black values
        if downscale:
            #if pred.shape != image.shape[:2] and pred.shape[1] / pred.shape[0] == image.shape[1] / image.shape[0]:
            if pred.shape[1] / pred.shape[0] != image.shape[1] / image.shape[0]:
                print(
                    f'Unable to rescale two arrays with different aspect ratios.\n'
                    f'Prediction shape is {pred.shape[1]}x{pred.shape[0]} '
                    f'whereas image shape is {image.shape[1]}x{image.shape[1]}.'
                )
            # resize pred to image dimensions
            else:
                pred = cv2.resize(src=pred,
                                  dsize=(image.shape[1], image.shape[0]),
                                  interpolation=cv2.INTER_NEAREST)

        return pred
Пример #3
0
    def img_preproc(self, image, top_crop=True, downscale=True):

        if top_crop:
            #pre-processing for original GSV panoramas (6656*3328)
            #if img.shape[0] >= 3328 and img.shape[1] >= 6656:
            image = topcrop(image, topcrop_prop=0.5)
        if downscale:
            if max(image.shape[0],
                   image.shape[1]) >= 6656:  # most probably a 360 panorama
                image = rescale(image, max_size=4096)
            elif max(image.shape[0], image.shape[1]) > 2048:
                image = rescale(image, max_size=1536)

        image = minmax_normalize(image, norm_range=(-1, 1))
        image = image.transpose(2, 0, 1)
        image = torch.FloatTensor(image)
        return image
Пример #4
0
    def __getitem__(self, index):
        '''Returns preprocessed image and label
        '''

        img_path = self.img_paths[index]
        img = np.array(Image.open(img_path))

        lbl_path = self.lbl_paths[index]
        lbl = np.array(Image.open(lbl_path))

        # if GSV (6656*3328), crop the top half of image and rescale if task is not defect detection
        if img.shape[0] >= 3328 and img.shape[1] >= 6656:
            img = topcrop(img)
            lbl = topcrop(lbl)
            if not self.defects:
                img = rescale(img, max_size=4096)
                lbl = rescale(lbl, max_size=4096)
        elif max(
                img.shape[0], img.shape[1]
        ) > 2048:  #if not GSV, rescale to 2048 if largest side is > 2048
            img = rescale(img, max_size=2048)
            lbl = rescale(lbl, max_size=2048)

        if self.debug:
            print(f'Label path: {lbl_path}')
            print(f'Unique values in label {np.unique(lbl)}')
            # quality control
            valid_values = [0, 8, 34, 35, 36,
                            255]  #list(self.class_map.values())
            #valid_values.append(self.ignore_index)
            lbl_values = set(np.unique(lbl))
            if not lbl_values.issubset(set(valid_values)):
                print('Oups. There are stranger values in your label...')

        lbl = self.encode_mask(
            lbl
        )  #overwrite values for to get a 2 class task (sidewalk/background or defect/background)

        # ImageAugment (RandomBrightness, AddNoise...)
        if self.image_augmenter:
            augmented = self.image_augmenter(image=img)
            img = augmented['image']
        # Resize (Scale & Pad & Crop)
        if self.net_type == 'unet':
            img = minmax_normalize(img)
            img = meanstd_normalize(img,
                                    mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])
        else:
            #ex.: pixel value 144 --> 144/255 if norm range (0,1)
            img = minmax_normalize(img, norm_range=(-1, 1))
        if self.resizer:
            resized = self.resizer(image=img, mask=lbl)
            img, lbl = resized['image'], resized['mask']
        # AffineAugment (Horizontal Flip, Rotate...)
        if self.affine_augmenter:
            augmented = self.affine_augmenter(image=img, mask=lbl)
            img, lbl = augmented['image'], augmented['mask']

        if self.debug:
            print(np.unique(lbl))
            print(lbl.shape)
        else:
            img = img.transpose(2, 0, 1)
            img = torch.FloatTensor(img)
            lbl = torch.LongTensor(lbl)

        return img, lbl