def postprocess(self, pred, lbl): '''Returns image and label with original dimensions pred (np.array): prediction array lbl (np.array): label array ''' assert isinstance(pred, np.ndarray) and isinstance(lbl, np.ndarray) # if GSV (6656*3328), add top half of image with 0 values and upscale to # original dimensions if task is not defect detection pred = topcrop(pred, reverse=True) lbl = topcrop(lbl, reverse=True) if not self.defects: pred_pil = Image.fromarray(pred.astype(np.uint8)) lbl_pil = Image.fromarray(lbl.astype(np.uint8)) pred = pred_pil.resize((3328, 6656), Image.NEAREST) lbl = lbl_pil.resize((3328, 6656), Image.NEAREST) return pred, lbl
def seg_mask(self, image, model, top_crop=True, downscale=True): ''' Returns prediction mask as np array for sidewalk on image with value = 1 for sidewalk :param model_path: image as np array :param model: pytorch model to use for semantic segmentation :param vis: if True, will save labels as .png files and create .jpg overlays with image :return: per-pixel mask for sidewalk as np array of same dimensions as image ''' model.eval() # set model mode to eval() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # work on GPU if available image_torch = self.img_preproc(image, top_crop=top_crop, downscale=downscale) image_torch = image_torch[ None] # mimick dataloader with 4th channel (batch channel) image_torch = image_torch.to(device) # send image to device # output: predictions (segmentation maps with horizontal flip, i.e. test time augmentation) pred = model.tta( image_torch, net_type='deeplab' ) # TODO try without tta. don't think it will have a significant effect on iou pred = pred.argmax( dim=1) # take channel with highest pixel value as winning class pred = pred.detach().cpu().numpy( ) # send back to cpu, convert to numpy # take first pred of single-item list of preds. Reduce item to 3D. pred = pred[0] # set all pixel in pred corresponding to an ignore_pixel in label to 0 # pred[label == dataset.ignore_index] = 0 if topcrop: #if pred.shape[1] / pred.shape[0] == 4: pred = topcrop( pred, reverse=True ) # restore top half of prediction mask by filling with black values if downscale: #if pred.shape != image.shape[:2] and pred.shape[1] / pred.shape[0] == image.shape[1] / image.shape[0]: if pred.shape[1] / pred.shape[0] != image.shape[1] / image.shape[0]: print( f'Unable to rescale two arrays with different aspect ratios.\n' f'Prediction shape is {pred.shape[1]}x{pred.shape[0]} ' f'whereas image shape is {image.shape[1]}x{image.shape[1]}.' ) # resize pred to image dimensions else: pred = cv2.resize(src=pred, dsize=(image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST) return pred
def img_preproc(self, image, top_crop=True, downscale=True): if top_crop: #pre-processing for original GSV panoramas (6656*3328) #if img.shape[0] >= 3328 and img.shape[1] >= 6656: image = topcrop(image, topcrop_prop=0.5) if downscale: if max(image.shape[0], image.shape[1]) >= 6656: # most probably a 360 panorama image = rescale(image, max_size=4096) elif max(image.shape[0], image.shape[1]) > 2048: image = rescale(image, max_size=1536) image = minmax_normalize(image, norm_range=(-1, 1)) image = image.transpose(2, 0, 1) image = torch.FloatTensor(image) return image
def __getitem__(self, index): '''Returns preprocessed image and label ''' img_path = self.img_paths[index] img = np.array(Image.open(img_path)) lbl_path = self.lbl_paths[index] lbl = np.array(Image.open(lbl_path)) # if GSV (6656*3328), crop the top half of image and rescale if task is not defect detection if img.shape[0] >= 3328 and img.shape[1] >= 6656: img = topcrop(img) lbl = topcrop(lbl) if not self.defects: img = rescale(img, max_size=4096) lbl = rescale(lbl, max_size=4096) elif max( img.shape[0], img.shape[1] ) > 2048: #if not GSV, rescale to 2048 if largest side is > 2048 img = rescale(img, max_size=2048) lbl = rescale(lbl, max_size=2048) if self.debug: print(f'Label path: {lbl_path}') print(f'Unique values in label {np.unique(lbl)}') # quality control valid_values = [0, 8, 34, 35, 36, 255] #list(self.class_map.values()) #valid_values.append(self.ignore_index) lbl_values = set(np.unique(lbl)) if not lbl_values.issubset(set(valid_values)): print('Oups. There are stranger values in your label...') lbl = self.encode_mask( lbl ) #overwrite values for to get a 2 class task (sidewalk/background or defect/background) # ImageAugment (RandomBrightness, AddNoise...) if self.image_augmenter: augmented = self.image_augmenter(image=img) img = augmented['image'] # Resize (Scale & Pad & Crop) if self.net_type == 'unet': img = minmax_normalize(img) img = meanstd_normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) else: #ex.: pixel value 144 --> 144/255 if norm range (0,1) img = minmax_normalize(img, norm_range=(-1, 1)) if self.resizer: resized = self.resizer(image=img, mask=lbl) img, lbl = resized['image'], resized['mask'] # AffineAugment (Horizontal Flip, Rotate...) if self.affine_augmenter: augmented = self.affine_augmenter(image=img, mask=lbl) img, lbl = augmented['image'], augmented['mask'] if self.debug: print(np.unique(lbl)) print(lbl.shape) else: img = img.transpose(2, 0, 1) img = torch.FloatTensor(img) lbl = torch.LongTensor(lbl) return img, lbl