def generate_label(self): _image_dir = os.path.join(self.args.ori_root_dir, 'JPEGImages') annFile = os.path.join(self.args.ori_root_dir, 'trainval_merged.json') _mapping = np.sort(np.array([ 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115])) _key = np.array(range(len(_mapping))).astype('uint8') from detail import Detail train_detail = Detail(annFile, _image_dir, 'train') train_ids = train_detail.getImgs() for img_id in train_ids: mask = Image.fromarray(self._class_to_index( train_detail.getMask(img_id), _mapping=_mapping, _key=_key)) filename = img_id['file_name'] basename, _ = os.path.splitext(filename) if filename.endswith(".jpg"): imgpath = os.path.join(_image_dir, filename) shutil.copy(imgpath, os.path.join(self.train_image_dir, filename)) mask_png_name = basename + '.png' mask.save(os.path.join(self.train_label_dir, mask_png_name)) val_detail = Detail(annFile, _image_dir, 'val') val_ids = val_detail.getImgs() for img_id in val_ids: mask = Image.fromarray(self._class_to_index( val_detail.getMask(img_id), _mapping=_mapping, _key=_key)) filename = img_id['file_name'] basename, _ = os.path.splitext(filename) if filename.endswith(".jpg"): imgpath = os.path.join(_image_dir, filename) shutil.copy(imgpath, os.path.join(self.val_image_dir, filename)) mask_png_name = basename + '.png' mask.save(os.path.join(self.val_label_dir, mask_png_name))
class ContextSegmentation(BaseDataset): BASE_DIR = 'VOCdevkit/VOC2010' NUM_CLASS = 59 def __init__( self, root=os.path. expanduser( '/media/zilong/b368236f-2592-49f3-9dcc-0d4da674fd26/.encoding/data' ), split='train', mode=None, transform=None, target_transform=None, **kwargs): super(ContextSegmentation, self).__init__(root, split, mode, transform, target_transform, **kwargs) from detail import Detail #from detail import mask root = os.path.join(root, self.BASE_DIR) annFile = os.path.join(root, 'trainval_merged.json') imgDir = os.path.join(root, 'JPEGImages') # training mode self.detail = Detail(annFile, imgDir, split) self.transform = transform self.target_transform = target_transform self.ids = self.detail.getImgs() # generate masks self._mapping = np.sort( np.array([ 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 ])) self._key = np.array(range(len(self._mapping))).astype('uint8') mask_file = os.path.join(root, self.split + '.pth') print('mask_file:', mask_file) if os.path.exists(mask_file): self.masks = torch.load(mask_file) else: self.masks = self._preprocess(mask_file) def _class_to_index(self, mask): # assert the values values = np.unique(mask) for i in range(len(values)): assert (values[i] in self._mapping) index = np.digitize(mask.ravel(), self._mapping, right=True) return self._key[index].reshape(mask.shape) def _preprocess(self, mask_file): masks = {} tbar = trange(len(self.ids)) print("Preprocessing mask, this will take a while." + \ "But don't worry, it only run once for each split.") for i in tbar: img_id = self.ids[i] mask = Image.fromarray( self._class_to_index(self.detail.getMask(img_id))) masks[img_id['image_id']] = mask tbar.set_description("Preprocessing masks {}".format( img_id['image_id'])) torch.save(masks, mask_file) return masks def __getitem__(self, index): img_id = self.ids[index] path = img_id['file_name'] iid = img_id['image_id'] img = Image.open(os.path.join(self.detail.img_folder, path)).convert('RGB') if self.mode == 'test': if self.transform is not None: img = self.transform(img) return img, os.path.basename(path) # convert mask to 60 categories mask = self.masks[iid] # synchrosized transform if self.mode == 'train': img, mask = self._sync_transform(img, mask) elif self.mode == 'val': img, mask = self._val_sync_transform(img, mask) else: assert self.mode == 'testval' mask = self._mask_transform(mask) # general resize, normalize and toTensor if self.transform is not None: img = self.transform(img) if self.target_transform is not None: mask = self.target_transform(mask) return img, mask def _mask_transform(self, mask): target = np.array(mask).astype('int32') - 1 return torch.from_numpy(target).long() def __len__(self): return len(self.ids) @property def pred_offset(self): return 1
class PascalContext(BaseDataset): NUM_CLASS = 59 def __init__(self, root="./data", split="train", mode=None, transform=None, target_transform=None, **kwargs): super(PascalContext, self).__init__(root, split, mode, transform, target_transform, **kwargs) from detail import Detail # from detail import mask root = os.path.join(root, "PascalContext") annFile = os.path.join(root, "trainval_merged.json") imgDir = os.path.join(root, "JPEGImages") # training mode self.detail = Detail(annFile, imgDir, split) self.transform = transform self.target_transform = target_transform self.ids = self.detail.getImgs() # generate masks self._mapping = np.sort( np.array([ 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115, ])) self.classes = [ "background", "aeroplane", "mountain", "mouse", "track", "road", "bag", "motorbike", "fence", "bed", "bedclothes", "bench", "bicycle", "diningtable", "bird", "person", "floor", "boat", "train", "book", "bottle", "tree", "window", "plate", "platform", "tvmonitor", "building", "bus", "cabinet", "shelves", "light", "pottedplant", "wall", "car", "ground", "cat", "sidewalk", "truck", "ceiling", "rock", "chair", "wood", "food", "horse", "cloth", "sign", "computer", "sheep", "keyboard", "flower", "sky", "cow", "grass", "cup", "curtain", "snow", "water", "sofa", "dog", "door", ] self._key = np.array(range(len(self._mapping))).astype("uint8") mask_file = os.path.join(root, self.split + ".pth") print("mask_file:", mask_file) if os.path.exists(mask_file): self.masks = torch.load(mask_file) else: self.masks = self._preprocess(mask_file) def _class_to_index(self, mask): # assert the values values = np.unique(mask) for i in range(len(values)): assert values[i] in self._mapping index = np.digitize(mask.ravel(), self._mapping, right=True) return self._key[index].reshape(mask.shape) def _preprocess(self, mask_file): masks = {} tbar = trange(len(self.ids)) print("Preprocessing mask, this will take a while." + "But don't worry, it only run once for each split.") for i in tbar: img_id = self.ids[i] mask = Image.fromarray( self._class_to_index(self.detail.getMask(img_id))) masks[img_id["image_id"]] = mask tbar.set_description("Preprocessing masks {}".format( img_id["image_id"])) torch.save(masks, mask_file) return masks def __getitem__(self, index): img_id = self.ids[index] path = img_id["file_name"] iid = img_id["image_id"] img = Image.open(os.path.join(self.detail.img_folder, path)).convert("RGB") if self.mode == "test": if self.transform is not None: img = self.transform(img) return img, os.path.basename(path) # convert mask to 60 categories mask = self.masks[iid] # synchrosized transform if self.mode == "train": img, mask = self._sync_transform(img, mask) elif self.mode == "val": img, mask = self._val_sync_transform(img, mask) else: assert self.mode == "testval" mask = self._mask_transform(mask) # general resize, normalize and toTensor if self.transform is not None: img = self.transform(img) if self.target_transform is not None: mask = self.target_transform(mask) return img, mask def _mask_transform(self, mask): target = np.array(mask).astype("int32") - 1 return torch.from_numpy(target).long() def __len__(self): return len(self.ids) @property def pred_offset(self): return 1
class PascalContextGenerator(object): def __init__(self, voc_path, annotation_path): self.voc_path = voc_path self.annotation_path = annotation_path self.label_dir = os.path.join(self.voc_path, 'Context') self._image_dir = os.path.join(self.voc_path, 'JPEGImages') self.annFile = os.path.join(self.annotation_path, 'trainval_merged.json') self._mapping = np.sort( np.array([ 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 ])) self._key = np.array(range(len(self._mapping))).astype('uint8') self.train_detail = Detail(self.annFile, self._image_dir, 'train') self.train_ids = self.train_detail.getImgs() self.val_detail = Detail(self.annFile, self._image_dir, 'val') self.val_ids = self.val_detail.getImgs() if not os.path.exists(self.label_dir): os.makedirs(self.label_dir) def _class_to_index(self, mask, _mapping, _key): # assert the values values = np.unique(mask) for i in range(len(values)): assert (values[i] in _mapping) index = np.digitize(mask.ravel(), _mapping, right=True) return _key[index].reshape(mask.shape) def save_mask(self, img_id, mode): if mode == 'train': mask = Image.fromarray( self._class_to_index( self.train_detail.getMask(img_id), _mapping=self._mapping, _key=self._key)) elif mode == 'val': mask = Image.fromarray( self._class_to_index( self.val_detail.getMask(img_id), _mapping=self._mapping, _key=self._key)) filename = img_id['file_name'] basename, _ = os.path.splitext(filename) if filename.endswith(".jpg"): mask_png_name = basename + '.png' mask.save(os.path.join(self.label_dir, mask_png_name)) return basename def generate_label(self): with open( os.path.join(self.voc_path, 'ImageSets/Segmentation/train_context.txt'), 'w') as f: for img_id in tqdm.tqdm(self.train_ids, desc='train'): basename = self.save_mask(img_id, 'train') f.writelines(''.join([basename, '\n'])) with open( os.path.join(self.voc_path, 'ImageSets/Segmentation/val_context.txt'), 'w') as f: for img_id in tqdm.tqdm(self.val_ids, desc='val'): basename = self.save_mask(img_id, 'val') f.writelines(''.join([basename, '\n'])) with open( os.path.join(self.voc_path, 'ImageSets/Segmentation/trainval_context.txt'), 'w') as f: for img in tqdm.tqdm(os.listdir(self.label_dir), desc='trainval'): if img.endswith('.png'): basename = img.split('.', 1)[0] f.writelines(''.join([basename, '\n']))
# img = imgs[random.randrange(len(imgs))] img = '2008_007573' pt = [261, 119] cat_idx = 8 details.showImg(img, ax=axarr[0], wait=True) # cats = details.getCats(imgs=img) # print("") # print(['%s (%d) : %s' % (cat['name'], cat['category_id'], cat['supercategory']) for cat in cats]) print "****" print "{} --> ".format(cat_idx) + CLASS_NAMES[cat_idx] # mask # mask = details.getMask(img, cat='person', instance='#0', show=False) mask = details.getMask(img, show=False) mycmap = get_rand_color_map() mycmap.set_under(alpha=0.0) nonzero = np.unique(mask[np.nonzero(mask)]) # axarr[0].imshow(mask, cmap=mycmap, vmin=np.min(nonzero), vmax=np.max(nonzero)+1) # plot point axarr[0].plot(pt[0], pt[1], 'ro') plt.show() # print(type(img)) # print(type(mask))
class PASCALContext(BaseDataset): def __init__( self, root, list_path, num_samples=None, num_classes=59, multi_scale=True, flip=True, ignore_label=-1, base_size=520, crop_size=(480, 480), downsample_rate=1, scale_factor=16, center_crop_test=False, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ): super(PASCALContext, self).__init__(ignore_label, base_size, crop_size, downsample_rate, scale_factor, mean, std) self.root = os.path.join(root, 'pascal_ctx/VOCdevkit/VOC2010') self.split = list_path self.num_classes = num_classes self.class_weights = None self.multi_scale = multi_scale self.flip = flip self.crop_size = crop_size # prepare data annots = os.path.join(self.root, 'trainval_merged.json') img_path = os.path.join(self.root, 'JPEGImages') from detail import Detail if 'val' in self.split: self.detail = Detail(annots, img_path, 'val') mask_file = os.path.join(self.root, 'val.pth') elif 'train' in self.split: self.mode = 'train' self.detail = Detail(annots, img_path, 'train') mask_file = os.path.join(self.root, 'train.pth') else: raise NotImplementedError('only supporting train and val set.') self.files = self.detail.getImgs() # generate masks self._mapping = np.sort( np.array([ 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 ])) print('mask_file:', mask_file) if os.path.exists(mask_file): self.masks = torch.load(mask_file) else: self.masks = self._preprocess(mask_file) def _class_to_index(self, mask): # assert the values values = np.unique(mask) for i in range(len(values)): assert (values[i] in self._mapping) index = np.digitize(mask.ravel(), self._mapping, right=True) return self._key[index].reshape(mask.shape) def _preprocess(self, mask_file): masks = {} print("Preprocessing mask, this will take a while." + \ "But don't worry, it only run once for each split.") for i in range(len(self.files)): img_id = self.files[i] mask = Image.fromarray( self._class_to_index(self.detail.getMask(img_id))) masks[img_id['image_id']] = mask torch.save(masks, mask_file) return masks def __getitem__(self, index): item = self.files[index] name = item['file_name'] img_id = item['image_id'] image = cv2.imread(os.path.join(self.detail.img_folder, name), cv2.IMREAD_COLOR) label = np.asarray(self.masks[img_id], dtype=np.int) size = image.shape if self.split == 'val': image = cv2.resize(image, self.crop_size, interpolation=cv2.INTER_LINEAR) image = self.input_transform(image) image = image.transpose((2, 0, 1)) label = cv2.resize(label, self.crop_size, interpolation=cv2.INTER_NEAREST) label = self.label_transform(label) elif self.split == 'testval': # evaluate model on val dataset image = self.input_transform(image) image = image.transpose((2, 0, 1)) label = self.label_transform(label) else: image, label = self.gen_sample(image, label, self.multi_scale, self.flip) return image.copy(), label.copy(), np.array(size), name def label_transform(self, label): if self.num_classes == 59: # background is ignored label = np.array(label).astype('int32') - 1 label[label == -2] = -1 else: label = np.array(label).astype('int32') return label
class PContextDataset(SegmentationDataset): NUM_CLASS = 59 def __init__(self, cfg, stage, transform=None): # VOC2010 super(PContextDataset, self).__init__(cfg, stage, transform) annFile = os.path.join(self.root, 'trainval_merged.json') imgDir = os.path.join(self.root, 'JPEGImages') self.detail = Detail(annFile, imgDir, self.stage) self.ids = self.detail.getImgs() self._mapping = np.sort( np.array([ 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 ])) self._key = np.array(range(len(self._mapping))).astype('uint8') mask_file = os.path.join(self.root, self.stage + '.pth') if os.path.exists(mask_file): self.masks = torch.load(mask_file) else: self.masks = self._preprocess(mask_file) def _class_to_index(self, mask): values = np.unique(mask) for i in range(len(values)): assert values[i] in self._mapping index = np.digitize(mask.ravel(), self._mapping, right=True) return self._key[index].reshape(mask.shape) def _preprocess(self, mask_file): masks = {} tbar = trange(len(self.ids)) for i in tbar: img_id = self.ids[i] mask = Image.fromarray( self._class_to_index(self.detail.getMask(img_id))) masks[img_id['image_id']] = mask tbar.set_description("Preprocessing masks {}".format( img_id['image_id'])) torch.save(masks, mask_file) return masks def _mask_transform(self, mask): mask = np.array(mask).astype('int32') - 1 return torch.from_numpy(mask).long() def __getitem__(self, index): img_id = self.ids[index] path = img_id['file_name'] iid = img_id['image_id'] img = Image.open(os.path.join(self.detail.img_foler, path)).convert('RGB') if self.stage == 'test': if self.transform is not None: img, _ = self.transform(img, None) return img, None mask = self.masks[iid] if self.stage == 'train': img, mask = self._sync_transform(img, mask) elif self.stage == 'val': img, mask = self._val_sync_transform(img, mask) else: assert self.stage == 'testval' mask = self._mask_transform(mask) if self.transform is not None: img, mask = self.transform(img, mask) return img, mask def __len__(self): return len(self.ids) @property def pred_offset(self): return 1
class PASCALContext(data.Dataset): def __init__(self, cfg, root, image_set, num_classes=59, transform=None, augmentations=None): self.cfg = cfg self.root = os.path.join(root, 'VOCdevkit/VOC2010') # self.root = root self.image_set = image_set if 'xception' in cfg.MODEL.NAME: mean = np.array([0.5, 0.5, 0.5], dtype=np.float32) std = np.array([0.5, 0.5, 0.5], dtype=np.float32) else: mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) std = np.array([0.229, 0.224, 0.225], dtype=np.float32) self.mean = np.expand_dims(np.expand_dims(np.expand_dims(mean, axis=0), axis=-1), axis=-1) self.std = np.expand_dims(np.expand_dims(np.expand_dims(std, axis=0), axis=-1), axis=-1) self.patch_width = cfg.MODEL.IMAGE_SIZE[0] self.patch_height = cfg.MODEL.IMAGE_SIZE[1] self.n_classes = num_classes self.output_stride = cfg.MODEL.OUTPUT_STRIDE self.tf = transform self.augmentations = augmentations self._setup_db() self.db_length = len(self.files) def __len__(self): return len(self.files) def __getitem__(self, index): item = self.files[index] im_name = item['file_name'] im_path = os.path.join(self.root, "JPEGImages", im_name) img_id = item['image_id'] im = Image.open(im_path) lbl = np.asarray(self.masks[img_id], dtype=np.int) lbl = self.label_transform(lbl) # Map all ignored pixels to class index 255 lbl[np.logical_or(lbl >= self.n_classes, lbl < 0)] = 255 lbl = Image.fromarray(lbl) if self.augmentations is not None: im, lbl = self.augmentations(im, lbl) im, lbl, lbl_os = self.transform(im, lbl) if self.cfg.MODEL.LEARN_PAIRWISE_TERMS: # if self.cfg.MODEL.NUM_PAIRWISE_TERMS > 1: lbl_hs = [] lbl_vs = [] stride = 1 for _ in range(self.cfg.MODEL.NUM_PAIRWISE_TERMS): for i in range(stride): for j in range(stride): lbl_os_ = lbl_os[i::stride, j::stride] lbl_h = lbl_os_[:, :-1] * self.n_classes + lbl_os_[:, 1:] lbl_v = lbl_os_[:-1, :] * self.n_classes + lbl_os_[ 1:, :] lbl_h[(lbl_os_[:, :-1] >= self.n_classes) | (lbl_os_[:, 1:] >= self.n_classes)] = 255 lbl_v[(lbl_os_[:-1, :] >= self.n_classes) | (lbl_os_[1:, :] >= self.n_classes)] = 255 lbl_hs.append(lbl_h) lbl_vs.append(lbl_v) stride += self.cfg.MODEL.PAIRWISE_STEP_SIZE # else: # lbl_h = lbl_os[:, :-1] * self.n_classes + lbl_os[:, 1:] # lbl_v = lbl_os[:-1, :] * self.n_classes + lbl_os[1:, :] # lbl_h[(lbl_os[:, :-1] >= self.n_classes) | (lbl_os[:, 1:] >= self.n_classes)] = 255 # lbl_v[(lbl_os[:-1, :] >= self.n_classes) | (lbl_os[1:, :] >= self.n_classes)] = 255 return im, lbl, lbl_hs, lbl_vs, dict() else: return im, lbl, dict(), dict(), dict() def transform(self, img, lbl): if self.tf is not None: img = self.tf(img) # if self.is_train: w, h = lbl.size lbl_os = lbl.resize((math.ceil( w / self.output_stride), math.ceil(h / self.output_stride)), Image.NEAREST) lbl = torch.from_numpy(np.array(lbl)).long() lbl[lbl >= self.n_classes] = 255 # ignore pixels lbl_os = torch.from_numpy(np.array(lbl_os)).long() lbl_os[lbl_os >= self.n_classes] = 255 # ignore pixels return img, lbl, lbl_os def _setup_db(self): # prepare data annots = os.path.join(self.root, 'trainval_merged.json') img_path = os.path.join(self.root, 'JPEGImages') from detail import Detail if 'val' in self.image_set: self.detail = Detail(annots, img_path, 'val') mask_file = os.path.join(self.root, 'val.pth') elif 'train' in self.image_set: self.mode = 'train' self.detail = Detail(annots, img_path, 'train') mask_file = os.path.join(self.root, 'train.pth') else: raise NotImplementedError('only supporting train and val set.') self.files = self.detail.getImgs() # generate masks self._mapping = np.sort( np.array([ 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 ])) self._key = np.array(range(len(self._mapping))).astype('uint8') print('mask_file:', mask_file) if os.path.exists(mask_file): self.masks = torch.load(mask_file) else: self.masks = self._preprocess(mask_file) def decode_segmap(self, label_mask, plot=False): """Decode segmentation class labels into a color image Args: label_mask (np.ndarray): an (M,N) array of integer values denoting the class label at each spatial location. plot (bool, optional): whether to show the resulting color image in a figure. Returns: (np.ndarray, optional): the resulting decoded color image. """ label_colours = self.get_pascal_labels() r = label_mask.copy() g = label_mask.copy() b = label_mask.copy() for ll in range(0, self.n_classes): r[label_mask == ll] = label_colours[ll, 0] g[label_mask == ll] = label_colours[ll, 1] b[label_mask == ll] = label_colours[ll, 2] rgb = np.zeros((label_mask.shape[0], label_mask.shape[1], 3)) rgb[:, :, 0] = r / 255.0 rgb[:, :, 1] = g / 255.0 rgb[:, :, 2] = b / 255.0 if plot: plt.imshow(rgb) plt.show() else: return rgb def _class_to_index(self, mask): # assert the values values = np.unique(mask) for i in range(len(values)): assert (values[i] in self._mapping) index = np.digitize(mask.ravel(), self._mapping, right=True) return self._key[index].reshape(mask.shape) def _preprocess(self, mask_file): masks = {} print("Preprocessing mask, this will take a while." + \ "But don't worry, it only run once for each split.") for i in range(len(self.files)): img_id = self.files[i] mask = Image.fromarray( self._class_to_index(self.detail.getMask(img_id))) masks[img_id['image_id']] = mask torch.save(masks, mask_file) return masks def label_transform(self, label): if self.n_classes == 59: # background is ignored label = np.array(label).astype('int32') - 1 label[label == -2] = -1 else: label = np.array(label).astype('int32') return label def get_pascal_labels(self): """Load the mapping that associates pascal-context classes with label colors Returns: np.ndarray with dimensions (60, 3) """ cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, self.n_classes - 1)] colors = np.array([[c[2] * 255, c[1] * 255, c[0] * 255] for c in colors]) colors = np.vstack(([[0, 0, 0]], colors)) return colors
class PascalContext(BaseDataset): NUM_CLASS = 59 def __init__(self, root='./data', split='train', mode=None, transform=None, target_transform=None, **kwargs): super(PascalContext, self).__init__(root, split, mode, transform, target_transform, **kwargs) from detail import Detail #from detail import mask root = os.path.join(root, 'PascalContext') annFile = os.path.join(root, 'trainval_merged.json') imgDir = os.path.join(root, 'JPEGImages') # training mode self.detail = Detail(annFile, imgDir, split) self.transform = transform self.target_transform = target_transform self.ids = self.detail.getImgs() # generate masks self._mapping = np.sort( np.array([ 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 ])) self.classes = [ 'background', 'aeroplane', 'mountain', 'mouse', 'track', 'road', 'bag', 'motorbike', 'fence', 'bed', 'bedclothes', 'bench', 'bicycle', 'diningtable', 'bird', 'person', 'floor', 'boat', 'train', 'book', 'bottle', 'tree', 'window', 'plate', 'platform', 'tvmonitor', 'building', 'bus', 'cabinet', 'shelves', 'light', 'pottedplant', 'wall', 'car', 'ground', 'cat', 'sidewalk', 'truck', 'ceiling', 'rock', 'chair', 'wood', 'food', 'horse', 'cloth', 'sign', 'computer', 'sheep', 'keyboard', 'flower', 'sky', 'cow', 'grass', 'cup', 'curtain', 'snow', 'water', 'sofa', 'dog', 'door' ] self._key = np.array(range(len(self._mapping))).astype('uint8') mask_file = os.path.join(root, self.split + '.pth') print('mask_file:', mask_file) if os.path.exists(mask_file): self.masks = torch.load(mask_file) else: self.masks = self._preprocess(mask_file) def _class_to_index(self, mask): # assert the values values = np.unique(mask) for i in range(len(values)): assert (values[i] in self._mapping) index = np.digitize(mask.ravel(), self._mapping, right=True) return self._key[index].reshape(mask.shape) def _preprocess(self, mask_file): masks = {} tbar = trange(len(self.ids)) print("Preprocessing mask, this will take a while." + \ "But don't worry, it only run once for each split.") for i in tbar: img_id = self.ids[i] mask = Image.fromarray( self._class_to_index(self.detail.getMask(img_id))) masks[img_id['image_id']] = mask tbar.set_description("Preprocessing masks {}".format( img_id['image_id'])) torch.save(masks, mask_file) return masks def __getitem__(self, index): img_id = self.ids[index] path = img_id['file_name'] iid = img_id['image_id'] img = Image.open(os.path.join(self.detail.img_folder, path)).convert('RGB') if self.mode == 'test': if self.transform is not None: img = self.transform(img) return img, os.path.basename(path) # convert mask to 60 categories mask = self.masks[iid] # synchrosized transform if self.mode == 'train': img, mask = self._sync_transform(img, mask) elif self.mode == 'val': img, mask = self._val_sync_transform(img, mask) else: assert self.mode == 'testval' mask = self._mask_transform(mask) # general resize, normalize and toTensor if self.transform is not None: img = self.transform(img) if self.target_transform is not None: mask = self.target_transform(mask) return img, mask def _mask_transform(self, mask): target = np.array(mask).astype('int32') - 1 return torch.from_numpy(target).long() def __len__(self): return len(self.ids) @property def pred_offset(self): return 1
class DetailDataset(torch.utils.data.Dataset): CLASSES = PascalVOCDataset.CLASSES # TODO to chyba nie wszystkie, Detail.getCats() zwraca wiecej def __init__(self, img_dir, ann_file, split, minimal=False, transforms=None): self.img_dir = img_dir self.image_set = split self.transforms = transforms self.anno = ann_file self.detail = Detail(ann_file, img_dir, split, minimal, divider=10) # TODO poprawny format klas: self.CLASSES = self.detail.getCats() imgs = self.detail.getImgs() idxs = range(len(imgs)) self.idx_to_img = dict(zip(idxs, imgs)) # TODO może się przydać, zrobic to poprawnie, uważając na underscore # self.img_to_idx = dict(zip([x.image_id for x in imgs], idxs)) self.class_to_ind = dict(zip(self.CLASSES, range(len(self.CLASSES)))) def __len__(self): return len(self.idx_to_img) def _img_size(self, img): return (img['width'], img['height']) def get_groundtruth(self, idx): img = self.idx_to_img[idx] boxes = self.detail.getBboxes(img) # example of 'boxes': # [{'bbox': [250, 209, 241, 149], 'category': 'motorbike'}, # {'bbox': [312, 139, 109, 191], 'category': 'person'}] boxes = [box['bbox'] for box in boxes ] # TODO gubimy informację o otoczonym przedmiocie boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, self._img_size(img), mode="xywh").convert("xyxy") target = target.clip_to_image(remove_empty=True) img_keypoints = self.detail.getKpts(img) keypoints = [skelton['keypoints'] for skelton in img_keypoints] # TODO keypoints - gubimy informację o bbox target.add_field("kpts", Keypoints(keypoints, self._img_size(img))) # target.add_field("mask", SegmentationMask(self.detail.getMask(img).tolist(), size=self._img_size(img))) # TODO getMask zwraca macierz rozmiaru (img.height, img.width), gdzie każdemu pikselowi # TODO odpowiada numer id klasy, do której należy. SegmentationMask # from getMask() doc: # If semantic segmentation of an image is requested (cat=instance=superpart=part=None), # the result is an image whose pixel values are the class IDs for that image. # If instance-level segmentation for one category of an image is requested (img and cat provided), # the result is an image whose pixel values are the instance IDs for that class and 0 everywhere else. target.add_field("class_mask", self.detail.getMask(img)) target.add_field("instance_mask", self.detail.getMask(img, cat='person')) target.add_field("bounds", self.detail.getBounds(img)) target.add_field("occl", self.detail.getOccl(img)) # TODO human parts? return target def __getitem__(self, idx): img = self.idx_to_img[idx] # example img object: # {'file_name': '2008_000002.jpg', 'phase': 'val', 'height': 375, 'width': 500, # 'date_captured': '31-May-2015 17:44:04', 'image_id': 2008000002, 'annotations': [1, 62295], # 'categories': [454, 427], 'parts': [16], 'keypoints': []} img = Image.open(os.path.join(self.img_dir, img['file_name'])).convert('RGB') target = self.get_groundtruth(idx) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx def get_img_info(self, idx): img = self.idx_to_img[idx] return {"height": img['height'], "width": img['width']}