def __init__(self, root=Path.db_root_dir('VOCSegmentation'), split='val', transform=None, download=True, ignore_classes=[]): # Set paths self.root = root valid_splits = ['trainaug', 'train', 'val'] assert (split in valid_splits) self.split = split if split == 'trainaug': _semseg_dir = os.path.join(self.root, 'SegmentationClassAug') else: _semseg_dir = os.path.join(self.root, 'SegmentationClass') _image_dir = os.path.join(self.root, 'images') # Download if download: self._download() # Transform self.transform = transform # Splits are pre-cut print("Initializing dataloader for PASCAL VOC12 {} set".format(''.join( self.split))) split_file = os.path.join(self.root, 'sets', self.split + '.txt') self.images = [] self.semsegs = [] with open(split_file, "r") as f: lines = f.read().splitlines() for ii, line in enumerate(lines): # Images _image = os.path.join(_image_dir, line + ".jpg") assert os.path.isfile(_image) self.images.append(_image) # Semantic Segmentation _semseg = os.path.join(_semseg_dir, line + '.png') assert os.path.isfile(_semseg) self.semsegs.append(_semseg) assert (len(self.images) == len(self.semsegs)) # Display stats print('Number of dataset images: {:d}'.format(len(self.images))) # List of classes which are remapped to ignore index. # This option is used for comparing with other works that consider only a subset of the pascal classes. self.ignore_classes = [ self.VOC_CATEGORY_NAMES.index(class_name) for class_name in ignore_classes ]
def get_train_dataset(p, transform=None): if p['train_db_name'] == 'VOCSegmentation': from data.dataloaders.pascal_voc import VOCSegmentation return VOCSegmentation(root=Path.db_root_dir(p['train_db_name']), saliency=p['train_db_kwargs']['saliency'], transform=transform) else: raise ValueError('Invalid train db name {}'.format(p['train_db_name']))
def _download(self): _fpath = os.path.join(Path.db_root_dir(), self.FILE) if os.path.isfile(_fpath): print('Files already downloaded') return else: print('Downloading dataset from google drive') mkdir_if_missing(os.path.dirname(_fpath)) download_file_from_google_drive(self.GOOGLE_DRIVE_ID, _fpath) # extract file cwd = os.getcwd() print('\nExtracting tar file') tar = tarfile.open(_fpath) os.chdir(Path.db_root_dir()) tar.extractall() tar.close() os.chdir(cwd) print('Done!')
def __init__(self, root=Path.db_root_dir('VOCSegmentation'), split='val', transform=None, download=True): # Set paths self.root = root valid_splits = ['trainaug', 'train', 'val'] assert (split in valid_splits) self.split = split if split == 'trainaug': _semseg_dir = os.path.join(self.root, 'SegmentationClassAug') else: _semseg_dir = os.path.join(self.root, 'SegmentationClass') _image_dir = os.path.join(self.root, 'images') # Download if download: self._download() # Transform self.transform = transform # Splits are pre-cut print("Initializing dataloader for PASCAL VOC12 {} set".format(''.join( self.split))) split_file = os.path.join(self.root, 'sets', self.split + '.txt') self.images = [] self.semsegs = [] with open(split_file, "r") as f: lines = f.read().splitlines() for ii, line in enumerate(lines): # Images _image = os.path.join(_image_dir, line + ".jpg") assert os.path.isfile(_image) self.images.append(_image) # Semantic Segmentation _semseg = os.path.join(_semseg_dir, line + '.png') assert os.path.isfile(_semseg) self.semsegs.append(_semseg) assert (len(self.images) == len(self.semsegs)) # Display stats print('Number of dataset images: {:d}'.format(len(self.images)))
def __init__(self, root=Path.db_root_dir('VOCSegmentation'), saliency='supervised_model', download=True, transform=None, overfit=False): super(VOCSegmentation, self).__init__() self.root = root self.transform = transform if download: self._download() self.images_dir = os.path.join(self.root, 'images') valid_saliency = ['supervised_model', 'unsupervised_model'] assert (saliency in valid_saliency) self.saliency = saliency self.sal_dir = os.path.join(self.root, 'saliency_' + self.saliency) self.images = [] self.sal = [] with open(os.path.join(self.root, 'sets/trainaug.txt'), 'r') as f: all_ = f.read().splitlines() for f in all_: _image = os.path.join(self.images_dir, f + ".jpg") _sal = os.path.join(self.sal_dir, f + ".png") if os.path.isfile(_image) and os.path.isfile(_sal): self.images.append(_image) self.sal.append(_sal) assert (len(self.images) == len(self.sal)) if overfit: n_of = 32 self.images = self.images[:n_of] self.sal = self.sal[:n_of] # Display stats print('Number of images: {:d}'.format(len(self.images)))