def __init__(self, opt): """Initialize this dataset class. Parameters: opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions """ BaseDataset.__init__(self, opt) self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A') # create a path '/path/to/data/trainA' self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B') # create a path '/path/to/data/trainB' self.classes_A, self.class_to_idx_A = self._find_classes(self.dir_A) # find classes in '/path/to/data/trainA' self.classes_B, self.class_to_idx_B = self._find_classes(self.dir_B) # find classes in '/path/to/data/trainB' samples_A = make_dataset(self.dir_A, self.class_to_idx_A, extensions=self.img_extension, is_valid_file=None) # samples (list): List of (sample path, class_index) tuples samples_B = make_dataset(self.dir_B, self.class_to_idx_B, extensions=self.img_extension, is_valid_file=None) # samples (list): List of (sample path, class_index) tuples self.A_paths = [s[0] for s in samples_A] self.B_paths = [s[0] for s in samples_B] self.A_targets = [s[1] for s in samples_A] self.B_targets = [s[1] for s in samples_B] self.A_size = len(self.A_paths) # get the size of dataset A self.B_size = len(self.B_paths) # get the size of dataset B btoA = self.opt.direction == 'BtoA' input_nc = self.opt.output_nc if btoA else self.opt.input_nc # get the number of channels of input image output_nc = self.opt.input_nc if btoA else self.opt.output_nc # get the number of channels of output image self.transform_A = get_transform(self.opt, grayscale=(input_nc == 1)) self.transform_B = get_transform(self.opt, grayscale=(output_nc == 1))
def __init__(self, root, transform=None, target_transform=None, loader=default_loader, retun_idx=False): classes, class_to_idx = find_classes(root) IMG_EXTENSIONS = [ '.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif' ] try: imgs = make_dataset(root, class_to_idx, IMG_EXTENSIONS) except: imgs = make_dataset(root, class_to_idx) if len(imgs) == 0: raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx self.transform = transform self.target_transform = target_transform self.loader = loader self.retun_idx = retun_idx
def imagenet(root: str, img_size: int = 224, batch_size: int = 32, augment: bool = True, shuffle: bool = True, workers: int = 6, splits: Union[str, Tuple[str, str], Tuple[str, str, str]] = ('train', 'val'), seed: int = 42): train_dir = os.path.join(root, 'train') val_test_dir = os.path.join(root, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_transform_list = [transforms.Resize(int(img_size * 8 / 7)), transforms.CenterCrop(img_size), transforms.ToTensor(), normalize] val_transform = transforms.Compose(val_transform_list) val_mapper = train_mapper = DatasetMapper(val_transform) if augment: train_transform_list = [transforms.RandomResizedCrop(img_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize] train_transform = transforms.Compose(train_transform_list) train_mapper = DatasetMapper(train_transform) loader_list = list() if "train" in splits: classes, class_to_idx = find_classes(train_dir) dataset = make_dataset(train_dir, class_to_idx, IMG_EXTENSIONS) dataset = DatasetFromList(dataset) dataset = MapDataset(dataset, train_mapper) loader_list.append(data.DataLoader(dataset, batch_size, shuffle=shuffle, num_workers=workers, pin_memory=True, worker_init_fn=worker_init_reset_seed)) if "val" or "test" in splits: classes, class_to_idx = find_classes(val_test_dir) val_test_set = make_dataset(val_test_dir, class_to_idx, IMG_EXTENSIONS) random.seed(seed) random.shuffle(val_test_set) val_set = val_test_set[:int(round(len(val_test_set) / 2))] test_set = val_test_set[int(round(len(val_test_set) / 2)):] if "val" in splits: val_set = DatasetFromList(val_set) val_set = MapDataset(val_set, val_mapper) loader_list.append(data.DataLoader(val_set, batch_size, num_workers=workers, pin_memory=True)) if "test" in splits: test_set = DatasetFromList(test_set) test_set = MapDataset(test_set, val_mapper) loader_list.append(data.DataLoader(test_set, batch_size, num_workers=workers, pin_memory=True)) if len(loader_list) == 1: return loader_list[0] return loader_list
def __init__(self, root_path, train_dir, valid_dir): self.classes, self.class_to_idx = find_classes(root_path / 'train') train_samples = make_dataset(root_path / 'train', self.class_to_idx, extensions=IMG_EXTENSIONS) valid_samples = make_dataset(root_path / 'valid', self.class_to_idx, extensions=IMG_EXTENSIONS) self.samples = train_samples + valid_samples
def __init__(self, image_path, label_path, transform, image_extensions): super(LoadDataset, self).__init__(image_path, transform=transform, target_transform=None) self.image_path = image_path self.label_path = label_path self.transform = transform self.image_extensions = image_extensions ## classes, class_to_idx = self._find_classes(self.image_path) self.image_files = make_dataset(self.image_path, class_to_idx, extensions=self.image_extensions, is_valid_file=None) self.label_files = make_dataset(self.label_path, class_to_idx, extensions=self.image_extensions, is_valid_file=None)
def test_make_dataset_no_valid_files(tmpdir, kwargs, expected_error_msg): tmpdir = pathlib.Path(tmpdir) (tmpdir / "a").mkdir() (tmpdir / "a" / "a.png").touch() (tmpdir / "b").mkdir() (tmpdir / "b" / "b.jpeg").touch() (tmpdir / "c").mkdir() (tmpdir / "c" / "c.unknown").touch() with pytest.raises(FileNotFoundError, match=expected_error_msg): make_dataset(str(tmpdir), **kwargs)
def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None, extensions=('mp4', ), transform=None, cached=None, _precomputed_metadata=None): super(Kinetics400, self).__init__(root) extensions = extensions classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] self.video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, ) self.transform = transform
def __init__(self, roots, transform=None, target_transform=None, loader=default_loader): assert isinstance(roots, (tuple, list)) self.classes_list, self.class_to_idx_list, self.imgs_list = [], [], [] for root in roots: classes, class_to_idx = find_classes(root) imgs = make_dataset(root, class_to_idx) if len(imgs) == 0: raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) # add them to the list self.classes_list.append(classes) self.class_to_idx_list.append(class_to_idx) self.imgs_list.append(imgs) # sanity check that we have the same number of samples num_imgs = len(self.imgs_list[0]) for imgs in self.imgs_list: assert len(imgs) == num_imgs self.roots = roots self.transform = transform self.target_transform = target_transform self.loader = loader
def __init__(self, root_dir, extensions, transforms=None, target_transforms=None, test=False): super(CheckerboardDataset, self).__init__(root_dir, transforms, target_transforms) file_name_list = os.listdir(root_dir) labels_list = range(1000) self.dataset_labels = dict(zip(file_name_list, labels_list)) self.root_dir = root_dir self.transforms = transforms self.target_transforms = target_transforms self.extensions = extensions classes, class_to_idx = self._find_classes(self.root_dir) samples = make_dataset(root_dir, class_to_idx, self.extensions, is_valid_file=None) print(samples) self.classes = classes self.class_to_idx = class_to_idx self.samples = samples self.targets = [s[1] for s in samples]
def __init__(self, root, loader, extensions=None, transform=None, target_transform=None, is_valid_file=None): super(DatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform) classes, class_to_idx = self._find_classes(self.root) samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file) if len(samples) == 0: raise (RuntimeError("Found 0 files in subfolders of: " + self.root + "\n" "Supported extensions are: " + ",".join(extensions))) self.loader = loader self.extensions = extensions self.classes = classes self.class_to_idx = class_to_idx self.samples = samples self.targets = [s[1] for s in samples]
def __init__(self, root, train=True, transform=None, download=True): self.root = root root = os.path.join(root, self.foldername) if download: self.download() classes, class_to_idx = self._find_classes(root) samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS) datapaths = defaultdict(list) for path, target in samples: datapaths[target].append(path) for target in datapaths.keys(): if train: datapaths[target] = datapaths[target][:int(0.8 * len(datapaths[target]))] else: datapaths[target] = datapaths[target][int(0.8 * len(datapaths[target])):] newdatapaths = [] labels = [] for target in datapaths.keys(): for path in datapaths[target]: newdatapaths.append(path) labels.append(target) self.train = train self.transform = transform self.labels = labels self.datapaths = newdatapaths self.cache = {}
def __init__(self, root_list, transform=None, target_transform=None, loader=default_loader): if not isinstance(root_list, (list, tuple)): raise RuntimeError( "dataset_list should be a list of strings, got {}".format( dataset_list)) super(ImageFolderList, self).__init__(root_list[0], loader, IMG_EXTENSIONS, transform=transform, target_transform=target_transform) if len(root_list) > 1: for root in root_list[1:]: classes, class_to_idx = self._find_classes(root) for k in class_to_idx.keys(): class_to_idx[k] += len(self.classes) samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS) self.classes += classes self.class_to_idx.update(class_to_idx) self.samples += samples self.targets = [s[1] for s in self.samples] self.imgs = self.samples
def train_test_split_for_dir(root_path: Path, test_size: float, random_state: int = 42): """ torchvision.datasets.ImageFolder 形式のディレクトリ構造になっているデータセットを train / test に分割し、 train, testそれぞれを `root_path` と同じ階層に `train/` `val/` して保存する。 TODO: この関数内でTrain, Test用のImageFolderを作成する方が良いのか考える。 """ if not root_path.exists(): raise FileNotFoundError elif not (0 <= test_size <= 1.0): raise ValueError classes, class_to_idx = find_classes(root_path) dataset = make_dataset(root_path, class_to_idx, IMG_EXTENSIONS) train, val = train_test_split(dataset, test_size=test_size, shuffle=True, random_state=random_state) split_dataset = {'train': train, 'val': val} dst_path_root = root_path.parent for set_ in ['train', 'val']: for file_path, class_ in tqdm(split_dataset[set_], desc=set_): file_path = Path(file_path) dst_dir = dst_path_root / set_ / str(class_) dst_dir.mkdir(exist_ok=True, parents=True) shutil.copy(file_path, dst_dir / file_path.name)
def read_all_images(root, num_workers=4): classes, class_to_idx = find_classes(root) dataset = make_dataset(root, class_to_idx) if len(dataset) == 0: raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" + "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) num_images = len(dataset) paths = [dataset[i_image][0] for i_image in range(num_images)] print("Reading {0} images with {1} workers".format(num_images, num_workers)) if num_workers > 1: images = parallel_process(paths, read_image_for_pytorch, n_jobs=num_workers) else: images = [] for p in tqdm(paths): images.append(read_image_for_pytorch(p)) image_cache = {} for i, image in enumerate(images): path, target = dataset[i] image_cache[path] = image return image_cache
def __init__( self, *roots, transforms=None, target_transforms=None, loader=default_loader ): classes_ = [] class_to_idx_ = [] samples_ = [] for root in roots: classes, class_to_idx = find_classes(root) samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS) if len(samples) == 0: raise ( RuntimeError( "Found 0 files in subfolders of: " + root + "\n" "Supported extensions are: " + ",".join(IMG_EXTENSIONS) ) ) classes_.append(classes) class_to_idx_.append(class_to_idx_) samples_.append(samples) if len(samples_[0]) != len(samples): raise ValueError( "Dataset folders must have the same number of samples." ) if len(classes_[0]) != len(classes): raise ValueError( "Dataset folders must have the same number of classes." ) super().__init__(roots, samples_, transforms, target_transforms) self.loader = loader self.extensions = IMG_EXTENSIONS self.classes = classes_ self.class_to_idx = class_to_idx_
def __init__(self, root, transform=None, target_transform=None, loader=default_loader, is_valid_file=None, valid_classes=None): super(DatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform) classes, class_to_idx = self._find_classes(self.root, valid_classes=valid_classes) extensions = torchvision.datasets.folder.IMG_EXTENSIONS if is_valid_file is None else None samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file) if len(samples) == 0: raise (RuntimeError("Found 0 files in subfolders of: " + self.root + "\n" "Supported extensions are: " + ",".join(extensions))) self.loader = loader self.extensions = extensions self.classes = classes self.class_to_idx = class_to_idx self.samples = samples self.targets = [s[1] for s in samples] self.imgs = self.samples
def __init__(self, roots, loader, extensions, transform=None, target_transform=None): samples = [] root_lengths = [] for i in range(len(roots)): root = roots[i] classes, class_to_idx = self._find_classes(root) temp_samples = make_dataset(root, class_to_idx, extensions) # should be the path names of the images samples = samples + temp_samples root_lengths.append(len(samples)) if len(samples) == 0: raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n" "Supported extensions are: " + ",".join(extensions))) self.root = root self.loader = loader self.extensions = extensions self.classes = classes self.class_to_idx = class_to_idx self.samples = samples #self.targets = [s[1] for s in samples] self.targets = [i for (i, s) in enumerate(root_lengths) for a in range(s)] # Above line makes a list of idxs for each image where the idx is the root idx # s is the root_length, i is the root_idx. For each s, repeat idx i s times. self.transform = transform self.target_transform = target_transform
def from_directory(cls, dir_path: str) -> LabeledVideoPaths: """ Factory function that creates a LabeledVideoPaths object by parsing the structure of the given directory's subdirectories into the classification labels. It expects the directory format to be the following: dir_path/<class_name>/<video_name>.mp4 Classes are indexed from 0 to the number of classes, alphabetically. E.g. dir_path/class_x/xxx.ext dir_path/class_x/xxy.ext dir_path/class_x/xxz.ext dir_path/class_y/123.ext dir_path/class_y/nsdf3.ext dir_path/class_y/asd932_.ext Would produce two classes labeled 0 and 1 with 3 videos paths associated with each. Args: dir_path (str): Root directory to the video class directories . """ assert g_pathmgr.exists(dir_path), f"{dir_path} not found." # Find all classes based on directory names. These classes are then sorted and indexed # from 0 to the number of classes. classes = sorted( (f for f in pathlib.Path(dir_path).iterdir() if f.is_dir())) class_to_idx = {classes[i]: i for i in range(len(classes))} video_paths_and_label = make_dataset(dir_path, class_to_idx, extensions=("mp4", "avi")) assert (len(video_paths_and_label) > 0), f"Failed to load dataset from {dir_path}." return cls(video_paths_and_label)
def load_data( self, data: Union[str, Tuple[List[str], List[Any]]], dataset: Optional[Any] = None) -> Sequence[Mapping[str, Any]]: if self.isdir(data): classes, class_to_idx = self.find_classes(data) if not classes: return self.predict_load_data(data) else: self.set_state(LabelsState(classes)) if dataset is not None: dataset.num_classes = len(classes) data = make_dataset(data, class_to_idx, extensions=self.extensions) return [{ DefaultDataKeys.INPUT: input, DefaultDataKeys.TARGET: target } for input, target in data] return list( filter( lambda sample: has_file_allowed_extension( sample[DefaultDataKeys.INPUT], self.extensions), super().load_data(data, dataset), ))
def __init__(self, root, loader, extensions, transform=None, target_transform=None): classes, class_to_idx = find_classes(root) samples_cache_path = os.path.join(root, 'samples.pickle') if os.path.exists(samples_cache_path): with open(samples_cache_path, 'rb') as rf: samples = pickle.load(rf) print('=> read {} samples from cache: {}'.format(len(samples), samples_cache_path)) else: samples = make_dataset(root, class_to_idx, extensions) if os.access(root, os.W_OK): print('=> caching {} samples to: {}'.format(len(samples), samples_cache_path)) with open(samples_cache_path, 'wb') as wf: pickle.dump(samples, wf) if len(samples) == 0: raise (RuntimeError("Found 0 files in subfolders of: " + root + "\n" "Supported extensions are: " + ",".join( extensions))) self.root = root self.loader = loader self.extensions = extensions self.classes = classes self.class_to_idx = class_to_idx self.samples = samples self.transform = transform self.target_transform = target_transform
def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, fold=1, train=True, framewiseTransform=False, transform=None): super(HMDB51, self).__init__(root) if not 1 <= fold <= 3: raise ValueError( "fold should be between 1 and 3, got {}".format(fold)) extensions = ('avi', ) self.fold = fold self.train = train classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] video_clips = VideoClips(video_list, frames_per_clip, step_between_clips) self.indices = self._select_fold(video_list, annotation_path, fold, train) self.video_clips = video_clips.subset(self.indices) self.video_list = [video_list[i] for i in self.indices] self.framewiseTransform = framewiseTransform self.transform = transform
def __init__(self, root, loader, extensions, transform=None, target_transform=None, label_probability=False): classes, class_to_idx = self._find_classes(root) samples = make_dataset(root, class_to_idx, extensions) if len(samples) == 0: raise (RuntimeError("Found 0 files in subfolders of: " + root + "\n" "Supported extensions are: " + ",".join(extensions))) self.root = root self.loader = loader self.extensions = extensions self.label_probability = label_probability self.classes = classes self.nclasses = len(self.classes) self.class_to_idx = class_to_idx self.samples = samples if self.label_probability: tmp = numpy.zeros((len(samples), self.nclasses), dtype='f') for (i, s) in enumerate(samples): tmp[i, s[1]] = 1 self.targets = tmp else: self.targets = [s[1] for s in samples] self.transform = transform self.target_transform = target_transform
def __init__( self, root: str, loader: Callable[[str], Any], extensions: Optional[Tuple[str, ...]] = None, input_transform: Optional[Callable] = None, reconstruction_transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, is_valid_file: Optional[Callable[[str], bool]] = None, ) -> None: super(GenerativeDatasetFolder, self).__init__(root, transform=input_transform, target_transform=target_transform) classes, class_to_idx = self._find_classes(self.root) samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file) if len(samples) == 0: msg = "Found 0 files in subfolders of: {}\n".format(self.root) if extensions is not None: msg += "Supported extensions are: {}".format( ",".join(extensions)) raise RuntimeError(msg) self.r_transform = reconstruction_transform self.loader = loader self.extensions = extensions self.classes = classes self.class_to_idx = class_to_idx self.samples = samples self.targets = [s[1] for s in samples]
def load_format_paths(folder_path, extension): classes, class_to_idx = find_classes(folder_path) samples = make_dataset(folder_path, class_to_idx, [extension]) paths = np.array([s[0] for s in samples]) classes = np.array([int(s[1]) for s in samples]) return paths, classes
def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, frame_rate=None, fold=1, train=True, transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0): super(UCF101, self).__init__(root) if not 1 <= fold <= 3: raise ValueError( "fold should be between 1 and 3, got {}".format(fold)) extensions = ('avi', ) self.fold = fold self.train = train classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] metadata_filepath = os.path.join(root, 'ucf101_metadata.pt') if os.path.exists(metadata_filepath): metadata = torch.load(metadata_filepath) else: metadata = None video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, ) if not os.path.exists(metadata_filepath): torch.save(video_clips.metadata, metadata_filepath) self.video_clips_metadata = video_clips.metadata self.indices = self._select_fold(video_list, annotation_path, fold, train) self.video_clips = video_clips.subset(self.indices) self.transform = transform
def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, frame_rate=None, fold=1, train=True, transform=None, _precomputed_metadata=None, num_workers=1, _video_width=0, _video_height=0, _video_min_dimension=0, _audio_samples=0): super(MYUCF101, self).__init__(root) if not 1 <= fold <= 3: raise ValueError( "fold should be between 1 and 3, got {}".format(fold)) extensions = ('avi', ) self.fold = fold self.train = train classes = list(sorted(list_dir(root))) class_to_idx = {classes[i]: i for i in range(len(classes))} self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) self.classes = classes video_list = [x[0] for x in self.samples] video_clips = VideoClips( video_list, frames_per_clip, step_between_clips, frame_rate, _precomputed_metadata, num_workers=num_workers, _video_width=_video_width, _video_height=_video_height, _video_min_dimension=_video_min_dimension, _audio_samples=_audio_samples, ) meta_data_str_ = os.path.join( root, f"meta_data_train_{train}_fold_{fold}_frames_{frames_per_clip}_skip_" f"{step_between_clips}.pickle") if not os.path.exists(meta_data_str_): with open(meta_data_str_, 'wb') as ff: pickle.dump(video_clips.metadata, ff) self.video_clips_metadata = video_clips.metadata self.indices = self._select_fold(video_list, annotation_path, fold, train) self.video_clips = video_clips.subset(self.indices) self.transform = transform
def __init__(self, video_root='/DB/VCDB/core_dataset', fps=1, extensions=['mp4', 'flv']): self.video_root = video_root self.classes, self.class_to_idx = self._find_classes(self.video_root) self.videos = make_dataset(self.video_root, self.class_to_idx, extensions) self.video_list, self.meta = self.__read_video_meta() print('sampling') self.frames, self.frames_cnt = self.__sampling_frames(fps=fps) print('sampling')
def get_data(self): folder = os.path.join(self.root, self.split_folder[self.split]) classes, class_to_idx = self._find_classes(folder) samples = make_dataset(folder, class_to_idx, is_valid_file=is_image_file) data, targets = zip(*samples) logger.info( f"Dataset summary: #examples={len(data)}; #classes={len(classes)}") return data, targets, classes
def make_dataset( directory: str, class_to_idx, extensions=IMG_EXTENSIONS, is_valid_file=None, ): return make_dataset(directory, class_to_idx, extensions=extensions, is_valid_file=is_valid_file)
def create_image_to_label(directory, batch_size=16, ahead=4): ed = expdir.ExperimentDirectory(directory) info = ed.load_info() print info.dataset if 'broden' in info.dataset: ds = loadseg.SegmentationData(info.dataset) categories = ds.category_names() shape = (ds.size(), len(ds.label)) pf = loadseg.SegmentationPrefetcher(ds, categories=categories, once=True, batch_size=batch_size, ahead=ahead, thread=False) image_to_label = np.zeros(shape, dtype='int32') batch_count = 0 for batch in pf.batches(): if batch_count % 100 == 0: print('Processing batch %d ...' % batch_count) for rec in batch: image_index = rec['i'] for cat in categories: if ((type(rec[cat]) is np.ndarray and rec[cat].size > 0) or type(rec[cat]) is list and len(rec[cat]) > 0): image_to_label[image_index][np.unique(rec[cat])] = True batch_count += 1 elif 'imagenet' in info.dataset or 'ILSVRC' in info.dataset: classes, class_to_idx = find_classes(info.dataset) imgs = make_dataset(info.dataset, class_to_idx) _, labels = zip(*imgs) labels = np.array(labels) L = 1000 shape = (len(labels), L) image_to_label = np.zeros(shape) for i in range(L): image_to_label[labels == i, i] = 1 else: assert (False) mmap = ed.open_mmap(part='image_to_label', mode='w+', dtype=bool, shape=shape) mmap[:] = image_to_label[:] ed.finish_mmap(mmap) f = ed.mmap_filename(part='image_to_label') print('Finished and saved index_to_label at %s' % f)
def __init__(self, data_path, image_cache, do_random_flips=False, normalization=transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))): classes, class_to_idx = find_classes(data_path) imgs = make_dataset(data_path, class_to_idx) if len(imgs) == 0: raise(RuntimeError("Found 0 images in subfolders of: " + data_path + "\n" "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) self.root = data_path self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx self.normalization = normalization self.do_random_flips = do_random_flips self.image_cache = image_cache
def read_all_images(root, num_workers=4): classes, class_to_idx = find_classes(root) dataset = make_dataset(root, class_to_idx) if len(dataset) == 0: raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" + "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) num_images = len(dataset) paths = [dataset[i_image][0] for i_image in range(num_images)] print("Reading {0} images with {1} workers".format(num_images, num_workers)) if num_workers > 1: images = parallel_process(paths, read_image_for_pytorch, n_jobs=num_workers) else: images = [] for p in tqdm(paths): images.append(read_image_for_pytorch(p)) image_cache = {} for i, image in enumerate(images): path, target = dataset[i] image_cache[path] = image return image_cache