def __init__(self, root, split='train', download=False, **kwargs): """Use as torchvision.datasets.ImageNet.""" root = self.root = os.path.expanduser(root) self.split = verify_str_arg(split, "split", ("train", "val")) try: wnid_to_classes = load_meta_file(self.root)[0] except RuntimeError: torchvision.datasets.imagenet.META_FILE = os.path.join( os.path.expanduser('~/data/'), 'meta.bin') try: wnid_to_classes = load_meta_file(self.root)[0] except RuntimeError: self.parse_archives() wnid_to_classes = load_meta_file(self.root)[0] torchvision.datasets.ImageFolder.__init__(self, self.split_folder, **kwargs) self.root = root self.wnids = self.classes self.wnid_to_idx = self.class_to_idx self.classes = [wnid_to_classes[wnid] for wnid in self.wnids] self.class_to_idx = { cls: idx for idx, clss in enumerate(self.classes) for cls in clss } """Scrub class names to be a single string.""" scrubbed_names = [] for name in self.classes: if isinstance(name, tuple): scrubbed_names.append(name[0]) else: scrubbed_names.append(name) self.classes = scrubbed_names
def __init__(self, root, split='train', **kwargs): root = self.root = os.path.expanduser(root) self.split = verify_str_arg(split, "split", ("train", "val")) self.parse_archives() wnid_to_classes = load_meta_file(self.root)[0] super().__init__(self.split_folder, **kwargs) self.root = root self.wnids = self.classes self.wnid_to_idx = self.class_to_idx self.classes = [wnid_to_classes[wnid] for wnid in self.wnids] self.class_to_idx = { cls: idx for idx, clss in enumerate(self.classes) for cls in clss }
def __init__( self, root, split: str = 'train', num_classes: int = -1, num_imgs_per_class: int = -1, num_imgs_per_class_val_split: int = 50, meta_dir=None, **kwargs, ): """ Args: root: path of dataset split: num_classes: Sets the limit of classes num_imgs_per_class: Limits the number of images per class num_imgs_per_class_val_split: How many images per class to generate the val split download: kwargs: """ root = self.root = os.path.expanduser(root) # [train], [val] --> [train, val], [test] original_split = split if split == 'train' or split == 'val': split = 'train' if split == 'test': split = 'val' self.split = split split_root = os.path.join(root, split) meta_dir = meta_dir if meta_dir is not None else split_root wnid_to_classes = load_meta_file(meta_dir)[0] super(ImageNet, self).__init__(self.split_folder, **kwargs) self.root = root # shuffle images first self.imgs = shuffle(self.imgs, random_state=1234) # partition train set into [train, val] if split == 'train': train, val = self.partition_train_set(self.imgs, num_imgs_per_class_val_split) if original_split == 'train': self.imgs = train if original_split == 'val': self.imgs = val # limit the number of images in train or test set since the limit was already applied to the val set if split in ['train', 'test']: if num_imgs_per_class != -1: clean_imgs = [] cts = {x: 0 for x in range(len(self.classes))} for img_name, idx in self.imgs: if cts[idx] < num_imgs_per_class: clean_imgs.append((img_name, idx)) cts[idx] += 1 self.imgs = clean_imgs # limit the number of classes if num_classes != -1: # choose the classes at random (but deterministic) ok_classes = shuffle(list(range(num_classes)), random_state=1234) ok_classes = ok_classes[:num_classes] ok_classes = set(ok_classes) clean_imgs = [] for img_name, idx in self.imgs: if idx in ok_classes: clean_imgs.append((img_name, idx)) self.imgs = clean_imgs # shuffle again for final exit self.imgs = shuffle(self.imgs, random_state=1234) # list of class_nbs for each image idcs = [idx for _, idx in self.imgs] self.wnids = self.classes self.wnid_to_idx = {wnid: idx for idx, wnid in zip(idcs, self.wnids)} self.classes = [wnid_to_classes[wnid] for wnid in self.wnids] self.class_to_idx = {cls: idx for clss, idx in zip(self.classes, idcs) for cls in clss} # update the root data self.samples = self.imgs self.targets = [s[1] for s in self.imgs]
def __init__( self, root, split: str = "train", num_classes: int = -1, num_imgs_per_class: int = -1, num_imgs_per_class_val_split: int = 50, meta_dir=None, **kwargs, ): """ Args: root: path of dataset split: num_classes: Sets the limit of classes num_imgs_per_class: Limits the number of images per class num_imgs_per_class_val_split: How many images per class to generate the val split download: kwargs: """ if not _TORCHVISION_AVAILABLE: # pragma: no cover raise ModuleNotFoundError( "You want to use `torchvision` which is not installed yet, install it with `pip install torchvision`." ) root = self.root = os.path.expanduser(root) # [train], [val] --> [train, val], [test] original_split = split if split == "train" or split == "val": split = "train" if split == "test": split = "val" self.split = split split_root = os.path.join(root, split) meta_dir = meta_dir if meta_dir is not None else split_root wnid_to_classes = load_meta_file(meta_dir)[0] super(ImageNet, self).__init__(self.split_folder, **kwargs) self.root = root # shuffle images first np.random.seed(1234) np.random.shuffle(self.imgs) # partition train set into [train, val] if split == "train": train, val = self.partition_train_set( self.imgs, num_imgs_per_class_val_split) if original_split == "train": self.imgs = train if original_split == "val": self.imgs = val # limit the number of images in train or test set since the limit was already applied to the val set if split in ["train", "test"]: if num_imgs_per_class != -1: clean_imgs = [] cts = {x: 0 for x in range(len(self.classes))} for img_name, idx in self.imgs: if cts[idx] < num_imgs_per_class: clean_imgs.append((img_name, idx)) cts[idx] += 1 self.imgs = clean_imgs # limit the number of classes if num_classes != -1: # choose the classes at random (but deterministic) ok_classes = list(range(num_classes)) np.random.seed(1234) np.random.shuffle(ok_classes) ok_classes = ok_classes[:num_classes] ok_classes = set(ok_classes) clean_imgs = [] for img_name, idx in self.imgs: if idx in ok_classes: clean_imgs.append((img_name, idx)) self.imgs = clean_imgs # shuffle again for final exit np.random.seed(1234) np.random.shuffle(self.imgs) # list of class_nbs for each image idcs = [idx for _, idx in self.imgs] self.wnids = self.classes self.wnid_to_idx = {wnid: idx for idx, wnid in zip(idcs, self.wnids)} self.classes = [wnid_to_classes[wnid] for wnid in self.wnids] self.class_to_idx = { cls: idx for clss, idx in zip(self.classes, idcs) for cls in clss } # update the root data self.samples = self.imgs self.targets = [s[1] for s in self.imgs]