def __init__(self, root, split='train', download=False, **kwargs):
        """Use as torchvision.datasets.ImageNet."""
        root = self.root = os.path.expanduser(root)
        self.split = verify_str_arg(split, "split", ("train", "val"))

        try:
            wnid_to_classes = load_meta_file(self.root)[0]
        except RuntimeError:
            torchvision.datasets.imagenet.META_FILE = os.path.join(
                os.path.expanduser('~/data/'), 'meta.bin')
            try:
                wnid_to_classes = load_meta_file(self.root)[0]
            except RuntimeError:
                self.parse_archives()
                wnid_to_classes = load_meta_file(self.root)[0]

        torchvision.datasets.ImageFolder.__init__(self, self.split_folder,
                                                  **kwargs)
        self.root = root

        self.wnids = self.classes
        self.wnid_to_idx = self.class_to_idx
        self.classes = [wnid_to_classes[wnid] for wnid in self.wnids]
        self.class_to_idx = {
            cls: idx
            for idx, clss in enumerate(self.classes) for cls in clss
        }
        """Scrub class names to be a single string."""
        scrubbed_names = []
        for name in self.classes:
            if isinstance(name, tuple):
                scrubbed_names.append(name[0])
            else:
                scrubbed_names.append(name)
        self.classes = scrubbed_names
Exemplo n.º 2
0
    def __init__(self, root, split='train', **kwargs):
        root = self.root = os.path.expanduser(root)
        self.split = verify_str_arg(split, "split", ("train", "val"))

        self.parse_archives()
        wnid_to_classes = load_meta_file(self.root)[0]

        super().__init__(self.split_folder, **kwargs)
        self.root = root

        self.wnids = self.classes
        self.wnid_to_idx = self.class_to_idx
        self.classes = [wnid_to_classes[wnid] for wnid in self.wnids]
        self.class_to_idx = {
            cls: idx
            for idx, clss in enumerate(self.classes) for cls in clss
        }
Exemplo n.º 3
0
    def __init__(
            self,
            root,
            split: str = 'train',
            num_classes: int = -1,
            num_imgs_per_class: int = -1,
            num_imgs_per_class_val_split: int = 50,
            meta_dir=None,
            **kwargs,
    ):
        """
        Args:
            root: path of dataset
            split:
            num_classes: Sets the limit of classes
            num_imgs_per_class: Limits the number of images per class
            num_imgs_per_class_val_split: How many images per class to generate the val split
            download:
            kwargs:
        """
        root = self.root = os.path.expanduser(root)

        # [train], [val] --> [train, val], [test]
        original_split = split
        if split == 'train' or split == 'val':
            split = 'train'

        if split == 'test':
            split = 'val'

        self.split = split
        split_root = os.path.join(root, split)
        meta_dir = meta_dir if meta_dir is not None else split_root
        wnid_to_classes = load_meta_file(meta_dir)[0]

        super(ImageNet, self).__init__(self.split_folder, **kwargs)
        self.root = root

        # shuffle images first
        self.imgs = shuffle(self.imgs, random_state=1234)

        # partition train set into [train, val]
        if split == 'train':
            train, val = self.partition_train_set(self.imgs, num_imgs_per_class_val_split)
            if original_split == 'train':
                self.imgs = train
            if original_split == 'val':
                self.imgs = val

        # limit the number of images in train or test set since the limit was already applied to the val set
        if split in ['train', 'test']:
            if num_imgs_per_class != -1:
                clean_imgs = []
                cts = {x: 0 for x in range(len(self.classes))}
                for img_name, idx in self.imgs:
                    if cts[idx] < num_imgs_per_class:
                        clean_imgs.append((img_name, idx))
                        cts[idx] += 1

                self.imgs = clean_imgs

        # limit the number of classes
        if num_classes != -1:
            # choose the classes at random (but deterministic)
            ok_classes = shuffle(list(range(num_classes)), random_state=1234)
            ok_classes = ok_classes[:num_classes]
            ok_classes = set(ok_classes)

            clean_imgs = []
            for img_name, idx in self.imgs:
                if idx in ok_classes:
                    clean_imgs.append((img_name, idx))

            self.imgs = clean_imgs

        # shuffle again for final exit
        self.imgs = shuffle(self.imgs, random_state=1234)

        # list of class_nbs for each image
        idcs = [idx for _, idx in self.imgs]

        self.wnids = self.classes
        self.wnid_to_idx = {wnid: idx for idx, wnid in zip(idcs, self.wnids)}
        self.classes = [wnid_to_classes[wnid] for wnid in self.wnids]
        self.class_to_idx = {cls: idx
                             for clss, idx in zip(self.classes, idcs)
                             for cls in clss}

        # update the root data
        self.samples = self.imgs
        self.targets = [s[1] for s in self.imgs]
Exemplo n.º 4
0
    def __init__(
        self,
        root,
        split: str = "train",
        num_classes: int = -1,
        num_imgs_per_class: int = -1,
        num_imgs_per_class_val_split: int = 50,
        meta_dir=None,
        **kwargs,
    ):
        """
        Args:
            root: path of dataset
            split:
            num_classes: Sets the limit of classes
            num_imgs_per_class: Limits the number of images per class
            num_imgs_per_class_val_split: How many images per class to generate the val split
            download:
            kwargs:
        """
        if not _TORCHVISION_AVAILABLE:  # pragma: no cover
            raise ModuleNotFoundError(
                "You want to use `torchvision` which is not installed yet, install it with `pip install torchvision`."
            )

        root = self.root = os.path.expanduser(root)

        # [train], [val] --> [train, val], [test]
        original_split = split
        if split == "train" or split == "val":
            split = "train"

        if split == "test":
            split = "val"

        self.split = split
        split_root = os.path.join(root, split)
        meta_dir = meta_dir if meta_dir is not None else split_root
        wnid_to_classes = load_meta_file(meta_dir)[0]

        super(ImageNet, self).__init__(self.split_folder, **kwargs)
        self.root = root

        # shuffle images first
        np.random.seed(1234)
        np.random.shuffle(self.imgs)

        # partition train set into [train, val]
        if split == "train":
            train, val = self.partition_train_set(
                self.imgs, num_imgs_per_class_val_split)
            if original_split == "train":
                self.imgs = train
            if original_split == "val":
                self.imgs = val

        # limit the number of images in train or test set since the limit was already applied to the val set
        if split in ["train", "test"]:
            if num_imgs_per_class != -1:
                clean_imgs = []
                cts = {x: 0 for x in range(len(self.classes))}
                for img_name, idx in self.imgs:
                    if cts[idx] < num_imgs_per_class:
                        clean_imgs.append((img_name, idx))
                        cts[idx] += 1

                self.imgs = clean_imgs

        # limit the number of classes
        if num_classes != -1:
            # choose the classes at random (but deterministic)
            ok_classes = list(range(num_classes))
            np.random.seed(1234)
            np.random.shuffle(ok_classes)
            ok_classes = ok_classes[:num_classes]
            ok_classes = set(ok_classes)

            clean_imgs = []
            for img_name, idx in self.imgs:
                if idx in ok_classes:
                    clean_imgs.append((img_name, idx))

            self.imgs = clean_imgs

        # shuffle again for final exit
        np.random.seed(1234)
        np.random.shuffle(self.imgs)

        # list of class_nbs for each image
        idcs = [idx for _, idx in self.imgs]

        self.wnids = self.classes
        self.wnid_to_idx = {wnid: idx for idx, wnid in zip(idcs, self.wnids)}
        self.classes = [wnid_to_classes[wnid] for wnid in self.wnids]
        self.class_to_idx = {
            cls: idx
            for clss, idx in zip(self.classes, idcs) for cls in clss
        }

        # update the root data
        self.samples = self.imgs
        self.targets = [s[1] for s in self.imgs]