Пример #1
0
    def test_resolve_to_gdrive(self, mocker):
        file_name = "data.tar"
        original_url = f"http://downloads.pytorch.org/{file_name}"

        id_sentinel = "id-sentinel"
        redirected_url = f"https://drive.google.com/file/d/{id_sentinel}/view"

        sha256_sentinel = "sha256_sentinel"

        def preprocess_sentinel(path):
            return path

        original_resource = HttpResource(
            original_url,
            sha256=sha256_sentinel,
            preprocess=preprocess_sentinel,
        )

        mocker.patch("torchvision.prototype.datasets.utils._resource._get_redirect_url", return_value=redirected_url)
        redirected_resource = original_resource.resolve()

        assert isinstance(redirected_resource, GDriveResource)
        assert redirected_resource.id == id_sentinel
        assert redirected_resource.file_name == file_name
        assert redirected_resource.sha256 == sha256_sentinel
        assert redirected_resource._preprocess is preprocess_sentinel
Пример #2
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     if config.year == "2011":
         archive = HttpResource(
             "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz",
             sha256=
             "0c685df5597a8b24909f6a7c9db6d11e008733779a671760afef78feb49bf081",
             preprocess="decompress",
         )
         segmentations = HttpResource(
             "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/segmentations.tgz",
             sha256=
             "dc77f6cffea0cbe2e41d4201115c8f29a6320ecb04fffd2444f51b8066e4b84f",
             preprocess="decompress",
         )
         return [archive, segmentations]
     else:  # config.year == "2010"
         split = HttpResource(
             "http://www.vision.caltech.edu/visipedia-data/CUB-200/lists.tgz",
             sha256=
             "aeacbd5e3539ae84ea726e8a266a9a119c18f055cd80f3836d5eb4500b005428",
             preprocess="decompress",
         )
         images = HttpResource(
             "http://www.vision.caltech.edu/visipedia-data/CUB-200/images.tgz",
             sha256=
             "2a6d2246bbb9778ca03aa94e2e683ccb4f8821a36b7f235c0822e659d60a803e",
             preprocess="decompress",
         )
         anns = HttpResource(
             "http://www.vision.caltech.edu/visipedia-data/CUB-200/annotations.tgz",
             sha256=
             "c17b7841c21a66aa44ba8fe92369cc95dfc998946081828b1d7b8a4b716805c1",
             preprocess="decompress",
         )
         return [split, images, anns]
Пример #3
0
    def test_resolve_to_http(self, mocker):
        file_name = "data.tar"
        original_url = f"http://downloads.pytorch.org/{file_name}"

        redirected_url = original_url.replace("http", "https")

        sha256_sentinel = "sha256_sentinel"

        def preprocess_sentinel(path):
            return path

        original_resource = HttpResource(
            original_url,
            sha256=sha256_sentinel,
            preprocess=preprocess_sentinel,
        )

        mocker.patch("torchvision.prototype.datasets.utils._resource._get_redirect_url", return_value=redirected_url)
        redirected_resource = original_resource.resolve()

        assert isinstance(redirected_resource, HttpResource)
        assert redirected_resource.url == redirected_url
        assert redirected_resource.file_name == file_name
        assert redirected_resource.sha256 == sha256_sentinel
        assert redirected_resource._preprocess is preprocess_sentinel
Пример #4
0
class USPS(Dataset):
    """USPS Dataset
    homepage="https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#usps",
    """
    def __init__(
        self,
        root: Union[str, pathlib.Path],
        *,
        split: str = "train",
        skip_integrity_check: bool = False,
    ) -> None:
        self._split = self._verify_str_arg(split, "split", {"train", "test"})

        self._categories = _info()["categories"]
        super().__init__(root, skip_integrity_check=skip_integrity_check)

    _URL = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass"

    _RESOURCES = {
        "train":
        HttpResource(
            f"{_URL}/usps.bz2",
            sha256=
            "3771e9dd6ba685185f89867b6e249233dd74652389f263963b3b741e994b034f"
        ),
        "test":
        HttpResource(
            f"{_URL}/usps.t.bz2",
            sha256=
            "a9c0164e797d60142a50604917f0baa604f326e9a689698763793fa5d12ffc4e"
        ),
    }

    def _resources(self) -> List[OnlineResource]:
        return [USPS._RESOURCES[self._split]]

    def _prepare_sample(self, line: str) -> Dict[str, Any]:
        label, *values = line.strip().split(" ")
        values = [float(value.split(":")[1]) for value in values]
        pixels = torch.tensor(values).add_(1).div_(2)
        return dict(
            image=Image(pixels.reshape(16, 16)),
            label=Label(int(label) - 1, categories=self._categories),
        )

    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        dp = Decompressor(resource_dps[0])
        dp = LineReader(dp, decode=True, return_path=False)
        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)
        return Mapper(dp, self._prepare_sample)

    def __len__(self) -> int:
        return 7_291 if self._split == "train" else 2_007
Пример #5
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     images = HttpResource(
         f"{self._IMAGE_URL_BASE}/{config.split}{config.year}.zip",
         sha256=self._IMAGES_CHECKSUMS[(config.year, config.split)],
     )
     meta = HttpResource(
         f"{self._META_URL_BASE}/annotations_trainval{config.year}.zip",
         sha256=self._META_CHECKSUMS[config.year],
     )
     return [images, meta]
Пример #6
0
 def _resources(self) -> List[OnlineResource]:
     images = HttpResource(
         f"{self._IMAGE_URL_BASE}/{self._split}{self._year}.zip",
         sha256=self._IMAGES_CHECKSUMS[(self._year, self._split)],
     )
     meta = HttpResource(
         f"{self._META_URL_BASE}/annotations_trainval{self._year}.zip",
         sha256=self._META_CHECKSUMS[self._year],
     )
     return [images, meta]
Пример #7
0
 def _resources(self) -> List[OnlineResource]:
     archive = HttpResource(
         "https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz",
         sha256="6a5a2918d5c73ce032fdeba876574d150d9d04113ab87540a1304cbcc715be53",
     )
     extra_split = HttpResource(
         "http://home.bharathh.info/pubs/codes/SBD/train_noval.txt",
         sha256="0b2068f7a359d2907431803e1cd63bf6162da37d7d503b589d3b08c6fd0c2432",
     )
     return [archive, extra_split]
Пример #8
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     images = HttpResource(
         "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz",
         sha256=
         "af6ece2f339791ca20f855943d8b55dd60892c0a25105fcd631ee3d6430f9926",
     )
     anns = HttpResource(
         "http://www.vision.caltech.edu/Image_Datasets/Caltech101/Annotations.tar",
         sha256=
         "1717f4e10aa837b05956e3f4c94456527b143eec0d95e935028b30aff40663d8",
     )
     return [images, anns]
Пример #9
0
    def resources(self, config: DatasetConfig) -> List[OnlineResource]:
        resources: List[OnlineResource] = [HttpResource(self._URLS[config.split], sha256=self._CHECKSUM[config.split])]
        if config.split == "train":
            resources.append(HttpResource(url=self._URLS["car_devkit"], sha256=self._CHECKSUM["car_devkit"]))

        else:
            resources.append(
                HttpResource(
                    self._URLS["cars_test_annos_withlabels"], sha256=self._CHECKSUM["cars_test_annos_withlabels"]
                )
            )
        return resources
Пример #10
0
 def _resources(self) -> List[OnlineResource]:
     images = HttpResource(
         "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz",
         sha256="67195c5e1c01f1ab5f9b6a5d22b8c27a580d896ece458917e61d459337fa318d",
         preprocess="decompress",
     )
     anns = HttpResource(
         "https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz",
         sha256="52425fb6de5c424942b7626b428656fcbd798db970a937df61750c0f1d358e91",
         preprocess="decompress",
     )
     return [images, anns]
Пример #11
0
    def _resources(self) -> List[OnlineResource]:
        resources: List[OnlineResource] = [HttpResource(self._URLS[self._split], sha256=self._CHECKSUM[self._split])]
        if self._split == "train":
            resources.append(HttpResource(url=self._URLS["car_devkit"], sha256=self._CHECKSUM["car_devkit"]))

        else:
            resources.append(
                HttpResource(
                    self._URLS["cars_test_annos_withlabels"], sha256=self._CHECKSUM["cars_test_annos_withlabels"]
                )
            )
        return resources
Пример #12
0
    def _resources(self) -> List[OnlineResource]:
        rsrcs: List[OnlineResource] = [HttpResource(self._URLS[self._split], sha256=self._CHECKSUMS[self._split])]

        if self._split == "test":
            rsrcs.append(
                HttpResource(
                    self._URLS["test_ground_truth"],
                    sha256=self._CHECKSUMS["test_ground_truth"],
                )
            )

        return rsrcs
Пример #13
0
    def resources(self, config: DatasetConfig) -> List[OnlineResource]:
        rsrcs: List[OnlineResource] = [
            HttpResource(self._URLS[config.split],
                         sha256=self._CHECKSUMS[config.split])
        ]

        if config.split == "test":
            rsrcs.append(
                HttpResource(
                    self._URLS["test_ground_truth"],
                    sha256=self._CHECKSUMS["test_ground_truth"],
                ))

        return rsrcs
Пример #14
0
class USPS(Dataset):
    def _make_info(self) -> DatasetInfo:
        return DatasetInfo(
            "usps",
            homepage=
            "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#usps",
            valid_options=dict(split=("train", "test"), ),
            categories=10,
        )

    _URL = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass"

    _RESOURCES = {
        "train":
        HttpResource(
            f"{_URL}/usps.bz2",
            sha256=
            "3771e9dd6ba685185f89867b6e249233dd74652389f263963b3b741e994b034f"
        ),
        "test":
        HttpResource(
            f"{_URL}/usps.t.bz2",
            sha256=
            "a9c0164e797d60142a50604917f0baa604f326e9a689698763793fa5d12ffc4e"
        ),
    }

    def resources(self, config: DatasetConfig) -> List[OnlineResource]:
        return [USPS._RESOURCES[config.split]]

    def _prepare_sample(self, line: str) -> Dict[str, Any]:
        label, *values = line.strip().split(" ")
        values = [float(value.split(":")[1]) for value in values]
        pixels = torch.tensor(values).add_(1).div_(2)
        return dict(
            image=Image(pixels.reshape(16, 16)),
            label=Label(int(label) - 1, categories=self.categories),
        )

    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
    ) -> IterDataPipe[Dict[str, Any]]:
        dp = Decompressor(resource_dps[0])
        dp = LineReader(dp, decode=True, return_path=False)
        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)
        return Mapper(dp, self._prepare_sample)
Пример #15
0
    def _resources(self) -> List[OnlineResource]:
        data = HttpResource(
            f"http://ufldl.stanford.edu/housenumbers/{self._split}_32x32.mat",
            sha256=self._CHECKSUMS[self._split],
        )

        return [data]
Пример #16
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     archive = HttpResource(
         "https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz",
         sha256="e42855a52a4950a3b59612834602aa253914755c95b0cff9ead6d07395f8e205",
         decompress=True,
     )
     return [archive]
Пример #17
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     return [
         HttpResource(
             "https://openaipublic.azureedge.net/clip/data/country211.tgz",
             sha256="c011343cdc1296a8c31ff1d7129cf0b5e5b8605462cffd24f89266d6e6f4da3c",
         )
     ]
Пример #18
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     archive = HttpResource(
         "https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip",
         sha256=
         "5cd61cf1096ed20944df93c9adb31e74d189b8459a94f54ba00090e5c59936d1",
     )
     return [archive]
Пример #19
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     data = HttpResource(
         "http://archive.ics.uci.edu/ml/machine-learning-databases/semeion/semeion.data",
         sha256=
         "f43228ae3da5ea6a3c95069d53450b86166770e3b719dcc333182128fe08d4b1",
     )
     return [data]
Пример #20
0
 def _resources(self) -> List[OnlineResource]:
     return [
         HttpResource(
             f"https://www.cs.toronto.edu/~kriz/{self._FILE_NAME}",
             sha256=self._SHA256,
         )
     ]
Пример #21
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     file_name, sha256 = (self._TEST_ARCHIVES if config.split == "test" else
                          self._TRAIN_VAL_ARCHIVES)[config.year]
     archive = HttpResource(
         f"http://host.robots.ox.ac.uk/pascal/VOC/voc{config.year}/{file_name}",
         sha256=sha256)
     return [archive]
Пример #22
0
 def resources(self, config: Optional[DatasetConfig] = None) -> List[OnlineResource]:
     return [
         HttpResource(
             f"{self._URL_BASE}/emnist-gzip.zip",
             sha256="909a2a39c5e86bdd7662425e9b9c4a49bb582bf8d0edad427f3c3a9d0c6f7259",
         )
     ]
Пример #23
0
    def resources(self, config: DatasetConfig) -> List[OnlineResource]:
        data = HttpResource(
            f"http://ufldl.stanford.edu/housenumbers/{config.split}_32x32.mat",
            sha256=self._CHECKSUMS[config.split],
        )

        return [data]
Пример #24
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     return [
         HttpResource(
             "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz",
             sha256=
             "85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7",
         )
     ]
Пример #25
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     return [
         HttpResource(
             "https://madm.dfki.de/files/sentinel/EuroSAT.zip",
             sha256=
             "8ebea626349354c5328b142b96d0430e647051f26efc2dc974c843f25ecf70bd",
         )
     ]
Пример #26
0
 def _resources(self) -> List[OnlineResource]:
     return [
         HttpResource(
             f"{self._URL_BASE}/emnist-gzip.zip",
             sha256=
             "909a2a39c5e86bdd7662425e9b9c4a49bb582bf8d0edad427f3c3a9d0c6f7259",
         )
     ]
Пример #27
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     return [
         HttpResource(
             "http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar",
             sha256=
             "08ff01b03c65566014ae88eb0490dbe4419fc7ac4de726ee1163e39fd809543e",
         )
     ]
Пример #28
0
    def resources(self, config: DatasetConfig) -> List[OnlineResource]:
        (images_file, images_sha256), (
            labels_file,
            labels_sha256,
        ) = self._files_and_checksums(config)

        url_bases = self._URL_BASE
        if isinstance(url_bases, str):
            url_bases = (url_bases,)

        images_urls = [f"{url_base}/{images_file}" for url_base in url_bases]
        images = HttpResource(images_urls[0], sha256=images_sha256, mirrors=images_urls[1:])

        labels_urls = [f"{url_base}/{labels_file}" for url_base in url_bases]
        labels = HttpResource(labels_urls[0], sha256=labels_sha256, mirrors=labels_urls[1:])

        return [images, labels]
Пример #29
0
 def resources(self, config: DatasetConfig) -> List[OnlineResource]:
     return [
         HttpResource(
             "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz",
             sha256=
             "6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce",
         )
     ]
Пример #30
0
 def _resources(self) -> List[OnlineResource]:
     return [
         HttpResource(
             url="http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz",
             sha256=
             "d97d15e438b7f4498f96086a4f7e2fa42a32f2712e87d3295441b2b6314053a4",
             preprocess="decompress",
         )
     ]