Exemplo n.º 1
0
 def _create_annotations(self, image_ids, num_annotations_per_image):
     captions = [str(idx) for idx in range(num_annotations_per_image)]
     annotations = datasets_utils.combinations_grid(image_id=image_ids,
                                                    caption=captions)
     for id, annotation in enumerate(annotations):
         annotation["id"] = id
     return annotations, dict(captions=captions)
Exemplo n.º 2
0
 def _create_annotations(self, image_ids, num_annotations_per_image):
     annotations = datasets_utils.combinations_grid(
         image_id=image_ids,
         bbox=([1.0, 2.0, 3.0, 4.0], ) * num_annotations_per_image)
     for id, annotation in enumerate(annotations):
         annotation["id"] = id
     return annotations, dict()
Exemplo n.º 3
0
class UCF101TestCase(datasets_utils.VideoDatasetTestCase):
    DATASET_CLASS = datasets.UCF101

    CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3),
                                               train=(True, False))

    def inject_fake_data(self, tmpdir, config):
        tmpdir = pathlib.Path(tmpdir)

        video_folder = tmpdir / "videos"
        os.makedirs(video_folder)
        video_files = self._create_videos(video_folder)

        annotations_folder = annotations_folder = tmpdir / "annotations"
        os.makedirs(annotations_folder)
        num_examples = self._create_annotation_files(annotations_folder,
                                                     video_files,
                                                     config["fold"],
                                                     config["train"])

        return (str(video_folder), str(annotations_folder)), num_examples

    def _create_videos(self, root, num_examples_per_class=3):
        def file_name_fn(cls, idx, clips_per_group=2):
            return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi"

        video_files = [
            datasets_utils.create_video_folder(
                root, cls, lambda idx: file_name_fn(cls, idx),
                num_examples_per_class) for cls in ("ApplyEyeMakeup", "YoYo")
        ]
        return [
            path.relative_to(root) for path in itertools.chain(*video_files)
        ]

    def _create_annotation_files(self, root, video_files, fold, train):
        current_videos = random.sample(
            video_files, random.randrange(1,
                                          len(video_files) - 1))
        current_annotation = self._annotation_file_name(fold, train)
        self._create_annotation_file(root, current_annotation, current_videos)

        other_videos = set(video_files) - set(current_videos)
        other_annotations = [
            self._annotation_file_name(fold, train)
            for fold, train in itertools.product((1, 2, 3), (True, False))
        ]
        other_annotations.remove(current_annotation)
        for name in other_annotations:
            self._create_annotation_file(root, name, other_videos)

        return len(current_videos)

    def _annotation_file_name(self, fold, train):
        return f"{'train' if train else 'test'}list{fold:02d}.txt"

    def _create_annotation_file(self, root, name, video_files):
        with open(pathlib.Path(root) / name, "w") as fh:
            fh.writelines(f"{file}\n" for file in sorted(video_files))
Exemplo n.º 4
0
class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase):
    DATASET_CLASS = datasets.VOCSegmentation
    FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image)

    CONFIGS = (
        *datasets_utils.combinations_grid(
            year=[f"20{year:02d}" for year in range(7, 13)],
            image_set=("train", "val", "trainval")),
        dict(year="2007", image_set="test"),
        dict(year="2007-test", image_set="test"),
    )

    def inject_fake_data(self, tmpdir, config):
        year, is_test_set = (("2007", True) if config["year"] == "2007-test"
                             or config["image_set"] == "test" else
                             (config["year"], False))
        image_set = config["image_set"]

        base_dir = pathlib.Path(tmpdir)
        if year == "2011":
            base_dir /= "TrainVal"
        base_dir = base_dir / "VOCdevkit" / f"VOC{year}"
        os.makedirs(base_dir)

        num_images, num_images_per_image_set = self._create_image_set_files(
            base_dir, "ImageSets", is_test_set)
        datasets_utils.create_image_folder(base_dir, "JPEGImages",
                                           lambda idx: f"{idx:06d}.jpg",
                                           num_images)

        datasets_utils.create_image_folder(base_dir, "SegmentationClass",
                                           lambda idx: f"{idx:06d}.png",
                                           num_images)
        annotation = self._create_annotation_files(base_dir, "Annotations",
                                                   num_images)

        return dict(num_examples=num_images_per_image_set[image_set],
                    annotation=annotation)

    def _create_image_set_files(self, root, name, is_test_set):
        root = pathlib.Path(root) / name
        src = pathlib.Path(root) / "Main"
        os.makedirs(src, exist_ok=True)

        idcs = dict(train=(0, 1, 2), val=(3, 4), test=(5, ))
        idcs["trainval"] = (*idcs["train"], *idcs["val"])

        for image_set in ("test", ) if is_test_set else ("train", "val",
                                                         "trainval"):
            self._create_image_set_file(src, image_set, idcs[image_set])

        shutil.copytree(src, root / "Segmentation")

        num_images = max(itertools.chain(*idcs.values())) + 1
        num_images_per_image_set = dict([(image_set, len(idcs_))
                                         for image_set, idcs_ in idcs.items()])
        return num_images, num_images_per_image_set

    def _create_image_set_file(self, root, image_set, idcs):
        with open(pathlib.Path(root) / f"{image_set}.txt", "w") as fh:
            fh.writelines([f"{idx:06d}\n" for idx in idcs])

    def _create_annotation_files(self, root, name, num_images):
        root = pathlib.Path(root) / name
        os.makedirs(root)

        for idx in range(num_images):
            annotation = self._create_annotation_file(root, f"{idx:06d}.xml")

        return annotation

    def _create_annotation_file(self, root, name):
        def add_child(parent, name, text=None):
            child = ET.SubElement(parent, name)
            child.text = text
            return child

        def add_name(obj, name="dog"):
            add_child(obj, "name", name)
            return name

        def add_bndbox(obj, bndbox=None):
            if bndbox is None:
                bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"}

            obj = add_child(obj, "bndbox")
            for name, text in bndbox.items():
                add_child(obj, name, text)

            return bndbox

        annotation = ET.Element("annotation")
        obj = add_child(annotation, "object")
        data = dict(name=add_name(obj), bndbox=add_bndbox(obj))

        with open(pathlib.Path(root) / name, "wb") as fh:
            fh.write(ET.tostring(annotation))

        return data
Exemplo n.º 5
0
class CelebATestCase(datasets_utils.ImageDatasetTestCase):
    DATASET_CLASS = datasets.CelebA
    FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None)))

    CONFIGS = datasets_utils.combinations_grid(
        split=("train", "valid", "test", "all"),
        target_type=("attr", "identity", "bbox", "landmarks",
                     ["attr", "identity"]),
    )
    REQUIRED_PACKAGES = ("pandas", )

    _SPLIT_TO_IDX = dict(train=0, valid=1, test=2)

    def inject_fake_data(self, tmpdir, config):
        base_folder = pathlib.Path(tmpdir) / "celeba"
        os.makedirs(base_folder)

        num_images, num_images_per_split = self._create_split_txt(base_folder)

        datasets_utils.create_image_folder(base_folder, "img_align_celeba",
                                           lambda idx: f"{idx + 1:06d}.jpg",
                                           num_images)
        attr_names = self._create_attr_txt(base_folder, num_images)
        self._create_identity_txt(base_folder, num_images)
        self._create_bbox_txt(base_folder, num_images)
        self._create_landmarks_txt(base_folder, num_images)

        return dict(num_examples=num_images_per_split[config["split"]],
                    attr_names=attr_names)

    def _create_split_txt(self, root):
        num_images_per_split = dict(train=3, valid=2, test=1)

        data = [[self._SPLIT_TO_IDX[split]]
                for split, num_images in num_images_per_split.items()
                for _ in range(num_images)]
        self._create_txt(root, "list_eval_partition.txt", data)

        num_images_per_split["all"] = num_images = sum(
            num_images_per_split.values())
        return num_images, num_images_per_split

    def _create_attr_txt(self, root, num_images):
        header = ("5_o_Clock_Shadow", "Young")
        data = torch.rand(
            (num_images, len(header))).ge(0.5).int().mul(2).sub(1).tolist()
        self._create_txt(root,
                         "list_attr_celeba.txt",
                         data,
                         header=header,
                         add_num_examples=True)
        return header

    def _create_identity_txt(self, root, num_images):
        data = torch.randint(1, 4, size=(num_images, 1)).tolist()
        self._create_txt(root, "identity_CelebA.txt", data)

    def _create_bbox_txt(self, root, num_images):
        header = ("x_1", "y_1", "width", "height")
        data = torch.randint(10, size=(num_images, len(header))).tolist()
        self._create_txt(root,
                         "list_bbox_celeba.txt",
                         data,
                         header=header,
                         add_num_examples=True,
                         add_image_id_to_header=True)

    def _create_landmarks_txt(self, root, num_images):
        header = ("lefteye_x", "rightmouth_y")
        data = torch.randint(10, size=(num_images, len(header))).tolist()
        self._create_txt(root,
                         "list_landmarks_align_celeba.txt",
                         data,
                         header=header,
                         add_num_examples=True)

    def _create_txt(self,
                    root,
                    name,
                    data,
                    header=None,
                    add_num_examples=False,
                    add_image_id_to_header=False):
        with open(pathlib.Path(root) / name, "w") as fh:
            if add_num_examples:
                fh.write(f"{len(data)}\n")

            if header:
                if add_image_id_to_header:
                    header = ("image_id", *header)
                fh.write(f"{' '.join(header)}\n")

            for idx, line in enumerate(data, 1):
                fh.write(
                    f"{' '.join((f'{idx:06d}.jpg', *[str(value) for value in line]))}\n"
                )

    def test_combined_targets(self):
        target_types = ["attr", "identity", "bbox", "landmarks"]

        individual_targets = []
        for target_type in target_types:
            with self.create_dataset(target_type=target_type) as (dataset, _):
                _, target = dataset[0]
                individual_targets.append(target)

        with self.create_dataset(target_type=target_types) as (dataset, _):
            _, combined_targets = dataset[0]

        actual = len(individual_targets)
        expected = len(combined_targets)
        self.assertEqual(
            actual,
            expected,
            f"The number of the returned combined targets does not match the the number targets if requested "
            f"individually: {actual} != {expected}",
        )

        for target_type, combined_target, individual_target in zip(
                target_types, combined_targets, individual_targets):
            with self.subTest(target_type=target_type):
                actual = type(combined_target)
                expected = type(individual_target)
                self.assertIs(
                    actual,
                    expected,
                    f"Type of the combined target does not match the type of the corresponding individual target: "
                    f"{actual} is not {expected}",
                )

    def test_no_target(self):
        with self.create_dataset(target_type=[]) as (dataset, _):
            _, target = dataset[0]

        self.assertIsNone(target)

    def test_attr_names(self):
        with self.create_dataset() as (dataset, info):
            self.assertEqual(tuple(dataset.attr_names), info["attr_names"])
Exemplo n.º 6
0
class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase):
    DATASET_CLASS = datasets.CIFAR10
    CONFIGS = datasets_utils.combinations_grid(train=(True, False))

    _VERSION_CONFIG = dict(
        base_folder="cifar-10-batches-py",
        train_files=tuple(f"data_batch_{idx}" for idx in range(1, 6)),
        test_files=("test_batch", ),
        labels_key="labels",
        meta_file="batches.meta",
        num_categories=10,
        categories_key="label_names",
    )

    def inject_fake_data(self, tmpdir, config):
        tmpdir = pathlib.Path(tmpdir) / self._VERSION_CONFIG["base_folder"]
        os.makedirs(tmpdir)

        num_images_per_file = 1
        for name in itertools.chain(self._VERSION_CONFIG["train_files"],
                                    self._VERSION_CONFIG["test_files"]):
            self._create_batch_file(tmpdir, name, num_images_per_file)

        categories = self._create_meta_file(tmpdir)

        return dict(
            num_examples=num_images_per_file *
            len(self._VERSION_CONFIG["train_files"]
                if config["train"] else self._VERSION_CONFIG["test_files"]),
            categories=categories,
        )

    def _create_batch_file(self, root, name, num_images):
        data = datasets_utils.create_image_or_video_tensor(
            (num_images, 32 * 32 * 3))
        labels = np.random.randint(0,
                                   self._VERSION_CONFIG["num_categories"],
                                   size=num_images).tolist()
        self._create_binary_file(root, name, {
            "data": data,
            self._VERSION_CONFIG["labels_key"]: labels
        })

    def _create_meta_file(self, root):
        categories = [
            f"{idx:0{len(str(self._VERSION_CONFIG['num_categories'] - 1))}d}"
            for idx in range(self._VERSION_CONFIG["num_categories"])
        ]
        self._create_binary_file(
            root, self._VERSION_CONFIG["meta_file"],
            {self._VERSION_CONFIG["categories_key"]: categories})
        return categories

    def _create_binary_file(self, root, name, content):
        with open(pathlib.Path(root) / name, "wb") as fh:
            pickle.dump(content, fh)

    def test_class_to_idx(self):
        with self.create_dataset() as (dataset, info):
            expected = {
                category: label
                for label, category in enumerate(info["categories"])
            }
            actual = dataset.class_to_idx
            self.assertEqual(actual, expected)
Exemplo n.º 7
0
class Caltech101TestCase(datasets_utils.ImageDatasetTestCase):
    DATASET_CLASS = datasets.Caltech101
    FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple))

    CONFIGS = datasets_utils.combinations_grid(
        target_type=("category", "annotation", ["category", "annotation"]))
    REQUIRED_PACKAGES = ("scipy", )

    def inject_fake_data(self, tmpdir, config):
        root = pathlib.Path(tmpdir) / "caltech101"
        images = root / "101_ObjectCategories"
        annotations = root / "Annotations"

        categories = (("Faces", "Faces_2"), ("helicopter", "helicopter"),
                      ("ying_yang", "ying_yang"))
        num_images_per_category = 2

        for image_category, annotation_category in categories:
            datasets_utils.create_image_folder(
                root=images,
                name=image_category,
                file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg",
                num_examples=num_images_per_category,
            )
            self._create_annotation_folder(
                root=annotations,
                name=annotation_category,
                file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat",
                num_examples=num_images_per_category,
            )

        # This is included in the original archive, but is removed by the dataset. Thus, an empty directory suffices.
        os.makedirs(images / "BACKGROUND_Google")

        return num_images_per_category * len(categories)

    def _create_annotation_folder(self, root, name, file_name_fn,
                                  num_examples):
        root = pathlib.Path(root) / name
        os.makedirs(root)

        for idx in range(num_examples):
            self._create_annotation_file(root, file_name_fn(idx))

    def _create_annotation_file(self, root, name):
        mdict = dict(obj_contour=torch.rand((2, torch.randint(3, 6, size=())),
                                            dtype=torch.float64).numpy())
        datasets_utils.lazy_importer.scipy.io.savemat(
            str(pathlib.Path(root) / name), mdict)

    def test_combined_targets(self):
        target_types = ["category", "annotation"]

        individual_targets = []
        for target_type in target_types:
            with self.create_dataset(target_type=target_type) as (dataset, _):
                _, target = dataset[0]
                individual_targets.append(target)

        with self.create_dataset(target_type=target_types) as (dataset, _):
            _, combined_targets = dataset[0]

        actual = len(individual_targets)
        expected = len(combined_targets)
        self.assertEqual(
            actual,
            expected,
            f"The number of the returned combined targets does not match the the number targets if requested "
            f"individually: {actual} != {expected}",
        )

        for target_type, combined_target, individual_target in zip(
                target_types, combined_targets, individual_targets):
            with self.subTest(target_type=target_type):
                actual = type(combined_target)
                expected = type(individual_target)
                self.assertIs(
                    actual,
                    expected,
                    f"Type of the combined target does not match the type of the corresponding individual target: "
                    f"{actual} is not {expected}",
                )
Exemplo n.º 8
0
class LSUNTestCase(datasets_utils.ImageDatasetTestCase):
    DATASET_CLASS = datasets.LSUN

    REQUIRED_PACKAGES = ("lmdb", )
    CONFIGS = datasets_utils.combinations_grid(
        classes=("train", "test", "val",
                 ["bedroom_train", "church_outdoor_train"]))

    _CATEGORIES = (
        "bedroom",
        "bridge",
        "church_outdoor",
        "classroom",
        "conference_room",
        "dining_room",
        "kitchen",
        "living_room",
        "restaurant",
        "tower",
    )

    def inject_fake_data(self, tmpdir, config):
        root = pathlib.Path(tmpdir)

        num_images = 0
        for cls in self._parse_classes(config["classes"]):
            num_images += self._create_lmdb(root, cls)

        return num_images

    @contextlib.contextmanager
    def create_dataset(self, *args, **kwargs):
        with super().create_dataset(*args, **kwargs) as output:
            yield output
            # Currently datasets.LSUN caches the keys in the current directory rather than in the root directory. Thus,
            # this creates a number of unique _cache_* files in the current directory that will not be removed together
            # with the temporary directory
            for file in os.listdir(os.getcwd()):
                if file.startswith("_cache_"):
                    os.remove(file)

    def _parse_classes(self, classes):
        if not isinstance(classes, str):
            return classes

        split = classes
        if split == "test":
            return [split]

        return [f"{category}_{split}" for category in self._CATEGORIES]

    def _create_lmdb(self, root, cls):
        lmdb = datasets_utils.lazy_importer.lmdb
        hexdigits_lowercase = string.digits + string.ascii_lowercase[:6]

        folder = f"{cls}_lmdb"

        num_images = torch.randint(1, 4, size=()).item()
        format = "webp"
        files = datasets_utils.create_image_folder(
            root, folder, lambda idx: f"{idx}.{format}", num_images)

        with lmdb.open(str(root /
                           folder)) as env, env.begin(write=True) as txn:
            for file in files:
                key = "".join(
                    random.choice(hexdigits_lowercase)
                    for _ in range(40)).encode()

                buffer = io.BytesIO()
                Image.open(file).save(buffer, format)
                buffer.seek(0)
                value = buffer.read()

                txn.put(key, value)

                os.remove(file)

        return num_images