def generate(
        cls,
        root,
        *,
        year,
        num_samples,
    ):
        annotations_dir = root / "annotations"
        annotations_dir.mkdir()

        for split in ("train", "val"):
            config_name = f"{split}{year}"

            images_meta = cls._make_images_archive(root,
                                                   config_name,
                                                   num_samples=num_samples)
            cls._make_annotations(
                annotations_dir,
                config_name,
                images_meta=images_meta,
            )

        make_zip(root, f"annotations_trainval{year}.zip", annotations_dir)

        return num_samples
def fer2013(info, root, config):
    num_samples = 5 if config.split == "train" else 3

    path = root / f"{config.split}.txt"
    with open(path, "w", newline="") as file:
        field_names = ["emotion"] if config.split == "train" else []
        field_names.append("pixels")

        file.write(",".join(field_names) + "\n")

        writer = csv.DictWriter(file,
                                fieldnames=field_names,
                                quotechar='"',
                                quoting=csv.QUOTE_NONNUMERIC)
        for _ in range(num_samples):
            rowdict = {
                "pixels":
                " ".join([
                    str(int(pixel))
                    for pixel in torch.randint(256, (48 * 48, ),
                                               dtype=torch.uint8)
                ])
            }
            if config.split == "train":
                rowdict["emotion"] = int(torch.randint(7, ()))
            writer.writerow(rowdict)

    make_zip(root, f"{path.name}.zip", path)

    return num_samples
def emnist(info, root, _):
    # The image sets that merge some lower case letters in their respective upper case variant, still use dense
    # labels in the data files. Thus, num_categories != len(categories) there.
    num_categories = defaultdict(
        lambda: len(info.categories),
        {image_set: 47
         for image_set in ("Balanced", "By_Merge")})

    mock_infos = {}
    file_names = set()
    for config in info._configs:
        prefix = f"emnist-{config.image_set.replace('_', '').lower()}-{config.split}"
        images_file = f"{prefix}-images-idx3-ubyte.gz"
        labels_file = f"{prefix}-labels-idx1-ubyte.gz"
        file_names.update({images_file, labels_file})
        mock_infos[config] = dict(num_samples=MNISTMockData.generate(
            root,
            num_categories=num_categories[config.image_set],
            images_file=images_file,
            labels_file=labels_file,
        ))

    make_zip(root, "emnist-gzip.zip", *file_names)

    return mock_infos
Пример #4
0
    def _make_images_archive(cls, root, name, *, num_samples):
        image_paths = create_image_folder(
            root, name, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_samples
        )

        images_meta = []
        for path in image_paths:
            with PIL.Image.open(path) as image:
                width, height = image.size
            images_meta.append(dict(file_name=path.name, id=int(path.stem), width=width, height=height))

        make_zip(root, f"{name}.zip")

        return images_meta
Пример #5
0
def eurosat(info, root, config):
    data_folder = pathlib.Path(root, "eurosat", "2750")
    data_folder.mkdir(parents=True)

    num_examples_per_class = 3
    classes = ("AnnualCrop", "Forest")
    for cls in classes:
        create_image_folder(
            root=data_folder,
            name=cls,
            file_name_fn=lambda idx: f"{cls}_{idx}.jpg",
            num_examples=num_examples_per_class,
        )
    make_zip(root, "EuroSAT.zip", data_folder)
    return len(classes) * num_examples_per_class
Пример #6
0
    def generate(cls, root):
        image_file_names, num_samples_map = cls._make_split_file(root)

        image_files = create_image_folder(
            root, "img_align_celeba", file_name_fn=lambda idx: image_file_names[idx], num_examples=len(image_file_names)
        )
        make_zip(root, image_files[0].parent.with_suffix(".zip").name)

        for make_ann_file_fn in (
            cls._make_identity_file,
            cls._make_attributes_file,
            cls._make_bounding_boxes_file,
            cls._make_landmarks_file,
        ):
            make_ann_file_fn(root, image_file_names)

        return num_samples_map
def clevr(info, root, config):
    data_folder = root / "CLEVR_v1.0"

    num_samples_map = {
        "train": 3,
        "val": 2,
        "test": 1,
    }

    images_folder = data_folder / "images"
    image_files = {
        split: create_image_folder(
            images_folder,
            split,
            file_name_fn=lambda idx: f"CLEVR_{split}_{idx:06d}.jpg",
            num_examples=num_samples,
        )
        for split, num_samples in num_samples_map.items()
    }

    scenes_folder = data_folder / "scenes"
    scenes_folder.mkdir()
    for split in ["train", "val"]:
        with open(scenes_folder / f"CLEVR_{split}_scenes.json", "w") as file:
            json.dump(
                {
                    "scenes": [
                        {
                            "image_filename": image_file.name,
                            # We currently only return the number of objects in a scene.
                            # Thus, it is sufficient for now to only mock the number of elements.
                            "objects": [None] * int(torch.randint(1, 5, ())),
                        } for image_file in image_files[split]
                    ]
                },
                file,
            )

    make_zip(root, f"{data_folder.name}.zip")

    return {
        config_: num_samples_map[config_.split]
        for config_ in info._configs
    }
Пример #8
0
def gtsrb(info, root, config):
    num_examples_per_class = 5 if config.split == "train" else 3
    classes = ("00000", "00042", "00012")
    num_examples = num_examples_per_class * len(classes)

    csv_columns = [
        "Filename", "Width", "Height", "Roi.X1", "Roi.Y1", "Roi.X2", "Roi.Y2",
        "ClassId"
    ]

    def _make_ann_file(path, num_examples, class_idx):
        if class_idx == "random":
            class_idx = torch.randint(1, len(classes) + 1, size=(1, )).item()

        with open(path, "w") as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=csv_columns,
                                    delimiter=";")
            writer.writeheader()
            for image_idx in range(num_examples):
                writer.writerow({
                    "Filename": f"{image_idx:05d}.ppm",
                    "Width": torch.randint(1, 100, size=()).item(),
                    "Height": torch.randint(1, 100, size=()).item(),
                    "Roi.X1": torch.randint(1, 100, size=()).item(),
                    "Roi.Y1": torch.randint(1, 100, size=()).item(),
                    "Roi.X2": torch.randint(1, 100, size=()).item(),
                    "Roi.Y2": torch.randint(1, 100, size=()).item(),
                    "ClassId": class_idx,
                })

    if config["split"] == "train":
        train_folder = root / "GTSRB" / "Training"
        train_folder.mkdir(parents=True)

        for class_idx in classes:
            create_image_folder(
                train_folder,
                name=class_idx,
                file_name_fn=lambda image_idx:
                f"{class_idx}_{image_idx:05d}.ppm",
                num_examples=num_examples_per_class,
            )
            _make_ann_file(
                path=train_folder / class_idx / f"GT-{class_idx}.csv",
                num_examples=num_examples_per_class,
                class_idx=int(class_idx),
            )
        make_zip(root, "GTSRB-Training_fixed.zip", train_folder)
    else:
        test_folder = root / "GTSRB" / "Final_Test"
        test_folder.mkdir(parents=True)

        create_image_folder(
            test_folder,
            name="Images",
            file_name_fn=lambda image_idx: f"{image_idx:05d}.ppm",
            num_examples=num_examples,
        )

        make_zip(root, "GTSRB_Final_Test_Images.zip", test_folder)

        _make_ann_file(
            path=root / "GT-final_test.csv",
            num_examples=num_examples,
            class_idx="random",
        )

        make_zip(root, "GTSRB_Final_Test_GT.zip", "GT-final_test.csv")

    return num_examples