示例#1
0
def test_detection_dataset(mock_image_folder, mock_detection_label):

    input_size = (1024, 1024)

    ds = datasets.DetectionDataset(
        img_folder=mock_image_folder,
        label_path=mock_detection_label,
        img_transforms=Resize(input_size),
    )

    assert len(ds) == 5
    img, target = ds[0]
    assert isinstance(img, tf.Tensor)
    assert img.shape[:2] == input_size
    assert img.dtype == tf.float32
    # Bounding boxes
    assert isinstance(target, np.ndarray) and target.dtype == np.float32
    assert np.all(np.logical_and(target[:, :4] >= 0, target[:, :4] <= 1))
    assert target.shape[1] == 4

    loader = DataLoader(ds, batch_size=2)
    images, targets = next(iter(loader))
    assert isinstance(images,
                      tf.Tensor) and images.shape == (2, *input_size, 3)
    assert isinstance(targets, list) and all(
        isinstance(elt, np.ndarray) for elt in targets)

    # Rotated DS
    rotated_ds = datasets.DetectionDataset(
        img_folder=mock_image_folder,
        label_path=mock_detection_label,
        img_transforms=Resize(input_size),
        use_polygons=True,
    )
    _, r_target = rotated_ds[0]
    assert r_target.shape[1:] == (4, 2)

    # File existence check
    img_name, _ = ds.data[0]
    move(os.path.join(ds.root, img_name), os.path.join(ds.root, "tmp_file"))
    with pytest.raises(FileNotFoundError):
        datasets.DetectionDataset(mock_image_folder, mock_detection_label)
    move(os.path.join(ds.root, "tmp_file"), os.path.join(ds.root, img_name))
示例#2
0
def test_mjsynth_dataset(mock_mjsynth_dataset):
    input_size = (32, 128)
    ds = datasets.MJSynth(
        *mock_mjsynth_dataset,
        img_transforms=Resize(input_size, preserve_aspect_ratio=True),
    )

    assert len(
        ds) == 4  # Actual set has 7581382 train and 1337891 test samples
    assert repr(ds) == f"MJSynth(train={True})"
    _validate_dataset_recognition_part(ds, input_size)
示例#3
0
def test_ic03(input_size, num_samples, rotate, mock_ic03_dataset):
    # monkeypatch the path to temporary dataset
    datasets.IC03.TRAIN = (mock_ic03_dataset, None, "ic03_train.zip")

    ds = datasets.IC03(
        train=True, download=True, img_transforms=Resize(input_size), use_polygons=rotate,
        cache_dir="/".join(mock_ic03_dataset.split("/")[:-2]), cache_subdir=mock_ic03_dataset.split("/")[-2],
    )

    assert len(ds) == num_samples
    assert repr(ds) == f"IC03(train={True})"
    _validate_dataset(ds, input_size, is_polygons=rotate)
示例#4
0
def test_imgur5k_dataset(num_samples, rotate, mock_imgur5k):
    input_size = (512, 512)
    ds = datasets.IMGUR5K(
        *mock_imgur5k,
        train=True,
        img_transforms=Resize(input_size),
        use_polygons=rotate,
    )

    assert len(ds) == num_samples - 1  # -1 because of the test set 90 / 10 split
    assert repr(ds) == f"IMGUR5K(train={True})"
    _validate_dataset(ds, input_size, is_polygons=rotate)
示例#5
0
def test_svt(input_size, num_samples, rotate, mock_svt_dataset):
    # monkeypatch the path to temporary dataset
    datasets.SVT.URL = mock_svt_dataset
    datasets.SVT.SHA256 = None

    ds = datasets.SVT(
        train=True, download=True, img_transforms=Resize(input_size), use_polygons=rotate,
        cache_dir="/".join(mock_svt_dataset.split("/")[:-2]), cache_subdir=mock_svt_dataset.split("/")[-2],
    )

    assert len(ds) == num_samples
    assert repr(ds) == f"SVT(train={True})"
    _validate_dataset(ds, input_size, is_polygons=rotate)
示例#6
0
def test_artefact_detection(input_size, num_samples, rotate, mock_doc_artefacts):
    # monkeypatch the path to temporary dataset
    datasets.DocArtefacts.URL = mock_doc_artefacts
    datasets.DocArtefacts.SHA256 = None

    ds = datasets.DocArtefacts(
        train=True, download=True, img_transforms=Resize(input_size), use_polygons=rotate,
        cache_dir="/".join(mock_doc_artefacts.split("/")[:-2]), cache_subdir=mock_doc_artefacts.split("/")[-2],
    )

    assert len(ds) == num_samples
    assert repr(ds) == f"DocArtefacts(train={True})"
    _validate_dataset(ds, input_size, class_indices=True, is_polygons=rotate)
示例#7
0
def test_ic13_dataset(input_size, num_samples, rotate, recognition, mock_ic13):
    ds = datasets.IC13(
        *mock_ic13,
        img_transforms=Resize(input_size),
        use_polygons=rotate,
        recognition_task=recognition,
    )

    assert len(ds) == num_samples
    if recognition:
        _validate_dataset_recognition_part(ds, input_size)
    else:
        _validate_dataset(ds, input_size, is_polygons=rotate)
示例#8
0
    def __init__(
        self,
        output_size: Tuple[int, int],
        batch_size: int,
        mean: Tuple[float, float, float] = (0.5, 0.5, 0.5),
        std: Tuple[float, float, float] = (1.0, 1.0, 1.0),
        fp16: bool = False,
        **kwargs: Any,
    ) -> None:

        self.batch_size = batch_size
        self.resize = Resize(output_size, **kwargs)
        # Perform the division by 255 at the same time
        self.normalize = Normalize(mean, std)
示例#9
0
def test_imgur5k_dataset(input_size, num_samples, rotate, recognition,
                         mock_imgur5k):
    ds = datasets.IMGUR5K(
        *mock_imgur5k,
        train=True,
        img_transforms=Resize(input_size),
        use_polygons=rotate,
        recognition_task=recognition,
    )

    assert len(
        ds) == num_samples - 1  # -1 because of the test set 90 / 10 split
    assert repr(ds) == f"IMGUR5K(train={True})"
    if recognition:
        _validate_dataset_recognition_part(ds, input_size)
    else:
        _validate_dataset(ds, input_size, is_polygons=rotate)
示例#10
0
def test_iiit5k(input_size, num_samples, rotate, mock_iiit5k_dataset):
    # monkeypatch the path to temporary dataset
    datasets.IIIT5K.URL = mock_iiit5k_dataset
    datasets.IIIT5K.SHA256 = None

    ds = datasets.IIIT5K(
        train=True,
        download=True,
        img_transforms=Resize(input_size),
        use_polygons=rotate,
        cache_dir="/".join(mock_iiit5k_dataset.split("/")[:-2]),
        cache_subdir=mock_iiit5k_dataset.split("/")[-2],
    )

    assert len(ds) == num_samples
    assert repr(ds) == f"IIIT5K(train={True})"
    img, target = ds[0]
    _validate_dataset(ds, input_size, batch_size=1, is_polygons=rotate)
示例#11
0
def test_ocrdataset(mock_ocrdataset, use_polygons):

    input_size = (512, 512)

    ds = datasets.OCRDataset(
        *mock_ocrdataset,
        img_transforms=Resize(input_size),
        use_polygons=use_polygons,
    )
    assert len(ds) == 3
    _validate_dataset(ds, input_size, is_polygons=use_polygons)

    # File existence check
    img_name, _ = ds.data[0]
    move(os.path.join(ds.root, img_name), os.path.join(ds.root, "tmp_file"))
    with pytest.raises(FileNotFoundError):
        datasets.OCRDataset(*mock_ocrdataset)
    move(os.path.join(ds.root, "tmp_file"), os.path.join(ds.root, img_name))
示例#12
0
def test_cord(input_size, num_samples, rotate, recognition, mock_cord_dataset):
    # monkeypatch the path to temporary dataset
    datasets.CORD.TRAIN = (mock_cord_dataset, None)

    ds = datasets.CORD(
        train=True,
        download=True,
        img_transforms=Resize(input_size),
        use_polygons=rotate,
        recognition_task=recognition,
        cache_dir="/".join(mock_cord_dataset.split("/")[:-2]),
        cache_subdir=mock_cord_dataset.split("/")[-2],
    )

    assert len(ds) == num_samples
    assert repr(ds) == f"CORD(train={True})"
    if recognition:
        _validate_dataset_recognition_part(ds, input_size)
    else:
        _validate_dataset(ds, input_size, is_polygons=rotate)
示例#13
0
def test_funsd(input_size, num_samples, rotate, recognition,
               mock_funsd_dataset):
    # monkeypatch the path to temporary dataset
    datasets.FUNSD.URL = mock_funsd_dataset
    datasets.FUNSD.SHA256 = None
    datasets.FUNSD.FILE_NAME = "funsd.zip"

    ds = datasets.FUNSD(
        train=True,
        download=True,
        img_transforms=Resize(input_size),
        use_polygons=rotate,
        recognition_task=recognition,
        cache_dir="/".join(mock_funsd_dataset.split("/")[:-2]),
        cache_subdir=mock_funsd_dataset.split("/")[-2],
    )

    assert len(ds) == num_samples
    assert repr(ds) == f"FUNSD(train={True})"
    if recognition:
        _validate_dataset_recognition_part(ds, input_size)
    else:
        _validate_dataset(ds, input_size, is_polygons=rotate)
示例#14
0
def test_charactergenerator():

    input_size = (32, 32)
    vocab = 'abcdef'

    ds = datasets.CharacterGenerator(
        vocab=vocab,
        num_samples=10,
        cache_samples=True,
        img_transforms=Resize(input_size),
    )

    assert len(ds) == 10
    image, label = ds[0]
    assert isinstance(image, torch.Tensor)
    assert image.shape[-2:] == input_size
    assert image.dtype == torch.float32
    assert isinstance(label, int) and label < len(vocab)

    loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn)
    images, targets = next(iter(loader))
    assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
    assert isinstance(targets, torch.Tensor) and targets.shape == (2,)
    assert targets.dtype == torch.int64