Пример #1
0
def create_realistic_folder_structure() -> Tuple[Path, List[str], List[str]]:
    base_dir = get_temp_path(prefix="create_realistic_folder_structure")

    files = [f"0{i}.jpg" for i in range(10)]
    base_folders = ["train", "val", "test"]
    sub_folders = ["image", "mask"]

    additional_files = [
        "meta.json",
        "train/train_meta.json",
        "val/val_meta.json",
        "test/test_meta.json",
    ]

    for base_folder in base_folders:
        for sub_folder in sub_folders:
            for file in files:
                file_path = base_dir / base_folder / sub_folder / file
                file_path.parent.mkdir(parents=True, exist_ok=True)
                write_image_in_file(file_path)

    for file in additional_files:
        write_json_in_file(base_dir / file)

    return base_dir, base_folders, additional_files
Пример #2
0
def test_extension_bins():
    temp_path = get_temp_path(prefix="test_extension_bins")

    jpg_files = [f"{i}.jpg" for i in range(10)]
    png_files = [f"{i}.png" for i in range(10)]
    exif_files = [f"{i}.exif" for i in range(10)]

    all_files = jpg_files + png_files + exif_files

    for file in all_files:
        (temp_path / file).touch()

    extension_bins = files_by_extension(temp_path)

    assert len(extension_bins["jpg"]) == 10
    assert len(extension_bins["png"]) == 10
    assert len(extension_bins["exif"]) == 10

    assert set(extension_bins["jpg"]) == {
        temp_path / jpg_file
        for jpg_file in jpg_files
    }

    extension_bins = files_by_extension(temp_path, limit=20)

    assert len(extension_bins["jpg"]) == 10
    assert len(extension_bins["png"]) == 10
    assert len(extension_bins["exif"]) == 10

    limit = 2
    extension_bins = files_by_extension(temp_path, limit=limit)

    assert len(extension_bins["jpg"]) == limit
    assert len(extension_bins["png"]) == limit
    assert len(extension_bins["exif"]) == limit
Пример #3
0
def test_interface_creation(dummy_config):
    base_dir = get_temp_path(prefix="test_interface_creation")

    package = PythonPackage(base_dir=base_dir, collection_config=dummy_config)
    package.init()

    assert (base_dir / "setup.py").exists()
    assert (base_dir / "datasets" / dummy_config.name / "__init__.py").exists()
Пример #4
0
def test_python_package():
    base_dir, git_url, original_files = setup_test_package()
    print(base_dir)

    venv_dir = get_temp_path(prefix="venv_dir") / "venv"
    python_loc = venv_dir / "bin" / "python"
    poif_setup_location = Path(__file__).parents[3]

    subprocess.run(["python3", "-m", "venv", str(venv_dir)], check=True)
    subprocess.run([
        str(python_loc), "-m", "pip", "install", "--upgrade", "setuptools",
        "pip"
    ],
                   check=True)
    subprocess.run(
        [str(python_loc), "-m", "pip", "install",
         str(poif_setup_location)],
        check=True)

    subprocess.run([str(python_loc), "-m", "pip", "install",
                    str(base_dir)],
                   check=True)

    download_script = Path(
        __file__).parent / "python_package" / "download_to_folder.py_test"
    download_to_folder = get_temp_path(prefix="download_to_folder")
    print(download_to_folder)

    subprocess.run(
        [str(python_loc),
         str(download_script),
         str(download_to_folder)],
        check=True)

    assert len(original_files) > 0
    for original_file in original_files:
        rel_path = get_relative_path(base_dir=base_dir, file=original_file)

        with open(original_file, "rb") as f:
            original_bytes = f.read()

        with open(download_to_folder / rel_path, "rb") as f:
            downloaded_bytes = f.read()

        assert original_bytes == downloaded_bytes
Пример #5
0
def test_is_more_populated():
    temp_path = get_temp_path(prefix="test_is_more_populated")

    jpg_files = [f"{i}.jpg" for i in range(10)]

    for file in jpg_files:
        (temp_path / file).touch()

    assert not is_more_populated(temp_path, 11)
    assert not is_more_populated(temp_path, 10)
    assert is_more_populated(temp_path, 9)
Пример #6
0
def test_png_img_writer():
    base_dir = get_temp_path(prefix="test_png_img_writer")
    manager = CacheManager(base_dir)

    tagged_data = ImageMockTaggedData(relative_path="img.png")

    assert manager.get(tagged_data.tag) is None

    ds_object = DataSetObject(tagged_data)
    ds_object.add_cache_manager(manager)
    object_bytes = ds_object.get()  # This load the object into the cache
    assert manager.get(ds_object.tag) == object_bytes
Пример #7
0
def test_repo_read_write():
    config_file = get_temp_file()
    _, config = get_repo_sequence()
    config.write(config_file)

    assert config.read(config_file) == config
    temp_dir = get_temp_path(prefix="test_repo_read_write")
    package = PythonPackage(base_dir=temp_dir,
                            collection_config=config.collection)

    config.write_to_package(package)

    assert DataRepoConfig.read_from_package(temp_dir) == config
Пример #8
0
def test_filesystem_creation():
    files = get_standard_folder_template()

    disk_loc = get_temp_path()
    print(disk_loc)

    root_dir = Directory()

    for file in files:
        root_dir.add_data(file, MockBinary(file))

    p = setup_as_filesystem(root_dir, disk_loc, daemon=True)

    for file in files:
        assert (disk_loc / file).is_file()

    p.terminate()
Пример #9
0
def test_init(monkeypatch):
    temp_dir = get_temp_path(prefix="test_init_no_integration")

    sequence, config = get_repo_sequence()
    monkeypatch.setattr("builtins.input", MonkeyPatchSequence(sequence + ["git_url"]))

    for data_folder in config.collection.folders:
        create_data_folder(temp_dir / data_folder)

    for file in config.collection.files:
        write_image_in_file(temp_dir / file)

    mock_repo = MockTaggedRepo()
    # mock_git = MockGitRepo(None, None)

    monkeypatch.setattr(
        "poif.cli.commands.init.get_remote_repo_from_config",
        lambda x: mock_repo,
    )
    monkeypatch.setattr("poif.cli.commands.init.GitRepo", MockGitRepo)

    init([str(temp_dir)])
Пример #10
0
def test_without_fuse():
    files = get_standard_folder_template()
    contents_by_folder = defaultdict(set)
    contents_by_folder[""].add("__test_file")
    for file in files:
        file_parts = file.split("/")
        contents_by_folder[""].add(file_parts[0])

        for content_index in range(1, len(file_parts)):
            folder = "/".join(file_parts[:content_index])
            contents_by_folder[folder].add(file_parts[content_index])

    disk_loc = get_temp_path()
    print(disk_loc)

    root_dir = Directory()

    for file in files:
        root_dir.add_data(file, MockBinary(file))

    file_system = DataSetFileSystem(root_dir)

    for folder, folder_contents in contents_by_folder.items():
        assert folder_contents | {".", ".."} == set(file_system.readdir(folder, None))
Пример #11
0
 def get_clone_location(self) -> Path:
     return get_temp_path(prefix="git_get_clone_location")
Пример #12
0
def test_has_remote():
    base_dir = get_temp_path()
    repo = GitRepo(base_dir=base_dir, init=True)
    assert not repo.has_remote()
    repo.add_remote("hello.world")
    assert repo.has_remote()