def create_realistic_folder_structure() -> Tuple[Path, List[str], List[str]]: base_dir = get_temp_path(prefix="create_realistic_folder_structure") files = [f"0{i}.jpg" for i in range(10)] base_folders = ["train", "val", "test"] sub_folders = ["image", "mask"] additional_files = [ "meta.json", "train/train_meta.json", "val/val_meta.json", "test/test_meta.json", ] for base_folder in base_folders: for sub_folder in sub_folders: for file in files: file_path = base_dir / base_folder / sub_folder / file file_path.parent.mkdir(parents=True, exist_ok=True) write_image_in_file(file_path) for file in additional_files: write_json_in_file(base_dir / file) return base_dir, base_folders, additional_files
def test_extension_bins(): temp_path = get_temp_path(prefix="test_extension_bins") jpg_files = [f"{i}.jpg" for i in range(10)] png_files = [f"{i}.png" for i in range(10)] exif_files = [f"{i}.exif" for i in range(10)] all_files = jpg_files + png_files + exif_files for file in all_files: (temp_path / file).touch() extension_bins = files_by_extension(temp_path) assert len(extension_bins["jpg"]) == 10 assert len(extension_bins["png"]) == 10 assert len(extension_bins["exif"]) == 10 assert set(extension_bins["jpg"]) == { temp_path / jpg_file for jpg_file in jpg_files } extension_bins = files_by_extension(temp_path, limit=20) assert len(extension_bins["jpg"]) == 10 assert len(extension_bins["png"]) == 10 assert len(extension_bins["exif"]) == 10 limit = 2 extension_bins = files_by_extension(temp_path, limit=limit) assert len(extension_bins["jpg"]) == limit assert len(extension_bins["png"]) == limit assert len(extension_bins["exif"]) == limit
def test_interface_creation(dummy_config): base_dir = get_temp_path(prefix="test_interface_creation") package = PythonPackage(base_dir=base_dir, collection_config=dummy_config) package.init() assert (base_dir / "setup.py").exists() assert (base_dir / "datasets" / dummy_config.name / "__init__.py").exists()
def test_python_package(): base_dir, git_url, original_files = setup_test_package() print(base_dir) venv_dir = get_temp_path(prefix="venv_dir") / "venv" python_loc = venv_dir / "bin" / "python" poif_setup_location = Path(__file__).parents[3] subprocess.run(["python3", "-m", "venv", str(venv_dir)], check=True) subprocess.run([ str(python_loc), "-m", "pip", "install", "--upgrade", "setuptools", "pip" ], check=True) subprocess.run( [str(python_loc), "-m", "pip", "install", str(poif_setup_location)], check=True) subprocess.run([str(python_loc), "-m", "pip", "install", str(base_dir)], check=True) download_script = Path( __file__).parent / "python_package" / "download_to_folder.py_test" download_to_folder = get_temp_path(prefix="download_to_folder") print(download_to_folder) subprocess.run( [str(python_loc), str(download_script), str(download_to_folder)], check=True) assert len(original_files) > 0 for original_file in original_files: rel_path = get_relative_path(base_dir=base_dir, file=original_file) with open(original_file, "rb") as f: original_bytes = f.read() with open(download_to_folder / rel_path, "rb") as f: downloaded_bytes = f.read() assert original_bytes == downloaded_bytes
def test_is_more_populated(): temp_path = get_temp_path(prefix="test_is_more_populated") jpg_files = [f"{i}.jpg" for i in range(10)] for file in jpg_files: (temp_path / file).touch() assert not is_more_populated(temp_path, 11) assert not is_more_populated(temp_path, 10) assert is_more_populated(temp_path, 9)
def test_png_img_writer(): base_dir = get_temp_path(prefix="test_png_img_writer") manager = CacheManager(base_dir) tagged_data = ImageMockTaggedData(relative_path="img.png") assert manager.get(tagged_data.tag) is None ds_object = DataSetObject(tagged_data) ds_object.add_cache_manager(manager) object_bytes = ds_object.get() # This load the object into the cache assert manager.get(ds_object.tag) == object_bytes
def test_repo_read_write(): config_file = get_temp_file() _, config = get_repo_sequence() config.write(config_file) assert config.read(config_file) == config temp_dir = get_temp_path(prefix="test_repo_read_write") package = PythonPackage(base_dir=temp_dir, collection_config=config.collection) config.write_to_package(package) assert DataRepoConfig.read_from_package(temp_dir) == config
def test_filesystem_creation(): files = get_standard_folder_template() disk_loc = get_temp_path() print(disk_loc) root_dir = Directory() for file in files: root_dir.add_data(file, MockBinary(file)) p = setup_as_filesystem(root_dir, disk_loc, daemon=True) for file in files: assert (disk_loc / file).is_file() p.terminate()
def test_init(monkeypatch): temp_dir = get_temp_path(prefix="test_init_no_integration") sequence, config = get_repo_sequence() monkeypatch.setattr("builtins.input", MonkeyPatchSequence(sequence + ["git_url"])) for data_folder in config.collection.folders: create_data_folder(temp_dir / data_folder) for file in config.collection.files: write_image_in_file(temp_dir / file) mock_repo = MockTaggedRepo() # mock_git = MockGitRepo(None, None) monkeypatch.setattr( "poif.cli.commands.init.get_remote_repo_from_config", lambda x: mock_repo, ) monkeypatch.setattr("poif.cli.commands.init.GitRepo", MockGitRepo) init([str(temp_dir)])
def test_without_fuse(): files = get_standard_folder_template() contents_by_folder = defaultdict(set) contents_by_folder[""].add("__test_file") for file in files: file_parts = file.split("/") contents_by_folder[""].add(file_parts[0]) for content_index in range(1, len(file_parts)): folder = "/".join(file_parts[:content_index]) contents_by_folder[folder].add(file_parts[content_index]) disk_loc = get_temp_path() print(disk_loc) root_dir = Directory() for file in files: root_dir.add_data(file, MockBinary(file)) file_system = DataSetFileSystem(root_dir) for folder, folder_contents in contents_by_folder.items(): assert folder_contents | {".", ".."} == set(file_system.readdir(folder, None))
def get_clone_location(self) -> Path: return get_temp_path(prefix="git_get_clone_location")
def test_has_remote(): base_dir = get_temp_path() repo = GitRepo(base_dir=base_dir, init=True) assert not repo.has_remote() repo.add_remote("hello.world") assert repo.has_remote()