def test_api_fluid(with_adapter: str, bucket: str) -> None: path: FluidPath = Pathy.fluid(f"gs://{bucket}/fake-key") assert isinstance(path, Pathy) path = Pathy.fluid("foo/bar.txt") assert isinstance(path, BasePath) path = Pathy.fluid("/dev/null") assert isinstance(path, BasePath)
def test_cli_ls_local_files(with_adapter: str, bucket: str) -> None: root = Pathy.fluid(tempfile.mkdtemp()) / "ls" root.mkdir(parents=True, exist_ok=True) for i in range(3): (root / f"file_{i}").write_text("NICE") files = list(root.ls()) assert len(files) == 3 for i, blob_stat in enumerate(files): assert blob_stat.name == f"file_{i}" assert blob_stat.size == 4 assert blob_stat.last_modified is not None root = Pathy.from_bucket(bucket) / "cli_ls" one = str(root / "file.txt") two = str(root / "other.txt") three = str(root / "folder/file.txt") Pathy(one).write_text("---") Pathy(two).write_text("---") Pathy(three).write_text("---") result = runner.invoke(app, ["ls", str(root)]) assert result.exit_code == 0 assert one in result.output assert two in result.output assert str(root / "folder") in result.output result = runner.invoke(app, ["ls", "-l", str(root)]) assert result.exit_code == 0 assert one in result.output assert two in result.output assert str(root / "folder") in result.output
def test_cli_cp_file(with_adapter, bucket: str): source = f"gs://{bucket}/cli_cp_file/file.txt" destination = f"gs://{bucket}/cli_cp_file/other.txt" Pathy(source).write_text("---") assert runner.invoke(app, ["cp", source, destination]).exit_code == 0 assert Pathy(source).exists() assert Pathy(destination).is_file()
def test_api_replace_files_in_bucket(with_adapter: str, bucket: str) -> None: # replace a single file Pathy(f"gs://{bucket}/replace/file.txt").write_text("---") Pathy(f"gs://{bucket}/replace/file.txt").replace( f"gs://{bucket}/replace/other.txt") assert not Pathy(f"gs://{bucket}/replace/file.txt").exists() assert Pathy(f"gs://{bucket}/replace/other.txt").is_file()
def with_adapter(adapter: str, bucket: str, other_bucket: str): tmp_dir = None if adapter == "gcs": # Use GCS (with system credentials) use_fs(False) elif adapter == "fs": # Use local file-system in a temp folder tmp_dir = tempfile.mkdtemp() use_fs(tmp_dir) bucket_one = Pathy.from_bucket(bucket) if not bucket_one.exists(): bucket_one.mkdir() bucket_two = Pathy.from_bucket(other_bucket) if not bucket_two.exists(): bucket_two.mkdir() else: raise ValueError("invalid adapter, nothing is configured") # execute the test yield if adapter == "fs" and tmp_dir is not None: # Cleanup fs temp folder shutil.rmtree(tmp_dir) use_fs(False) use_fs_cache(False)
def _load_object_detection_api(self, model_spec: ObjectDetectionAPI_ModelSpec): import tensorflow as tf from object_detection.utils import config_util from object_detection.builders import model_builder temp_dir = tempfile.TemporaryDirectory() temp_dir_path = Path(temp_dir.name) model_config_path = temp_dir_path / Pathy(model_spec.config_path).name with open(model_config_path, 'wb') as out: with fsspec.open(model_spec.config_path, 'rb') as src: out.write(src.read()) src_checkpoint_path = Pathy(model_spec.checkpoint_path) checkpoint_path = temp_dir_path / src_checkpoint_path.name for src_file in fsspec.open_files(f"{src_checkpoint_path}*", 'rb'): out_file = temp_dir_path / Pathy(src_file.path).name with open(out_file, 'wb') as out: with src_file as src: out.write(src.read()) configs = config_util.get_configs_from_pipeline_file( pipeline_config_path=str(model_config_path) ) model_config = configs['model'] self.model = model_builder.build( model_config=model_config, is_training=False ) ckpt = tf.compat.v2.train.Checkpoint(model=self.model) ckpt.restore(str(checkpoint_path)).expect_partial() self.input_dtype = np.float32 # Run model through a dummy image so that variables are created zeros = np.zeros([640, 640, 3]) self._raw_predict_single_image_default(zeros) temp_dir.cleanup()
def with_adapter( adapter: str, bucket: str, other_bucket: str ) -> Generator[str, None, None]: tmp_dir = None scheme = "gs" if adapter == "gcs": # Use GCS use_fs(False) credentials = gcs_credentials_from_env() if credentials is not None: set_client_params("gs", credentials=credentials) elif adapter == "fs": # Use local file-system in a temp folder tmp_dir = tempfile.mkdtemp() use_fs(tmp_dir) bucket_one = Pathy.from_bucket(bucket) if not bucket_one.exists(): bucket_one.mkdir() bucket_two = Pathy.from_bucket(other_bucket) if not bucket_two.exists(): bucket_two.mkdir() else: raise ValueError("invalid adapter, nothing is configured") # execute the test yield scheme if adapter == "fs" and tmp_dir is not None: # Cleanup fs temp folder shutil.rmtree(tmp_dir) use_fs(False) use_fs_cache(False)
def test_api_rename_files_in_bucket(with_adapter, bucket: str): # Rename a single file Pathy(f"gs://{bucket}/rename/file.txt").write_text("---") Pathy(f"gs://{bucket}/rename/file.txt").rename( f"gs://{bucket}/rename/other.txt") assert not Pathy(f"gs://{bucket}/rename/file.txt").exists() assert Pathy(f"gs://{bucket}/rename/other.txt").is_file()
def test_gcs_scandir_list_buckets(with_adapter: str, bucket: str, other_bucket: str) -> None: from pathy.gcs import ScanDirGCS root = Pathy("gs://foo/bar") client = root._accessor.client(root) # type:ignore scandir = ScanDirGCS(client=client, path=Pathy()) assert sorted([s.name for s in scandir]) == sorted([bucket, other_bucket])
def test_cli_mv_file_across_buckets(with_adapter, bucket: str, other_bucket: str): source = f"gs://{bucket}/cli_mv_file_across_buckets/file.txt" destination = f"gs://{other_bucket}/cli_mv_file_across_buckets/other.txt" Pathy(source).write_text("---") assert Pathy(source).exists() assert runner.invoke(app, ["mv", source, destination]).exit_code == 0 assert not Pathy(source).exists() assert Pathy(destination).is_file()
def test_api_readwrite_lines(with_adapter: str, bucket: str) -> None: path = Pathy(f"gs://{bucket}/write_text/file.txt") with path.open("w") as file_obj: file_obj.writelines(["---"]) with path.open("r") as file_obj: assert file_obj.readlines() == ["---"] with path.open("rt") as file_obj: assert file_obj.readline() == "---"
def test_api_is_file(with_adapter: str, bucket: str) -> None: path = Pathy(f"gs://{bucket}/is_file/subfolder/another/my.file") path.write_text("---") # The full file is a file assert path.is_file() is True # Each parent node in the path is only a directory for parent in path.parents: assert parent.is_file() is False
def test_api_replace_files_across_buckets(with_adapter: str, bucket: str, other_bucket: str) -> None: # Rename a single file across buckets Pathy(f"gs://{bucket}/replace/file.txt").write_text("---") Pathy(f"gs://{bucket}/replace/file.txt").replace( f"gs://{other_bucket}/replace/other.txt") assert not Pathy(f"gs://{bucket}/replace/file.txt").exists() assert Pathy(f"gs://{other_bucket}/replace/other.txt").is_file()
def test_file_get_blob_owner_key_error_protection(with_adapter: str) -> None: gs_bucket = Pathy("gs://my_bucket") gs_bucket.mkdir() path = gs_bucket / "blob.txt" path.write_text("hello world!") gcs_client: BucketClientFS = get_client("gs") bucket: BucketFS = gcs_client.get_bucket(gs_bucket) blob: Optional[BlobFS] = bucket.get_blob("blob.txt") assert blob is not None and blob.owner is None
def test_s3_scandir_list_buckets(with_adapter: str, bucket: str, other_bucket: str) -> None: from pathy.s3 import ScanDirS3 root = Pathy("s3://foo/bar") client = root._accessor.client(root) # type:ignore scandir = ScanDirS3(client=client, path=Pathy()) buckets = [s.name for s in scandir] assert bucket in buckets assert other_bucket in buckets
def test_api_ls_blobs_with_stat(with_adapter: str, bucket: str) -> None: root = Pathy(f"gs://{bucket}/ls") for i in range(3): (root / f"file_{i}").write_text("NICE") files = list(root.ls()) assert len(files) == 3 for i, blob_stat in enumerate(files): assert blob_stat.name == f"file_{i}" assert blob_stat.size == 4 assert blob_stat.last_modified is not None
def test_file_bucket_client_fs_make_uri(with_adapter: str) -> None: client: BucketClientFS = get_client("gs") blob = Pathy("gs://foo/bar") actual = client.make_uri(blob) expected = f"file://{client.root}/foo/bar" assert actual == expected # Invalid root other = Pathy("") with pytest.raises(ValueError): client.make_uri(other)
def test_api_rmdir(with_adapter: str, bucket: str) -> None: Pathy(f"gs://{bucket}/rmdir/one.txt").write_text("---") Pathy(f"gs://{bucket}/rmdir/folder/two.txt").write_text("---") path = Pathy(f"gs://{bucket}/rmdir/") path.rmdir() assert not Pathy(f"gs://{bucket}/rmdir/one.txt").is_file() assert not Pathy(f"gs://{bucket}/rmdir/other/two.txt").is_file() assert not path.exists()
def test_cli_ls(with_adapter, bucket: str): root = Pathy.from_bucket(bucket) / "cli_ls" one = str(root / "file.txt") two = str(root / "other.txt") three = str(root / "folder/file.txt") Pathy(one).write_text("---") Pathy(two).write_text("---") Pathy(three).write_text("---") result = runner.invoke(app, ["ls", str(root)]) assert result.exit_code == 0 assert one in result.output assert two in result.output assert str(root / "folder") in result.output
def test_cli_cp_folder(with_adapter, bucket: str): root = Pathy.from_bucket(bucket) source = root / "cli_cp_folder" destination = root / "cli_cp_folder_other" for i in range(2): for j in range(2): (source / f"{i}" / f"{j}").write_text("---") assert runner.invoke(app, ["cp", str(source), str(destination)]).exit_code == 0 assert Pathy(source).exists() assert Pathy(destination).is_dir() for i in range(2): for j in range(2): assert (destination / f"{i}" / f"{j}").is_file()
def test_file_bucket_client_fs_create_bucket(with_adapter: str) -> None: client: BucketClientFS = get_client("gs") # Invalid root invalid = Pathy("") with pytest.raises(ValueError): client.create_bucket(invalid) # Can create a bucket with a valid path root = Pathy("gs://bucket_name") assert client.create_bucket(root) is not None # Bucket already exists error with pytest.raises(FileExistsError): client.create_bucket(root)
def pathy_fixture(): import tempfile import shutil from pathy import use_fs, Pathy temp_folder = tempfile.mkdtemp(prefix="thinc-pathy") use_fs(temp_folder) root = Pathy("gs://test-bucket") root.mkdir(exist_ok=True) yield root use_fs(False) shutil.rmtree(temp_folder)
def _load_tensorflow_KeypointsRegressor_model_spec( self, model_spec: TensorFlow_KeypointsRegressorModelSpec): import tensorflow as tf if model_spec.saved_model_type in [ "tf.keras", "tf.saved_model", "tflite", "tflite_one_image_per_batch" ]: model_openfile = fsspec.open(model_spec.model_path, 'rb') if model_openfile.fs.isdir(model_openfile.path): temp_folder = copy_files_from_directory_to_temp_directory( directory=model_spec.model_path) model_path = Pathy(temp_folder.name) temp_files_cleanup = temp_folder.cleanup else: temp_file = tempfile.NamedTemporaryFile() with model_openfile as src: temp_file.write(src.read()) model_path = Pathy(temp_file.name) temp_files_cleanup = temp_file.close if model_spec.saved_model_type in "tf.keras": self.model = tf.keras.models.load_model(str(model_path), compile=False) self.input_dtype = np.float32 elif model_spec.saved_model_type == "tf.saved_model": self.loaded_model = tf.saved_model.load( str(model_path)) # to protect from gc self.model = self.loaded_model.signatures["serving_default"] self.input_dtype = np.float32 elif model_spec.saved_model_type in [ 'tflite', 'tflite_one_image_per_batch' ]: self.model = tf.lite.Interpreter(str(model_path)) # self.model.allocate_tensors() input_details = self.model.get_input_details()[0] self.input_index = input_details['index'] self.input_dtype = input_details['dtype'] self.output_index = self.model.get_output_details()[0]['index'] temp_files_cleanup() elif model_spec.saved_model_type == "tf.keras.Model": self.model = model_spec.model_path self.input_dtype = np.float32 else: raise ValueError( "Tensorflow_KeypointsRegressorModel got unknown saved_model_type " f"in TensorFlow_KeypointsRegressorModelSpec: {model_spec.saved_model_type}" )
def test_cli_rm_folder(with_adapter, bucket: str): root = Pathy.from_bucket(bucket) source = root / "cli_rm_folder" for i in range(2): for j in range(2): (source / f"{i}" / f"{j}").write_text("---") # Returns exit code 1 without recursive flag when given a folder assert runner.invoke(app, ["rm", str(source)]).exit_code == 1 assert runner.invoke(app, ["rm", "-r", str(source)]).exit_code == 0 assert not Pathy(source).exists() # Ensure source files are gone for i in range(2): for j in range(2): assert not (source / f"{i}" / f"{j}").is_file()
def test_scandir_custom_class(bucket: str) -> None: use_fs(True) client = BucketClientFS() root = Pathy(f"gs://{bucket}/") scandir = MockScanDir(client=client, path=root) blobs = [b for b in scandir] assert len(blobs) == 1
def test_cli_cp_file_name_from_source(with_adapter: str, bucket: str) -> None: source = pathlib.Path("./file.txt") source.touch() destination = f"gs://{bucket}/{ENV_ID}/cli_cp_file/" assert runner.invoke(app, ["cp", str(source), destination]).exit_code == 0 assert Pathy(f"{destination}file.txt").is_file() source.unlink()
def test_cli_rm_verbose(with_adapter, bucket: str): root = Pathy.from_bucket(bucket) / "cli_rm_folder" source = str(root / "file.txt") other = str(root / "folder/other") Pathy(source).write_text("---") Pathy(other).write_text("---") result = runner.invoke(app, ["rm", "-v", source]) assert result.exit_code == 0 assert source in result.output assert other not in result.output Pathy(source).write_text("---") result = runner.invoke(app, ["rm", "-rv", str(root)]) assert result.exit_code == 0 assert source in result.output assert other in result.output
def test_s3_bucket_client_list_blobs(with_adapter: str, bucket: str) -> None: """Test corner-case in S3 client that isn't easily reachable from Pathy""" from pathy.s3 import BucketClientS3 client: BucketClientS3 = get_client("s3") root = Pathy("s3://invalid_h3gE_ds5daEf_Sdf15487t2n4") assert len(list(client.list_blobs(root))) == 0
def test_s3_scandir_invalid_bucket_name(with_adapter: str) -> None: from pathy.s3 import ScanDirS3 root = Pathy(f"{with_adapter}://invalid_h3gE_ds5daEf_Sdf15487t2n4/bar") client = root._accessor.client(root) # type:ignore scandir = ScanDirS3(client=client, path=root) assert len(list(scandir)) == 0
def test_file_scandir_list_buckets( with_adapter: str, bucket: str, other_bucket: str ) -> None: root = Pathy() client = root._accessor.client(root) # type:ignore scandir = ScanDirFS(client=client, path=root) assert sorted([s.name for s in scandir]) == sorted([bucket, other_bucket])