def test_rmdir(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/make.bat') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/index.rst') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/Makefile') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_templates/11conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_build/22conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_static/conf.py') object_summary.put(Body=b'test data') conf_path = S3Path('/test-bucket/docs/_templates') assert conf_path.is_dir() conf_path.rmdir() assert not conf_path.exists() path = S3Path('/test-bucket/docs/') path.rmdir() assert not path.exists()
def test_rglob(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') assert list(S3Path('/test-bucket/').rglob('*.test')) == [ S3Path('/test-bucket/directory/Test.test') ] assert list(S3Path('/test-bucket/').rglob('**/*.test')) == [ S3Path('/test-bucket/directory/Test.test') ] object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'setup.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py') object_summary.put(Body=b'test data') assert sorted(S3Path.from_uri('s3://test-bucket/').rglob('*.py')) == [ S3Path('/test-bucket/build/lib/pathlib.py'), S3Path('/test-bucket/docs/conf.py'), S3Path('/test-bucket/pathlib.py'), S3Path('/test-bucket/setup.py'), S3Path('/test-bucket/test_pathlib.py') ]
def test_is_file(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'setup.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py') object_summary.put(Body=b'test data') assert not S3Path('/test-bucket/fake.test').is_file() assert not S3Path('/test-bucket/fake/').is_file() assert not S3Path('/test-bucket/directory').is_file() assert S3Path('/test-bucket/directory/Test.test').is_file() assert S3Path('/test-bucket/pathlib.py').is_file() assert S3Path('/test-bucket/docs/conf.py').is_file() assert not S3Path('/test-bucket/docs/').is_file() assert not S3Path('/test-bucket/build/').is_file() assert not S3Path('/test-bucket/build/lib').is_file() assert S3Path('/test-bucket/build/lib/pathlib.py').is_file()
def test_empty_directory(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') assert list(S3Path('/test-bucket').iterdir()) == [] s3.meta.client.put_object(Bucket='test-bucket', Key='to/empty/dir/') assert list(S3Path('/test-bucket/to/empty/dir/').iterdir()) == []
def test_iterdir_on_buckets(s3_mock): s3 = boto3.resource('s3') for index in range(4): s3.create_bucket(Bucket='test-bucket{}'.format(index)) s3_root_path = S3Path('/') assert sorted(s3_root_path.iterdir()) == [ S3Path('/test-bucket{}'.format(index)) for index in range(4) ]
def test_hierarchical_configuration(reset_configuration_cache): path = S3Path('/foo/') register_configuration_parameter(path, parameters={'ContentType': 'text/html'}) assert path in _s3_accessor.configuration_map.arguments assert path not in _s3_accessor.configuration_map.resources assert _s3_accessor.configuration_map.get_configuration(path) == ( _s3_accessor.configuration_map.default_resource, {'ContentType': 'text/html'}) assert (_s3_accessor.configuration_map.get_configuration(S3Path('/foo/')) == _s3_accessor.configuration_map.get_configuration(PureS3Path('/foo/')))
def browseS3Dir(path): global WHITELIST if path == "/s3buckets": p = S3Path('/') bucket_list = [path for path in p.iterdir() if str(path) in WHITELIST] else: p = S3Path(path.replace("/s3buckets", "")) bucket_list = [path for path in p.iterdir()] return bucket_list
def test_read_lines_hint(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data\ntest data') with S3Path('/test-bucket/directory/Test.test').open() as fp: assert len(fp.readlines(1)) == (1 if sys.version_info >= (3, 6) else 2) with S3Path('/test-bucket/directory/Test.test').open('br') as fp: assert len(fp.readlines(1)) == 1 # work only in binary mode
def test_register_configuration_exceptions(reset_configuration_cache): with pytest.raises(TypeError): register_configuration_parameter( Path('/'), parameters={'ContentType': 'text/html'}) with pytest.raises(TypeError): register_configuration_parameter(S3Path('/foo/'), parameters=('ContentType', 'text/html')) with pytest.raises(ValueError): register_configuration_parameter(S3Path('/foo/'))
def _route_path(*args, endpoint_url: str) -> Union[Path, S3Path]: """use S3Path or Path depending on the input. """ if str(args[0]).startswith("s3://"): s3path._s3_accessor.s3 = boto3.resource("s3", endpoint_url=endpoint_url) return S3Path.from_uri(*args) elif isinstance(args[0], S3Path): s3path._s3_accessor.s3 = boto3.resource("s3", endpoint_url=endpoint_url) return S3Path(*args) else: return Path(*args)
def test_basic_configuration(reset_configuration_cache): path = S3Path('/foo/') _s3_accessor.configuration_map.arguments = _s3_accessor.configuration_map.resources = None assert path not in (_s3_accessor.configuration_map.arguments or ()) assert path not in (_s3_accessor.configuration_map.resources or ()) assert _s3_accessor.configuration_map.get_configuration(path) == ( _s3_accessor.configuration_map.default_resource, {}) assert (_s3_accessor.configuration_map.get_configuration( S3Path('/foo/')) == _s3_accessor.configuration_map.get_configuration( PureS3Path('/foo/')))
def test_s3path(self): str_path = "s3://foo/bar" path = smart_path(str_path) self.assertIsInstance(path, S3Path) self.assertEqual(str(path), "/foo/bar") path = S3Path("s3://hahah") path = smart_path(path) self.assertIsInstance(path, S3Path) conn = boto3.resource("s3") conn.create_bucket(Bucket="tmp") path = S3Path("s3://tmp") path = smart_path(path) self.assertIsInstance(path, S3Path)
def test_owner(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') assert path.owner() == 'webfile'
def test_boto_methods_with_configuration(s3_mock, reset_configuration_cache): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') bucket = S3Path('/test-bucket/') register_configuration_parameter(bucket, parameters={'ContentType': 'text/html'}) key = bucket.joinpath('bar.html') key.write_text('hello')
def test_read_lines_hint(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data\ntest data') with S3Path('/test-bucket/directory/Test.test').open("r") as fp: assert len(fp.readlines(1)) == 1
def get_path(): path = g.path if path.startswith("/s3buckets"): path = path.replace("/s3buckets", "") return S3Path(path) else: return Path(path)
def test_iter_lines(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data\ntest data') with S3Path('/test-bucket/directory/Test.test').open("r") as fp: for line in fp: assert line == "test data"
def test_open_for_reading(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') file_obj = path.open() assert file_obj.read() == 'test data'
def test_fix_url_encoding_issue(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'paramA=valueA/paramB=valueB/name') object_summary.put(Body=b'test data\ntest data') assert S3Path('/test-bucket/paramA=valueA/paramB=valueB/name').read_bytes( ) == b'test data\ntest data'
def test_mkdir(s3_mock): s3 = boto3.resource('s3') S3Path('/test-bucket/').mkdir() assert s3.Bucket('test-bucket') in s3.buckets.all() S3Path('/test-bucket/').mkdir(exist_ok=True) with pytest.raises(FileExistsError): S3Path('/test-bucket/').mkdir(exist_ok=False) with pytest.raises(FileNotFoundError): S3Path('/test-second-bucket/test-directory/file.name').mkdir() S3Path('/test-second-bucket/test-directory/file.name').mkdir(parents=True) assert s3.Bucket('test-second-bucket') in s3.buckets.all()
def test_write_lines(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') path = S3Path('/test-bucket/directory/Test.test') with path.open("w") as fp: fp.writelines(["line 1\n", "line 2\n"]) res = path.read_text().splitlines() assert len(res) == 2
def test_unlink(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'temp_key') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/temp_key') subdir_key = S3Path('/test-bucket/fake_folder/some_key') subdir_key.write_text("some text") assert path.exists() is True assert subdir_key.exists() is True path.unlink() assert path.exists() is False with pytest.raises(FileNotFoundError): S3Path("/test-bucket/fake_subfolder/fake_subkey").unlink() with pytest.raises(IsADirectoryError): S3Path("/test-bucket/fake_folder").unlink() with pytest.raises(IsADirectoryError): S3Path("/fake-bucket/").unlink()
def test_exists(s3_mock): path = S3Path('./fake-key') with pytest.raises(ValueError): path.exists() path = S3Path('/fake-bucket/fake-key') with pytest.raises(ClientError): path.exists() s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') assert not S3Path('/test-bucket/Test.test').exists() path = S3Path('/test-bucket/directory/Test.test') assert path.exists() for parent in path.parents: assert parent.exists()
def test_open_method_with_custom_endpoint_url(): local_path = PureS3Path('/local/') register_configuration_parameter( local_path, parameters={}, resource=boto3.resource('s3', endpoint_url='http://localhost')) file_object = S3Path('/local/directory/Test.test').open('br') if StrictVersion(smart_open.__version__) <= StrictVersion('3.0.0'): assert file_object._object.meta.client._endpoint.host == 'http://localhost' else: assert file_object._client.client._endpoint.host == 'http://localhost'
def test_stat(s3_mock): path = S3Path('fake-bucket/fake-key') with pytest.raises(ValueError): path.stat() path = S3Path('/fake-bucket/fake-key') with pytest.raises(ClientError): path.stat() s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/Test.test') stat = path.stat() assert isinstance(stat, StatResult) assert stat == StatResult( size=object_summary.size, last_modified=object_summary.last_modified, ) with NamedTemporaryFile() as local_file: local_file.write(path.read_bytes()) local_file.flush() local_path = Path(local_file.name) local_stat = local_path.stat() s3_stat = path.stat() assert s3_stat.st_size == local_stat.st_size == s3_stat.size assert s3_stat.last_modified.timestamp() == s3_stat.st_mtime assert s3_stat.st_mtime < local_stat.st_mtime with pytest.raises(UnsupportedOperation): path.stat().st_atime path = S3Path('/test-bucket') assert path.stat() is None
def resolve_data_dir(self) -> Path: self.is_bucket = False sil_nlp_data_path = get_env_path("SIL_NLP_DATA_PATH", default="") if sil_nlp_data_path != "": temp_path = Path(sil_nlp_data_path) if temp_path.is_dir(): LOGGER.info( f"Using workspace: {sil_nlp_data_path} as per environment variable SIL_NLP_DATA_PATH." ) return Path(sil_nlp_data_path) else: temp_s3_path = S3Path(sil_nlp_data_path) if temp_s3_path.is_dir(): LOGGER.info( f"Using s3 workspace: {sil_nlp_data_path} as per environment variable SIL_NLP_DATA_PATH." ) self.is_bucket = True return S3Path(sil_nlp_data_path) else: raise Exception( f"The path defined by environment variable SIL_NLP_DATA_PATH ({sil_nlp_data_path}) is not a real or s3 directory." ) gutenberg_path = Path("G:/Shared drives/Gutenberg") if gutenberg_path.is_dir(): LOGGER.info( f"Using workspace: {gutenberg_path}. To change the workspace, set the environment variable SIL_NLP_DATA_PATH." ) return gutenberg_path s3root = S3Path("/aqua-ml-data") if s3root.is_dir(): LOGGER.info( f"Using s3 workspace workspace: {s3root}. To change the workspace, set the environment variable SIL_NLP_DATA_PATH." ) self.is_bucket = True return s3root raise FileExistsError("No valid path exists")
def test_write_bytes(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'temp_key') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/temp_key') data = path.read_bytes() assert isinstance(data, bytes) path.write_bytes(data) assert path.read_bytes() == data
def test_open_text_read(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') with path.open(mode='r') as file_obj: assert file_obj.readlines() == ['test data'] with path.open(mode='rt') as file_obj: assert file_obj.readline() == 'test data' assert file_obj.readline() == '' assert file_obj.readline() == ''
def load_image_from_oss(path: s3path.S3Path, mode='rb', format=None): """ Args: path: mode: format: Returns: """ assert isinstance(path, s3path.S3Path) image = Image.open(io.BytesIO(path.open(mode=mode).read())) image = utils.convert_PIL_to_numpy(image, format) return image
def test_stat(s3_mock): path = S3Path('fake-bucket/fake-key') with pytest.raises(ValueError): path.stat() path = S3Path('/fake-bucket/fake-key') with pytest.raises(ClientError): path.stat() s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/Test.test') stat = path.stat() assert isinstance(stat, StatResult) assert stat == StatResult( size=object_summary.size, last_modified=object_summary.last_modified, ) path = S3Path('/test-bucket') assert path.stat() is None