Пример #1
0
def test_rmdir(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'docs/make.bat')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'docs/index.rst')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'docs/Makefile')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket',
                                      'docs/_templates/11conf.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'docs/_build/22conf.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'docs/_static/conf.py')
    object_summary.put(Body=b'test data')

    conf_path = S3Path('/test-bucket/docs/_templates')
    assert conf_path.is_dir()
    conf_path.rmdir()
    assert not conf_path.exists()

    path = S3Path('/test-bucket/docs/')
    path.rmdir()
    assert not path.exists()
Пример #2
0
def test_rglob(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data')

    assert list(S3Path('/test-bucket/').rglob('*.test')) == [
        S3Path('/test-bucket/directory/Test.test')
    ]
    assert list(S3Path('/test-bucket/').rglob('**/*.test')) == [
        S3Path('/test-bucket/directory/Test.test')
    ]

    object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'setup.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py')
    object_summary.put(Body=b'test data')

    assert sorted(S3Path.from_uri('s3://test-bucket/').rglob('*.py')) == [
        S3Path('/test-bucket/build/lib/pathlib.py'),
        S3Path('/test-bucket/docs/conf.py'),
        S3Path('/test-bucket/pathlib.py'),
        S3Path('/test-bucket/setup.py'),
        S3Path('/test-bucket/test_pathlib.py')
    ]
Пример #3
0
def test_is_file(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'setup.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py')
    object_summary.put(Body=b'test data')
    object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py')
    object_summary.put(Body=b'test data')

    assert not S3Path('/test-bucket/fake.test').is_file()
    assert not S3Path('/test-bucket/fake/').is_file()
    assert not S3Path('/test-bucket/directory').is_file()
    assert S3Path('/test-bucket/directory/Test.test').is_file()
    assert S3Path('/test-bucket/pathlib.py').is_file()
    assert S3Path('/test-bucket/docs/conf.py').is_file()
    assert not S3Path('/test-bucket/docs/').is_file()
    assert not S3Path('/test-bucket/build/').is_file()
    assert not S3Path('/test-bucket/build/lib').is_file()
    assert S3Path('/test-bucket/build/lib/pathlib.py').is_file()
Пример #4
0
def test_empty_directory(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')

    assert list(S3Path('/test-bucket').iterdir()) == []

    s3.meta.client.put_object(Bucket='test-bucket', Key='to/empty/dir/')
    assert list(S3Path('/test-bucket/to/empty/dir/').iterdir()) == []
Пример #5
0
def test_iterdir_on_buckets(s3_mock):
    s3 = boto3.resource('s3')
    for index in range(4):
        s3.create_bucket(Bucket='test-bucket{}'.format(index))

    s3_root_path = S3Path('/')
    assert sorted(s3_root_path.iterdir()) == [
        S3Path('/test-bucket{}'.format(index)) for index in range(4)
    ]
Пример #6
0
def test_hierarchical_configuration(reset_configuration_cache):
    path = S3Path('/foo/')
    register_configuration_parameter(path, parameters={'ContentType': 'text/html'})
    assert path in _s3_accessor.configuration_map.arguments
    assert path not in _s3_accessor.configuration_map.resources
    assert _s3_accessor.configuration_map.get_configuration(path) == (
        _s3_accessor.configuration_map.default_resource, {'ContentType': 'text/html'})

    assert (_s3_accessor.configuration_map.get_configuration(S3Path('/foo/'))
            == _s3_accessor.configuration_map.get_configuration(PureS3Path('/foo/')))
Пример #7
0
def browseS3Dir(path):
    global WHITELIST
    if path == "/s3buckets":
        p = S3Path('/')
        bucket_list = [path for path in p.iterdir() if str(path) in WHITELIST]
    else:
        p = S3Path(path.replace("/s3buckets", ""))
        bucket_list = [path for path in p.iterdir()]

    return bucket_list
Пример #8
0
def test_read_lines_hint(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data\ntest data')

    with S3Path('/test-bucket/directory/Test.test').open() as fp:
        assert len(fp.readlines(1)) == (1 if sys.version_info >= (3, 6) else 2)

    with S3Path('/test-bucket/directory/Test.test').open('br') as fp:
        assert len(fp.readlines(1)) == 1  # work only in binary mode
Пример #9
0
def test_register_configuration_exceptions(reset_configuration_cache):
    with pytest.raises(TypeError):
        register_configuration_parameter(
            Path('/'), parameters={'ContentType': 'text/html'})

    with pytest.raises(TypeError):
        register_configuration_parameter(S3Path('/foo/'),
                                         parameters=('ContentType',
                                                     'text/html'))

    with pytest.raises(ValueError):
        register_configuration_parameter(S3Path('/foo/'))
Пример #10
0
def _route_path(*args, endpoint_url: str) -> Union[Path, S3Path]:
    """use S3Path or Path depending on the input.
    """
    if str(args[0]).startswith("s3://"):
        s3path._s3_accessor.s3 = boto3.resource("s3",
                                                endpoint_url=endpoint_url)
        return S3Path.from_uri(*args)
    elif isinstance(args[0], S3Path):
        s3path._s3_accessor.s3 = boto3.resource("s3",
                                                endpoint_url=endpoint_url)
        return S3Path(*args)
    else:
        return Path(*args)
Пример #11
0
def test_basic_configuration(reset_configuration_cache):
    path = S3Path('/foo/')

    _s3_accessor.configuration_map.arguments = _s3_accessor.configuration_map.resources = None

    assert path not in (_s3_accessor.configuration_map.arguments or ())
    assert path not in (_s3_accessor.configuration_map.resources or ())
    assert _s3_accessor.configuration_map.get_configuration(path) == (
        _s3_accessor.configuration_map.default_resource, {})

    assert (_s3_accessor.configuration_map.get_configuration(
        S3Path('/foo/')) == _s3_accessor.configuration_map.get_configuration(
            PureS3Path('/foo/')))
Пример #12
0
    def test_s3path(self):
        str_path = "s3://foo/bar"
        path = smart_path(str_path)
        self.assertIsInstance(path, S3Path)
        self.assertEqual(str(path), "/foo/bar")

        path = S3Path("s3://hahah")
        path = smart_path(path)
        self.assertIsInstance(path, S3Path)

        conn = boto3.resource("s3")
        conn.create_bucket(Bucket="tmp")
        path = S3Path("s3://tmp")
        path = smart_path(path)
        self.assertIsInstance(path, S3Path)
Пример #13
0
def test_owner(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data')

    path = S3Path('/test-bucket/directory/Test.test')
    assert path.owner() == 'webfile'
Пример #14
0
def test_boto_methods_with_configuration(s3_mock, reset_configuration_cache):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')

    bucket = S3Path('/test-bucket/')
    register_configuration_parameter(bucket, parameters={'ContentType': 'text/html'})
    key = bucket.joinpath('bar.html')
    key.write_text('hello')
Пример #15
0
def test_read_lines_hint(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data\ntest data')

    with S3Path('/test-bucket/directory/Test.test').open("r") as fp:
        assert len(fp.readlines(1)) == 1
Пример #16
0
def get_path():
    path = g.path

    if path.startswith("/s3buckets"):
        path = path.replace("/s3buckets", "")
        return S3Path(path)
    else:
        return Path(path)
Пример #17
0
def test_iter_lines(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data\ntest data')

    with S3Path('/test-bucket/directory/Test.test').open("r") as fp:
        for line in fp:
            assert line == "test data"
Пример #18
0
def test_open_for_reading(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data')

    path = S3Path('/test-bucket/directory/Test.test')
    file_obj = path.open()
    assert file_obj.read() == 'test data'
Пример #19
0
def test_fix_url_encoding_issue(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket',
                                      'paramA=valueA/paramB=valueB/name')
    object_summary.put(Body=b'test data\ntest data')

    assert S3Path('/test-bucket/paramA=valueA/paramB=valueB/name').read_bytes(
    ) == b'test data\ntest data'
Пример #20
0
def test_mkdir(s3_mock):
    s3 = boto3.resource('s3')

    S3Path('/test-bucket/').mkdir()

    assert s3.Bucket('test-bucket') in s3.buckets.all()

    S3Path('/test-bucket/').mkdir(exist_ok=True)

    with pytest.raises(FileExistsError):
        S3Path('/test-bucket/').mkdir(exist_ok=False)

    with pytest.raises(FileNotFoundError):
        S3Path('/test-second-bucket/test-directory/file.name').mkdir()

    S3Path('/test-second-bucket/test-directory/file.name').mkdir(parents=True)

    assert s3.Bucket('test-second-bucket') in s3.buckets.all()
Пример #21
0
def test_write_lines(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')

    path = S3Path('/test-bucket/directory/Test.test')
    with path.open("w") as fp:
        fp.writelines(["line 1\n", "line 2\n"])

    res = path.read_text().splitlines()
    assert len(res) == 2
Пример #22
0
def test_unlink(s3_mock):
    s3 = boto3.resource('s3')

    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'temp_key')
    object_summary.put(Body=b'test data')
    path = S3Path('/test-bucket/temp_key')
    subdir_key = S3Path('/test-bucket/fake_folder/some_key')
    subdir_key.write_text("some text")
    assert path.exists() is True
    assert subdir_key.exists() is True
    path.unlink()
    assert path.exists() is False
    with pytest.raises(FileNotFoundError):
        S3Path("/test-bucket/fake_subfolder/fake_subkey").unlink()
    with pytest.raises(IsADirectoryError):
        S3Path("/test-bucket/fake_folder").unlink()
    with pytest.raises(IsADirectoryError):
        S3Path("/fake-bucket/").unlink()
Пример #23
0
def test_exists(s3_mock):
    path = S3Path('./fake-key')
    with pytest.raises(ValueError):
        path.exists()

    path = S3Path('/fake-bucket/fake-key')
    with pytest.raises(ClientError):
        path.exists()

    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data')

    assert not S3Path('/test-bucket/Test.test').exists()
    path = S3Path('/test-bucket/directory/Test.test')
    assert path.exists()
    for parent in path.parents:
        assert parent.exists()
Пример #24
0
def test_open_method_with_custom_endpoint_url():
    local_path = PureS3Path('/local/')
    register_configuration_parameter(
        local_path,
        parameters={},
        resource=boto3.resource('s3', endpoint_url='http://localhost'))

    file_object = S3Path('/local/directory/Test.test').open('br')
    if StrictVersion(smart_open.__version__) <= StrictVersion('3.0.0'):
        assert file_object._object.meta.client._endpoint.host == 'http://localhost'
    else:
        assert file_object._client.client._endpoint.host == 'http://localhost'
Пример #25
0
def test_stat(s3_mock):
    path = S3Path('fake-bucket/fake-key')
    with pytest.raises(ValueError):
        path.stat()

    path = S3Path('/fake-bucket/fake-key')
    with pytest.raises(ClientError):
        path.stat()

    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'Test.test')
    object_summary.put(Body=b'test data')

    path = S3Path('/test-bucket/Test.test')
    stat = path.stat()

    assert isinstance(stat, StatResult)
    assert stat == StatResult(
        size=object_summary.size,
        last_modified=object_summary.last_modified,
    )

    with NamedTemporaryFile() as local_file:
        local_file.write(path.read_bytes())
        local_file.flush()
        local_path = Path(local_file.name)

        local_stat = local_path.stat()
        s3_stat = path.stat()

        assert s3_stat.st_size == local_stat.st_size == s3_stat.size
        assert s3_stat.last_modified.timestamp() == s3_stat.st_mtime
        assert s3_stat.st_mtime < local_stat.st_mtime

    with pytest.raises(UnsupportedOperation):
        path.stat().st_atime

    path = S3Path('/test-bucket')
    assert path.stat() is None
Пример #26
0
    def resolve_data_dir(self) -> Path:
        self.is_bucket = False
        sil_nlp_data_path = get_env_path("SIL_NLP_DATA_PATH", default="")
        if sil_nlp_data_path != "":
            temp_path = Path(sil_nlp_data_path)
            if temp_path.is_dir():
                LOGGER.info(
                    f"Using workspace: {sil_nlp_data_path} as per environment variable SIL_NLP_DATA_PATH."
                )
                return Path(sil_nlp_data_path)
            else:
                temp_s3_path = S3Path(sil_nlp_data_path)
                if temp_s3_path.is_dir():
                    LOGGER.info(
                        f"Using s3 workspace: {sil_nlp_data_path} as per environment variable SIL_NLP_DATA_PATH."
                    )
                    self.is_bucket = True
                    return S3Path(sil_nlp_data_path)
                else:
                    raise Exception(
                        f"The path defined by environment variable SIL_NLP_DATA_PATH ({sil_nlp_data_path}) is not a real or s3 directory."
                    )

        gutenberg_path = Path("G:/Shared drives/Gutenberg")
        if gutenberg_path.is_dir():
            LOGGER.info(
                f"Using workspace: {gutenberg_path}.  To change the workspace, set the environment variable SIL_NLP_DATA_PATH."
            )
            return gutenberg_path

        s3root = S3Path("/aqua-ml-data")
        if s3root.is_dir():
            LOGGER.info(
                f"Using s3 workspace workspace: {s3root}.  To change the workspace, set the environment variable SIL_NLP_DATA_PATH."
            )
            self.is_bucket = True
            return s3root

        raise FileExistsError("No valid path exists")
Пример #27
0
def test_write_bytes(s3_mock):
    s3 = boto3.resource('s3')

    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'temp_key')
    object_summary.put(Body=b'test data')

    path = S3Path('/test-bucket/temp_key')
    data = path.read_bytes()
    assert isinstance(data, bytes)

    path.write_bytes(data)
    assert path.read_bytes() == data
Пример #28
0
def test_open_text_read(s3_mock):
    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test')
    object_summary.put(Body=b'test data')

    path = S3Path('/test-bucket/directory/Test.test')
    with path.open(mode='r') as file_obj:
        assert file_obj.readlines() == ['test data']

    with path.open(mode='rt') as file_obj:
        assert file_obj.readline() == 'test data'
        assert file_obj.readline() == ''
        assert file_obj.readline() == ''
Пример #29
0
def load_image_from_oss(path: s3path.S3Path, mode='rb', format=None):
    """

    Args:
        path:
        mode:
        format:

    Returns:

    """
    assert isinstance(path, s3path.S3Path)
    image = Image.open(io.BytesIO(path.open(mode=mode).read()))
    image = utils.convert_PIL_to_numpy(image, format)
    return image
Пример #30
0
def test_stat(s3_mock):
    path = S3Path('fake-bucket/fake-key')
    with pytest.raises(ValueError):
        path.stat()

    path = S3Path('/fake-bucket/fake-key')
    with pytest.raises(ClientError):
        path.stat()

    s3 = boto3.resource('s3')
    s3.create_bucket(Bucket='test-bucket')
    object_summary = s3.ObjectSummary('test-bucket', 'Test.test')
    object_summary.put(Body=b'test data')

    path = S3Path('/test-bucket/Test.test')
    stat = path.stat()
    assert isinstance(stat, StatResult)
    assert stat == StatResult(
        size=object_summary.size,
        last_modified=object_summary.last_modified,
    )

    path = S3Path('/test-bucket')
    assert path.stat() is None