def test_copy_multipart_preserve_etag(): from_info, to_info = _get_src_dst() s3 = boto3.client("s3") s3.create_bucket(Bucket=from_info.bucket) _upload_multipart(s3, from_info.bucket, from_info.path) S3Tree._copy(s3, from_info, to_info, {})
def test_copy_singlepart_preserve_etag(): from_info, to_info = _get_src_dst() s3 = boto3.client("s3") s3.create_bucket(Bucket=from_info.bucket) s3.put_object(Bucket=from_info.bucket, Key=from_info.path, Body="data") S3Tree._copy(s3, from_info, to_info, {})
def test_copy_preserve_etag_across_buckets(remote, dvc): s3 = remote.tree.s3 s3.create_bucket(Bucket="another") another = S3Tree(dvc, {"url": "s3://another", "region": "us-east-1"}) from_info = remote.tree.path_info / "foo" to_info = another.path_info / "foo" remote.tree.copy(from_info, to_info) from_etag = S3Tree.get_etag(s3, from_info.bucket, from_info.path) to_etag = S3Tree.get_etag(s3, "another", "foo") assert from_etag == to_etag
def test_get_s3_no_credentials(mocker): from botocore.exceptions import NoCredentialsError tree = S3Tree(None, {}) with pytest.raises(DvcException, match="Unable to find AWS credentials"): with tree._get_s3(): raise NoCredentialsError
def test_s3_aws_config_different_profile(tmp_dir, dvc, s3, monkeypatch): config_file = tmp_dir / "aws_config.ini" config_file.write_text( textwrap.dedent("""\ [default] extra = keys s3 = addressing_style = auto use_accelerate_endpoint = true multipart_threshold = ThisIsNotGoingToBeCasted! [profile dev] some_extra = keys s3 = addresing_style = virtual multipart_threshold = 2GiB """)) monkeypatch.setenv("AWS_CONFIG_FILE", config_file) tree = S3Tree(dvc, {**s3.config, "profile": "dev"}) assert tree._transfer_config is None with tree._get_s3() as s3: s3_config = s3.meta.client.meta.config.s3 assert s3_config["addresing_style"] == "virtual" assert "use_accelerate_endpoint" not in s3_config transfer_config = tree._transfer_config assert transfer_config.multipart_threshold == 2 * GB
def test_get_s3_connection_error_endpoint(mocker): from botocore.exceptions import EndpointConnectionError tree = S3Tree(None, {"endpointurl": "https://example.com"}) msg = "Unable to connect to 'https://example.com'." with pytest.raises(DvcException, match=msg): with tree._get_s3(): raise EndpointConnectionError(endpoint_url="url")
def test_get_s3_connection_error(mocker): from botocore.exceptions import EndpointConnectionError tree = S3Tree(None, {}) msg = "Unable to connect to 'AWS S3'." with pytest.raises(DvcException, match=msg): with tree._get_s3(): raise EndpointConnectionError(endpoint_url="url")
def test_s3_upload_fobj(tmp_dir, dvc, s3): s3.gen({"data": {"foo": "foo"}}) tree = S3Tree(dvc, s3.config) to_info = s3 / "data" / "bar" with tree.open(s3 / "data" / "foo", "rb") as stream: tree.upload_fobj(stream, to_info, 1) assert to_info.read_text() == "foo"
def test_key_id_and_secret(dvc): tree = S3Tree( dvc, { "url": url, "access_key_id": key_id, "secret_access_key": key_secret }, ) assert tree.access_key_id == key_id assert tree.secret_access_key == key_secret
def test_link_created_on_non_nested_path(base_info, tmp_dir, dvc, scm): tree = S3Tree(dvc, {"url": str(base_info.parent)}) cache = CloudCache(tree) s3 = cache.tree.s3.meta.client s3.create_bucket(Bucket=base_info.bucket) s3.put_object( Bucket=base_info.bucket, Key=(base_info / "from").path, Body="data" ) cache.link(base_info / "from", base_info / "to") assert cache.tree.exists(base_info / "from") assert cache.tree.exists(base_info / "to")
def test_key_id_and_secret(dvc): tree = S3Tree( dvc, { "url": url, "access_key_id": key_id, "secret_access_key": key_secret, "session_token": session_token, }, ) assert tree.access_key_id == key_id assert tree.secret_access_key == key_secret assert tree.session_token == session_token
def test_checkout_for_external_outputs(tmp_dir, dvc): dvc.cache.s3 = CloudCache(S3Tree(dvc, {"url": S3.get_url()})) remote = Remote(S3Tree(dvc, {"url": S3.get_url()})) file_path = remote.tree.path_info / "foo" remote.tree.s3.put_object( Bucket=remote.tree.path_info.bucket, Key=file_path.path, Body="foo" ) dvc.add(str(remote.tree.path_info / "foo"), external=True) remote.tree.remove(file_path) stats = dvc.checkout(force=True) assert stats == {**empty_checkout, "added": [str(file_path)]} assert remote.tree.exists(file_path) remote.tree.s3.put_object( Bucket=remote.tree.path_info.bucket, Key=file_path.path, Body="foo\nfoo", ) stats = dvc.checkout(force=True) assert stats == {**empty_checkout, "modified": [str(file_path)]}
def test_copy_preserve_etag_across_buckets(remote, dvc): s3 = remote.tree.s3 s3.Bucket("another").create() another = S3Tree(dvc, {"url": "s3://another", "region": "us-east-1"}) from_info = remote.tree.path_info / "foo" to_info = another.path_info / "foo" remote.tree.copy(from_info, to_info) from_hash = remote.tree.get_hash(from_info) to_hash = another.get_hash(to_info) assert from_hash == to_hash
def test_grants(dvc): config = { "url": url, "grant_read": "id=read-permission-id,id=other-read-permission-id", "grant_read_acp": "id=read-acp-permission-id", "grant_write_acp": "id=write-acp-permission-id", "grant_full_control": "id=full-control-permission-id", } tree = S3Tree(dvc, config) assert (tree.extra_args["GrantRead"] == "id=read-permission-id,id=other-read-permission-id") assert tree.extra_args["GrantReadACP"] == "id=read-acp-permission-id" assert tree.extra_args["GrantWriteACP"] == "id=write-acp-permission-id" assert ( tree.extra_args["GrantFullControl"] == "id=full-control-permission-id")
def test_s3_isdir(tmp_dir, dvc, s3): s3.gen({"data": {"foo": "foo"}}) tree = S3Tree(dvc, s3.config) assert not tree.isdir(s3 / "data" / "foo") assert tree.isdir(s3 / "data")
def test_sse_kms_key_id(dvc): tree = S3Tree(dvc, {"url": url, "sse_kms_key_id": "key"}) assert tree.extra_args["SSEKMSKeyId"] == "key"
def test_grants_mutually_exclusive_acl_error(dvc, grants): for grant_option, grant_value in grants.items(): config = {"url": url, "acl": "public-read", grant_option: grant_value} with pytest.raises(ConfigError): S3Tree(dvc, config)
def test_init(dvc): config = {"url": url} tree = S3Tree(dvc, config) assert tree.path_info == url
def test_makedirs_doesnot_try_on_top_level_paths(tmp_dir, dvc, scm): base_info = S3Tree.PATH_CLS("s3://bucket/") tree = S3Tree(dvc, {"url": str(base_info)}) tree.makedirs(base_info)
def test_get_bucket(): tree = S3Tree(None, {"url": "s3://mybucket/path"}) with pytest.raises(DvcException, match="Bucket 'mybucket' does not exist"): with tree._get_bucket("mybucket") as bucket: raise bucket.meta.client.exceptions.NoSuchBucket({}, None)
@mock_s3 def test_copy_singlepart_preserve_etag(): from_info, to_info = _get_src_dst() s3 = boto3.client("s3") s3.create_bucket(Bucket=from_info.bucket) s3.put_object(Bucket=from_info.bucket, Key=from_info.path, Body="data") S3Tree._copy(s3, from_info, to_info, {}) @mock_s3 @pytest.mark.parametrize( "base_info", [S3Tree.PATH_CLS("s3://bucket/"), S3Tree.PATH_CLS("s3://bucket/ns/")], ) def test_link_created_on_non_nested_path(base_info, tmp_dir, dvc, scm): tree = S3Tree(dvc, {"url": str(base_info.parent)}) cache = CloudCache(tree) s3 = cache.tree.s3.meta.client s3.create_bucket(Bucket=base_info.bucket) s3.put_object( Bucket=base_info.bucket, Key=(base_info / "from").path, Body="data" ) cache.link(base_info / "from", base_info / "to") assert cache.tree.exists(base_info / "from") assert cache.tree.exists(base_info / "to")
def _get_src_dst(): base_info = S3Tree.PATH_CLS(S3.get_url()) return base_info / "from", base_info / "to"