def test_get_one_wo_meta(s3root, prefixes, expected): with S3() as s3: for url, item in expected.items(): for _, expected_result in item.items(): range_info = expected_result.range if expected_result.size is None: # ensure that the default return_missing=False works with pytest.raises(MetaflowS3NotFound): s3obj = s3.get( s3_get_object_from_url_range(url, range_info)) s3obj = s3.get( s3_get_object_from_url_range(url, range_info), return_missing=True, return_info=False, ) assert_results( [s3obj], {url: item}, info_should_be_empty=True, ranges_fetched=[range_info], ) else: s3obj = s3.get(s3_get_object_from_url_range( url, range_info), return_info=False) assert_results( [s3obj], {url: item}, info_should_be_empty=True, ranges_fetched=[range_info], )
def test_get_exceptions(s3root, prefixes, expected): # get_many() goes via s3op, get() is a method - test both the code paths with S3() as s3: with pytest.raises(MetaflowS3AccessDenied): s3.get_many(["s3://foobar/foo"]) with pytest.raises(MetaflowS3AccessDenied): s3.get("s3://foobar/foo") with S3(s3root=s3root) as s3: with pytest.raises(MetaflowS3NotFound): s3.get_many(["this_file_does_not_exist"]) with pytest.raises(MetaflowS3NotFound): s3.get("this_file_does_not_exist")
def test_put_one(s3root, objs, expected): with S3(s3root=s3root) as s3: for key, obj in objs: s3url = s3.put(key, obj) assert s3url in expected s3obj = s3.get(key) assert s3obj.key == key assert_results([s3obj], {s3url: expected[s3url]}) assert s3obj.blob == to_bytes(obj) # put with overwrite disabled s3url = s3.put(key, "random_value", overwrite=False) assert s3url in expected s3obj = s3.get(key) assert s3obj.key == key assert_results([s3obj], {s3url: expected[s3url]}) assert s3obj.blob == to_bytes(obj)
def _do(): with S3() as s3: res = [] for url in expected: # Use return_missing as this is the most expensive path res.append(s3.get(url, return_missing=True)) return res
def test_init_options(s3root, pathspecs, expected): [pathspec] = pathspecs flow_name, run_id = pathspec.split("/") plen = len(s3root) # option 1) s3root as prefix with S3(s3root=s3root) as s3: for url, exp in expected.items(): # s3root should work as a prefix s3obj = s3.get(url[plen:]) assert s3obj.key == url[plen:] assert_results([s3obj], {url: exp}) with pytest.raises(MetaflowS3URLException): s3.get("s3://some/fake/address") # option 2) full url as s3root for url, exp in expected.items(): with S3(s3root=url) as s3: s3obj = s3.get() assert_results([s3obj], {url: exp}) # option 3) full urls with S3() as s3: for url, exp in expected.items(): # s3root should work as a prefix s3obj = s3.get(url) assert s3obj.key == url assert_results([s3obj], {url: exp}) with pytest.raises(MetaflowS3URLException): s3.get("suffix") with pytest.raises(MetaflowS3URLException): s3.get("s3://nopath") with pytest.raises(MetaflowS3URLException): s3.get_many(["suffixes"]) with pytest.raises(MetaflowS3URLException): s3.get_recursive(["suffixes"]) with pytest.raises(MetaflowS3URLException): s3.get_all() # option 4) 'current' environment (fake a running flow) flow = FakeFlow(use_cli=False) parsed = urlparse(s3root) with pytest.raises(MetaflowS3URLException): # current not set yet, so this should fail with S3(run=flow): pass current._set_env( FakeFlow(name=flow_name), run_id, "no_step", "no_task", "no_origin_run_id", "no_ns", "no_user", ) with S3(bucket=parsed.netloc, prefix=parsed.path, run=flow) as s3: for url, exp in expected.items(): name = url.split("/")[-1] s3obj = s3.get(name) assert s3obj.key == name assert_results([s3obj], {url: exp}) names = [url.split("/")[-1] for url in expected] s3objs = s3.get_many(names) assert {e.key for e in s3objs} == set(names) assert_results(s3objs, expected) assert_results(s3.get_all(), expected, info_should_be_empty=True) # option 5) run object if DO_TEST_RUN: # Only works if a metadata service exists with the run in question. namespace(None) with S3(bucket=parsed.netloc, prefix=parsed.path, run=Run(pathspec)) as s3: names = [url.split("/")[-1] for url in expected] assert_results(s3.get_many(names), expected)