示例#1
0
def test_get_one_wo_meta(s3root, prefixes, expected):
    with S3() as s3:
        for url, item in expected.items():
            for _, expected_result in item.items():
                range_info = expected_result.range
                if expected_result.size is None:
                    # ensure that the default return_missing=False works
                    with pytest.raises(MetaflowS3NotFound):
                        s3obj = s3.get(
                            s3_get_object_from_url_range(url, range_info))
                    s3obj = s3.get(
                        s3_get_object_from_url_range(url, range_info),
                        return_missing=True,
                        return_info=False,
                    )
                    assert_results(
                        [s3obj],
                        {url: item},
                        info_should_be_empty=True,
                        ranges_fetched=[range_info],
                    )
                else:
                    s3obj = s3.get(s3_get_object_from_url_range(
                        url, range_info),
                                   return_info=False)
                    assert_results(
                        [s3obj],
                        {url: item},
                        info_should_be_empty=True,
                        ranges_fetched=[range_info],
                    )
示例#2
0
def test_get_exceptions(s3root, prefixes, expected):
    # get_many() goes via s3op, get() is a method - test both the code paths
    with S3() as s3:
        with pytest.raises(MetaflowS3AccessDenied):
            s3.get_many(["s3://foobar/foo"])
        with pytest.raises(MetaflowS3AccessDenied):
            s3.get("s3://foobar/foo")
    with S3(s3root=s3root) as s3:
        with pytest.raises(MetaflowS3NotFound):
            s3.get_many(["this_file_does_not_exist"])
        with pytest.raises(MetaflowS3NotFound):
            s3.get("this_file_does_not_exist")
示例#3
0
def test_put_one(s3root, objs, expected):
    with S3(s3root=s3root) as s3:
        for key, obj in objs:
            s3url = s3.put(key, obj)
            assert s3url in expected
            s3obj = s3.get(key)
            assert s3obj.key == key
            assert_results([s3obj], {s3url: expected[s3url]})
            assert s3obj.blob == to_bytes(obj)
            # put with overwrite disabled
            s3url = s3.put(key, "random_value", overwrite=False)
            assert s3url in expected
            s3obj = s3.get(key)
            assert s3obj.key == key
            assert_results([s3obj], {s3url: expected[s3url]})
            assert s3obj.blob == to_bytes(obj)
示例#4
0
 def _do():
     with S3() as s3:
         res = []
         for url in expected:
             # Use return_missing as this is the most expensive path
             res.append(s3.get(url, return_missing=True))
         return res
示例#5
0
def test_init_options(s3root, pathspecs, expected):
    [pathspec] = pathspecs
    flow_name, run_id = pathspec.split("/")
    plen = len(s3root)

    # option 1) s3root as prefix
    with S3(s3root=s3root) as s3:
        for url, exp in expected.items():
            # s3root should work as a prefix
            s3obj = s3.get(url[plen:])
            assert s3obj.key == url[plen:]
            assert_results([s3obj], {url: exp})
        with pytest.raises(MetaflowS3URLException):
            s3.get("s3://some/fake/address")

    # option 2) full url as s3root
    for url, exp in expected.items():
        with S3(s3root=url) as s3:
            s3obj = s3.get()
            assert_results([s3obj], {url: exp})

    # option 3) full urls
    with S3() as s3:
        for url, exp in expected.items():
            # s3root should work as a prefix
            s3obj = s3.get(url)
            assert s3obj.key == url
            assert_results([s3obj], {url: exp})
        with pytest.raises(MetaflowS3URLException):
            s3.get("suffix")
        with pytest.raises(MetaflowS3URLException):
            s3.get("s3://nopath")
        with pytest.raises(MetaflowS3URLException):
            s3.get_many(["suffixes"])
        with pytest.raises(MetaflowS3URLException):
            s3.get_recursive(["suffixes"])
        with pytest.raises(MetaflowS3URLException):
            s3.get_all()

    # option 4) 'current' environment (fake a running flow)
    flow = FakeFlow(use_cli=False)

    parsed = urlparse(s3root)
    with pytest.raises(MetaflowS3URLException):
        # current not set yet, so this should fail
        with S3(run=flow):
            pass

    current._set_env(
        FakeFlow(name=flow_name),
        run_id,
        "no_step",
        "no_task",
        "no_origin_run_id",
        "no_ns",
        "no_user",
    )

    with S3(bucket=parsed.netloc, prefix=parsed.path, run=flow) as s3:
        for url, exp in expected.items():
            name = url.split("/")[-1]
            s3obj = s3.get(name)
            assert s3obj.key == name
            assert_results([s3obj], {url: exp})
        names = [url.split("/")[-1] for url in expected]
        s3objs = s3.get_many(names)
        assert {e.key for e in s3objs} == set(names)
        assert_results(s3objs, expected)
        assert_results(s3.get_all(), expected, info_should_be_empty=True)

    # option 5) run object
    if DO_TEST_RUN:
        # Only works if a metadata service exists with the run in question.
        namespace(None)
        with S3(bucket=parsed.netloc, prefix=parsed.path,
                run=Run(pathspec)) as s3:
            names = [url.split("/")[-1] for url in expected]
            assert_results(s3.get_many(names), expected)