示例#1
0
def test_simple(token_restore):
    assert not GCSFileSystem.tokens
    gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)
    assert gcs.ls('')

    # token is now cached
    gcs = GCSFileSystem(TEST_PROJECT)
    assert gcs.ls('')
示例#2
0
def test_raise_on_project_mismatch(mock_auth):
    mock_auth.default.return_value = (requests.Session(), "my_other_project")
    match = "'my_project' does not match the google default project 'my_other_project'"
    with pytest.raises(ValueError, match=match):
        GCSFileSystem(project="my_project", token="google_default")

    result = GCSFileSystem(token="google_default")
    assert result.project == "my_other_project"
示例#3
0
def test_current():
    from google.auth import credentials

    with gcs_maker() as gcs:
        assert GCSFileSystem.current() is gcs
        gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)
        assert gcs2.session is gcs.session
        gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, secure_serialize=False)
        assert isinstance(gcs2.token, credentials.Credentials)
示例#4
0
def test_validate_response():
    gcs = GCSFileSystem(token="anon")
    gcs.validate_response(200, None, None, "/path")

    # HttpError with no JSON body
    with pytest.raises(HttpError) as e:
        gcs.validate_response(503, b"", None, "/path")
    assert e.value.code == 503
    assert e.value.message == ", 503"

    # HttpError with JSON body
    j = {"error": {"code": 503, "message": b"Service Unavailable"}}
    with pytest.raises(HttpError) as e:
        gcs.validate_response(503, None, j, "/path")
    assert e.value.code == 503
    assert e.value.message == b"Service Unavailable, 503"

    # 403
    j = {"error": {"message": "Not ok"}}
    with pytest.raises(IOError, match="Forbidden: /path\nNot ok"):
        gcs.validate_response(403, None, j, "/path")

    # 404
    with pytest.raises(FileNotFoundError):
        gcs.validate_response(404, b"", None, "/path")

    # 502
    with pytest.raises(ProxyError):
        gcs.validate_response(502, b"", None, "/path")

    # ChecksumError
    md5 = repr(base64.b64encode(hashlib.md5(b"foo").digest()))[2:-1]
    with pytest.raises(ChecksumError):
        gcs.validate_response(0, b"f", None, "/path",
                              {"X-Goog-Hash": f"md5={md5}"})
示例#5
0
def test_validate_response():
    gcs = GCSFileSystem(token="anon")
    gcs.validate_response(200, None, None, "/path")

    # HttpError with no JSON body
    with pytest.raises(HttpError) as e:
        gcs.validate_response(503, b"", None, "/path")
    assert e.value.code == 503
    assert e.value.message == ""

    # HttpError with JSON body
    j = {"error": {"code": 503, "message": b"Service Unavailable"}}
    with pytest.raises(HttpError) as e:
        gcs.validate_response(503, None, j, "/path")
    assert e.value.code == 503
    assert e.value.message == b"Service Unavailable"

    # 403
    j = {"error": {"message": "Not ok"}}
    with pytest.raises(IOError, match="Forbidden: /path\nNot ok"):
        gcs.validate_response(403, None, j, "/path")

    # 404
    with pytest.raises(FileNotFoundError):
        gcs.validate_response(404, b"", None, "/path")

    # 502
    with pytest.raises(ProxyError):
        gcs.validate_response(502, b"", None, "/path")
示例#6
0
文件: utils.py 项目: yokomotod/gcsfs
def gcs_maker(populate=False):
    gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)
    gcs.invalidate_cache()
    try:
        try:
            gcs.mkdir(TEST_BUCKET, default_acl="authenticatedread",
                      acl="publicReadWrite")
        except:
            pass
        for k in [a, b, c, d]:
            try:
                gcs.rm(k)
            except:
                pass
        if populate:
            for flist in [files, csv_files, text_files]:
                for fname, data in flist.items():
                    with gcs.open(TEST_BUCKET+'/'+fname, 'wb') as f:
                        f.write(data)
        yield gcs
    finally:
        for f in gcs.find(TEST_BUCKET):
            try:
                gcs.rm(f)
            except:
                pass
示例#7
0
文件: utils.py 项目: oliverwm1/gcsfs
def gcs_maker(populate=False, **kwargs):
    gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, **kwargs)
    gcs.invalidate_cache()
    try:
        # ensure we're empty.
        try:
            gcs.rm(TEST_BUCKET, recursive=True)
        except FileNotFoundError:
            pass
        try:
            gcs.mkdir(
                TEST_BUCKET, default_acl="authenticatedread", acl="publicReadWrite"
            )
        except Exception:
            pass

        if populate:
            gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
        gcs.invalidate_cache()
        yield gcs
    finally:
        try:
            gcs.rm(gcs.find(TEST_BUCKET))
        except:  # noqa: E722
            pass
示例#8
0
def gcs_maker(populate=False):
    gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)
    gcs.invalidate_cache()
    try:
        try:
            gcs.mkdir(TEST_BUCKET,
                      default_acl="authenticatedread",
                      acl="publicReadWrite")
        except gcsfs.utils.HttpError:
            pass

        # ensure we're empty.
        gcs.rm(TEST_BUCKET, recursive=True)

        for k in [a, b, c, d]:
            try:
                gcs.rm(k)
            except FileNotFoundError:
                pass
        if populate:
            for flist in [files, csv_files, text_files]:
                for fname, data in flist.items():
                    with gcs.open(TEST_BUCKET + "/" + fname, "wb") as f:
                        f.write(data)
        gcs.invalidate_cache()
        yield gcs
    finally:
        for f in gcs.find(TEST_BUCKET):
            try:
                gcs.rm(f)
            except:  # noqa: E722
                pass
示例#9
0
 def __init__(self, project_id: str, bucket_name: str,
              service_account_file: str):
     self._bucket_name = bucket_name
     self._client = storage.Client(project=project_id,
             credentials=service_account.Credentials.\
             from_service_account_file(service_account_file))
     self._fs = GCSFileSystem(token=service_account_file,
                              check_connection=True)
示例#10
0
文件: core.py 项目: x213212/Hub
def _load_fs_and_path(path, creds=None, session_creds=True, google_cloud_project=""):
    """Given url(path) and creds returns filesystem required for accessing that file + url's filepath in that filesystem"""
    if (
        path.startswith("./")
        or path.startswith("/")
        or path.startswith("../")
        or path.startswith("~/")
    ):
        return fsspec.filesystem("file"), os.path.expanduser(path.replace("fs://", ""))

    if (
        session_creds
        and creds is None
        and not path.startswith("s3://")
        and not path.startswith("gcs://")
    ):
        path, creds = _connect(path)

    if path.startswith("s3://"):
        path = path[5:]
        if creds is not None and session_creds:

            return (
                fsspec.filesystem(
                    "s3",
                    key=creds["access_key"],
                    secret=creds["secret_key"],
                    token=creds["session_token"],
                    client_kwargs={
                        "endpoint_url": creds["endpoint"],
                        "region_name": creds["region"],
                    },
                ),
                path,
            )
        elif creds is not None:
            return (
                fsspec.filesystem(
                    "s3",
                    key=creds.get("access_key"),
                    secret=creds.get("secret_key"),
                ),
                path,
            )
        else:
            return fsspec.filesystem("s3"), path
    elif path.startswith("gcs://"):
        return (
            GCSFileSystem(project=google_cloud_project, token=creds),
            path[6:],
        )
示例#11
0
def test_many_connect():
    from multiprocessing.pool import ThreadPool

    GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)

    def task(i):
        GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN).ls("")
        return True

    pool = ThreadPool(processes=20)
    out = pool.map(task, range(40))
    assert all(out)
    pool.close()
    pool.join()
示例#12
0
def test_request_header():
    with gcs_maker():
        gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, requester_pays=True)
        # test directly against `_call` to inspect the result
        r = gcs.call(
            "GET",
            "b/{}/o/",
            TEST_REQUESTER_PAYS_BUCKET,
            delimiter="/",
            prefix="test",
            maxResults=100,
            info_out=True,
        )
        assert r.headers["User-Agent"] == "python-gcsfs/" + version
示例#13
0
def test_request_user_project():
    with gcs_maker():
        gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, requester_pays=True)
        # test directly against `_call` to inspect the result
        r = gcs._call(
            "GET",
            "b/{}/o/",
            TEST_REQUESTER_PAYS_BUCKET,
            delimiter="/",
            prefix="test",
            maxResults=100,
        )
        qs = urlparse(r.request.url).query
        result = parse_qs(qs)
        assert result["userProject"] == [TEST_PROJECT]
示例#14
0
文件: utils.py 项目: mrocklin/gcsfs
def gcs_maker(populate=False):
    gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)
    try:
        if not gcs.exists(TEST_BUCKET):
            gcs.mkdir(TEST_BUCKET)
        for k in [a, b, c, d]:
            try:
                gcs.rm(k)
            except:
                pass
        if populate:
            for flist in [files, csv_files, text_files]:
                for fname, data in flist.items():
                    with gcs.open(TEST_BUCKET + '/' + fname, 'wb') as f:
                        f.write(data)
        yield gcs
    finally:
        [gcs.rm(f) for f in gcs.walk(TEST_BUCKET)]
示例#15
0
parser.add_argument('--firecloud', action='store_true', help="Use logic to generate primary keys for Terra upload via Firecloud")
parser.add_argument('--debug', action='store_true', help="Write additional logs for debugging")
args = parser.parse_args()

log_level = logging.DEBUG if args.debug else logging.INFO
logging.basicConfig(level=log_level)

log = logging.getLogger(__name__)

TERRA_COLUMN_LIMIT = 1000

table_names = args.table or ['cslb', 'hles_cancer_condition', 'hles_dog', 'hles_health_condition', 'hles_owner',
                             'environment', 'sample', 'eols']
PRIMARY_KEY_PREFIX = 'entity'

gcs = GCSFileSystem()

# create a service object to handle all aspects of generating a primary key
@dataclass
class PrimaryKeyGenerator:
    table_name: str
    pk_name: str = field(init=False)
    firecloud: bool

    # this will calculate pk_name during init
    def __post_init__(self):
        # most tables should have "dog_id" as a key
        if self.table_name in {"hles_dog", "hles_cancer_condition", "hles_health_condition", "environment", "cslb", "eols"}:
            self.pk_name = 'dog_id'
        # owner table is linked to hles_dog via "owner_id"
        elif self.table_name == 'hles_owner':
示例#16
0
 def task(i):
     # first instance is made within thread - creating loop
     GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN).ls("")
     return True
示例#17
0
def test_user_project_cat():
    gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, requester_pays=True)
    result = gcs.cat(TEST_REQUESTER_PAYS_BUCKET + "/foo.csv")
    assert len(result)
示例#18
0
def test_user_project_fallback_google_default(mock_auth):
    mock_auth.default.return_value = (requests.Session(), "my_default_project")
    fs = GCSFileSystem(token="google_default")
    assert fs.project == "my_default_project"
示例#19
0
def test_current():
    with gcs_maker() as gcs:
        assert GCSFileSystem.current() is gcs
        gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)
        assert gcs2.session is gcs.session
示例#20
0
 def task(i):
     GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN).ls("")
     return True
示例#21
0
def test_simple():
    assert not GCSFileSystem.tokens
    gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)
    gcs.ls(TEST_BUCKET)  # no error
    gcs.ls("/" + TEST_BUCKET)  # OK to lead with '/'