Пример #1
0
async def gs_filesystem(request):
    token = secret_alnum_string()

    with ThreadPoolExecutor() as thread_pool:
        if request.param.startswith('router/'):
            fs = RouterAsyncFS('file',
                               filesystems=[
                                   LocalAsyncFS(thread_pool),
                                   GoogleStorageAsyncFS()
                               ])
        else:
            assert request.param.endswith('gs')
            fs = GoogleStorageAsyncFS()
        async with fs:
            test_storage_uri = os.environ['HAIL_TEST_STORAGE_URI']
            protocol = 'gs://'
            assert test_storage_uri[:len(protocol)] == protocol
            base = f'{test_storage_uri}/tmp/{token}/'

            await fs.mkdir(base)
            sema = asyncio.Semaphore(50)
            async with sema:
                yield (sema, fs, base)
                await fs.rmtree(sema, base)
            assert not await fs.isdir(base)
Пример #2
0
    def setUp(self):
        bucket_name = get_user_config().get('batch', 'bucket')
        token = uuid.uuid4()
        self.test_path = f'gs://{bucket_name}/memory-tests/{token}'

        self.fs = GoogleStorageAsyncFS(project=PROJECT)
        self.client = BlockingMemoryClient(fs=self.fs)
        self.temp_files = set()
Пример #3
0
    def __init__(self,
                 gcs_project=None,
                 fs=None,
                 deploy_config=None,
                 session=None,
                 headers=None,
                 _token=None):
        if not deploy_config:
            self._deploy_config = get_deploy_config()
        else:
            self._deploy_config = deploy_config

        self.url = self._deploy_config.base_url('memory')
        self.objects_url = f'{self.url}/api/v1alpha/objects'
        self._session = session

        if fs is None:
            fs = GoogleStorageAsyncFS(project=gcs_project)
        self._fs = fs

        self._headers = {}
        if headers:
            self._headers.update(headers)
        if _token:
            self._headers['Authorization'] = f'Bearer {_token}'
Пример #4
0
 def _fs(self) -> AsyncFS:
     if self._DEPRECATED_project is not None:
         if self._DEPRECATED_fs is None:
             self._DEPRECATED_fs = RouterAsyncFS('file', [
                 LocalAsyncFS(ThreadPoolExecutor()),
                 GoogleStorageAsyncFS(project=self._DEPRECATED_project)
             ])
         return self._DEPRECATED_fs
     return self._backend._fs
Пример #5
0
async def filesystem(
        request) -> AsyncIterator[Tuple[asyncio.Semaphore, AsyncFS, str]]:
    token = secret_alnum_string()

    with ThreadPoolExecutor() as thread_pool:
        fs: AsyncFS
        if request.param.startswith('router/'):
            fs = RouterAsyncFS('file',
                               filesystems=[
                                   LocalAsyncFS(thread_pool),
                                   GoogleStorageAsyncFS(),
                                   S3AsyncFS(thread_pool),
                                   AzureAsyncFS()
                               ])
        elif request.param == 'file':
            fs = LocalAsyncFS(thread_pool)
        elif request.param.endswith('gs'):
            fs = GoogleStorageAsyncFS()
        elif request.param.endswith('s3'):
            fs = S3AsyncFS(thread_pool)
        else:
            assert request.param.endswith('hail-az')
            fs = AzureAsyncFS()
        async with fs:
            if request.param.endswith('file'):
                base = f'/tmp/{token}/'
            elif request.param.endswith('gs'):
                bucket = os.environ['HAIL_TEST_GCS_BUCKET']
                base = f'gs://{bucket}/tmp/{token}/'
            elif request.param.endswith('s3'):
                bucket = os.environ['HAIL_TEST_S3_BUCKET']
                base = f's3://{bucket}/tmp/{token}/'
            else:
                assert request.param.endswith('hail-az')
                account = os.environ['HAIL_TEST_AZURE_ACCOUNT']
                container = os.environ['HAIL_TEST_AZURE_CONTAINER']
                base = f'hail-az://{account}/{container}/tmp/{token}/'

            await fs.mkdir(base)
            sema = asyncio.Semaphore(50)
            async with sema:
                yield (sema, fs, base)
                await fs.rmtree(sema, base)
            assert not await fs.isdir(base)
Пример #6
0
class Tests(unittest.TestCase):
    def setUp(self):
        bucket_name = get_user_config().get('batch', 'bucket')
        token = uuid.uuid4()
        self.test_path = f'gs://{bucket_name}/memory-tests/{token}'

        self.fs = GoogleStorageAsyncFS(project=PROJECT)
        self.client = BlockingMemoryClient(fs=self.fs)
        self.temp_files = set()

    def tearDown(self):
        async_to_blocking(self.fs.rmtree(None, self.test_path))
        self.client.close()

    async def add_temp_file_from_string(self, name: str, str_value: bytes):
        handle = f'{self.test_path}/{name}'

        async with await self.fs.create(handle) as f:
            await f.write(str_value)

        return handle

    def test_non_existent(self):
        for _ in range(3):
            self.assertIsNone(
                self.client._get_file_if_exists(
                    f'{self.test_path}/nonexistent'))

    def test_small_write_around(self):
        async def read(url):
            async with await self.fs.open(url) as f:
                return await f.read()

        cases = [('empty_file', b''), ('null', b'\0'),
                 ('small', b'hello world')]
        for file, data in cases:
            handle = async_to_blocking(
                self.add_temp_file_from_string(file, data))
            expected = async_to_blocking(read(handle))
            self.assertEqual(expected, data)
            i = 0
            cached = self.client._get_file_if_exists(handle)
            while cached is None and i < 10:
                cached = self.client._get_file_if_exists(handle)
                i += 1
            self.assertEqual(cached, expected)

    def test_small_write_through(self):
        cases = [('empty_file2', b''), ('null2', b'\0'),
                 ('small2', b'hello world')]
        for file, data in cases:
            filename = f'{self.test_path}/{file}'
            self.client.write_file(filename, data)
            cached = self.client._get_file_if_exists(filename)
            self.assertEqual(cached, data)
Пример #7
0
async def gs_filesystem(request):
    token = secret_alnum_string()

    with ThreadPoolExecutor() as thread_pool:
        if request.param.startswith('router/'):
            fs = RouterAsyncFS(
                'file', [LocalAsyncFS(thread_pool),
                         GoogleStorageAsyncFS()])
        else:
            assert request.param.endswith('gs')
            fs = GoogleStorageAsyncFS()
        async with fs:
            bucket = os.environ['HAIL_TEST_GCS_BUCKET']
            base = f'gs://{bucket}/tmp/{token}/'

            await fs.mkdir(base)
            sema = asyncio.Semaphore(50)
            async with sema:
                yield (sema, fs, base)
                await fs.rmtree(sema, base)
            assert not await fs.isdir(base)
Пример #8
0
def filesystem_from_scheme(scheme, thread_pool=None, gcs_params=None):
    if scheme == 'file':
        assert thread_pool is not None
        return LocalAsyncFS(thread_pool)
    if scheme == 'gs':
        return GoogleStorageAsyncFS(params=gcs_params)
    if scheme == 's3':
        assert thread_pool is not None
        return S3AsyncFS(thread_pool)
    if scheme == 'hail-az':
        return AzureAsyncFS()
    raise ValueError(f'Unsupported scheme: {scheme}')
Пример #9
0
async def get_or_add_user(app, userdata):
    users = app['users']
    username = userdata['username']
    if username not in users:
        k8s_client = app['k8s_client']
        hail_identity_secret = await retry_transient_errors(
            k8s_client.read_namespaced_secret,
            userdata['hail_credentials_secret_name'],
            DEFAULT_NAMESPACE,
            _request_timeout=5.0)
        gsa_key = json.loads(
            base64.b64decode(hail_identity_secret.data['key.json']).decode())
        credentials = GoogleCredentials.from_credentials_data(gsa_key)
        users[username] = {'fs': GoogleStorageAsyncFS(credentials=credentials)}
    return users[username]
Пример #10
0
async def router_filesystem(
    request
) -> AsyncIterator[Tuple[asyncio.Semaphore, AsyncFS, Dict[str, str]]]:
    token = secrets.token_hex(16)

    with ThreadPoolExecutor() as thread_pool:
        async with RouterAsyncFS('file',
                                 filesystems=[
                                     LocalAsyncFS(thread_pool),
                                     GoogleStorageAsyncFS(),
                                     S3AsyncFS(thread_pool),
                                     AzureAsyncFS()
                                 ]) as fs:
            file_base = f'/tmp/{token}/'
            await fs.mkdir(file_base)

            gs_bucket = os.environ['HAIL_TEST_GCS_BUCKET']
            gs_base = f'gs://{gs_bucket}/tmp/{token}/'

            s3_bucket = os.environ['HAIL_TEST_S3_BUCKET']
            s3_base = f's3://{s3_bucket}/tmp/{token}/'

            azure_account = os.environ['HAIL_TEST_AZURE_ACCOUNT']
            azure_container = os.environ['HAIL_TEST_AZURE_CONTAINER']
            azure_base = f'hail-az://{azure_account}/{azure_container}/tmp/{token}/'

            bases = {
                'file': file_base,
                'gs': gs_base,
                's3': s3_base,
                'hail-az': azure_base
            }

            sema = asyncio.Semaphore(50)
            async with sema:
                yield (sema, fs, bases)
                await bounded_gather2(
                    sema, functools.partial(fs.rmtree, sema, file_base),
                    functools.partial(fs.rmtree, sema, gs_base),
                    functools.partial(fs.rmtree, sema, s3_base),
                    functools.partial(fs.rmtree, sema, azure_base))

            assert not await fs.isdir(file_base)
            assert not await fs.isdir(gs_base)
            assert not await fs.isdir(s3_base)
            assert not await fs.isdir(azure_base)
Пример #11
0
def bucket_and_temporary_file():
    bucket, prefix = GoogleStorageAsyncFS.get_bucket_name(
        os.environ['HAIL_TEST_STORAGE_URI'])
    return bucket, prefix + '/' + secrets.token_hex(16)
Пример #12
0
    def __init__(self,
                 *args,
                 billing_project: Optional[str] = None,
                 bucket: Optional[str] = None,
                 remote_tmpdir: Optional[str] = None,
                 google_project: Optional[str] = None,
                 token: str = None):
        if len(args) > 2:
            raise TypeError(
                f'ServiceBackend() takes 2 positional arguments but {len(args)} were given'
            )
        if len(args) >= 1:
            if billing_project is not None:
                raise TypeError(
                    'ServiceBackend() got multiple values for argument \'billing_project\''
                )
            warnings.warn(
                'Use of deprecated positional argument \'billing_project\' in ServiceBackend(). Specify \'billing_project\' as a keyword argument instead.'
            )
            billing_project = args[0]
        if len(args) >= 2:
            if bucket is not None:
                raise TypeError(
                    'ServiceBackend() got multiple values for argument \'bucket\''
                )
            warnings.warn(
                'Use of deprecated positional argument \'bucket\' in ServiceBackend(). Specify \'bucket\' as a keyword argument instead.'
            )
            bucket = args[1]

        if remote_tmpdir is not None and bucket is not None:
            raise ValueError(
                'Cannot specify both remote_tmpdir and bucket in ServiceBackend()'
            )

        if billing_project is None:
            billing_project = get_user_config().get('batch',
                                                    'billing_project',
                                                    fallback=None)
        if billing_project is None:
            raise ValueError(
                'the billing_project parameter of ServiceBackend must be set '
                'or run `hailctl config set batch/billing_project '
                'MY_BILLING_PROJECT`')
        self._batch_client = BatchClient(billing_project, _token=token)
        self.__fs: AsyncFS = RouterAsyncFS('file', [
            LocalAsyncFS(ThreadPoolExecutor()),
            GoogleStorageAsyncFS(project=google_project)
        ])
        if remote_tmpdir is None:
            if bucket is None:
                bucket = get_user_config().get('batch',
                                               'bucket',
                                               fallback=None)
            if bucket is None:
                raise ValueError(
                    'either the bucket or remote_tmpdir parameter of ServiceBackend '
                    'must be set or run `hailctl config set batch/bucket MY_BUCKET`'
                )
            if 'gs://' in bucket:
                raise ValueError(
                    'The bucket parameter to ServiceBackend() should be a bucket name, not a path. '
                    'Use the remote_tmpdir parameter to specify a path.')
            remote_tmpdir = f'gs://{bucket}/batch'
        else:
            if not remote_tmpdir.startswith('gs://'):
                raise ValueError(
                    'remote_tmpdir must be a google storage path like gs://bucket/folder'
                )
        if remote_tmpdir[-1] != '/':
            remote_tmpdir += '/'
        self.remote_tmpdir = remote_tmpdir