Exemplo n.º 1
0
class CatTestCase(MockGoogleTestCase):
    def setUp(self):
        super(CatTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_cat_uncompressed(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'foo\nfoo\n'})

        self.assertEqual(b''.join(self.fs._cat_file('gs://walrus/data/foo')),
                         b'foo\nfoo\n')

    def test_cat_bz2(self):
        self.put_gcs_multi(
            {'gs://walrus/data/foo.bz2': bz2.compress(b'foo\n' * 1000)})

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo.bz2')),
            b'foo\n' * 1000)

    def test_cat_gz(self):
        self.put_gcs_multi(
            {'gs://walrus/data/foo.gz': gzip_compress(b'foo\n' * 10000)})

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo.gz')),
            b'foo\n' * 10000)

    def test_chunks_file(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'foo\nfoo\n' * 1000})

        self.assertGreater(
            len(list(self.fs._cat_file('gs://walrus/data/foo'))), 1)
Exemplo n.º 2
0
    def test_override_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus', location='us-east1')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-EAST1')
Exemplo n.º 3
0
    def test_blank_out_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus', location='')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US')
Exemplo n.º 4
0
    def test_override_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus', location='us-east1')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-EAST1')
Exemplo n.º 5
0
    def test_location(self):
        fs = GCSFilesystem()

        fs.create_bucket('walrus', location='us-central1')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-CENTRAL1')
Exemplo n.º 6
0
    def test_blank_out_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=0)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(list(bucket.lifecycle_rules), [])
Exemplo n.º 7
0
    def test_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-CENTRAL1')
Exemplo n.º 8
0
    def test_blank_out_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=0)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(list(bucket.lifecycle_rules), [])
Exemplo n.º 9
0
    def test_default(self):
        fs = GCSFilesystem()

        fs.create_bucket('walrus')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US')
        self.assertEqual(list(bucket.lifecycle_rules), [])
Exemplo n.º 10
0
    def test_default(self):
        fs = GCSFilesystem()

        fs.create_bucket('walrus')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US')
        self.assertEqual(list(bucket.lifecycle_rules), [])
Exemplo n.º 11
0
    def test_override_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=123)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(
            list(bucket.lifecycle_rules),
            [dict(action=dict(type='Delete'), condition=dict(age=123))])
Exemplo n.º 12
0
    def test_put_with_part_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        fs = GCSFilesystem(part_size=12345)

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            fs.put(local_path, dest)
            blob_meth.assert_called_once_with(dest, chunk_size=12345)
Exemplo n.º 13
0
    def test_put_with_part_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        fs = GCSFilesystem(part_size=12345)

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            fs.put(local_path, dest)
            blob_meth.assert_called_once_with(dest, chunk_size=12345)
Exemplo n.º 14
0
    def __init__(self, test_case):
        assert isinstance(test_case, MockGoogleAPITestCase)
        self._test_case = test_case
        self._fs = GCSFilesystem()

        self._cache_objects = dict()
        self._cache_buckets = dict()

        self._client_objects = MockGCSClientObjects(self)
        self._client_buckets = MockGCSClientBuckets(self)
Exemplo n.º 15
0
    def test_override_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=123)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(
            list(bucket.lifecycle_rules),
            [dict(action=dict(type='Delete'), condition=dict(age=123))])
Exemplo n.º 16
0
    def test_mkdir_bucket(self):
        fs = GCSFilesystem(location='us-central1', object_ttl_days=123)

        fs.mkdir('gs://walrus/data')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-CENTRAL1')

        self.assertEqual(
            list(bucket.lifecycle_rules),
            [dict(action=dict(type='Delete'), condition=dict(age=123))])
Exemplo n.º 17
0
class CatTestCase(MockGoogleTestCase):

    def setUp(self):
        super(CatTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_cat_uncompressed(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'foo\nfoo\n'
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo')),
            b'foo\nfoo\n')

    def test_cat_bz2(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo.bz2': bz2.compress(b'foo\n' * 1000)
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo.bz2')),
            b'foo\n' * 1000)

    def test_cat_gz(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo.gz': gzip_compress(b'foo\n' * 10000)
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo.gz')),
            b'foo\n' * 10000)

    def test_chunks_file(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'foo\nfoo\n' * 10000
        })

        self.assertGreater(
            len(list(self.fs._cat_file('gs://walrus/data/foo'))),
            1)

    def test_chunk_boundary(self):
        # trying to read from end of file raises an exception, which we catch
        data = b'a' * _CAT_CHUNK_SIZE + b'b' * _CAT_CHUNK_SIZE

        self.put_gcs_multi({
            'gs://walrus/data/foo': data,
        })

        self.assertEqual(
            list(self.fs._cat_file('gs://walrus/data/foo')),
            [b'a' * _CAT_CHUNK_SIZE, b'b' * _CAT_CHUNK_SIZE])
Exemplo n.º 18
0
class CatTestCase(MockGoogleTestCase):

    def setUp(self):
        super(CatTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_cat_uncompressed(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'foo\nfoo\n'
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo')),
            b'foo\nfoo\n')

    def test_cat_bz2(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo.bz2': bz2.compress(b'foo\n' * 1000)
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo.bz2')),
            b'foo\n' * 1000)

    def test_cat_gz(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo.gz': gzip_compress(b'foo\n' * 10000)
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo.gz')),
            b'foo\n' * 10000)

    def test_chunks_file(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'foo\nfoo\n' * 10000
        })

        self.assertGreater(
            len(list(self.fs._cat_file('gs://walrus/data/foo'))),
            1)

    def test_chunk_boundary(self):
        # trying to read from end of file raises an exception, which we catch
        data = b'a' * _CAT_CHUNK_SIZE + b'b' * _CAT_CHUNK_SIZE

        self.put_gcs_multi({
            'gs://walrus/data/foo': data,
        })

        self.assertEqual(
            list(self.fs._cat_file('gs://walrus/data/foo')),
            [b'a' * _CAT_CHUNK_SIZE, b'b' * _CAT_CHUNK_SIZE])
Exemplo n.º 19
0
    def test_mkdir_bucket(self):
        fs = GCSFilesystem(location='us-central1', object_ttl_days=123)

        fs.mkdir('gs://walrus/data')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-CENTRAL1')

        self.assertEqual(
            list(bucket.lifecycle_rules),
            [dict(action=dict(type='Delete'), condition=dict(age=123))])
Exemplo n.º 20
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['project_id'],
                    location=self._opts['gcs_region'],
                    object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
                ), disable_if=_is_permanent_google_error)

            # Hadoop FS is responsible for all URIs that fall through to it
            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
Exemplo n.º 21
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['google_project_id']
                ), disable_if=_is_permanent_google_error)

            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
Exemplo n.º 22
0
    def test_local_tmp_dir_is_deprecated_and_does_nothing(self):
        fs = GCSFilesystem(local_tmp_dir=self.tmp_dir)
        self.assertTrue(self.log.warning.called)

        self.assertEqual(fs.client, self.Client(project=None,
                                                credentials=None))
        self.assertFalse(hasattr(fs, '_local_tmp_dir'))
Exemplo n.º 23
0
    def setUp(self):
        self.fs = GCSFilesystem()
        self.gcs_path = 'gs://walrus/data'

        self.list_req_mock = mock.MagicMock()

        objects_ret = mock.MagicMock()
        objects_ret.list.return_value = self.list_req_mock
        objects_ret.get_media.return_value = google_http.HttpRequest(
            None, None, self.gcs_path)

        api_client = mock.MagicMock()
        api_client.objects.return_value = objects_ret

        self.fs._api_client = api_client
        self.next_chunk_patch = patch.object(
            google_http.MediaIoBaseDownload, 'next_chunk')
Exemplo n.º 24
0
    def test_set_credentials_and_project_id(self):
        creds = Mock()
        project_id = 'alan-parsons'

        fs = GCSFilesystem(credentials=creds, project_id=project_id)
        self.assertFalse(self.log.warning.called)

        self.assertEqual(fs.client,
                         self.Client(project=project_id, credentials=creds))
Exemplo n.º 25
0
    def __init__(self, test_case):
        assert isinstance(test_case, MockGoogleAPITestCase)
        self._test_case = test_case
        self._fs = GCSFilesystem()

        self._cache_objects = dict()
        self._cache_buckets = dict()

        self._client_objects = MockGCSClientObjects(self)
        self._client_buckets = MockGCSClientBuckets(self)
Exemplo n.º 26
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is not None:
            return self._fs

        self._gcs_fs = GCSFilesystem()

        self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem())
        return self._fs
Exemplo n.º 27
0
class CatTestCase(MockGoogleAPITestCase):

    def setUp(self):
        super(CatTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_cat_uncompressed(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'foo\nfoo\n'
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo')),
            b'foo\nfoo\n')

    def test_cat_bz2(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo.bz2': bz2.compress(b'foo\n' * 1000)
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo.bz2')),
            b'foo\n' * 1000)

    def test_cat_gz(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo.gz': gzip_compress(b'foo\n' * 10000)
        })

        self.assertEqual(
            b''.join(self.fs._cat_file('gs://walrus/data/foo.gz')),
            b'foo\n' * 10000)

    def test_chunks_file(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'foo\nfoo\n' * 1000
        })

        self.assertGreater(
            len(list(self.fs._cat_file('gs://walrus/data/foo'))),
            1)
Exemplo n.º 28
0
    def setUp(self):
        self.fs = GCSFilesystem()
        self.gcs_path = "gs://walrus/data"

        self.list_req_mock = mock.MagicMock()

        objects_ret = mock.MagicMock()
        objects_ret.list.return_value = self.list_req_mock
        objects_ret.get_media.return_value = google_http.HttpRequest(None, None, self.gcs_path)

        api_client = mock.MagicMock()
        api_client.objects.return_value = objects_ret

        self.fs._api_client = api_client
        self.next_chunk_patch = patch.object(google_http.MediaIoBaseDownload, "next_chunk")
Exemplo n.º 29
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is not None:
            return self._fs

        self._gcs_fs = GCSFilesystem(
            credentials=self._credentials,
            local_tmp_dir=self._get_local_tmp_dir(),
            project_id=self._project_id,
        )

        self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem())
        return self._fs
Exemplo n.º 30
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is None:
            self._fs = CompositeFilesystem()

            location = self._opts['region'] or _zone_to_region(
                self._opts['zone'])

            self._fs.add_fs('gcs', GCSFilesystem(
                credentials=self._credentials,
                project_id=self._project_id,
                part_size=self._upload_part_size(),
                location=location,
                object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
            ))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
Exemplo n.º 31
0
class GCSFSHTTPErrorTestCase(PatcherTestCase):

    def setUp(self):
        self.fs = GCSFilesystem()
        self.gcs_path = 'gs://walrus/data'

        self.list_req_mock = mock.MagicMock()

        objects_ret = mock.MagicMock()
        objects_ret.list.return_value = self.list_req_mock
        objects_ret.get_media.return_value = google_http.HttpRequest(
            None, None, self.gcs_path)

        api_client = mock.MagicMock()
        api_client.objects.return_value = objects_ret

        self.fs._api_client = api_client
        self.next_chunk_patch = patch.object(
            google_http.MediaIoBaseDownload, 'next_chunk')

    def test_list_missing(self):
        self.list_req_mock.execute.side_effect = _http_exception(404)

        list(self.fs._ls_detailed(self.gcs_path))

    def test_list_actual_error(self):
        self.list_req_mock.execute.side_effect = _http_exception(500)

        with self.assertRaises(google_http.HttpError):
            list(self.fs._ls_detailed(self.gcs_path))

    def test_download_io_empty_file(self):
        io_obj = io.BytesIO()

        with self.next_chunk_patch as media_io_next_chunk:
            media_io_next_chunk.side_effect = _http_exception(416)

            self.fs._download_io(self.gcs_path, io_obj)
            self.assertEqual(len(io_obj.getvalue()), 0)

    def test_download_io_actual_error(self):
        io_obj = io.BytesIO()

        with self.next_chunk_patch as media_io_next_chunk:
            media_io_next_chunk.side_effect = _http_exception(500)

            with self.assertRaises(google_http.HttpError):
                self.fs._download_io(self.gcs_path, io_obj)
Exemplo n.º 32
0
class GCSFSTestCase(MockGoogleTestCase):

    def setUp(self):
        super(GCSFSTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_ls_blob(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b''
        })

        self.assertEqual(list(self.fs.ls('gs://walrus/data/foo')),
                         ['gs://walrus/data/foo'])

    def test_ls_missing(self):
        self.assertEqual(list(self.fs.ls('gs://nope/not/here')), [])

    def test_ls_ignores_dirs(self):
        # Dataproc (i.e. Hadoop) will create empty blobs whose names end
        # in '/'
        self.put_gcs_multi({
            'gs://walrus/data/foo/': b'',
            'gs://walrus/data/foo/bar': b'baz',
        })

        self.assertEqual(list(self.fs.ls('gs://walrus/data')),
                         ['gs://walrus/data/foo/bar'])

    def test_ls_recursively(self):
        self.put_gcs_multi({
            'gs://walrus/data/bar': b'',
            'gs://walrus/data/bar/baz': b'',
            'gs://walrus/data/foo': b'',
            'gs://walrus/qux': b'',
        })

        uris = [
            'gs://walrus/data/bar',
            'gs://walrus/data/bar/baz',
            'gs://walrus/data/foo',
            'gs://walrus/qux',
        ]

        self.assertEqual(set(self.fs.ls('gs://walrus/')), set(uris))
        self.assertEqual(set(self.fs.ls('gs://walrus/*')), set(uris))

        self.assertEqual(set(self.fs.ls('gs://walrus/data')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/*')), set(uris[:-1]))

    def test_ls_globs(self):
        self.put_gcs_multi({
            'gs://w/a': b'',
            'gs://w/a/b': b'',
            'gs://w/ab': b'',
            'gs://w/b': b'',
        })

        self.assertEqual(set(self.fs.ls('gs://w/')),
                         set(['gs://w/a', 'gs://w/a/b',
                              'gs://w/ab', 'gs://w/b']))
        self.assertEqual(set(self.fs.ls('gs://w/*')),
                         set(['gs://w/a', 'gs://w/a/b',
                              'gs://w/ab', 'gs://w/b']))
        self.assertEqual(list(self.fs.ls('gs://w/*/')),
                         ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/*/*')),
                         ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/a?')),
                         ['gs://w/ab'])
        # * can match /
        self.assertEqual(set(self.fs.ls('gs://w/a*')),
                         set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab']))
        self.assertEqual(set(self.fs.ls('gs://w/*b')),
                         set(['gs://w/a/b', 'gs://w/ab', 'gs://w/b']))

    def test_du(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcde',
            'gs://walrus/data/bar/baz': b'fgh'
        })

        self.assertEqual(self.fs.du('gs://walrus/'), 8)
        self.assertEqual(self.fs.du('gs://walrus/data/foo'), 5)
        self.assertEqual(self.fs.du('gs://walrus/data/bar/baz'), 3)

    def test_exists(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcd'
        })
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar'), False)

    def test_md5sum(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcd'
        })

        self.assertEqual(self.fs.md5sum('gs://walrus/data/foo'),
                         md5(b'abcd').hexdigest())

    def test_md5sum_of_missing_blob(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcd'
        })

        self.assertRaises(IOError, self.fs.md5sum, 'gs://walrus/data/bar')

    def test_mkdir_creates_buckets(self):
        self.assertNotIn('walrus', self.mock_gcs_fs)

        self.fs.mkdir('gs://walrus/data')

        self.assertIn('walrus', self.mock_gcs_fs)

    def test_mkdir_does_not_create_directories(self):
        self.fs.create_bucket('walrus')

        self.assertEqual(list(self.fs.ls('gs://walrus/')), [])

        self.fs.mkdir('gs://walrus/data')

        self.assertEqual(list(self.fs.ls('gs://walrus/')), [])

    def test_put(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        self.fs.put(local_path, dest)
        self.assertEqual(b''.join(self.fs.cat(dest)), b'bar')

    def test_put_with_part_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        fs = GCSFilesystem(part_size=12345)

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            fs.put(local_path, dest)
            blob_meth.assert_called_once_with(dest, chunk_size=12345)

    def test_put_chunk_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            with patch('mrjob.fs.gcs.log') as log:

                self.fs.put(local_path, dest, chunk_size=99999)
                blob_meth.assert_called_once_with(dest, chunk_size=99999)

                self.assertTrue(log.warning.called)

    def test_rm(self):
        self.put_gcs_multi({
            'gs://walrus/foo': b''
        })

        self.assertEqual(self.fs.exists('gs://walrus/foo'), True)
        self.fs.rm('gs://walrus/foo')
        self.assertEqual(self.fs.exists('gs://walrus/foo'), False)

    def test_rm_dir(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'',
            'gs://walrus/data/bar/baz': b'',
        })

        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), True)
        self.fs.rm('gs://walrus/data')
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), False)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), False)
Exemplo n.º 33
0
class MockGCSClient(object):
    """Mock out GCSClient...

    TARGET API VERSION - Storage API v1

    Emulates GCS metadata and stores raw bytes
    Contains convenience functions for initializing items in GCS
    """
    def __init__(self, test_case):
        assert isinstance(test_case, MockGoogleAPITestCase)
        self._test_case = test_case
        self._fs = GCSFilesystem()

        self._cache_objects = dict()
        self._cache_buckets = dict()

        self._client_objects = MockGCSClientObjects(self)
        self._client_buckets = MockGCSClientBuckets(self)

    def objects(self):
        return self._client_objects

    def buckets(self):
        return self._client_buckets

    def put_gcs(self, gcs_uri, data):
        """Put data at gcs_uri, creating a bucket if necessary"""
        bucket, name = parse_gcs_uri(gcs_uri)

        try:
            self._fs.get_bucket(bucket)
        except google_errors.HttpError:
            self._fs.create_bucket(project=_TEST_PROJECT, name=bucket)

        bytes_io_obj = BytesIO(data)
        self.upload_io(bytes_io_obj, gcs_uri)

    def put_gcs_multi(self, gcs_uri_to_data_map):
        """Bulk put data at gcs_uris"""
        for gcs_uri, data in gcs_uri_to_data_map.items():
            self.put_gcs(gcs_uri, data)

    def download_io(self, src_uri, io_obj):
        """
        Clobber GCSFilesystem._download_io
        """
        bucket, name = parse_gcs_uri(src_uri)

        object_dict = _get_deep(self._cache_objects, [bucket, name])

        if not object_dict:
            raise Exception

        object_data = object_dict['_data']
        io_obj.write(object_data)
        return io_obj

    def upload_io(self, io_obj, dest_uri):
        """
        Clobber GCSFilesystem._upload_io
        """
        bucket, name = parse_gcs_uri(dest_uri)

        assert bucket in self._cache_buckets

        io_obj.seek(0)

        data = io_obj.read()

        # TODO - io_obj.close() ?  Not sure if callers of this function would
        # expect their io_objs to be closed

        object_resp = _insert_object_resp(bucket=bucket, name=name, data=data)

        _set_deep(self._cache_objects, [bucket, name], object_resp)

        return object_resp
Exemplo n.º 34
0
class MockGCSClient(object):
    """Mock out GCSClient...

    TARGET API VERSION - Storage API v1

    Emulates GCS metadata and stores raw bytes
    Contains convenience functions for initializing items in GCS
    """

    def __init__(self, test_case):
        assert isinstance(test_case, MockGoogleAPITestCase)
        self._test_case = test_case
        self._fs = GCSFilesystem()

        self._cache_objects = dict()
        self._cache_buckets = dict()

        self._client_objects = MockGCSClientObjects(self)
        self._client_buckets = MockGCSClientBuckets(self)

    def objects(self):
        return self._client_objects

    def buckets(self):
        return self._client_buckets

    def put_gcs(self, gcs_uri, data):
        """Put data at gcs_uri, creating a bucket if necessary"""
        bucket, name = parse_gcs_uri(gcs_uri)

        try:
            self._fs.get_bucket(bucket)
        except google_errors.HttpError:
            self._fs.create_bucket(project=_TEST_PROJECT, name=bucket)

        bytes_io_obj = BytesIO(data)
        self.upload_io(bytes_io_obj, gcs_uri)

    def put_gcs_multi(self, gcs_uri_to_data_map):
        """Bulk put data at gcs_uris"""
        for gcs_uri, data in gcs_uri_to_data_map.items():
            self.put_gcs(gcs_uri, data)

    def download_io(self, src_uri, io_obj):
        """
        Clobber GCSFilesystem._download_io
        """
        bucket, name = parse_gcs_uri(src_uri)

        object_dict = _get_deep(self._cache_objects, [bucket, name])

        if not object_dict:
            raise Exception

        object_data = object_dict['_data']
        io_obj.write(object_data)
        return io_obj

    def upload_io(self, io_obj, dest_uri):
        """
        Clobber GCSFilesystem._upload_io
        """
        bucket, name = parse_gcs_uri(dest_uri)

        assert bucket in self._cache_buckets

        io_obj.seek(0)

        data = io_obj.read()

        # TODO - io_obj.close() ?  Not sure if callers of this function would
        # expect their io_objs to be closed

        object_resp = _insert_object_resp(bucket=bucket, name=name, data=data)

        _set_deep(self._cache_objects, [bucket, name], object_resp)

        return object_resp
Exemplo n.º 35
0
class GCSFSTestCase(MockGoogleTestCase):
    def setUp(self):
        super(GCSFSTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_ls_blob(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b''})

        self.assertEqual(list(self.fs.ls('gs://walrus/data/foo')),
                         ['gs://walrus/data/foo'])

    def test_ls_missing(self):
        self.assertEqual(list(self.fs.ls('gs://nope/not/here')), [])

    def test_ls_ignores_dirs(self):
        # Dataproc (i.e. Hadoop) will create empty blobs whose names end
        # in '/'
        self.put_gcs_multi({
            'gs://walrus/data/foo/': b'',
            'gs://walrus/data/foo/bar': b'baz',
        })

        self.assertEqual(list(self.fs.ls('gs://walrus/data')),
                         ['gs://walrus/data/foo/bar'])

    def test_ls_recursively(self):
        self.put_gcs_multi({
            'gs://walrus/data/bar': b'',
            'gs://walrus/data/bar/baz': b'',
            'gs://walrus/data/foo': b'',
            'gs://walrus/qux': b'',
        })

        uris = [
            'gs://walrus/data/bar',
            'gs://walrus/data/bar/baz',
            'gs://walrus/data/foo',
            'gs://walrus/qux',
        ]

        self.assertEqual(set(self.fs.ls('gs://walrus/')), set(uris))
        self.assertEqual(set(self.fs.ls('gs://walrus/*')), set(uris))

        self.assertEqual(set(self.fs.ls('gs://walrus/data')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/*')), set(uris[:-1]))

    def test_ls_globs(self):
        self.put_gcs_multi({
            'gs://w/a': b'',
            'gs://w/a/b': b'',
            'gs://w/ab': b'',
            'gs://w/b': b'',
        })

        self.assertEqual(
            set(self.fs.ls('gs://w/')),
            set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b']))
        self.assertEqual(
            set(self.fs.ls('gs://w/*')),
            set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b']))
        self.assertEqual(list(self.fs.ls('gs://w/*/')), ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/*/*')), ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/a?')), ['gs://w/ab'])
        # * can match /
        self.assertEqual(set(self.fs.ls('gs://w/a*')),
                         set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab']))
        self.assertEqual(set(self.fs.ls('gs://w/*b')),
                         set(['gs://w/a/b', 'gs://w/ab', 'gs://w/b']))

    def test_du(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcde',
            'gs://walrus/data/bar/baz': b'fgh'
        })

        self.assertEqual(self.fs.du('gs://walrus/'), 8)
        self.assertEqual(self.fs.du('gs://walrus/data/foo'), 5)
        self.assertEqual(self.fs.du('gs://walrus/data/bar/baz'), 3)

    def test_exists(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar'), False)

    def test_md5sum(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})

        self.assertEqual(self.fs.md5sum('gs://walrus/data/foo'),
                         md5(b'abcd').hexdigest())

    def test_md5sum_of_missing_blob(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})

        self.assertRaises(IOError, self.fs.md5sum, 'gs://walrus/data/bar')

    def test_rm(self):
        self.put_gcs_multi({'gs://walrus/foo': b''})

        self.assertEqual(self.fs.exists('gs://walrus/foo'), True)
        self.fs.rm('gs://walrus/foo')
        self.assertEqual(self.fs.exists('gs://walrus/foo'), False)

    def test_rm_dir(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'',
            'gs://walrus/data/bar/baz': b'',
        })

        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), True)
        self.fs.rm('gs://walrus/data')
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), False)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), False)
Exemplo n.º 36
0
    def test_default(self):
        fs = GCSFilesystem()
        self.assertFalse(self.log.warning.called)

        self.assertEqual(fs.client, self.Client(project=None,
                                                credentials=None))
Exemplo n.º 37
0
 def _make_bucket(self, name, location=None):
     fs = GCSFilesystem()
     fs.create_bucket(name, location=location)
Exemplo n.º 38
0
 def setUp(self):
     super(CatTestCase, self).setUp()
     self.fs = GCSFilesystem()
Exemplo n.º 39
0
class GCSFSTestCase(MockGoogleTestCase):
    def setUp(self):
        super(GCSFSTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_ls_blob(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b''})

        self.assertEqual(list(self.fs.ls('gs://walrus/data/foo')),
                         ['gs://walrus/data/foo'])

    def test_ls_missing(self):
        self.assertEqual(list(self.fs.ls('gs://nope/not/here')), [])

    def test_ls_ignores_dirs(self):
        # Dataproc (i.e. Hadoop) will create empty blobs whose names end
        # in '/'
        self.put_gcs_multi({
            'gs://walrus/data/foo/': b'',
            'gs://walrus/data/foo/bar': b'baz',
        })

        self.assertEqual(list(self.fs.ls('gs://walrus/data')),
                         ['gs://walrus/data/foo/bar'])

    def test_ls_recursively(self):
        self.put_gcs_multi({
            'gs://walrus/data/bar': b'',
            'gs://walrus/data/bar/baz': b'',
            'gs://walrus/data/foo': b'',
            'gs://walrus/qux': b'',
        })

        uris = [
            'gs://walrus/data/bar',
            'gs://walrus/data/bar/baz',
            'gs://walrus/data/foo',
            'gs://walrus/qux',
        ]

        self.assertEqual(set(self.fs.ls('gs://walrus/')), set(uris))
        self.assertEqual(set(self.fs.ls('gs://walrus/*')), set(uris))

        self.assertEqual(set(self.fs.ls('gs://walrus/data')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/*')), set(uris[:-1]))

    def test_ls_globs(self):
        self.put_gcs_multi({
            'gs://w/a': b'',
            'gs://w/a/b': b'',
            'gs://w/ab': b'',
            'gs://w/b': b'',
        })

        self.assertEqual(
            set(self.fs.ls('gs://w/')),
            set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b']))
        self.assertEqual(
            set(self.fs.ls('gs://w/*')),
            set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b']))
        self.assertEqual(list(self.fs.ls('gs://w/*/')), ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/*/*')), ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/a?')), ['gs://w/ab'])
        # * can match /
        self.assertEqual(set(self.fs.ls('gs://w/a*')),
                         set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab']))
        self.assertEqual(set(self.fs.ls('gs://w/*b')),
                         set(['gs://w/a/b', 'gs://w/ab', 'gs://w/b']))

    def test_du(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcde',
            'gs://walrus/data/bar/baz': b'fgh'
        })

        self.assertEqual(self.fs.du('gs://walrus/'), 8)
        self.assertEqual(self.fs.du('gs://walrus/data/foo'), 5)
        self.assertEqual(self.fs.du('gs://walrus/data/bar/baz'), 3)

    def test_exists(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar'), False)

    def test_md5sum(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})

        self.assertEqual(self.fs.md5sum('gs://walrus/data/foo'),
                         md5(b'abcd').hexdigest())

    def test_md5sum_of_missing_blob(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})

        self.assertRaises(IOError, self.fs.md5sum, 'gs://walrus/data/bar')

    def test_mkdir_creates_buckets(self):
        self.assertNotIn('walrus', self.mock_gcs_fs)

        self.fs.mkdir('gs://walrus/data')

        self.assertIn('walrus', self.mock_gcs_fs)

    def test_mkdir_does_not_create_directories(self):
        self.fs.create_bucket('walrus')

        self.assertEqual(list(self.fs.ls('gs://walrus/')), [])

        self.fs.mkdir('gs://walrus/data')

        self.assertEqual(list(self.fs.ls('gs://walrus/')), [])

    def test_put(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        self.fs.put(local_path, dest)
        self.assertEqual(b''.join(self.fs.cat(dest)), b'bar')

    def test_put_with_part_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        fs = GCSFilesystem(part_size=12345)

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            fs.put(local_path, dest)
            blob_meth.assert_called_once_with(dest, chunk_size=12345)

    def test_put_chunk_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            with patch('mrjob.fs.gcs.log') as log:

                self.fs.put(local_path, dest, chunk_size=99999)
                blob_meth.assert_called_once_with(dest, chunk_size=99999)

                self.assertTrue(log.warning.called)

    def test_rm(self):
        self.put_gcs_multi({'gs://walrus/foo': b''})

        self.assertEqual(self.fs.exists('gs://walrus/foo'), True)
        self.fs.rm('gs://walrus/foo')
        self.assertEqual(self.fs.exists('gs://walrus/foo'), False)

    def test_rm_dir(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'',
            'gs://walrus/data/bar/baz': b'',
        })

        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), True)
        self.fs.rm('gs://walrus/data')
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), False)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), False)
Exemplo n.º 40
0
class GCSFSTestCase(MockGoogleAPITestCase):
    def setUp(self):
        super(GCSFSTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_cat_uncompressed(self):
        self.put_gcs_multi({"gs://walrus/data/foo": b"foo\nfoo\n"})

        self.assertEqual(list(self.fs._cat_file("gs://walrus/data/foo")), [b"foo\n", b"foo\n"])

    def test_cat_bz2(self):
        self.put_gcs_multi({"gs://walrus/data/foo.bz2": bz2.compress(b"foo\n" * 1000)})

        self.assertEqual(list(self.fs._cat_file("gs://walrus/data/foo.bz2")), [b"foo\n"] * 1000)

    def test_cat_gz(self):
        self.put_gcs_multi({"gs://walrus/data/foo.gz": gzip_compress(b"foo\n" * 10000)})

        self.assertEqual(list(self.fs._cat_file("gs://walrus/data/foo.gz")), [b"foo\n"] * 10000)

    def test_ls_key(self):
        self.put_gcs_multi({"gs://walrus/data/foo": b""})

        self.assertEqual(list(self.fs.ls("gs://walrus/data/foo")), ["gs://walrus/data/foo"])

    def test_ls_recursively(self):
        self.put_gcs_multi(
            {
                "gs://walrus/data/bar": b"",
                "gs://walrus/data/bar/baz": b"",
                "gs://walrus/data/foo": b"",
                "gs://walrus/qux": b"",
            }
        )

        uris = ["gs://walrus/data/bar", "gs://walrus/data/bar/baz", "gs://walrus/data/foo", "gs://walrus/qux"]

        self.assertEqual(set(self.fs.ls("gs://walrus/")), set(uris))
        self.assertEqual(set(self.fs.ls("gs://walrus/*")), set(uris))

        self.assertEqual(set(self.fs.ls("gs://walrus/data")), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls("gs://walrus/data/")), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls("gs://walrus/data/*")), set(uris[:-1]))

    def test_ls_globs(self):
        self.put_gcs_multi({"gs://w/a": b"", "gs://w/a/b": b"", "gs://w/ab": b"", "gs://w/b": b""})

        self.assertEqual(set(self.fs.ls("gs://w/")), set(["gs://w/a", "gs://w/a/b", "gs://w/ab", "gs://w/b"]))
        self.assertEqual(set(self.fs.ls("gs://w/*")), set(["gs://w/a", "gs://w/a/b", "gs://w/ab", "gs://w/b"]))
        self.assertEqual(list(self.fs.ls("gs://w/*/")), ["gs://w/a/b"])
        self.assertEqual(list(self.fs.ls("gs://w/*/*")), ["gs://w/a/b"])
        self.assertEqual(list(self.fs.ls("gs://w/a?")), ["gs://w/ab"])
        # * can match /
        self.assertEqual(set(self.fs.ls("gs://w/a*")), set(["gs://w/a", "gs://w/a/b", "gs://w/ab"]))
        self.assertEqual(set(self.fs.ls("gs://w/*b")), set(["gs://w/a/b", "gs://w/ab", "gs://w/b"]))

    def test_du(self):
        self.put_gcs_multi({"gs://walrus/data/foo": b"abcde", "gs://walrus/data/bar/baz": b"fgh"})

        self.assertEqual(self.fs.du("gs://walrus/"), 8)
        self.assertEqual(self.fs.du("gs://walrus/data/foo"), 5)
        self.assertEqual(self.fs.du("gs://walrus/data/bar/baz"), 3)

    def test_exists(self):
        self.put_gcs_multi({"gs://walrus/data/foo": b"abcd"})
        self.assertEqual(self.fs.exists("gs://walrus/data/foo"), True)
        self.assertEqual(self.fs.exists("gs://walrus/data/bar"), False)

    def test_rm(self):
        self.put_gcs_multi({"gs://walrus/foo": b""})

        self.assertEqual(self.fs.exists("gs://walrus/foo"), True)
        self.fs.rm("gs://walrus/foo")
        self.assertEqual(self.fs.exists("gs://walrus/foo"), False)

    def test_rm_dir(self):
        self.put_gcs_multi({"gs://walrus/data/foo": b"", "gs://walrus/data/bar/baz": b""})

        self.assertEqual(self.fs.exists("gs://walrus/data/foo"), True)
        self.assertEqual(self.fs.exists("gs://walrus/data/bar/baz"), True)
        self.fs.rm("gs://walrus/data")
        self.assertEqual(self.fs.exists("gs://walrus/data/foo"), False)
        self.assertEqual(self.fs.exists("gs://walrus/data/bar/baz"), False)
Exemplo n.º 41
0
 def setUp(self):
     super(GCSFSTestCase, self).setUp()
     self.fs = GCSFilesystem()
Exemplo n.º 42
0
class GCSFSTestCase(MockGoogleAPITestCase):

    def setUp(self):
        super(GCSFSTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_ls_key(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b''
        })

        self.assertEqual(list(self.fs.ls('gs://walrus/data/foo')),
                         ['gs://walrus/data/foo'])

    def test_ls_recursively(self):
        self.put_gcs_multi({
            'gs://walrus/data/bar': b'',
            'gs://walrus/data/bar/baz': b'',
            'gs://walrus/data/foo': b'',
            'gs://walrus/qux': b'',
        })

        uris = [
            'gs://walrus/data/bar',
            'gs://walrus/data/bar/baz',
            'gs://walrus/data/foo',
            'gs://walrus/qux',
        ]

        self.assertEqual(set(self.fs.ls('gs://walrus/')), set(uris))
        self.assertEqual(set(self.fs.ls('gs://walrus/*')), set(uris))

        self.assertEqual(set(self.fs.ls('gs://walrus/data')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/*')), set(uris[:-1]))

    def test_ls_globs(self):
        self.put_gcs_multi({
            'gs://w/a': b'',
            'gs://w/a/b': b'',
            'gs://w/ab': b'',
            'gs://w/b': b'',
        })

        self.assertEqual(set(self.fs.ls('gs://w/')),
                         set(['gs://w/a', 'gs://w/a/b',
                              'gs://w/ab', 'gs://w/b']))
        self.assertEqual(set(self.fs.ls('gs://w/*')),
                         set(['gs://w/a', 'gs://w/a/b',
                              'gs://w/ab', 'gs://w/b']))
        self.assertEqual(list(self.fs.ls('gs://w/*/')),
                         ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/*/*')),
                         ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/a?')),
                         ['gs://w/ab'])
        # * can match /
        self.assertEqual(set(self.fs.ls('gs://w/a*')),
                         set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab']))
        self.assertEqual(set(self.fs.ls('gs://w/*b')),
                         set(['gs://w/a/b', 'gs://w/ab', 'gs://w/b']))

    def test_du(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcde',
            'gs://walrus/data/bar/baz': b'fgh'
        })

        self.assertEqual(self.fs.du('gs://walrus/'), 8)
        self.assertEqual(self.fs.du('gs://walrus/data/foo'), 5)
        self.assertEqual(self.fs.du('gs://walrus/data/bar/baz'), 3)

    def test_exists(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcd'
        })
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar'), False)

    def test_rm(self):
        self.put_gcs_multi({
            'gs://walrus/foo': b''
        })

        self.assertEqual(self.fs.exists('gs://walrus/foo'), True)
        self.fs.rm('gs://walrus/foo')
        self.assertEqual(self.fs.exists('gs://walrus/foo'), False)

    def test_rm_dir(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'',
            'gs://walrus/data/bar/baz': b'',
        })

        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), True)
        self.fs.rm('gs://walrus/data')
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), False)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), False)