def test_current(token_restore): from google.oauth2.credentials import Credentials with gcs_maker() as gcs: assert GCSFileSystem.current() is gcs gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN) assert gcs2.session is gcs.session gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, secure_serialize=False) assert isinstance(gcs2.token, Credentials)
def test_array(token_restore): with gcs_maker() as gcs: from array import array data = array('B', [65] * 1000) with gcs.open(a, 'wb') as f: f.write(data) with gcs.open(a, 'rb') as f: out = f.read() assert out == b'A' * 1000
def test_rm(token_restore): with gcs_maker() as gcs: assert not gcs.exists(a) gcs.touch(a) assert gcs.exists(a) gcs.rm(a) assert not gcs.exists(a) with pytest.raises((OSError, IOError)): gcs.rm(TEST_BUCKET + '/nonexistent') with pytest.raises((OSError, IOError)): gcs.rm('nonexistent')
def test_bigger_than_block_read(): with gcs_maker(True) as gcs: with gcs.open(TEST_BUCKET + "/2014-01-01.csv", "rb", block_size=3) as f: out = [] while True: data = f.read(20) out.append(data) if len(data) == 0: break assert b"".join(out) == csv_files["2014-01-01.csv"]
def test_read_keys_from_bucket(): with gcs_maker(True) as gcs: for k, data in files.items(): file_contents = gcs.cat("/".join([TEST_BUCKET, k])) assert file_contents == data assert all( gcs.cat("/".join([TEST_BUCKET, k])) == gcs.cat("gcs://" + "/".join([TEST_BUCKET, k])) for k in files )
def test_readline(token_restore): with gcs_maker(True) as gcs: all_items = chain.from_iterable([ files.items(), csv_files.items(), text_files.items() ]) for k, data in all_items: with gcs.open('/'.join([TEST_BUCKET, k]), 'rb') as f: result = f.readline() expected = data.split(b'\n')[0] + (b'\n' if data.count(b'\n') else b'') assert result == expected
def test_map_pickle(): with gcs_maker() as gcs: d = gcs.get_mapper(root) d["x"] = b"1" assert d["x"] == b"1" import pickle d2 = pickle.loads(pickle.dumps(d)) assert d2["x"] == b"1"
def test_bigger_than_block_read(token_restore): with gcs_maker(True) as gcs: with gcs.open(TEST_BUCKET + '/2014-01-01.csv', 'rb', block_size=3) as f: out = [] while True: data = f.read(20) out.append(data) if len(data) == 0: break assert b''.join(out) == csv_files['2014-01-01.csv']
def test_rm(): with gcs_maker() as gcs: assert not gcs.exists(a) gcs.touch(a) assert gcs.exists(a) gcs.rm(a) assert not gcs.exists(a) with pytest.raises((OSError, IOError)): gcs.rm(TEST_BUCKET + "/nonexistent") with pytest.raises((OSError, IOError)): gcs.rm("nonexistent")
def test_gcs_glob(token_restore): with gcs_maker(True) as gcs: fn = TEST_BUCKET+'/nested/file1' assert fn not in gcs.glob(TEST_BUCKET+'/') assert fn not in gcs.glob(TEST_BUCKET+'/*') assert fn in gcs.glob(TEST_BUCKET+'/nested/') assert fn in gcs.glob(TEST_BUCKET+'/nested/*') assert fn in gcs.glob(TEST_BUCKET+'/nested/file*') assert fn in gcs.glob(TEST_BUCKET+'/*/*') assert fn in gcs.glob(TEST_BUCKET+'/**') assert all(f in gcs.find(TEST_BUCKET) for f in gcs.glob(TEST_BUCKET+'/nested/*') if gcs.isfile(f))
def test_array(): with gcs_maker() as gcs: from array import array data = array("B", [65] * 1000) with gcs.open(a, "wb") as f: f.write(data) with gcs.open(a, "rb") as f: out = f.read() assert out == b"A" * 1000
def test_readline(): with gcs_maker(True) as gcs: all_items = chain.from_iterable( [files.items(), csv_files.items(), text_files.items()]) for k, data in all_items: with gcs.open("/".join([TEST_BUCKET, k]), "rb") as f: result = f.readline() expected = data.split(b"\n")[0] + (b"\n" if data.count(b"\n") else b"") assert result == expected
def test_file_info(): with gcs_maker() as gcs: fn = TEST_BUCKET + "/nested/file1" data = b"hello\n" with gcs.open(fn, "wb") as f: f.write(data) assert fn in gcs.find(TEST_BUCKET) assert gcs.exists(fn) assert not gcs.exists(fn + "another") assert gcs.info(fn)["size"] == len(data) with pytest.raises((OSError, IOError)): gcs.info(fn + "another")
def test_get_put(consistency): if consistency == "crc32c" and gcsfs.checkers.crcmod is None: pytest.skip("No CRC") with gcs_maker(True) as gcs: gcs.consistency = consistency with tmpfile() as fn: gcs.get(TEST_BUCKET + "/test/accounts.1.json", fn) data = files["test/accounts.1.json"] assert open(fn, "rb").read() == data gcs.put(fn, TEST_BUCKET + "/temp") assert gcs.du(TEST_BUCKET + "/temp") == len(data) assert gcs.cat(TEST_BUCKET + "/temp") == data
def test_ls_touch(token_restore): with gcs_maker() as gcs: assert not gcs.exists(TEST_BUCKET + '/tmp/test') gcs.touch(a) gcs.touch(b) L = gcs.ls(TEST_BUCKET + '/tmp/test', False) assert set(L) == set([a, b]) L_d = gcs.ls(TEST_BUCKET + '/tmp/test', True) assert set(d['path'] for d in L_d) == set([a, b])
def test_map_pickle(token_restore): import pickle with gcs_maker() as gcs: d = gcs.get_mapper(root) d['x'] = b'1234567890' b = pickle.dumps(d) assert b'1234567890' not in b e = pickle.loads(b) assert dict(e) == {'x': b'1234567890'}
def test_file_info(token_restore): with gcs_maker() as gcs: fn = TEST_BUCKET + '/nested/file1' data = b'hello\n' with gcs.open(fn, 'wb') as f: f.write(data) assert fn in gcs.walk(TEST_BUCKET) assert gcs.exists(fn) assert not gcs.exists(fn + 'another') assert gcs.info(fn)['size'] == len(data) with pytest.raises((OSError, IOError)): gcs.info(fn + 'another')
def test_ls_touch(): with gcs_maker() as gcs: assert not gcs.exists(TEST_BUCKET + "/tmp/test") gcs.touch(a) gcs.touch(b) L = gcs.ls(TEST_BUCKET + "/tmp/test", False) assert set(L) == set([a, b]) L_d = gcs.ls(TEST_BUCKET + "/tmp/test", True) assert set(d["name"] for d in L_d) == set([a, b])
def test_large_upload(): orig = gcsfs.core.GCS_MAX_BLOCK_SIZE gcsfs.core.GCS_MAX_BLOCK_SIZE = 262144 # minimum block size try: with gcs_maker() as gcs: fn = TEST_BUCKET + "/test" d = b"7123" * 262144 with gcs.open(fn, "wb", content_type="application/octet-stream") as f: f.write(d) assert gcs.cat(fn) == d finally: gcsfs.core.GCS_MAX_BLOCK_SIZE = orig
def test_gcs_glob(): with gcs_maker(True) as gcs: fn = TEST_BUCKET + "/nested/file1" assert fn not in gcs.glob(TEST_BUCKET + "/") assert fn not in gcs.glob(TEST_BUCKET + "/*") assert fn in gcs.glob(TEST_BUCKET + "/nested/") assert fn in gcs.glob(TEST_BUCKET + "/nested/*") assert fn in gcs.glob(TEST_BUCKET + "/nested/file*") assert fn in gcs.glob(TEST_BUCKET + "/*/*") assert fn in gcs.glob(TEST_BUCKET + "/**") assert all(f in gcs.find(TEST_BUCKET) for f in gcs.glob(TEST_BUCKET + "/nested/*") if gcs.isfile(f))
def test_map_pickle(): import pickle with gcs_maker() as gcs: d = gcs.get_mapper(root) d["x"] = b"1234567890" b = pickle.dumps(d) assert b"1234567890" not in b e = pickle.loads(b) assert dict(e) == {"x": b"1234567890"}
def test_gcs_glob(token_restore): with gcs_maker(True) as gcs: fn = TEST_BUCKET + '/nested/file1' assert fn not in gcs.glob(TEST_BUCKET + '/') assert fn not in gcs.glob(TEST_BUCKET + '/*') assert fn in gcs.glob(TEST_BUCKET + '/nested') assert fn in gcs.glob(TEST_BUCKET + '/nested/*') assert fn in gcs.glob(TEST_BUCKET + '/nested/file*') assert fn in gcs.glob(TEST_BUCKET + '/*/*') assert all(f in gcs.walk(TEST_BUCKET) for f in gcs.glob(TEST_BUCKET + '/nested/*')) with pytest.raises(ValueError): gcs.glob('*')
def test_read_small(token_restore): with gcs_maker(True) as gcs: fn = TEST_BUCKET + '/2014-01-01.csv' with gcs.open(fn, 'rb', block_size=10) as f: out = [] while True: data = f.read(3) if data == b'': break out.append(data) assert gcs.cat(fn) == b''.join(out) # cache drop assert len(f.cache) < len(out)
def test_pseudo_dir_find(): with gcs_maker(False) as fs: fs.touch(f"{TEST_BUCKET}/a/b/file") b = set(fs.glob(f"{TEST_BUCKET}/a/*")) assert f"{TEST_BUCKET}/a/b" in b a = set(fs.glob(f"{TEST_BUCKET}/*")) assert f"{TEST_BUCKET}/a" in a assert fs.find(TEST_BUCKET) == [f"{TEST_BUCKET}/a/b/file"] assert fs.find(f"{TEST_BUCKET}/a", withdirs=True) == [ f"{TEST_BUCKET}/a", f"{TEST_BUCKET}/a/b", f"{TEST_BUCKET}/a/b/file", ]
def test_multi_upload(token_restore): with gcs_maker() as gcs: fn = TEST_BUCKET + '/test' d = b'01234567' * 2**15 # something to write on close with gcs.open(fn, 'wb', content_type='text/plain', block_size=2**18) as f: f.write(d) f.write(b'xx') assert gcs.cat(fn) == d + b'xx' assert gcs.info(fn)['contentType'] == 'text/plain' # empty buffer on close with gcs.open(fn, 'wb', content_type='text/plain', block_size=2**19) as f: f.write(d) f.write(b'xx') f.write(d) assert gcs.cat(fn) == d + b'xx' + d assert gcs.info(fn)['contentType'] == 'text/plain' # if content-type is not provided then default should be application/octet-stream with gcs_maker() as gcs: fn = TEST_BUCKET + '/test' d = b'01234567' * 2**15 # something to write on close with gcs.open(fn, 'wb', block_size=2**18) as f: f.write(d) f.write(b'xx') assert gcs.cat(fn) == d + b'xx' assert gcs.info(fn)['contentType'] == 'application/octet-stream' # empty buffer on close with gcs.open(fn, 'wb', block_size=2**19) as f: f.write(d) f.write(b'xx') f.write(d) assert gcs.cat(fn) == d + b'xx' + d assert gcs.info(fn)['contentType'] == 'application/octet-stream'
def test_multi_upload(): with gcs_maker() as gcs: fn = TEST_BUCKET + "/test" d = b"01234567" * 2**15 # something to write on close with gcs.open(fn, "wb", content_type="text/plain", block_size=2**18) as f: f.write(d) f.write(b"xx") assert gcs.cat(fn) == d + b"xx" assert gcs.info(fn)["contentType"] == "text/plain" # empty buffer on close with gcs.open(fn, "wb", content_type="text/plain", block_size=2**19) as f: f.write(d) f.write(b"xx") f.write(d) assert gcs.cat(fn) == d + b"xx" + d assert gcs.info(fn)["contentType"] == "text/plain" # if content-type is not provided then default should be application/octet-stream with gcs_maker() as gcs: fn = TEST_BUCKET + "/test" d = b"01234567" * 2**15 # something to write on close with gcs.open(fn, "wb", block_size=2**18) as f: f.write(d) f.write(b"xx") assert gcs.cat(fn) == d + b"xx" assert gcs.info(fn)["contentType"] == "application/octet-stream" # empty buffer on close with gcs.open(fn, "wb", block_size=2**19) as f: f.write(d) f.write(b"xx") f.write(d) assert gcs.cat(fn) == d + b"xx" + d assert gcs.info(fn)["contentType"] == "application/octet-stream"
def test_read_small(): with gcs_maker(True) as gcs: fn = TEST_BUCKET + "/2014-01-01.csv" with gcs.open(fn, "rb", block_size=10) as f: out = [] while True: data = f.read(3) if data == b"": break out.append(data) assert gcs.cat(fn) == b"".join(out) # cache drop assert len(f.cache.cache) < len(out)
def test_readline_partial(token_restore): with gcs_maker() as gcs: data = b'aaaaa,bbbbb\n12345,6789\n' with gcs.open(a, 'wb') as f: f.write(data) with gcs.open(a, 'rb') as f: result = f.readline(5) assert result == b'aaaaa' result = f.readline(5) assert result == b',bbbb' result = f.readline(5) assert result == b'b\n' result = f.readline() assert result == b'12345,6789\n'
def test_request_header(): with gcs_maker(): gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, requester_pays=True) # test directly against `_call` to inspect the result r = gcs.call( "GET", "b/{}/o/", TEST_REQUESTER_PAYS_BUCKET, delimiter="/", prefix="test", maxResults=100, info_out=True, ) assert r.headers["User-Agent"] == "python-gcsfs/" + version
def test_zero_cache_timeout(): with gcs_maker(True, cache_timeout=0) as gcs: gcs.touch(f"gs://{TEST_BUCKET}/a/file") gcs.find(f"gs://{TEST_BUCKET}/a/") gcs.info(f"gs://{TEST_BUCKET}/a/file") gcs.ls(f"gs://{TEST_BUCKET}/a/") # The _times entry and exception below should only be present after # https://github.com/intake/filesystem_spec/pull/513. if f"{TEST_BUCKET}/a" not in gcs.dircache._times: pytest.skip("fsspec version too early") with pytest.raises(KeyError): gcs.dircache[f"{TEST_BUCKET}/a"]