def __init__(self, bucket=None): super(GoogleStorageArchive, self).__init__(bucket) self.client = Client() log.info("Archive: gs://%s", bucket) self.bucket = self.client.lookup_bucket(bucket) if self.bucket is None: self.bucket = self.client.create_bucket(bucket) self.upgrade()
def setup_bucket(project_id, service_account_key, bucket_name): credentials = service_account.Credentials.from_service_account_file( service_account_key, scopes=SCOPES) client = GSClient(project_id, credentials) bucket = client.bucket(bucket_name) needs_create = not bucket.exists() if needs_create: bucket.create()
def get_bucket(service_account_b64_key, bucket_name, google_project): if service_account_b64_key: assert bucket_name and google_project, "missing required GCS configurations" credentials_info = json.loads( base64.decodebytes(service_account_b64_key.encode())) credentials = service_account.Credentials.from_service_account_info( credentials_info) gcs = Client(project=google_project, credentials=credentials) return gcs.bucket(bucket_name) else: return None
def retrieve_bucket(self): """Return bucket used for storing files. Returns: Bucket object. """ project = self.env("GOOGLE_PROJECT") service_account_file = json.loads( self.env("GOOGLE_APPLICATION_CREDENTIALS")) credentials = service_account.Credentials.from_service_account_info( service_account_file) client = Client(project=project, credentials=credentials) return client.bucket("cs4teachers-static")
def get_bucket(): global _bucket if _bucket is None: from google.cloud.storage.client import Client # A `None` project behaves differently with the client, so # we need to call it differently try: client = Client(project=os.environ["GOOGLE_CLOUD_PROJECT"]) except KeyError: client = Client() _bucket = client.bucket(settings.GOOGLE_STORAGE_BUCKET) return _bucket
def get_food_recommender(request): """Get existing food recommender list.""" list_id = request.args.get('listId') if not list_id: raise ValueError('Query parameter "listId" must be provided') if os.getenv('ENV', 'production') == 'local': expected_path = (Path(tempfile.gettempdir()) / 'jmyrberg-food-recommender' / 'data' / f'{list_id}.json') if expected_path.exists(): with open(expected_path, 'r') as f: data = json.load(f) return { 'status': 'success', 'message': 'Food recommender list fetched successfully', 'data': data }, 200 else: return { 'status': 'error', 'message': (f'Food recommender list "{list_id}"' ' could not be found'), 'data': None }, 404 else: global storage_client if not storage_client: storage_client = Client() bucket_name = os.getenv('FOOD_RECOMMENDER_BUCKET_NAME', 'jmyrberg-food-recommender') blob_name = f'/data/{list_id}.json' blob = storage_client.bucket(bucket_name).get_blob(blob_name) if blob: data = json.loads(blob.download_as_string()) return { 'status': 'success', 'message': 'Food recommender list fetched successfully', 'data': data }, 200 else: return { 'status': 'error', 'message': (f'Food recommender list "{list_id}"' ' could not be found'), 'data': None }, 404
def __init__(self, config=None): self.config = config if config else newhive.config # initialize s3 connection if self.config.buckets: try: from google.cloud.storage.client import Client self.con = Client() self.buckets = { k: self.con.get_bucket(name) for k, name in self.config.buckets.items() } except: print('google.cloud.storage.client failure')
def test_current(self): from google.cloud.storage.client import Client project = 'PROJECT' credentials = _Credentials() client = Client(project=project, credentials=credentials) batch1 = self._makeOne(client) self.assertIsNone(batch1.current()) client._push_batch(batch1) self.assertIs(batch1.current(), batch1) batch2 = self._makeOne(client) client._push_batch(batch2) self.assertIs(batch1.current(), batch2)
def test_create_w_extra_properties(self): from google.cloud.storage.client import Client from google.cloud.storage.bucket import Bucket BUCKET_NAME = "bucket-name" PROJECT = "PROJECT" CORS = [ { "maxAgeSeconds": 60, "methods": ["*"], "origin": ["https://example.com/frontend"], "responseHeader": ["X-Custom-Header"], } ] LIFECYCLE_RULES = [{"action": {"type": "Delete"}, "condition": {"age": 365}}] LOCATION = "eu" LABELS = {"color": "red", "flavor": "cherry"} STORAGE_CLASS = "NEARLINE" DATA = { "name": BUCKET_NAME, "cors": CORS, "lifecycle": {"rule": LIFECYCLE_RULES}, "location": LOCATION, "storageClass": STORAGE_CLASS, "versioning": {"enabled": True}, "billing": {"requesterPays": True}, "labels": LABELS, } connection = _make_connection(DATA) client = Client(project=PROJECT) client._base_connection = connection bucket = Bucket(client=client, name=BUCKET_NAME) bucket.cors = CORS bucket.lifecycle_rules = LIFECYCLE_RULES bucket.storage_class = STORAGE_CLASS bucket.versioning_enabled = True bucket.requester_pays = True bucket.labels = LABELS client.create_bucket(bucket, location=LOCATION) connection.api_request.assert_called_once_with( method="POST", path="/b", query_params={"project": PROJECT}, data=DATA, _target_object=bucket, timeout=self._get_default_timeout(), )
class GoogleCloudStorage(FileStoreBase): def __init__(self, *args, **kwargs): self.client = Client(**kwargs) def create_bucket(self, bucket_name, **kwargs): try: basic_request = { 'bucket_name': bucket_name } create_bucket_request = {**basic_request, **kwargs} self.client.create_bucket(**create_bucket_request) except Exception as e: logging.exception( 'Exception in [GoogleCloudStorage.create_bucket] with bucket_name {} and kwargs {}'.format(bucket_name, kwargs)) raise e
def save_json(self, data, blob_name): if os.getenv('ENV', 'production') == 'local': expected_path = (Path(tempfile.gettempdir()) / HOME_DATA_BUCKET_NAME / blob_name) expected_path.parent.mkdir(exist_ok=True, parents=True) with open(expected_path, 'w') as f: data = json.dump(data, f) print(f'Saved into {str(expected_path)}!') else: global storage_client if not storage_client: storage_client = Client() blob = storage_client.bucket(HOME_DATA_BUCKET_NAME).blob(blob_name) blob.upload_from_string(json.dumps(data), content_type='application/json') print(f'Saved into {blob_name}!')
def download(country_code, svg, pdf): client = Client.create_anonymous_client() def _download(blobs, svg): if len(blobs): for blob in blobs: extension = "svg" if svg else "pdf" fname = f"{extension}s/{get_country(blob)}.{extension}" with open(fname, "wb+") as fileobj: client.download_blob_to_file(blob, fileobj) print( f"Download {country_code} {extension} complete. Saved to /{extension}s" ) else: print(f"Could not find a {extension} file for code {country_code}") if svg: regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_.+" blobs = get(filetype="SVG", regex=regex) _download(blobs, True) if pdf: regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_.+" blobs = get(filetype="PDF", regex=regex) _download(blobs, False)
def get_dataset( max_bytes=1024 * 1024, dir='data', ): def _remove_last_line(file): file.seek(0, os.SEEK_END) pos = file.tell() - 1 while pos > 0 and file.read(1) != "\n": pos -= 1 file.seek(pos, os.SEEK_SET) if pos > 0: file.seek(pos, os.SEEK_SET) file.truncate() from google.cloud.storage.client import Client client = Client.create_anonymous_client() bucket = client.get_bucket('quickdraw_dataset') blob_iterator = bucket.list_blobs(prefix='full/simplified') for blob in blob_iterator: file_name = '{}/{}'.format(dir, path.basename(blob.name)) with open(file_name, 'wb+') as f: blob.download_to_file( f, start=0, end=max_bytes, ) with open(file_name, 'r+') as f: _remove_last_line(f)
def test_as_context_mgr_wo_error(self): from google.cloud.storage.client import Client URL = 'http://example.com/api' expected = _Response() expected['content-type'] = 'multipart/mixed; boundary="DEADBEEF="' http = _HTTP((expected, _THREE_PART_MIME_RESPONSE)) project = 'PROJECT' credentials = _Credentials() client = Client(project=project, credentials=credentials) client._connection._http = http self.assertEqual(list(client._batch_stack), []) target1 = _MockObject() target2 = _MockObject() target3 = _MockObject() with self._makeOne(client) as batch: self.assertEqual(list(client._batch_stack), [batch]) batch._make_request('POST', URL, {'foo': 1, 'bar': 2}, target_object=target1) batch._make_request('PATCH', URL, {'bar': 3}, target_object=target2) batch._make_request('DELETE', URL, target_object=target3) self.assertEqual(list(client._batch_stack), []) self.assertEqual(len(batch._requests), 3) self.assertEqual(batch._requests[0][0], 'POST') self.assertEqual(batch._requests[1][0], 'PATCH') self.assertEqual(batch._requests[2][0], 'DELETE') self.assertEqual(batch._target_objects, [target1, target2, target3]) self.assertEqual(target1._properties, {'foo': 1, 'bar': 2}) self.assertEqual(target2._properties, {'foo': 1, 'bar': 3}) self.assertEqual(target3._properties, '')
def get(filetype="SVG", regex="\d{4}-\d{2}-\d{2}_.+"): client = Client.create_anonymous_client() blobs = filter( lambda b: re.match(f"{filetype}/{regex}", b.name), client.list_blobs(BUCKET), ) return list(blobs)
def client(self): if self._client is None: self._client = Client( project=self.project_id, credentials=self.credentials ) return self._client
def download(country_code): client = Client.create_anonymous_client() def _download(blobs, extension): download_count = 0 if len(blobs): for blob in blobs: if blob.name.split("/")[-1].split('_')[0] == DATE: fname = f"{extension}s/{get_country(blob)}.{extension}" with open(fname, "wb+") as fileobj: client.download_blob_to_file(blob, fileobj) print( f"Download {country_code} {extension} complete. Saved to /{extension}s" ) download_count += 1 if download_count == 0: print(f"Could not find a {extension} file for code {country_code}") regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_M.+" blobs = get(filetype="SVG", regex=regex) _download(blobs, "svg") blobs = get(filetype="PDF", regex=regex) _download(blobs, "pdf")
def __init__(self, project, cas_url_prefix, credentials=None): assert project is not None self.buckets = {} self.client = GSClient(project, credentials=credentials) if cas_url_prefix[-1] == "/": cas_url_prefix = cas_url_prefix[:-1] self.cas_url_prefix = cas_url_prefix
def url(self, name): # Preserve the trailing slash after normalizing the path. name = self._normalize_name(clean_name(name)) blob = self._get_blob(self._encode_name(name)) if self.expiry_time: client = Client.from_service_account_json(self.keyfile_path) if self.keyfile_path else None return blob.generate_signed_url(datetime.timedelta(seconds=self.expiry_time), client=client) return blob.public_url
class GoogleStorage(object): def __init__(self, config=None): self.config = config if config else newhive.config # initialize s3 connection if self.config.buckets: try: from google.cloud.storage.client import Client self.con = Client() self.buckets = { k: self.con.get_bucket(name) for k, name in self.config.buckets.items() } except: print('google.cloud.storage.client failure') def upload_file(self, file, bucket_name, path, name, mimetype, md5=None): bucket = self.buckets[bucket_name] remote = bucket.blob(path) if mimetype: remote.content_type = mimetype remote.cache_control = 'max-age=' + str(86400 * 3650) if md5: remote.md5_hash = b64encode(b16decode(md5.upper())) if isinstance(file, basestring): remote.upload_from_filename(file) else: file.seek(0) remote.upload_from_file(file, num_retries=3) return self.url(bucket_name, path) def delete_file(self, bucket, path): bucket = self.buckets[bucket] remote = bucket.blob(path) if remote.exists(): remote.delete() return True return False def file_exists(self, bucket, path): bucket = self.buckets[bucket] remote = bucket.blob(path) return remote.exists() def bucket_url(self, bucket='media'): return '//' + self.config.buckets[bucket] + '/' def url(self, bucket='media', key='', bucket_name=None, http=False, secure=False): url = self.bucket_url(bucket) + key if http: url = 'http' + url if secure: url = 'https' + url return url
def get_gc_client(project_id=None, key_path=None, keyfile_dict=None, credentials=None, scopes=None): credentials = credentials or get_gc_credentials( key_path=key_path, keyfile_dict=keyfile_dict, scopes=scopes) project_id = project_id or get_project_id() return Client(project=project_id, credentials=credentials)
def _download_gcs_json(cls, file_url): parsed_url = urlparse(file_url) if parsed_url.scheme == 'gs': file_uri = file_url else: if parsed_url.netloc == GCS_HOST: file_uri = "gs:/{}".format(file_path) else: raise Exception( "Host URL is not a Google Cloud Storage URI: {}".format( file_url)) gcs_client = GCSClient() with BytesIO() as json_in: gcs_client.download_blob_to_file(file_uri, json_in) json_in.seek(0) json_string = json_in.read().decode('utf-8') return json_string
def get_client(project_id, credentials): global _client if _client[2] is None or (project_id, credentials) != (_client[0], _client[1]): client = Client(project=project_id, credentials=credentials) session = client._http adapter = TimeoutAdapter(timeout=GCS_TIMEOUT) session.mount("http://", adapter) session.mount("https://", adapter) _client = (project_id, credentials, client) return _client[2]
def test_anonymous_client_access_to_public_bucket(): from google.cloud.storage.client import Client anonymous_client = Client.create_anonymous_client() bucket = anonymous_client.bucket(public_bucket) (blob,) = _helpers.retry_429_503(anonymous_client.list_blobs)( bucket, max_results=1, ) with tempfile.TemporaryFile() as stream: _helpers.retry_429_503(blob.download_to_file)(stream)
def test_as_context_mgr_w_error(self): from google.cloud.storage.batch import _FutureDict from google.cloud.storage.client import Client URL = 'http://example.com/api' http = _make_requests_session([]) connection = _Connection(http=http) project = 'PROJECT' credentials = _make_credentials() client = Client(project=project, credentials=credentials) client._base_connection = connection self.assertEqual(list(client._batch_stack), []) target1 = _MockObject() target2 = _MockObject() target3 = _MockObject() try: with self._make_one(client) as batch: self.assertEqual(list(client._batch_stack), [batch]) batch._make_request('POST', URL, { 'foo': 1, 'bar': 2 }, target_object=target1) batch._make_request('PATCH', URL, {'bar': 3}, target_object=target2) batch._make_request('DELETE', URL, target_object=target3) raise ValueError() except ValueError: pass http.request.assert_not_called() self.assertEqual(list(client._batch_stack), []) self.assertEqual(len(batch._requests), 3) self.assertEqual(batch._target_objects, [target1, target2, target3]) # Since the context manager fails, finish will not get called and # the _properties will still be futures. self.assertIsInstance(target1._properties, _FutureDict) self.assertIsInstance(target2._properties, _FutureDict) self.assertIsInstance(target3._properties, _FutureDict)
def get_client(project_id, credentials): global _client if _client[2] is None or (project_id, credentials) != (_client[0], _client[1]): _client = (project_id, credentials, Client( project=project_id, credentials=credentials, )) return _client[2]
def test_as_context_mgr_wo_error(self): from google.cloud.storage.client import Client url = "http://example.com/api" expected_response = _make_response( content=_THREE_PART_MIME_RESPONSE, headers={"content-type": 'multipart/mixed; boundary="DEADBEEF="'}, ) http = _make_requests_session([expected_response]) project = "PROJECT" credentials = _make_credentials() client = Client(project=project, credentials=credentials) client._http_internal = http self.assertEqual(list(client._batch_stack), []) target1 = _MockObject() target2 = _MockObject() target3 = _MockObject() with self._make_one(client) as batch: self.assertEqual(list(client._batch_stack), [batch]) batch._make_request("POST", url, { "foo": 1, "bar": 2 }, target_object=target1) batch._make_request("PATCH", url, {"bar": 3}, target_object=target2) batch._make_request("DELETE", url, target_object=target3) self.assertEqual(list(client._batch_stack), []) self.assertEqual(len(batch._requests), 3) self.assertEqual(batch._requests[0][0], "POST") self.assertEqual(batch._requests[1][0], "PATCH") self.assertEqual(batch._requests[2][0], "DELETE") self.assertEqual(batch._target_objects, [target1, target2, target3]) self.assertEqual(target1._properties, {"foo": 1, "bar": 2}) self.assertEqual(target2._properties, {"foo": 1, "bar": 3}) self.assertEqual(target3._properties, b"")
def get_previous_history_timestamp(self): timestamp = None if os.getenv('ENV', 'production') == 'local': expected_path = (Path(tempfile.gettempdir()) / HOME_DATA_BUCKET_NAME / 'nibe/history/previous_timestamp.txt') if expected_path.exists(): with open(expected_path, 'r') as f: timestamp = int(json.load(f)[0]) else: global storage_client if not storage_client: storage_client = Client() blob_name = 'nibe/history/previous_timestamp.txt' blob = (storage_client.bucket(HOME_DATA_BUCKET_NAME).get_blob( blob_name)) if blob: timestamp = int(json.loads(blob.download_as_string())[0]) return timestamp
def test_create_hit(self): from google.cloud.storage.client import Client PROJECT = "PROJECT" BUCKET_NAME = "bucket-name" DATA = {"name": BUCKET_NAME} connection = _make_connection(DATA) client = Client(project=PROJECT) client._base_connection = connection bucket = client.create_bucket(BUCKET_NAME) connection.api_request.assert_called_once_with( method="POST", path="/b", query_params={"project": PROJECT}, data=DATA, _target_object=bucket, timeout=self._get_default_timeout(), )
def test_upload_html_gcs(client: Client): id_ = str(uuid.uuid4()) html = "hello" mime = "text/html" bucket_name = "vcm-ml-scratch" blob = f"testing/{id_}/index.html" url = f"gs://{bucket_name}/{blob}" upload(html, url, mime) bucket = client.bucket(bucket_name) blob = bucket.get_blob(blob) blob.content_type == "text/html"
def _get_client(cls, project_id=None, key_path=None, keyfile_dict=None, credentials=None, scopes=None): credentials = credentials or cls.get_credentials( key_path=key_path, keyfile_dict=keyfile_dict, scopes=scopes) project_id = project_id or config("GCP_PROJECT_ID", default=None) return Client(project=project_id, credentials=credentials)
def test_as_context_mgr_w_error(self): from google.cloud.storage.batch import _FutureDict from google.cloud.storage.client import Client URL = 'http://example.com/api' http = _HTTP() connection = _Connection(http=http) project = 'PROJECT' credentials = _make_credentials() client = Client(project=project, credentials=credentials) client._base_connection = connection self.assertEqual(list(client._batch_stack), []) target1 = _MockObject() target2 = _MockObject() target3 = _MockObject() try: with self._make_one(client) as batch: self.assertEqual(list(client._batch_stack), [batch]) batch._make_request('POST', URL, {'foo': 1, 'bar': 2}, target_object=target1) batch._make_request('PATCH', URL, {'bar': 3}, target_object=target2) batch._make_request('DELETE', URL, target_object=target3) raise ValueError() except ValueError: pass self.assertEqual(list(client._batch_stack), []) self.assertEqual(len(http._requests), 0) self.assertEqual(len(batch._requests), 3) self.assertEqual(batch._target_objects, [target1, target2, target3]) # Since the context manager fails, finish will not get called and # the _properties will still be futures. self.assertIsInstance(target1._properties, _FutureDict) self.assertIsInstance(target2._properties, _FutureDict) self.assertIsInstance(target3._properties, _FutureDict)
def get_connection(self): if self._client is None: self._client = Client.from_service_account_json(json_credentials_path=self.credentials) return self._client