def cached_get_bucket( gcs_client: storage.Client, bucket_id: str, ) -> storage.Bucket: """get storage.Bucket object by bucket_id string if exists or raise google.cloud.exceptions.NotFound.""" return gcs_client.get_bucket(bucket_id)
def _store_in_thread(self, file): file.seek(0) from google.cloud.storage import Client client = Client(project=self.project_id) bucket = client.get_bucket(self.bucket_name) blob = bucket.blob(self.blob_name) blob.upload_from_file(file, predefined_acl=self.acl)
def lock(self): """ This is the best we can do. It is impossible to acquire the lock reliably without using any additional services. test-and-set is impossible to implement. :return: """ log = self._log log.info("Locking the bucket...") # Client should be imported here because grpc starts threads during import # and if you call fork after that, a child process will be hang during exit from google.cloud.storage import Client if self.credentials: client = Client.from_service_account_json(self.credentials) else: client = Client() bucket = client.get_bucket(self.bucket_name) self._bucket = bucket sentinel = bucket.blob("index.lock") try: while sentinel.exists(): log.warning("Failed to acquire the lock, waiting...") time.sleep(1) sentinel.upload_from_string(b"") # Several agents can get here. No test-and-set, sorry! yield None finally: self._bucket = None if sentinel is not None: try: sentinel.delete() except: pass
def store_toxicity(gcs_client: storage.Client, bucket_name: Optional[str], file_name: str, file_contents: List[dict]) -> None: """Uploads toxicity JSON object to GCS. Args: gcs_client: google.cloud.storage.Client bucket_name: String of name of bucket to store the files file_name: String of audio file name file_contents: dict holding toxicity information Returns: None; Logs message to Stackdriver. """ logging.info( f'Starting store_toxicity with {file_contents}' in {bucket_name}) try: bucket = gcs_client.get_bucket(bucket_name) destination = bucket.blob(f'toxicity-files/{file_name}') destination.upload_from_string(json.dumps(file_contents), content_type='application/json') logging.info(f'Successfully stored {file_contents} for {file_name} in ' f'{bucket_name}') except Exception as e: logging.error('Storing toxicity results failed.') logging.error(e)
def bucket_object(storage_client: storage.Client) -> storage.Bucket: """ GCS Bucket from .env config """ if not storage_client.lookup_bucket(TEST_BUCKET): bucket = storage_client.create_bucket(TEST_BUCKET) else: bucket = storage_client.get_bucket(TEST_BUCKET) yield bucket
def _download_from_cloudstorage(self, blob_path: str, local_path: str) -> str: client = Client() bucket = client.get_bucket(self.BUCKET) blob = bucket.blob(blob_path) blob.download_to_filename(local_path) return local_path
def get_bucket(gcs: storage.Client, url_tokens: str) -> storage.Bucket: try: return gcs.get_bucket(url_tokens["bucket"]) except Exception as e: LOG.error("Error accessing bucket: {}\n\t{}".format( url_tokens["bucket"], e)) exit(1)
def _create_connection(self): client = Client( credentials=self.credentials, project=self.project, ) return client.get_bucket(self.bucket)
def get_gcp_service_account_credentials(gcp_project_id): # Retrieve service account information corresponding to the GCP Project ID provided # bucket, blob_name = get_gcp_service_account_infos(gcp_project_id) if (bucket is None) or (blob_name is None): return None try: # Read the credentials from GCS # gcs_client = Client() bucket = gcs_client.get_bucket(bucket) blob = Blob(blob_name, bucket) json_credentials = json.loads(blob.download_as_string()) # Build and return GCP Credentials # return service_account.Credentials.from_service_account_info( json_credentials) except Exception as ex: print("Cannot retrieve Service Account credentials.") print(ex) return None
def __init__( self, bucketname: str, #Should be bucket and/or blob name filename: str, store_user_data: bool = True, store_chat_data: bool = True, store_bot_data: bool = True, single_file: bool = True, #If false, stores in chatID_user_data.json, chatID_chat_data.json, chatID_bot_data.json on_flush: bool = False, storage_client: storage.Client = storage.Client()): super().__init__( store_user_data=store_user_data, store_chat_data=store_chat_data, store_bot_data=store_bot_data, ) self.bucketname = bucketname self.filename = filename try: self.bucket = storage_client.get_bucket(bucketname) except: self.bucket = storage_client.create_bucket(bucketname) blob = self.bucket.blob(filename) blob.upload_from_string(json.dumps({})) self.filename = filename self.storage_client = storage_client self.single_file = single_file self.on_flush = on_flush self.user_data: Optional[DefaultDict[int, Dict]] = None self.chat_data: Optional[DefaultDict[int, Dict]] = None self.bot_data: Optional[Dict] = None self.conversations: Optional[Dict[str, Dict[Tuple, object]]] = None
def upload_to_bucket( client: storage.Client, src_path: str, dest_bucket_name: str, dest_path: str): """Upload a file or a directory (recursively) from local file system to specified bucket. Args: client (storage.Client): Google Cloud storage client object to ask resources. src_path (str): Path to the local file or directory you want to send dest_bucket_name (str): Destination bucket name dest_path (str): Path where you want to store data inner the bucket """ bucket = client.get_bucket(dest_bucket_name) if os.path.isfile(src_path): blob = bucket.blob(os.path.join(dest_path, os.path.basename(src_path))) blob.upload_from_filename(src_path) return for item in glob.glob(src_path + '/*'): if os.path.isfile(item): blob = bucket.blob(os.path.join(dest_path, os.path.basename(item))) blob.upload_from_filename(item) else: upload_to_bucket(client, item, dest_bucket_name, os.path.join( dest_path, os.path.basename(item)))
async def upload_picture(file: bytes = File(None, media_type="image/jpeg")): """ Uploads image from phone to server and saves it to bucket Args: file (bytes): the image taken from camera app encoded in bytes Returns: response (str): JSON Response with uuid of the file uploaded """ try: if file is None: raise HTTPException(status_code=422, detail="Empty image sent") else: # Initializes the Storage client storage_client = Client(project=creds.project_id) bucket = storage_client.get_bucket(creds.bucket_id) # Generates a unique identifier for storage img_uuid = str(uuid.uuid4())[0:6] blob = bucket.blob(img_uuid) # Decodes the base64 encoded bytearry of incoming image content = base64.b64decode(file) # Takes base64 decoded image and converts to image/jpeg blob.upload_from_string(data=content, content_type="image/jpeg") return {"detail": img_uuid} except GoogleCloudError as e: raise HTTPException(detail=str(e), status_code=500)
def upload_to_gcs(training_records: Iterable[str], validation_records: Iterable[str], gcs_output_path: str, gcs_project: str, client: storage.Client = None): """Uploads TF-Record files to GCS, at provided path.""" # Find the GCS bucket_name and key_prefix for dataset files path_parts = gcs_output_path[5:].split('/', 1) bucket_name = path_parts[0] if len(path_parts) == 1: key_prefix = '' elif path_parts[1].endswith('/'): key_prefix = path_parts[1] else: key_prefix = path_parts[1] + '/' client = client if client else storage.Client(project=gcs_project) bucket = client.get_bucket(bucket_name) def _upload_files(filenames: Iterable[str]): """Uploads a list of files into a specifc subdirectory.""" for i, filename in enumerate(sorted(filenames)): blob = bucket.blob(key_prefix + os.path.basename(filename)) blob.upload_from_filename(filename) if not i % 20: logging.info('Finished uploading file: %s', filename) # Upload training dataset logging.info('Uploading the training data.') _upload_files(training_records) # Upload validation dataset logging.info('Uploading the validation data.') _upload_files(validation_records)
def _upload(self, payload: bytes, filename: str, bucket: str) -> None: """ Upload a payload to GCS """ client = Client(project=self.project_id) count = 0 while count < self.max_retries: try: bucket_obj = client.get_bucket(bucket) if self.use_encryption: payload = self._encrypt(payload) content = BytesIO(payload) blob = Blob(filename, bucket_obj) blob.upload_from_file(content) break except ( InvalidResponse, GoogleAPICallError, InternalServerError, SSLError, ) as e: if count >= self.max_retries: raise StoqPluginException( f'Failed to upload {bucket}/{filename} to GCS: {str(e)}' ) count += 1 sleep(randrange(0, 4))
def _get_bucket(self, client: Client) -> Optional[Bucket]: try: return client.get_bucket(bucket_or_name=self.bucket_name) except NotFound: self.logger.error(f'Bucket {self.bucket_name} not found') return None except Exception as err: self.logger.error(err)
def delete_photos_from_bucket(storage_client: storage.Client, blob_path, bucket_name): bucket = storage_client.get_bucket(bucket_name) try: bucket.delete_blob(blob_path) except NotFound: print(f"Deleting photos from bucket, blob {blob_path} not found")
def get_last_position_token(storage_client: storage.Client, bucket, key, logger) -> Optional[str]: try: bucket = storage_client.get_bucket(bucket) blob = bucket.get_blob(key) last_position_token = blob.download_as_string().decode("UTF-8") return last_position_token except Exception as e: logger.warning("could not get last position_token: {}".format(e)) return None
class GCSClient: def __init__(self, **kwargs): """Initialize a client to google cloud storage (GCS).""" self.client = Client(**kwargs) def download(self, bucket_name, object_key, localfile): """Download a single object from GCS""" bucket = self.client.get_bucket(bucket_name) blob = bucket.blob(object_key) blob.download_to_filename(localfile) def upload(self, localfile, bucket_name, object_key): """Upload a single object to GCS""" bucket = self.client.get_bucket(bucket_name) blob = bucket.blob(object_key) logger.info(f"filename : {localfile}") blob.upload_from_filename(localfile)
def _store_pickle_file(df: pd.DataFrame, name: str, client: storage.Client, bucket: str) -> None: logger.info(f'Upload {name}') filename = SUBMISSION_FILES.get(name) path = '/'.join([SOURCE_PATH, SUBMISSION_PATH, filename]) bucket = client.get_bucket(bucket) blob = bucket.blob(path) df_io = io.BytesIO() pickle.dump(df, df_io) df_io.seek(0) blob.upload_from_file(df_io) df_io.close()
def get_ssh_client(ipaddress): # Instantiate SSH Client # Global timeout : 60 seconds # client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) date_start = datetime.datetime.now() while True: # global Timeout check if (datetime.datetime.now() - date_start).total_seconds() > 20: return None try: private_key = None # read private key from GCS # gcs_client = Client() bucket = gcs_client.get_bucket("fd-io-jarvis-platform-api") blob = Blob("credentials/fd-io-key.private", bucket) read_key = str(blob.download_as_string(), "utf-8") with io.StringIO() as f: f.write(read_key) f.seek(0) private_key = paramiko.rsakey.RSAKey.from_private_key(f) client.connect(hostname=ipaddress, timeout=10, look_for_keys=False, pkey=private_key, passphrase='', username='******') # If we are here this means we connected successfully to the instance # break except paramiko.ssh_exception.NoValidConnectionsError as novalid: logging.info(novalid) logging.info( "Error during SSH connection to the instance. Retrying ...") except Exception as ex: logging.info(ex) logging.info( "Error during SSH connection to the instance. Retrying ...") time.sleep(5) # End WHILE return client
def read_digester_jsons_from_gcs( bucket_name: str, base_path: str, digester_version: str, workflow_ids: List[str], storage_client: storage.Client) -> List[Tuple[str, dict]]: bucket = storage_client.get_bucket(bucket_name) result = [] for workflow_id in workflow_ids: blob = bucket.blob( f"{base_path}/{workflow_id}/digests/{digester_version}/digest.json" ) json_string_bytes = blob.download_as_string() result.append((workflow_id, json.loads(json_string_bytes))) return result
def save_photos_to_bucket(storage_client: storage.Client, blob_path_general, photo_links, bucket_name): bucket = storage_client.get_bucket(bucket_name) for i, link in enumerate(photo_links): r = request_("GET", link) with io.BytesIO() as buf: buf.write(r.content) buf.seek(0) blob_path = blob_path_general + f"{i}.jpg" blob = bucket.blob(blob_path) blob.upload_from_file(buf)
def get_gcs_object(gcs_client: storage.Client, bucket_name: str, file_name: str) -> List[dict]: """Downloads object file from GCS. Args: gcs_client: google.cloud.storage.Client bucket_name: String representing bucket name. file_name: String representing file name. Returns: List of dictionaries with transcript metadata """ bucket = gcs_client.get_bucket(bucket_name) object = bucket.blob(file_name) return json.loads(object.download_as_string().decode('utf-8'))
def get_transcript(gcs_client: storage.Client, bucket_name: str, file_name: str) -> dict: """Downloads transcript file from GCS. Args: gcs_client: google.cloud.storage.Client. bucket_name: String representing bucket name of audio file. file_name: String representing audio file name. Returns: List of dicts holding transcript object """ bucket = gcs_client.get_bucket(bucket_name) transcript = bucket.blob(file_name) return json.loads(transcript.download_as_string())
def upload_file(bucket_name: str, file_name: str, upload_name: str, client: storage.Client): """A function to upload a file to a GCP bucket. :param bucket_name: [description] :type bucket_name: str :param file_name: [description] :type file_name: str :param upload_name: [description] :type upload_name: str :param client: [description] :type client: storage.Client """ bucket = client.get_bucket(bucket_name) blob = bucket.blob(file_name) blob.upload_from_filename(upload_name)
def _get_pickle_file(path: str, client: storage.Client, bucket: str) -> pd.DataFrame: """ Retrieve a pickle file from the target GCS bucket and convert to DataFrame. """ logger.info(f'Download {path}') path = '/'.join([SOURCE_PATH, path]) bucket = client.get_bucket(bucket) blob = bucket.blob(path) df_io = io.BytesIO() blob.download_to_file(df_io) df_io.seek(0) df = pickle.load(df_io) df_io.close() return df
def get_transcript(gcs_client: storage.Client, bucket_name: str, file_name: str) -> dict: """Downloads transcript file from GCS. Args: gcs_client: google.cloud.storage representing GCS Client Object. bucket_name: String representing bucket name of transcription of audio. file_name: String of 'transcript.json'. Returns: JSON holding transcript object """ logging.info(f'Retrieving transcript for {file_name} from {bucket_name}') bucket = gcs_client.get_bucket(bucket_name) transcript = bucket.blob(file_name) return json.loads(transcript.download_as_string())
def get_firebase_admin_sdk_service_account_credentials(): # Read file from GCS # # Instantiate Google Bigquery client # try: gcs_client = Client() bucket = gcs_client.get_bucket(FB_ADMIN_SDK_BUCKET) blob = Blob(FB_ADMIN_SDK_CREDENTIALS_PATH, bucket) json_credentials = json.loads(blob.download_as_string()) return credentials.Certificate(json_credentials) except Exception as ex: print("Cannot retrieve FB Admin SDK service account credentials") print(ex) return None
def upload_json_to_gcs(gcs_client: storage.Client, bucket_name: Optional[str], file_name: str, file_contents: List[dict]) -> None: """Uploads toxicity JSON object to GCS. Args: gcs_client: google.cloud.storage.Client bucket_name: String holding bucket of where to store results. file_name: String of bucket which is the name of the audio file. file_contents: List of dicts holding toxicity information Returns: None """ bucket = gcs_client.get_bucket(bucket_name) destination = bucket.blob(f'nlp-files/{file_name}') destination.upload_from_string(json.dumps(file_contents), content_type='application/json')
def get_files(client: storage.Client, bucket: storage.Bucket) -> List[dict]: """Retrieves all files in a given GCS bucket Args: client: Object representing Python GCS client bucket: google.cloud.storage.Bucket holding bucket name Returns: List of dicts [{name: String holding file name, type: String representing type of file, 'audio/flac'. }] """ bucket = client.get_bucket(bucket) return [{ 'name': blob.name, 'type': blob.content_type } for blob in list(bucket.list_blobs())]