def rotate_encryption_key(bucket_name, blob_name, base64_encryption_key, base64_new_encryption_key): """Performs a key rotation by re-writing an encrypted blob with a new encryption key.""" storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) current_encryption_key = base64.b64decode(base64_encryption_key) new_encryption_key = base64.b64decode(base64_new_encryption_key) # Both source_blob and destination_blob refer to the same storage object, # but destination_blob has the new encryption key. source_blob = Blob(blob_name, bucket, encryption_key=current_encryption_key) destination_blob = Blob(blob_name, bucket, encryption_key=new_encryption_key) token = None while True: token, bytes_rewritten, total_bytes = destination_blob.rewrite( source_blob, token=token) if token is None: break print('Key rotation complete for Blob {}'.format(blob_name))
def test_load_stage(self): # TODO check that extra files are skipped # the expected calls to load_table_from_uri # are made when all vocabulary files are present all_blobs = [ Blob(f'{table}.csv', self.bucket_name) for table in common.VOCABULARY_TABLES ] self.gcs_client.list_blobs.return_value = all_blobs load_vocab.load_stage(self.dst_dataset, self.bq_client, self.bucket_name, self.gcs_client) mock_ltfu = self.bq_client.load_table_from_uri expected_calls = [(f'gs://{self.bucket_name}/{table}.csv', self.dst_dataset.table(table)) for table in common.VOCABULARY_TABLES] actual_calls = [(source_uri, destination) for (source_uri, destination), _ in mock_ltfu.call_args_list] self.assertListEqual(expected_calls, actual_calls) # error is thrown when vocabulary files are missing expected_missing = [common.DOMAIN, common.CONCEPT_SYNONYM] incomplete_blobs = [ Blob(f'{table}.csv', self.bucket_name) for table in common.VOCABULARY_TABLES if table not in expected_missing ] self.gcs_client.list_blobs.return_value = incomplete_blobs expected_msg = f'Bucket {self.bucket_name} is missing files for tables {expected_missing}' with self.assertRaises(RuntimeError) as c: load_vocab.load_stage(self.dst_dataset, self.bq_client, self.bucket_name, self.gcs_client) self.assertIsInstance(c.exception, RuntimeError) self.assertEqual(str(c.exception), expected_msg)
def test_blob(): """Provides a pre-existing blob in the test bucket.""" bucket = storage.Client().bucket(BUCKET) blob_name = "test_blob_{}".format(uuid.uuid4().hex) blob = Blob( blob_name, bucket, encryption_key=TEST_ENCRYPTION_KEY_DECODED, ) content = "Hello, is it me you're looking for?" blob.upload_from_string(content) yield blob.name, content # To delete an encrypted blob, you have to provide the same key # used for the blob. When you provide a wrong key, you'll get # NotFound. try: # Clean up for the case that the rotation didn't occur. blob.delete() except NotFound as e: # For the case that the rotation succeeded. print("Ignoring 404, detail: {}".format(e)) blob = Blob(blob_name, bucket, encryption_key=TEST_ENCRYPTION_KEY_2_DECODED) blob.delete()
def test_get_latest_version_from_bucket(mocker): """ Given: - An id of a pack and the bucket. When: - Getting the latest version of the pack in the bucket. - Having a with_dependency.zip file in the bucket. Then: - Validate that the version is the one we expect for. - Skip over with_dependencies.zip file. """ dummy_prod_bucket = mocker.MagicMock() first_blob = Blob( f'{GCPConfig.CONTENT_PACKS_PATH}/TestPack/1.0.0/TestPack.zip', dummy_prod_bucket) second_blob = Blob( f'{GCPConfig.CONTENT_PACKS_PATH}/TestPack/1.0.1/TestPack.zip', dummy_prod_bucket) third_blob = Blob( f'{GCPConfig.CONTENT_PACKS_PATH}/TestPack/TestPack_with_dependencies.zip', dummy_prod_bucket) dummy_prod_bucket.list_blobs.return_value = [ first_blob, second_blob, third_blob ] assert script.get_latest_version_from_bucket('TestPack', dummy_prod_bucket) == '1.0.1'
def exists(self, uri_path, bucket_name=None, exact=False, **kwargs): gcp_bucket = self._gcp_bucket(bucket_name) is_found = Blob(uri_path, gcp_bucket).exists() if exact is False and is_found is False: folder_name = '{0}_$folder$'.format(path.basename(uri_path)) folder_key = path.join(uri_path, folder_name) is_found = Blob(folder_key, gcp_bucket).exists() return is_found
def main(): # # initial greeting... # print("Hello Google Cloud Storage!") # # create a client # print("creating client...") client = storage.Client() index = 0 print("indexing over bucket list...") for bucket in client.list_buckets(): print(bucket) print("index = " + str(index)) if index == 0: defaultBucket = bucket index += 1 print("") print("chosen bucket is: " + str(defaultBucket)) blob = Blob("raw_image.jpg", defaultBucket) quit = False imageFilePath = "/home/shawn/Desktop/raw_image_download.jpg" while quit == False: blobCount = 0 for blobItem in defaultBucket.list_blobs(): blobCount += 1 if blobCount == 0: print("empty...") else: print("downloading...") with open(imageFilePath, "wb") as imageFile: blob.download_to_file(imageFile) with Image(filename=imageFilePath) as img: print(img.size) print("blurring...") img.gaussian_blur(9, 1) imageFilePath = "/home/shawn/Desktop/blurred_image.jpg" print("saving...") img.save(filename=imageFilePath) with Image(filename=imageFilePath) as img: blob = Blob("blurred_image.jpg", defaultBucket) print("uploading...") with open("/home/shawn/Desktop/blurred_image.jpg", "rb") as imageFile: blob.upload_from_file(imageFile) display(img) time.sleep(1.0) # # final greeting... # print("Goodbye Google Cloud Storage!")
def test_get_latest_version_from_bucket(mocker): """ Given: - An id of a pack and the bucket. When: - Getting the latest version of the pack in the bucket. Then: - Validate that the version is the one we expect for. """ dummy_prod_bucket = mocker.MagicMock() first_blob = Blob(f'{GCPConfig.STORAGE_BASE_PATH}/TestPack/1.0.0/TestPack.zip', dummy_prod_bucket) second_blob = Blob(f'{GCPConfig.STORAGE_BASE_PATH}/TestPack/1.0.1/TestPack.zip', dummy_prod_bucket) dummy_prod_bucket.list_blobs.return_value = [first_blob, second_blob] assert script.get_latest_version_from_bucket('TestPack', dummy_prod_bucket) == '1.0.1'
def upload_image(self, location: str, label: str, image_bytes, metadata: dict) -> typing.Optional[str]: blob = Blob(f"{location}/{label}", self.bucket) blob.metadata = metadata blob.upload_from_file(image_bytes, content_type="image/png") blob.make_public() return blob.public_url
def blob_exists(bucket_name, blob_name): """ check if blob/key exists or not! """ tries = 0 while tries < NUM_TRIES: try: if bucket_exists(bucket_name): client = storage.Client() bucket = client.bucket(bucket_name) blob = Blob(blob_name, bucket) return blob.exists() else: return False except BadRequest: return False except Exception: time.sleep(300) tries += 1 logger.error( "Can not check the status of the blob {} after multiple attempts".format( blob_name ) ) return False
def download_bhavcopy(event, context): holiday_dict = create_holiday_dict() logging.info('Dictionary created for ' + str(len(holiday_dict)) + ' holidays') base_url = 'https://www.bseindia.com/download/BhavCopy/Equity/' pubsub_message = base64.b64decode(event['data']).decode('utf-8') print(pubsub_message) print(event['attributes']) fname = event['attributes']['objectId'] extracted_date = re.search(r'([eE][qQ])(\d\d\d\d\d\d)', fname).group(2) new_date = datetime.strptime(extracted_date, '%d%m%y') + timedelta(days=1) file_downloaded_locally, new_fname = check_and_download( new_date, holiday_dict, base_url) try: if file_downloaded_locally and ( not check_if_already_stored(new_fname)): client = storage.Client(project='bhavcopy') bucket = client.get_bucket('bhavcopy-store') blob = Blob(new_fname, bucket) with open('/tmp/' + new_fname, 'rb') as my_file: blob.upload_from_file(my_file) except Exception as e: logging.info( 'Not Downloaded: Cloud function exiting without storing file for date: ' + str(new_date) + '.Received error: ' + str(e))
def write(self, file_path, data, num_retries=10, content_type=None, bucket=None): bucket = self.client.get_bucket(self.bucket or bucket) try: blob = Blob(file_path, bucket) except: blob = bucket.get_blob(file_path) try: data = json.loads(data) except: pass if isinstance(data,(dict,list)): data = json.dumps(data) else: data = data data = _to_bytes(data, encoding="utf-8") string_buffer = BytesIO(data) blob.upload_from_file( file_obj = string_buffer, size = len(data), client = self.client, num_retries = num_retries or self.num_retries, content_type = _MEME_TYPES[self.content_type or content_type] ) return
def write_data(self, data: str, bucket_name: str, name: str, content_type: str = None): blob = Blob(name, self.get_bucket(bucket_name)) blob.upload_from_string(data, content_type=content_type)
def txfr_blob(filename: str, bq: BigQueryer = PostBigQueryer(), cs: CloudStorager = CloudStorager()): """ uploads the blob to bigquery. This would probably be better as a shell script :param cs: :param bq: :param bucket: :param filename: :return: """ tm = current_time_ms( ) # pain in the ass to get nanotime in python apparently objname = 'api-update-blob-{}'.format(tm) blob = Blob(objname, cs.get_cloud_storage_bucket()) logger.info("Uploading file (this will take a long time)... ") blob.upload_from_filename(filename) # change this to change table table = bq.get_bigquery_table() uri = 'gs://' + cs.bucket + "/" + objname logger.info("Loading file to BQ...") # insert into tmp table # tmptable = bq.client.dataset('forums').table(objname) job = LoadTableFromStorageJob('api-job-{}'.format(tm), table, [uri], client=bq.client) job.write_disposition = 'WRITE_APPEND' job.source_format = 'AVRO' job.begin() wait_for_job(job) logger.info("Cleaning up...") blob.delete(cs.client)
def write_gcloud_blob(bucket_id: str, gpath: str, fpath: str): """ Write blob from Google Cloud Storage. References: https://pypi.org/project/google-cloud-storage/ :param bucket_id: id for google cloud bucket :param gpath: file path of item within bucket :param fpath: file path of item from disk :return: upload file blob from disk :rtype: None """ logger.info("Writing '{}' to '{}' at '{}'".\ format(fpath, bucket_id, gpath)) try: client = storage.Client() bucket = client.get_bucket(bucket_id) blob = Blob(gpath, bucket) with open(fpath, 'rb') as infile: blob.upload_from_file(infile) logger.info("SUCCESS -- uploaded '{}' to '{}' using '{}'".\ format(fpath, gpath, bucket_id)) except Exception as exc: logger.error("Unable to upload '{}' to '{}' using '{}'".\ format(fpath, gpath, bucket_id)) logger.exception(exc)
def get_gcp_service_account_credentials(gcp_project_id): # Retrieve service account information corresponding to the GCP Project ID provided # bucket, blob_name = get_gcp_service_account_infos(gcp_project_id) if (bucket is None) or (blob_name is None): return None try: # Read the credentials from GCS # gcs_client = Client() bucket = gcs_client.get_bucket(bucket) blob = Blob(blob_name, bucket) json_credentials = json.loads(blob.download_as_string()) # Build and return GCP Credentials # return service_account.Credentials.from_service_account_info( json_credentials) except Exception as ex: print("Cannot retrieve Service Account credentials.") print(ex) return None
def delete_gcp_file(url): print('###### start removing %s' % url) client = storage.Client.from_service_account_json(settings.GCP_KEY_PATH) bucket = client.get_bucket(settings.BUCKET_NAME) blob = Blob(gcp_path(url), bucket) blob.delete() print('###### removing success %s' % url)
def _download_from_blobstore(self, blob_to_download_name, blob_download_target_path, chunk_size=None): """Download file from blobstore. :type chunk_size: int :param chunk_size: If file size if greater than 5MB, it is recommended that, chunked downloads should be used. To do so, pass chunk_size param to this function. This must be a multiple of 256 KB per the API specification. """ log_prefix = '[Google Cloud Storage] [DOWNLOAD]' if self.container: self.logger.info('{} Started to download the tarball to target.'.format( log_prefix, blob_download_target_path)) try: blob = Blob(blob_to_download_name, self.container, chunk_size=chunk_size) blob.download_to_filename(blob_download_target_path) self.logger.info('{} SUCCESS: blob_to_download={}, blob_target_name={}, container={}' .format(log_prefix, blob_to_download_name, self.CONTAINER, blob_download_target_path)) return True except Exception as error: message = '{} ERROR: blob_to_download={}, blob_target_name={}, container={}\n{}'.format( log_prefix, blob_to_download_name, blob_download_target_path, self.CONTAINER, error) self.logger.error(message) raise Exception(message) else: message = '{} ERROR: blob_to_download={}, blob_target_name={}, container={}\n{}'.format( log_prefix, blob_to_download_name, blob_download_target_path, self.CONTAINER, "Container not found or accessible") self.logger.error(message) raise Exception(message)
def upload_blob_string(bucket_name, csvString, destination_blob_name): client = storage.Client() bucket = client.get_bucket(bucket_name) blob = Blob(destination_blob_name, bucket) return blob.upload_from_string( data=csvString, content_type='text/csv')
def upload_image_to_gcs(self, image_file_path, bucket_name): """Uploads images to Google Cloud Storage. Arguments: file_name : Name of image to upload on local machine image_file_path: Path to image to upload on local machine. bucket_name: Name of the GCS bucket. """ project = "<GCP_project_id>" credentials = service_account.Credentials.from_service_account_file( '~/gcp-service-account.json') client = storage.Client(project, credentials) try: bucket = client.get_bucket(bucket_name) except Exception as e: # Error out if we're unable to locate the S3 bucket. raise MotionAlertError("Error connecting to GCS bucket: " "{0}".format(e)) try: temp_image = image_file_path.split('/') image_file = temp_image[3] + "/" + temp_image[4] blob = Blob(image_file, bucket) with open(image_file_path, 'rb') as my_file: blob.upload_from_file(my_file) except Exception as e: # Error out if we're unable to upload the image. raise MotionAlertError( "Error uploading file to GCS: {0}".format(e))
def get_data(): """ Retrieve the data file from GCP Storage, and return the file as a dictionary. Create the file, with dummy data, if it don't exist. """ # Introduce a delay here. do_delay() # Start of the actual function rtn = None storage_client = storage.Client() bucket_name = current_app.config.get('DATA_BUCKET_NAME') print('bucket_name=%s' % bucket_name) try: bucket = storage_client.get_bucket(bucket_name) except Exception as e: bucket = storage_client.create_bucket(bucket_name) # Test if the data file is found in the bucket, and # create it if it doesn't exist. blob = Blob(current_app.config.get('DATA_FILE_NAME'), bucket) if not blob.exists(): # Open the initial data file init_fname = current_app.config.get('INIT_DATA_FILE') with open(init_fname) as infile: init_data = json.load(infile) # Copy it to the storage bucket blob.upload_from_string(json.dumps(init_data, indent=4)) data_str = blob.download_as_string() rtn = json.loads(data_str) print('GOT BAG DATA:') print(json.dumps(rtn, indent=4)) return rtn, blob
def put(self, source, name=None): """ Parameters ---------- source name Returns ------- """ if isinstance(source, io.BytesIO): filebuff = io.BufferedReader(source) elif isinstance(source, (str, bytes)): filebuff = io.BufferedReader(io.BytesIO(source)) else: log.error( 'Source should be either a string, or bytes or io.BytesIO, got {}' .format(type(source))) return False key = self.base_address if name is None else os.path.join( self.base_address, name) key = key.strip('/') try: blob = Blob(key, self.bucket) blob.upload_from_file(filebuff, rewind=True) log.info("Uploaded {} bytes to \tbucket={}\tkey={}".format( len(source), self.bucket_name, key)) return True except Exception as e: log.error("{}\tbucket={}\tkey={}".format(e, self.bucket, key)) return False
def upload_item_as_raw_file(path, client=None): """Set things up, convert the file, and upload it.""" if client is None: client = get_storage_client() # Check that the bucket exists, make it if not. try: b = client.get_bucket(TRANSCRIPTS_BUCKET_NAME) except Forbidden as e: print("Received Forbidden (403) error while getting bucket. This could " "mean that you do not have billing set up for this " "account/project, or that somebody else has taken this bucket " "from the global namespace.") raise e except NotFound: b = client.bucket(TRANSCRIPTS_BUCKET_NAME) b.lifecycle_rules = [{ 'action': {'type': 'Delete'}, 'condition': {'age': 7}, }] b.create() b.make_public(future=True) # Re-encode the file as a temp file and upload it. When we leave the context # manager, the temp file gets automatically deleted. with NamedTemporaryFile(prefix='transcode_', suffix='.raw') as tmp: encode_as_linear16(path, tmp) # Name it after a SHA2 hash of the item, to avoid collisions. file_name = 'transcripts-%s' % hashlib.sha256(tmp.read()).hexdigest() blob = Blob(file_name, b) blob.upload_from_file(tmp, rewind=True) return {'blob_name': blob.name, 'bucket_name': blob.bucket.name}
def _upload(self, payload: bytes, filename: str, bucket: str) -> None: """ Upload a payload to GCS """ client = Client(project=self.project_id) count = 0 while count < self.max_retries: try: bucket_obj = client.get_bucket(bucket) if self.use_encryption: payload = self._encrypt(payload) content = BytesIO(payload) blob = Blob(filename, bucket_obj) blob.upload_from_file(content) break except ( InvalidResponse, GoogleAPICallError, InternalServerError, SSLError, ) as e: if count >= self.max_retries: raise StoqPluginException( f'Failed to upload {bucket}/{filename} to GCS: {str(e)}' ) count += 1 sleep(randrange(0, 4))
def google_stt(self): # Instantiates a client client = speech.SpeechClient() sound = AudioSegment.from_file(self.inputFilePath, format="webm") # if(sound.channels != 1):#If it's not mono sound = sound.set_channels(1) #Change it to mono sound.export(self.inputFilePath, format="wav") #Export them as wav files print('Conversion complete') # Instantiates a client and uploads file storage_client = storage.Client() # Parameter is the name of the Google Cloud bucket bucket = storage_client.lookup_bucket('celerfama2') folder = bucket.list_blobs() with open(self.inputFilePath, 'rb') as file: blob = Blob(os.path.basename(file.name), bucket) print("Google: Uploading: " + os.path.basename(file.name)) blob.upload_from_filename(self.inputFilePath) # Transcribes the file in the cloud for element in folder: print("Google: Transcribing " + element.name) audio = types.RecognitionAudio(uri="gs://celerfama2/" + element.name) config = types.RecognitionConfig( # Option to get word specific info like time stamps enable_word_time_offsets=True, # Language of the audio language_code='en-US') # Detects speech in the audio file operation = client.long_running_recognize(config, audio) print('Google: Waiting for operation to complete...') response = operation.result() file_name = element.name[:-4] output_file = open(file_name + "Google" + ".txt", "w") for result in response.results: for alternative in result.alternatives: output_file.write('Transcript: {}'.format( alternative.transcript.encode("utf-8")) + '\n') output_file.write("Confidence: " + str(alternative.confidence) + '\n') # # Below can be commented to get the detailed info for each word. # for word_info in alternative.words: # word = word_info.word # start_time = word_info.start_time # end_time = word_info.end_time # output_file.write('Word: {}, start_time: {}, end_time: {}'.format( # word, # start_time.seconds + start_time.nanos * 1e-9, # end_time.seconds + end_time.nanos * 1e-9)) # output_file.write("\n") output_file.close() print("Google: Operation Complete") element.delete() return
def main(): """Main function""" module = GcpModule(argument_spec=dict( action=dict(type="str", choices=["download", "upload", "delete"]), src=dict(type="path"), dest=dict(type="path"), bucket=dict(type="str"), )) if not HAS_GOOGLE_STORAGE_LIBRARY: module.fail_json( msg="Please install the google-cloud-storage Python library") if not module.params["scopes"]: module.params["scopes"] = [ "https://www.googleapis.com/auth/devstorage.full_control" ] creds = GcpSession(module, "storage")._credentials() client = storage.Client( project=module.params['project'], credentials=creds, client_info=ClientInfo(user_agent="Google-Ansible-MM-object")) bucket = client.get_bucket(module.params['bucket']) remote_file_exists = Blob(remote_file_path(module), bucket).exists() local_file_exists = os.path.isfile(local_file_path(module)) # Check if files exist. results = {} if module.params["action"] == "delete" and not remote_file_exists: module.fail_json(msg="File does not exist in bucket") if module.params["action"] == "download" and not remote_file_exists: module.fail_json(msg="File does not exist in bucket") if module.params["action"] == "upload" and not local_file_exists: module.fail_json(msg="File does not exist on disk") if module.params["action"] == "delete": if remote_file_exists: results = delete_file(module, client, module.params["src"]) results["changed"] = True module.params["changed"] = True elif module.params["action"] == "download": results = download_file(module, client, module.params["src"], module.params["dest"]) results["changed"] = True # Upload else: results = upload_file(module, client, module.params["src"], module.params["dest"]) results["changed"] = True module.exit_json(**results)
def upload(csvfile, bucketname, blobname): client = storage.Client() bucket = client.get_bucket(bucketname) blob = Blob(blobname, bucket) blob.upload_from_filename(csvfile) gcslocation = 'gs://{}/{}'.format(bucketname, blobname) logging.info('Uploaded {} ...'.format(gcslocation)) return gcslocation
def download(): file_name = request.form["downloadingfilename"] client = storage.Client() bucket = client.get_bucket(request.form['downbucketname']) blob = Blob(file_name, bucket, encryption_key=key) stringfile = blob.download_as_string() return stringfile
def _set_data(self, **kwargs) -> dict: df = kwargs.get('data_frame') ts = time.time_ns() blob_name_parts = os.path.splitext(self.__blob_name) blob_name = blob_name_parts[0] + '_' + str(ts) + blob_name_parts[1] blob = Blob(blob_name, self.__bucket) blob.upload_from_string(df.to_csv(), self.__file_format) return dict(record_count=df.shape[0])
def get_object(self, key): """ Get object from GCS with a key. :param key: key of the object :return: Data of the object :rtype: str/bytes """ return Blob(key, self.client).download_as_string()
def delete_file(module, client, name): try: bucket = client.get_bucket(module.params['bucket']) blob = Blob(name, bucket) blob.delete() return {} except google.cloud.exceptions.NotFound as e: module.fail_json(msg=str(e))