def testGetPathContentsRoot(self): requests = ['/', ''] gcs_buckets = [ Bucket(client=Mock(), name='dummy_bucket1'), Bucket(client=Mock(), name='dummy_bucket2'), ] storage_client = Mock() storage_client.list_buckets = MagicMock(return_value=gcs_buckets) wanted = { 'type': 'directory', 'content': [ { 'name': 'dummy_bucket1/', 'path': 'dummy_bucket1/', 'type': 'directory', 'last_modified': '', }, { 'name': 'dummy_bucket2/', 'path': 'dummy_bucket2/', 'type': 'directory', 'last_modified': '', }, ] } for req in requests: got = handlers.getPathContents(req, storage_client) self.assertEqual(wanted, got)
def create(ctx, *args, **kwargs): admin_check(ctx.obj["user_id"]) bucket = Bucket(ctx.obj["client"], name=ctx.obj["name"]) bucket.location = kwargs["location"].upper() bucket.storage_class = kwargs["class"].upper() bucket.create() return f"Bucket `{bucket.name}` created."
def create_bucket(storage_client, project, blob_name, bucket_name, table_file, exists_ok=True): """ Create a GCS Bucket :param storage_client: The GCS client :param project: The GCS project :param blob_name: The GCS blob/object name :param bucket_name: The bucket name :param table_file: Local file to upload to bucket :param exists_ok: Boolean, if `True` ignore "already exists" :return: The creation status : False if already exist and exists_ok set to False """ bucket = Bucket(storage_client, bucket_name) blob = bucket.blob(blob_name) try: bucket_exist = storage_client.lookup_bucket(bucket_name) if bucket_exist: if exists_ok: logging.info("Bucket {} already exist.".format(bucket_name)) return True else: logging.error("Bucket {} already exist.".format(bucket_name)) return False else: print("\t Creating bucket : {} \t".format(bucket_name)) storage_client.create_bucket(bucket, project) blob.upload_from_filename(table_file) return True except Conflict: if exists_ok: return True else: return False except Exception as error: logging.error(error) raise
def rename(bucket: str, source: str, destination: str, credentials: Credentials = None): """Rename a file. This is a copy/delete action as GCS has no actual rename option, however as it is all within GCS it is BLAZING fast, to the extent that there is essentially no limit on the maximum size of file we can rename. Arguments: bucket (str): destination bucket name source (str): current name destination (str): new name credentials (Credentials): authentication, if needed """ client = storage.Client( credentials=(credentials.credentials if credentials else None)) source_bucket = Bucket(client, name=bucket) source_blob = source_bucket.blob(blob_name=source) destination_bucket = client.get_bucket(bucket) source_bucket.copy_blob(source_blob, destination_bucket, destination) source_blob.delete() logging.info(f'Renamed file %s as %s in %s.', bucket, source, destination)
def get_report_file(report: dict, credentials: Credentials = None) -> str: """get_report_file Find and return just the blob. We'll use this in DV360 to be able to stream the file in pieces so we can drop out the footer. Arguments: report (dict): [description] Keyword Arguments: credentials (credentiala): [description] (default: {None}) Returns: str: [description] """ client = storage.Client( credentials=(credentials.credentials if credentials else None)) path_segments = report['current_path'].split('/') report_bucket = path_segments[-2] report_blob_name = path_segments[-1].split('?')[0] source_bucket = Bucket(client, report_bucket) blob = source_bucket.blob(report_blob_name) return blob
def copy_to_gcs(bucket_name: str, report: Dict[str, Any], credentials: Credentials = None): """copy from one bucket to another This is a copy from the bucket defined in the report definition (as DV360 stores its reports in GCS) into the monitored bucket for upload. It's BLAZING fast, to the extent that there is essentially no limit on the maximum size of a DV360 report we can handle. The destination file name is the report's id. Arguments: bucket_name (str): destination bucket name report (Dict[str, Any]): report definition """ client = storage.Client( credentials=(credentials.credentials if credentials else None)) path_segments = report['current_path'].split('/') report_bucket = path_segments[-2] report_blob_name = path_segments[-1].split('?')[0] output_blob_name = report['id'] source_bucket = Bucket(client, report_bucket) source_blob = source_bucket.blob(report_blob_name) destination_bucket = client.get_bucket(bucket_name) source_bucket.copy_blob(source_blob, destination_bucket, '{id}.csv'.format(id=output_blob_name)) logging.info('File {report} copied from {source} to {bucket}.'.format( report=report_blob_name, bucket=bucket_name, source=report_bucket))
def read_chunk(report: dict, chunk: int = 4096, credentials: Credentials = None, start: int = 0) -> str: client = storage.Client( credentials=(credentials.credentials if credentials else None)) path_segments = report['current_path'].split('/') report_bucket = path_segments[-2] report_blob_name = path_segments[-1].split('?')[0] source_bucket = Bucket(client, report_bucket) blob = source_bucket.blob(report_blob_name) data = blob.download_as_string(start=start, end=chunk, raw_download=True).decode('utf-8') return data
def storage(request): # create a random test bucket name bucket_name = "test_bucket_" + get_random_string(6, string.ascii_lowercase) storage = DjangoGCloudStorage( project=request.config.getoption("--gcs-project-name"), bucket=bucket_name, credentials_file_path=request.config.getoption( "--gcs-credentials-file")) # Make sure the bucket exists bucket = Bucket(storage.client, bucket_name) bucket.create(location=request.config.getoption("--gcs-bucket-location")) yield storage storage.bucket.delete_blobs(storage.bucket.list_blobs()) storage.bucket.delete(force=True)
def testGetPathContentsSubDir(self): requests = ['dummy_bucket1/subdir/', 'dummy_bucket1/subdir'] dummy_bucket1 = Bucket(client=Mock(), name='dummy_bucket1') gcs_buckets = [ dummy_bucket1, ] gcs_blobs = [ Blob(name='subdir/dummy_file', bucket=dummy_bucket1), Blob(name='subdir/dummy_dir/', bucket=dummy_bucket1), ] storage_client = Mock() storage_client.list_buckets = MagicMock(return_value=gcs_buckets) storage_client.list_blobs = MagicMock(return_value=gcs_blobs) wanted = { 'type': 'directory', 'content': [ { 'name': 'dummy_file', 'path': 'dummy_bucket1/subdir/dummy_file', 'type': 'file', 'last_modified': '', }, { 'name': 'dummy_dir/', 'path': 'dummy_bucket1/subdir/dummy_dir/', 'type': 'directory', 'last_modified': '', }, ] } for req in requests: got = handlers.getPathContents(req, storage_client) self.assertEqual(wanted['content'], got['content']) with self.assertRaises(handlers.FileNotFound): req = 'dummy_bucket1/sub' handlers.getPathContents(req, storage_client)
def testGetPathContentsDirEmpty(self): requests = ['dummy_bucket1/', 'dummy_bucket1'] dummy_bucket1 = Bucket(client=Mock(), name='dummy_bucket1') gcs_buckets = [ dummy_bucket1, ] gcs_blobs = [] storage_client = Mock() storage_client.list_buckets = MagicMock(return_value=gcs_buckets) storage_client.list_blobs = MagicMock(return_value=gcs_blobs) wanted = {'type': 'directory', 'content': []} for req in requests: got = handlers.getPathContents(req, storage_client) self.assertDictEqual(wanted, got)
def list_blobs( self, bucket_or_name, max_results=None, page_token=None, prefix=None, delimiter=None, versions=None, projection="noAcl", fields=None, ) -> Iterator[Blob]: bucket = Bucket(self, name=bucket_or_name) for key in self.object_keys: # check that blob starts with correct prefix if prefix is not None: prefix_idx = key.find(prefix) if prefix_idx != 0: continue yield Blob(key, bucket)
def testGetPathContentsDir(self): requests = ['dummy_bucket1/', 'dummy_bucket1'] dummy_bucket1 = Bucket(client=Mock(), name='dummy_bucket1') gcs_buckets = [ dummy_bucket1, ] gcs_blobs = [ Blob(name='dummy_file', bucket=dummy_bucket1), Blob(name='dummy_dir/', bucket=dummy_bucket1), ] storage_client = Mock() storage_client.list_buckets = MagicMock(return_value=gcs_buckets) storage_client.list_blobs = MagicMock(return_value=gcs_blobs) wanted = { 'type': 'directory', 'content': [ { 'name': 'dummy_file', 'path': 'dummy_bucket1/dummy_file', 'type': 'file', }, { 'name': 'dummy_dir/', 'path': 'dummy_bucket1/dummy_dir/', 'type': 'directory', }, ] } for req in requests: got = handlers.getPathContents(req, storage_client) self.assertDictEqual(wanted, got)
def delete(ctx, *args, **kwargs): admin_check(ctx.obj["user_id"]) bucket = Bucket(ctx.obj["client"], name=ctx.obj["name"]) bucket.delete(force=kwargs["force"]) return f"Bucket `{bucket.name}` deleted."
def init_blob(bucket_name: str, blob_name: str) -> Blob: logger.debug( f"Initializing GCS Blob. bucket={bucket_name}, blob={blob_name}") bucket = Bucket(Client(), bucket_name) return Blob(blob_name, bucket)
def create_dir(self, bucket_name: str, path: str): bucket = Bucket(self._client, name=bucket_name) blob = bucket.blob(path.rstrip('/') + '/') blob.upload_from_string('')
def get_cloud_storage_bucket(self) -> Bucket: return Bucket(self.client, self.bucket)
def __init__(self, ctx, config=None): super().__init__(ctx, config) self.bucket = Bucket(StorageClient(), self.bucket)
def create_bucket(self, bucket_name: str, region: str) -> Bucket: bucket = Bucket(self._client, name=bucket_name) bucket.create(location=region) return bucket