def download_task_outputs(root_directory: str, bucket: Bucket, job_id: str, task_id: str): """ Download all the results from a task execution. The task may have succeeded or failed; either is fine. Results are downloaded into a directory relative to root_directory. They are placed in a subpath tasks/task-{task_id}/outputs. The actual files within that subdirectory come directly from the thor.main.runTHOR function. Parameters ---------- root_directory : str A local directory where outputs should be placed. bucket : Bucket The bucket hosting the job with outputs. job_id : str The ID of the job. task_id : str The ID of the task to get outputs from. """ job_prefix = _job_path(job_id) + "/" task_prefix = _task_output_path(job_id, task_id) + "/" blobs = bucket.list_blobs(prefix=task_prefix) for b in blobs: relative_path = b.name[len(job_prefix):] local_path = os.path.join(root_directory, relative_path) os.makedirs(os.path.dirname(local_path), exist_ok=True) logger.info("downloading %s", local_path) b.download_to_filename(local_path)
def get_blobs(bucket: Bucket, blob_path: str = "", recursive: bool = False) -> List[Blob]: """ Gets the blobs list via given blob path. If you pass the "recursive" option, the function returns a list with all matching blobs along the specified path to the blob. It has one side effect: if you have a bucket structure mydir/1.txt, mydir2/2.txt and you only provide the / myd path to the tool with the --recursive parameter, this function will load both mydir/ and mydir2/ dirs. """ if recursive: blobs = [ x for x in bucket.list_blobs() if x.name.startswith(blob_path) ] else: blobs = [x for x in bucket.list_blobs() if x.name == blob_path] return blobs
def internal_delete_uls(self, name): """ Deletes the Underlying Storage using the Google API Args: name (str): The Underlying Storage name to be deleted """ # Todo: Replace with a TimeoutSampler for _ in range(10): try: bucket = GCPBucket(client=self.client, name=name) bucket.delete_blobs(bucket.list_blobs()) bucket.delete() break except GoogleExceptions.NotFound: logger.warning("Failed to delete some of the bucket blobs. Retrying...") sleep(10)
def internal_delete_uls(self, name): """ Deletes the Underlying Storage using the Google API Args: name (str): The Underlying Storage name to be deleted """ # Todo: Replace with a TimeoutSampler for _ in range(10): try: bucket = GCPBucket(client=self.client, name=name) bucket.delete_blobs(bucket.list_blobs()) bucket.delete() break except ClientError: # TODO: Find relevant exception logger.info( f"Deletion of Underlying Storage {name} failed. Retrying..." ) sleep(3)
def delete_object_from_bucket(bucket: Bucket): """Delete object from bucket""" blobs = bucket.list_blobs() for blob in blobs: blob.delete() print("all objects are deleted from GCS bucket {}".format(bucket.name))