def recursive_copy( s3: S3, target_bucket: str, target_path: str, dest_bucket: str, dest_path: str, exclude: List[str], include: List[str], preserve: bool, ) -> None: """Recursive copy object to other bucket. :param s3: S3 instance :type s3: S3 :param target_bucket: source bucket :type target_bucket: str :param target_path: source folder path :type target_path: str :param dest_bucket: destination bucket :type dest_bucket: str :param dest_path: dest folder path :type dest_path: str :param exclude: glob pattern to exclude :type exclude: List[str] :param include: glob pattern to include :type include: List[str] :param preserve: preserve previous object config :type preserve: bool """ file_list = walk_s3_folder( s3.client, target_bucket, target_path, target_path, [], exclude, include, "bucket", dest_path, dest_bucket, ) if get_confirmation("Confirm?"): for s3_key, dest_pathname in file_list: print("copy: s3://%s/%s to s3://%s/%s" % (target_bucket, s3_key, dest_bucket, dest_pathname)) copy_source = {"Bucket": target_bucket, "Key": s3_key} if not preserve: s3transferwrapper = S3TransferWrapper() s3.client.copy( copy_source, dest_bucket, dest_pathname, Callback=S3Progress(s3_key, target_bucket, s3.client), Config=s3transferwrapper.transfer_config, ) else: s3.bucket_name = target_bucket copy_and_preserve(s3, target_bucket, s3_key, dest_bucket, dest_pathname)
def download_recusive(s3: S3, exclude: List[str], include: List[str], local_path: str) -> None: """Download s3 recursive. :param s3: S3 instance :type s3: S3 :param exclude: glob pattern to exclude :type exclude: List[str] :param include: glob pattern to include :type include: List[str] :param local_path: local directory to download :type local_path: str """ download_list = walk_s3_folder( s3.client, s3.bucket_name, s3.path_list[0], s3.path_list[0], [], exclude, include, "download", local_path, ) if get_confirmation("Confirm?"): for s3_key, dest_pathname in download_list: if not os.path.exists(os.path.dirname(dest_pathname)): os.makedirs(os.path.dirname(dest_pathname)) print("download: s3://%s/%s to %s" % (s3.bucket_name, s3_key, dest_pathname)) transfer = S3TransferWrapper(s3.client) transfer.s3transfer.download_file( s3.bucket_name, s3_key, dest_pathname, callback=S3Progress(s3_key, s3.bucket_name, s3.client), )
def update_object_recursive( s3: S3, storage: bool = False, acl: bool = False, metadata: bool = False, encryption: bool = False, tagging: bool = False, exclude: Optional[List[str]] = None, include: Optional[List[str]] = None, ) -> None: """Recursive update object attributes. :param s3: S3 class instance :type s3: S3 :param storage: update storage :type storage: bool, optional :param acl: update acl :type acl: bool, optional :param metadata: update metadata :type metadata: bool, optional :param encryption: update encryption :type encryption: bool, optional :param tagging: update tagging :type tagging: bool, optional :param exclude: glob pattern to exclude :type exclude: List[str], optional :param include: glob pattern to include :type include: List[str], optional """ if exclude is None: exclude = [] if include is None: include = [] s3_args = S3Args(s3) s3_args.set_extra_args(storage, acl, metadata, encryption, tagging) # check if only tags or acl is being updated # this way it won't create extra versions on the object check_result = s3_args.check_tag_acl() file_list = walk_s3_folder( s3.client, s3.bucket_name, s3.path_list[0], s3.path_list[0], [], exclude, include, "object", s3.path_list[0], s3.bucket_name, ) if get_confirmation("Confirm?"): if check_result: for original_key, _ in file_list: print("update: s3://%s/%s" % (s3.bucket_name, original_key)) if check_result.get("Tags"): s3.client.put_object_tagging( Bucket=s3.bucket_name, Key=original_key, Tagging={"TagSet": check_result.get("Tags")}, ) if check_result.get("Grants"): grant_args = { "Bucket": s3.bucket_name, "Key": original_key } grant_args.update(check_result.get("Grants", {})) s3.client.put_object_acl(**grant_args) else: for original_key, _ in file_list: print("update: s3://%s/%s" % (s3.bucket_name, original_key)) # Note: this will create new version if version is enabled copy_object_args = get_copy_args(s3, original_key, s3_args, extra_args=True) copy_source = {"Bucket": s3.bucket_name, "Key": original_key} s3transferwrapper = S3TransferWrapper() s3.client.copy( copy_source, s3.bucket_name, original_key, Callback=S3Progress(original_key, s3.bucket_name, s3.client), ExtraArgs=copy_object_args, Config=s3transferwrapper.transfer_config, )
def delete_object_recursive( s3: S3, exclude: Optional[List[str]] = None, include: Optional[List[str]] = None, deletemark: bool = False, clean: bool = False, allversion: bool = False, ) -> None: """Recursive delete object and their versions if specified. :param s3: S3 instance :type s3: S3 :param exclude: glob pattern to exclude :type exclude: List[str], optional :param include: glob pattern to include :type include: List[str], optional :param deletemark: only delete deletemarkers :type deletemark: bool, optional :param clean: delete all versions except the current version :type clean: bool, optional :param allversion: delete allversions, use to nuke the entire bucket or folder :type allversion: bool, optional """ if allversion: # use a different method other than the walk s3 folder # since walk_s3_folder doesn't provide access to deleted version object # delete_all_versions method will list all files including deleted versions or even delete marker file_list = find_all_version_files( s3.client, s3.bucket_name, s3.path_list[0], [], exclude, include, deletemark, ) obj_versions: List[Dict[str, str]] = [] # loop through all files and get their versions for file in file_list: obj_versions.extend( s3.get_object_version(key=file, delete=True, select_all=True, non_current=clean)) print("(dryrun) delete: s3://%s/%s %s" % ( s3.bucket_name, file, "with all versions" if not clean else "all non-current versions", )) if get_confirmation("Delete %s?" % ("all of their versions" if not clean else "all non-current versions")): for obj_version in obj_versions: print("delete: s3://%s/%s with version %s" % ( s3.bucket_name, obj_version.get("Key"), obj_version.get("VersionId"), )) s3.client.delete_object( Bucket=s3.bucket_name, Key=obj_version.get("Key"), VersionId=obj_version.get("VersionId"), ) else: file_list = walk_s3_folder( s3.client, s3.bucket_name, s3.path_list[0], s3.path_list[0], [], exclude, include, "delete", ) if get_confirmation("Confirm?"): for s3_key, _ in file_list: print("delete: s3://%s/%s" % (s3.bucket_name, s3_key)) s3.client.delete_object( Bucket=s3.bucket_name, Key=s3_key, )
def test_walk(self, mocked_paginator, mocked_exclude): data_path2 = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../data/s3_object_nested.json") with open(data_path2, "r") as file: response = json.load(file) mocked_paginator.return_value = response mocked_exclude.return_value = False client = boto3.client("s3") result = walk_s3_folder(client, "kazhala-file-transfer", "wtf/hello", "", destination_path="tmp") self.assertEqual(result, [("wtf/hello/hello.txt", "tmp/wtf/hello/hello.txt")]) self.assertEqual( self.capturedOutput.getvalue(), "(dryrun) download: s3://kazhala-file-transfer/wtf/hello/hello.txt to tmp/wtf/hello/hello.txt\n", ) self.capturedOutput.truncate(0) self.capturedOutput.seek(0) result = walk_s3_folder( client, "kazhala-file-transfer", "wtf/hello/", "wtf/hello/", [], [], [], "download", "/Users/kazhala/tmp", ) self.assertEqual( result, [("wtf/hello/hello.txt", "/Users/kazhala/tmp/hello.txt")]) self.assertEqual( self.capturedOutput.getvalue(), "(dryrun) download: s3://kazhala-file-transfer/wtf/hello/hello.txt to /Users/kazhala/tmp/hello.txt\n", ) self.capturedOutput.truncate(0) self.capturedOutput.seek(0) result = walk_s3_folder( client, "kazhala-file-transfer", "wtf/hello/", "", operation="delete", destination_path="/", ) self.assertEqual(result, [("wtf/hello/hello.txt", "/wtf/hello/hello.txt")]) self.assertEqual( self.capturedOutput.getvalue(), "(dryrun) delete: s3://kazhala-file-transfer/wtf/hello/hello.txt\n", ) self.capturedOutput.truncate(0) self.capturedOutput.seek(0) result = walk_s3_folder( client, "kazhala-file-transfer", "wtf/hello/", "", operation="bucket", destination_path="", destination_bucket="kazhala-file-transfer2", ) self.assertEqual(result, [("wtf/hello/hello.txt", "wtf/hello/hello.txt")]) self.assertEqual( self.capturedOutput.getvalue(), "(dryrun) copy: s3://kazhala-file-transfer/wtf/hello/hello.txt to s3://kazhala-file-transfer2/wtf/hello/hello.txt\n", ) self.capturedOutput.truncate(0) self.capturedOutput.seek(0) result = walk_s3_folder( client, "kazhala-file-transfer", "wtf/hello/", "", operation="object", destination_path="", ) self.assertEqual(result, [("wtf/hello/hello.txt", "wtf/hello/hello.txt")]) self.assertEqual( self.capturedOutput.getvalue(), "(dryrun) update: s3://kazhala-file-transfer/wtf/hello/hello.txt\n", ) mocked_exclude.return_value = True result = walk_s3_folder(client, "kazhala-file-transfer", "", "") self.assertEqual(result, [])