def copy_s3_folder_contents_to_new_folder(old_s3_folder_path, new_s3_folder_path): """ Copies complete folder structure within old_s3_folder_path to the new_s3_folder_path """ old_s3_folder_path = add_slash(old_s3_folder_path) new_s3_folder_path = add_slash(new_s3_folder_path) all_old_filepaths = get_filepaths_from_s3_folder(old_s3_folder_path) for ofp in all_old_filepaths: nfp = ofp.replace(old_s3_folder_path, new_s3_folder_path) copy_s3_object(ofp, nfp)
def get_filepaths_from_s3_folder(s3_folder_path, extension=None, exclude_zero_byte_files=True): """ Get a list of filepaths from a bucket. If extension is set to a string then only return files with that extension otherwise if set to None (default) all filepaths are returned. """ if extension is None: extension = "" elif extension[0] != ".": extension = "." + extension s3_folder_path = add_slash(s3_folder_path) bucket, key = s3_path_to_bucket_key(s3_folder_path) s3b = s3_resource.Bucket(bucket) obs = s3b.objects.filter(Prefix=key) if exclude_zero_byte_files: ob_keys = [ o.key for o in obs if o.key.endswith(extension) and o.size != 0 ] else: ob_keys = [o.key for o in obs if o.key.endswith(extension)] paths = sorted([bucket_key_to_s3_path(bucket, o) for o in ob_keys]) return paths
def delete_s3_folder_contents(s3_folder_path): """ Deletes all files within the s3_folder_path given given. """ s3_folder_path = add_slash(s3_folder_path) all_filepaths = get_filepaths_from_s3_folder(s3_folder_path, exclude_zero_byte_files=False) for f in all_filepaths: delete_s3_object(f)
csv_path = 's3://alpha-gluejobutils/testing/data/diamonds_csv/' meta_path = 's3://alpha-gluejobutils/testing/meta_data/diamonds.json' meta = datatypes.create_spark_schema_from_metadata_file(meta_path) df_old = spark.read.csv(csv_path, header=True, schema=meta) df_old = drd.init_record_datetimes(df_old, '2018-01-01 01:00:00', col_prefix="dea_record_") df_old.write.mode('overwrite').parquet( 's3://alpha-gluejobutils/database/table1/') ## =====================> UTILS MODULE TESTING <========================= ## a = 'test/folder/path/' b = 'test/folder/path' if utils.add_slash(a) != a: raise ValueError('add_slash FAILURE') if utils.remove_slash(a) != b: raise ValueError('remove_slash FAILURE') if utils.add_slash(b) != a: raise ValueError('add_slash FAILURE') if utils.remove_slash(b) != b: raise ValueError('remove_slash FAILURE') print("===> utils ===> OK") ## =====================> S3 MODULE TESTING <========================= ## bucket = 'alpha-gluejobutils' diamonds_obj = 'testing/data/diamonds.csv' ### ### ### ### ### ### ### ### ### bucket_key_to_s3_path ### ### ### ### ### ### ### ### ###