def du(summarize, human_readable, s3_paths): """ display disk usage statistics """ size_formatter = magic.human_bytes if human_readable else lambda s: s for s3_path in s3_paths: totals = {} # In recursive tallying of totals this key is the stop condition. stop_path = '/' + s3_util.bucket_and_key(s3_path)[1].rstrip('/') def tally(path_segment, size): if not summarize or path_segment == stop_path: # If the summarize option is present, we only care about the grand total. if path_segment not in totals: totals[path_segment] = 0 totals[path_segment] += size if path_segment != stop_path: parent_dir = path_segment.rsplit('/', 1)[0] if parent_dir == '': # Edge case when totalling the entire bucket. parent_dir = '/' tally(parent_dir, size) for obj in s3_util.keys(s3_path): # usage for all given paths, and recursively for directories (excludes individual files) dbg('adding {}', obj) dir_key = '/' + obj['key'].rsplit('/', 1)[0] tally(dir_key, obj['len']) for path, total in sorted(totals.items()): out('{}\t{}', size_formatter(total), path)
def keys(s3_paths): """ Prefix key scan. Outputs keys in the format s3://bucket/key """ if not len(s3_paths): s3_paths = [''] for s3_path in s3_paths: for obj in s3_util.keys(s3_path): out('s3://{}/{}', obj['bucket'], obj['key'])