def main(args=None): if args is None: args = sys.argv[1:] options = parse_args(args) setup_logging(options) plain_bucket = get_bucket(options, plain=True) if 's3ql_metadata' in plain_bucket: if not options.force: raise QuietError("Found existing file system! Use --force to overwrite") log.info('Purging existing file system data..') plain_bucket.clear() if not plain_bucket.is_get_consistent(): log.info('Please note that the new file system may appear inconsistent\n' 'for a while until the removals have propagated through the backend.') if not options.plain: if sys.stdin.isatty(): wrap_pw = getpass("Enter encryption password: "******"Confirm encryption password: "******"Passwords don't match.") else: wrap_pw = sys.stdin.readline().rstrip() # Generate data encryption passphrase log.info('Generating random encryption key...') fh = open('/dev/urandom', "rb", 0) # No buffering data_pw = fh.read(32) fh.close() bucket = BetterBucket(wrap_pw, 'bzip2', plain_bucket) bucket['s3ql_passphrase'] = data_pw else: data_pw = None bucket = BetterBucket(data_pw, 'bzip2', plain_bucket) # Setup database cachepath = get_bucket_cachedir(options.storage_url, options.cachedir) # There can't be a corresponding bucket, so we can safely delete # these files. if os.path.exists(cachepath + '.db'): os.unlink(cachepath + '.db') if os.path.exists(cachepath + '-cache'): shutil.rmtree(cachepath + '-cache') log.info('Creating metadata tables...') db = Connection(cachepath + '.db') create_tables(db) init_tables(db) param = dict() param['revision'] = CURRENT_FS_REV param['seq_no'] = 1 param['label'] = options.label param['blocksize'] = options.blocksize * 1024 param['needs_fsck'] = False param['last_fsck'] = time.time() - time.timezone param['last-modified'] = time.time() - time.timezone # This indicates that the convert_legacy_metadata() stuff # in BetterBucket is not required for this file system. param['bucket_revision'] = 1 bucket.store('s3ql_seq_no_%d' % param['seq_no'], 'Empty') log.info('Uploading metadata...') with bucket.open_write('s3ql_metadata', param) as fh: dump_metadata(fh, db) pickle.dump(param, open(cachepath + '.params', 'wb'), 2)
def main(args=None): if args is None: args = sys.argv[1:] args = ['local:///home/nikratio/tmp/bucket', '/home/nikratio/tmp/testfile0'] options = parse_args(args) setup_logging(options) size = 100*1024*1024 # KB log.info('Measuring throughput to cache...') bucket_dir = tempfile.mkdtemp() mnt_dir = tempfile.mkdtemp() atexit.register(shutil.rmtree, bucket_dir) atexit.register(shutil.rmtree, mnt_dir) subprocess.check_call(['mkfs.s3ql', '--plain', 'local://%s' % bucket_dir, '--quiet', '--cachedir', options.cachedir]) subprocess.check_call(['mount.s3ql', '--threads', '1', '--quiet', '--cachesize', '%d' % (2 * size/1024), '--log', '%s/mount.log' % bucket_dir, '--cachedir', options.cachedir, 'local://%s' % bucket_dir, mnt_dir]) with open('/dev/urandom', 'rb', 0) as src: with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst: stamp = time.time() copied = 0 while copied < size: buf = src.read(256*1024) dst.write(buf) copied += len(buf) fuse_speed = copied / (time.time() - stamp) os.unlink('%s/bigfile' % mnt_dir) subprocess.check_call(['umount.s3ql', mnt_dir]) log.info('Cache throughput: %.2f KB/sec', fuse_speed / 1024) # Upload random data to prevent effects of compression # on the network layer log.info('Measuring raw backend throughput..') bucket = get_bucket(options, plain=True) with bucket.open_write('s3ql_testdata') as dst: with open('/dev/urandom', 'rb', 0) as src: stamp = time.time() copied = 0 while copied < size: buf = src.read(256*1024) dst.write(buf) copied += len(buf) upload_speed = copied / (time.time() - stamp) log.info('Backend throughput: %.2f KB/sec', upload_speed / 1024) bucket.delete('s3ql_testdata') src = options.file size = os.fstat(options.file.fileno()).st_size log.info('Test file size: %.2f MB', (size / 1024**2)) times = dict() out_sizes = dict() for alg in ('lzma', 'bzip2', 'zlib'): log.info('compressing with %s...', alg) bucket = BetterBucket('pass', alg, Bucket(bucket_dir, None, None)) with bucket.open_write('s3ql_testdata') as dst: src.seek(0) stamp = time.time() while True: buf = src.read(256*1024) if not buf: break dst.write(buf) times[alg] = time.time() - stamp out_sizes[alg] = dst.compr_size log.info('%s compression speed: %.2f KB/sec (in)', alg, size/times[alg]/1024) log.info('%s compression speed: %.2f KB/sec (out)', alg, out_sizes[alg] / times[alg] / 1024) print('') req = dict() for alg in ('lzma', 'bzip2', 'zlib'): backend_req = math.ceil(upload_speed * times[alg] / out_sizes[alg]) fuse_req = math.ceil(fuse_speed * times[alg] / size) req[alg] = min(backend_req, fuse_req) print('When using %s compression, incoming writes can keep up to %d threads\n' 'busy. The backend can handle data from up to %d threads. Therefore,\n' 'the maximum achievable throughput is %.2f KB/sec with %d threads.\n' % (alg, fuse_req, backend_req, min(upload_speed, fuse_speed)/1024, req[alg])) print('All numbers assume that the test file is representative and that', 'there are enough processor cores to run all threads in parallel.', 'To compensate for network latency, you should start about twice as', 'many upload threads as you need for optimal performance.\n', sep='\n') cores = os.sysconf('SC_NPROCESSORS_ONLN') best_size = None max_threads = cores while best_size is None: for alg in out_sizes: if req[alg] > max_threads: continue if best_size is None or out_sizes[alg] < best_size: best_size = out_sizes[alg] best_alg = alg threads = req[alg] max_threads = min(req.itervalues()) print('This system appears to have %d cores, so best performance with maximum\n' 'compression ratio would be achieved by using %s compression with %d\n' 'upload threads.' % (cores, best_alg, 2 * threads if cores >= threads else 2 * cores))