def backend(request): (backend_info, comprenc_kind) = request.param if backend_info.classname == 'local': gen = yield_local_backend(backend_info) elif 'request_handler' in backend_info: gen = yield_mock_backend(backend_info) else: gen = yield_remote_backend(backend_info) for raw_backend in gen: if comprenc_kind == 'raw': backend = raw_backend elif comprenc_kind == 'plain': backend = ComprencBackend(None, (None, 6), raw_backend) elif comprenc_kind == 'aes+zlib': backend = ComprencBackend(b'schlurz', ('zlib', 6), raw_backend) elif comprenc_kind == 'aes': backend = ComprencBackend(b'schlurz', (None, 6), raw_backend) else: backend = ComprencBackend(None, (comprenc_kind, 6), raw_backend) backend.unittest_info = raw_backend.unittest_info yield backend
def test_retrieve(backend, db): plain_backend = backend backend = ComprencBackend(b'schnorz', ('zlib', 6), plain_backend) # Create a few objects in db obj_ids = (22, 25, 30, 31) for id_ in obj_ids: db.execute('INSERT INTO objects (id, refcount, size) VALUES(?, ?, ?)', (id_, 1, 27 * id_)) # Object one will be missing in backend # Object two will have a checksum error in the data key = 's3ql_data_%d' % obj_ids[1] backend[key] = b'some data that will be broken on a data check' (raw, meta) = plain_backend.fetch(key) raw = bytearray(raw) assert len(raw) > 20 raw[-10:-6] = b'forg' plain_backend.store(key, raw, meta) # Object three will have a checksum error in the metadata key = 's3ql_data_%d' % obj_ids[2] backend.store(key, b'some data that will be broken on a metadata check', { 'meta-key1': 'some textual data that just increases', 'meta-key2': 'the metadata size so that we can tamper with it' }) meta = plain_backend.lookup(key) raw = bytearray(meta['data']) assert len(raw) > 20 raw[-10:-6] = b'forg' meta['data'] = raw plain_backend.update_meta(key, meta) # Object four will be ok backend['s3ql_data_%d' % obj_ids[3]] = b'some data that is well' # When using a single thread, we can fake the backend factory def backend_factory(): return backend missing_fh = io.StringIO() corrupted_fh = io.StringIO() with assert_logs('^Backend seems to have lost', count=1, level=logging.WARNING), \ assert_logs('^Object %d is corrupted', count=1, level=logging.WARNING): verify.retrieve_objects(db, backend_factory, corrupted_fh, missing_fh, thread_count=1, full=False) assert missing_fh.getvalue() == 's3ql_data_%d\n' % obj_ids[0] assert corrupted_fh.getvalue() == 's3ql_data_%d\n' % obj_ids[2] missing_fh = io.StringIO() corrupted_fh = io.StringIO() with assert_logs('^Backend seems to have lost', count=1, level=logging.WARNING), \ assert_logs('^Object %d is corrupted', count=2, level=logging.WARNING): verify.retrieve_objects(db, backend_factory, corrupted_fh, missing_fh, thread_count=1, full=True) assert missing_fh.getvalue() == 's3ql_data_%d\n' % obj_ids[0] assert corrupted_fh.getvalue() == ('s3ql_data_%d\n'*2) % obj_ids[1:3]
def test_retrieve(backend, db): plain_backend = backend backend = ComprencBackend(b'schnorz', ('zlib', 6), plain_backend) # Create a few objects in db obj_ids = (22, 25, 30, 31) for id_ in obj_ids: db.execute('INSERT INTO objects (id, refcount, size) VALUES(?, ?, ?)', (id_, 1, 27 * id_)) # Object one will be missing in backend # Object two will have a checksum error in the data key = 's3ql_data_%d' % obj_ids[1] backend[key] = b'some data that will be broken on a data check' (raw, meta) = plain_backend.fetch(key) raw = bytearray(raw) assert len(raw) > 20 raw[-10:-6] = b'forg' plain_backend.store(key, raw, meta) # Object three will have a checksum error in the metadata key = 's3ql_data_%d' % obj_ids[2] backend.store(key, b'some data that will be broken on a metadata check', { 'meta-key1': 'some textual data that just increases', 'meta-key2': 'the metadata size so that we can tamper with it' }) meta = plain_backend.lookup(key) raw = bytearray(meta['data']) assert len(raw) > 20 raw[-10:-6] = b'forg' meta['data'] = raw plain_backend.update_meta(key, meta) # Object four will be ok backend['s3ql_data_%d' % obj_ids[3]] = b'some data that is well' # When using a single thread, we can fake the backend factory def backend_factory(): return backend missing_fh = io.StringIO() corrupted_fh = io.StringIO() with catch_logmsg('^Backend seems to have lost', count=1, level=logging.WARNING), \ catch_logmsg('^Object %d is corrupted', count=1, level=logging.WARNING): verify.retrieve_objects(db, backend_factory, corrupted_fh, missing_fh, thread_count=1, full=False) assert missing_fh.getvalue() == 's3ql_data_%d\n' % obj_ids[0] assert corrupted_fh.getvalue() == 's3ql_data_%d\n' % obj_ids[2] missing_fh = io.StringIO() corrupted_fh = io.StringIO() with catch_logmsg('^Backend seems to have lost', count=1, level=logging.WARNING), \ catch_logmsg('^Object %d is corrupted', count=2, level=logging.WARNING): verify.retrieve_objects(db, backend_factory, corrupted_fh, missing_fh, thread_count=1, full=True) assert missing_fh.getvalue() == 's3ql_data_%d\n' % obj_ids[0] assert corrupted_fh.getvalue() == ('s3ql_data_%d\n'*2) % obj_ids[1:3]
def backend(): backend_dir = tempfile.mkdtemp(prefix='s3ql-backend-') plain_backend = local.Backend( Namespace(storage_url='local://' + backend_dir)) backend = ComprencBackend(b'schnorz', ('zlib', 6), plain_backend) try: yield backend finally: backend.close() shutil.rmtree(backend_dir)
def test_passphrase(self): self.mkfs() passphrase_new = 'sd982jhd' proc = subprocess.Popen(self.s3ql_cmd_argv('s3qladm') + [ '--quiet', '--fatal-warnings', '--log', 'none', '--authfile', '/dev/null', 'passphrase', self.storage_url ], stdin=subprocess.PIPE, universal_newlines=True) print(self.passphrase, file=proc.stdin) print(passphrase_new, file=proc.stdin) print(passphrase_new, file=proc.stdin) proc.stdin.close() self.assertEqual(proc.wait(), 0) plain_backend = local.Backend(self.storage_url, None, None) backend = ComprencBackend(passphrase_new.encode(), ('zlib', 6), plain_backend) backend.fetch('s3ql_passphrase') # will fail with wrong pw
def test_key_recovery(self): mkfs_output = self.mkfs() hit = re.search(r'^---BEGIN MASTER KEY---\n' r'(.+)\n' r'---END MASTER KEY---$', mkfs_output, re.MULTILINE) assert hit master_key = hit.group(1) plain_backend = local.Backend(Namespace( storage_url=self.storage_url)) del plain_backend['s3ql_passphrase'] # Oops backend = ComprencBackend(self.passphrase.encode(), ('zlib', 6), plain_backend) with pytest.raises(CorruptedObjectError): backend.fetch('s3ql_metadata') passphrase_new = 'sd982jhd' proc = subprocess.Popen(self.s3ql_cmd_argv('s3qladm') + [ '--quiet', '--log', 'none', '--authfile', '/dev/null', 'recover-key', self.storage_url ], stdin=subprocess.PIPE, universal_newlines=True) print(master_key, file=proc.stdin) print(passphrase_new, file=proc.stdin) print(passphrase_new, file=proc.stdin) proc.stdin.close() self.assertEqual(proc.wait(), 0) backend = ComprencBackend(passphrase_new.encode(), ('zlib', 6), plain_backend) backend.fetch('s3ql_passphrase') # will fail with wrong pw
def ctx(): ctx = Namespace() ctx.backend_dir = tempfile.mkdtemp(prefix='s3ql-backend-') plain_backend = local.Backend(Namespace( storage_url='local://' + ctx.backend_dir)) ctx.backend_pool = BackendPool(lambda: ComprencBackend(b'schwubl', ('zlib', 6), plain_backend)) ctx.backend = ctx.backend_pool.pop_conn() ctx.cachedir = tempfile.mkdtemp(prefix='s3ql-cache-') ctx.max_obj_size = 1024 # Destructors are not guaranteed to run, and we can't unlink # the file immediately because apsw refers to it by name. # Therefore, we unlink the file manually in tearDown() ctx.dbfile = tempfile.NamedTemporaryFile(delete=False) ctx.db = Connection(ctx.dbfile.name) create_tables(ctx.db) init_tables(ctx.db) # Tested methods assume that they are called from # file system request handler llfuse.lock.acquire() cache = BlockCache(ctx.backend_pool, ctx.db, ctx.cachedir + "/cache", ctx.max_obj_size * 5) ctx.block_cache = cache ctx.server = fs.Operations(cache, ctx.db, ctx.max_obj_size, InodeCache(ctx.db, 0)) ctx.server.init() # Monkeypatch around the need for removal and upload threads cache.to_remove = DummyQueue(cache) class DummyDistributor: def put(ctx, arg, timeout=None): cache._do_upload(*arg) return True cache.to_upload = DummyDistributor() # Keep track of unused filenames ctx.name_cnt = 0 yield ctx ctx.server.inodes.destroy() llfuse.lock.release() ctx.block_cache.destroy() shutil.rmtree(ctx.cachedir) shutil.rmtree(ctx.backend_dir) os.unlink(ctx.dbfile.name) ctx.dbfile.close()
async def ctx(): ctx = Namespace() ctx.backend_dir = tempfile.mkdtemp(prefix='s3ql-backend-') plain_backend = local.Backend( Namespace(storage_url='local://' + ctx.backend_dir)) ctx.backend_pool = BackendPool( lambda: ComprencBackend(b'schwubl', ('zlib', 6), plain_backend)) ctx.backend = ctx.backend_pool.pop_conn() ctx.cachedir = tempfile.mkdtemp(prefix='s3ql-cache-') ctx.max_obj_size = 1024 # Destructors are not guaranteed to run, and we can't unlink # the file immediately because apsw refers to it by name. # Therefore, we unlink the file manually in tearDown() ctx.dbfile = tempfile.NamedTemporaryFile(delete=False) ctx.db = Connection(ctx.dbfile.name) create_tables(ctx.db) init_tables(ctx.db) cache = BlockCache(ctx.backend_pool, ctx.db, ctx.cachedir + "/cache", ctx.max_obj_size * 5) cache.portal = trio.BlockingTrioPortal() ctx.cache = cache ctx.server = fs.Operations(cache, ctx.db, ctx.max_obj_size, InodeCache(ctx.db, 0)) ctx.server.init() # Monkeypatch around the need for removal and upload threads cache.to_remove = DummyQueue(cache) class DummyChannel: async def send(self, arg): await trio.run_sync_in_worker_thread(cache._do_upload, *arg) cache.to_upload = (DummyChannel(), None) # Keep track of unused filenames ctx.name_cnt = 0 yield ctx ctx.server.inodes.destroy() await ctx.cache.destroy() shutil.rmtree(ctx.cachedir) shutil.rmtree(ctx.backend_dir) os.unlink(ctx.dbfile.name) ctx.dbfile.close()
def setUp(self): self.backend_dir = tempfile.mkdtemp(prefix='s3ql-backend-') plain_backend = local.Backend('local://' + self.backend_dir, None, None) self.backend_pool = BackendPool( lambda: ComprencBackend(b'schwubl', ('zlib', 6), plain_backend)) self.backend = self.backend_pool.pop_conn() self.cachedir = tempfile.mkdtemp(prefix='s3ql-cache-') self.max_obj_size = 1024 # Destructors are not guaranteed to run, and we can't unlink # the file immediately because apsw refers to it by name. # Therefore, we unlink the file manually in tearDown() self.dbfile = tempfile.NamedTemporaryFile(delete=False) self.db = Connection(self.dbfile.name) create_tables(self.db) init_tables(self.db) # Tested methods assume that they are called from # file system request handler llfuse.lock.acquire() cache = BlockCache(self.backend_pool, self.db, self.cachedir + "/cache", self.max_obj_size * 5) self.block_cache = cache self.server = fs.Operations(cache, self.db, self.max_obj_size, InodeCache(self.db, 0)) self.server.init() # Monkeypatch around the need for removal and upload threads cache.to_remove = DummyQueue(cache) class DummyDistributor: def put(self, arg, timeout=None): cache._do_upload(*arg) return True cache.to_upload = DummyDistributor() # Keep track of unused filenames self.name_cnt = 0
def test_key_recovery(self): mkfs_output = self.mkfs() hit = re.search( r'^---BEGIN MASTER KEY---\n' r'(.+)\n' r'---END MASTER KEY---$', mkfs_output, re.MULTILINE) assert hit master_key = hit.group(1) plain_backend = local.Backend(Namespace(storage_url=self.storage_url)) del plain_backend['s3ql_passphrase'] # Oops backend = ComprencBackend(self.passphrase.encode(), ('zlib', 6), plain_backend) with pytest.raises(CorruptedObjectError): backend.fetch('s3ql_metadata') passphrase_new = 'sd982jhd' proc = subprocess.Popen(self.s3ql_cmd_argv('s3qladm') + [ '--quiet', '--log', 'none', '--authfile', '/dev/null', 'recover-key', self.storage_url ], stdin=subprocess.PIPE, universal_newlines=True) print(master_key, file=proc.stdin) print(passphrase_new, file=proc.stdin) print(passphrase_new, file=proc.stdin) proc.stdin.close() self.assertEqual(proc.wait(), 0) backend = ComprencBackend(passphrase_new.encode(), ('zlib', 6), plain_backend) backend.fetch('s3ql_passphrase') # will fail with wrong pw
def main(args=None): if args is None: args = sys.argv[1:] options = parse_args(args) setup_logging(options) # /dev/urandom may be slow, so we cache the data first log.info('Preparing test data...') rnd_fh = tempfile.TemporaryFile() with open('/dev/urandom', 'rb', 0) as src: copied = 0 while copied < 50 * 1024 * 1024: buf = src.read(BUFSIZE) rnd_fh.write(buf) copied += len(buf) log.info('Measuring throughput to cache...') backend_dir = tempfile.mkdtemp(prefix='s3ql-benchmark-') mnt_dir = tempfile.mkdtemp(prefix='s3ql-mnt') atexit.register(shutil.rmtree, backend_dir) atexit.register(shutil.rmtree, mnt_dir) block_sizes = [ 2**b for b in range(12, 18) ] for blocksize in block_sizes: write_time = 0 size = 50 * 1024 * 1024 while write_time < 3: log.debug('Write took %.3g seconds, retrying', write_time) subprocess.check_call([exec_prefix + 'mkfs.s3ql', '--plain', 'local://%s' % backend_dir, '--quiet', '--force', '--cachedir', options.cachedir]) subprocess.check_call([exec_prefix + 'mount.s3ql', '--threads', '1', '--quiet', '--cachesize', '%d' % (2 * size / 1024), '--log', '%s/mount.log' % backend_dir, '--cachedir', options.cachedir, 'local://%s' % backend_dir, mnt_dir]) try: size *= 2 with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst: rnd_fh.seek(0) write_time = time.time() copied = 0 while copied < size: buf = rnd_fh.read(blocksize) if not buf: rnd_fh.seek(0) continue dst.write(buf) copied += len(buf) write_time = time.time() - write_time os.unlink('%s/bigfile' % mnt_dir) finally: subprocess.check_call([exec_prefix + 'umount.s3ql', mnt_dir]) fuse_speed = copied / write_time log.info('Cache throughput with %3d KiB blocks: %d KiB/sec', blocksize / 1024, fuse_speed / 1024) # Upload random data to prevent effects of compression # on the network layer log.info('Measuring raw backend throughput..') try: backend = get_backend(options, raw=True) except DanglingStorageURLError as exc: raise QuietError(str(exc)) from None upload_time = 0 size = 512 * 1024 while upload_time < 10: size *= 2 def do_write(dst): rnd_fh.seek(0) stamp = time.time() copied = 0 while copied < size: buf = rnd_fh.read(BUFSIZE) if not buf: rnd_fh.seek(0) continue dst.write(buf) copied += len(buf) return (copied, stamp) (upload_size, upload_time) = backend.perform_write(do_write, 's3ql_testdata') upload_time = time.time() - upload_time backend_speed = upload_size / upload_time log.info('Backend throughput: %d KiB/sec', backend_speed / 1024) backend.delete('s3ql_testdata') src = options.file size = os.fstat(options.file.fileno()).st_size log.info('Test file size: %.2f MiB', (size / 1024 ** 2)) in_speed = dict() out_speed = dict() for alg in ALGS: log.info('compressing with %s-6...', alg) backend = ComprencBackend(b'pass', (alg, 6),Backend(argparse.Namespace(storage_url='local://' + backend_dir))) def do_write(dst): #pylint: disable=E0102 src.seek(0) stamp = time.time() while True: buf = src.read(BUFSIZE) if not buf: break dst.write(buf) return (dst, stamp) (dst_fh, stamp) = backend.perform_write(do_write, 's3ql_testdata') dt = time.time() - stamp in_speed[alg] = size / dt out_speed[alg] = dst_fh.get_obj_size() / dt log.info('%s compression speed: %d KiB/sec per thread (in)', alg, in_speed[alg] / 1024) log.info('%s compression speed: %d KiB/sec per thread (out)', alg, out_speed[alg] / 1024) print('') print('With %d KiB blocks, maximum performance for different compression' % (block_sizes[-1]/1024), 'algorithms and thread counts is:', '', sep='\n') threads = set([1,2,4,8]) cores = os.sysconf('SC_NPROCESSORS_ONLN') if cores != -1: threads.add(cores) if options.threads: threads.add(options.threads) print('%-26s' % 'Threads:', ('%12d' * len(threads)) % tuple(sorted(threads))) for alg in ALGS: speeds = [] limits = [] for t in sorted(threads): if fuse_speed > t * in_speed[alg]: limit = 'CPU' speed = t * in_speed[alg] else: limit = 'S3QL/FUSE' speed = fuse_speed if speed / in_speed[alg] * out_speed[alg] > backend_speed: limit = 'uplink' speed = backend_speed * in_speed[alg] / out_speed[alg] limits.append(limit) speeds.append(speed / 1024) print('%-26s' % ('Max FS throughput (%s):' % alg), ('%7d KiB/s' * len(threads)) % tuple(speeds)) print('%-26s' % '..limited by:', ('%12s' * len(threads)) % tuple(limits)) print('') print('All numbers assume that the test file is representative and that', 'there are enough processor cores to run all active threads in parallel.', 'To compensate for network latency, you should use about twice as', 'many upload threads as indicated by the above table.\n', sep='\n')
def main(args=None): if args is None: args = sys.argv[1:] options = parse_args(args) setup_logging(options) # /dev/urandom may be slow, so we cache the data first log.info('Preparing test data...') rnd_fh = tempfile.TemporaryFile() with open('/dev/urandom', 'rb', 0) as src: copied = 0 while copied < 50 * 1024 * 1024: buf = src.read(BUFSIZE) rnd_fh.write(buf) copied += len(buf) log.info('Measuring throughput to cache...') backend_dir = tempfile.mkdtemp(prefix='s3ql-benchmark-') mnt_dir = tempfile.mkdtemp(prefix='s3ql-mnt') atexit.register(shutil.rmtree, backend_dir) atexit.register(shutil.rmtree, mnt_dir) block_sizes = [2**b for b in range(12, 18)] for blocksize in block_sizes: write_time = 0 size = 50 * 1024 * 1024 while write_time < 3: log.debug('Write took %.3g seconds, retrying', write_time) subprocess.check_call([ exec_prefix + 'mkfs.s3ql', '--plain', 'local://%s' % backend_dir, '--quiet', '--force', '--cachedir', options.cachedir ]) subprocess.check_call([ exec_prefix + 'mount.s3ql', '--threads', '1', '--quiet', '--cachesize', '%d' % (2 * size / 1024), '--log', '%s/mount.log' % backend_dir, '--cachedir', options.cachedir, 'local://%s' % backend_dir, mnt_dir ]) try: size *= 2 with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst: rnd_fh.seek(0) write_time = time.time() copied = 0 while copied < size: buf = rnd_fh.read(blocksize) if not buf: rnd_fh.seek(0) continue dst.write(buf) copied += len(buf) write_time = time.time() - write_time os.unlink('%s/bigfile' % mnt_dir) finally: subprocess.check_call([exec_prefix + 'umount.s3ql', mnt_dir]) fuse_speed = copied / write_time log.info('Cache throughput with %3d KiB blocks: %d KiB/sec', blocksize / 1024, fuse_speed / 1024) # Upload random data to prevent effects of compression # on the network layer log.info('Measuring raw backend throughput..') try: backend = get_backend(options, raw=True) except DanglingStorageURLError as exc: raise QuietError(str(exc)) from None upload_time = 0 size = 512 * 1024 while upload_time < 10: size *= 2 def do_write(dst): rnd_fh.seek(0) stamp = time.time() copied = 0 while copied < size: buf = rnd_fh.read(BUFSIZE) if not buf: rnd_fh.seek(0) continue dst.write(buf) copied += len(buf) return (copied, stamp) (upload_size, upload_time) = backend.perform_write(do_write, 's3ql_testdata') upload_time = time.time() - upload_time backend_speed = upload_size / upload_time log.info('Backend throughput: %d KiB/sec', backend_speed / 1024) backend.delete('s3ql_testdata') src = options.file size = os.fstat(options.file.fileno()).st_size log.info('Test file size: %.2f MiB', (size / 1024**2)) in_speed = dict() out_speed = dict() for alg in ALGS: log.info('compressing with %s-6...', alg) backend = ComprencBackend( b'pass', (alg, 6), Backend(argparse.Namespace(storage_url='local://' + backend_dir))) def do_write(dst): #pylint: disable=E0102 src.seek(0) stamp = time.time() while True: buf = src.read(BUFSIZE) if not buf: break dst.write(buf) return (dst, stamp) (dst_fh, stamp) = backend.perform_write(do_write, 's3ql_testdata') dt = time.time() - stamp in_speed[alg] = size / dt out_speed[alg] = dst_fh.get_obj_size() / dt log.info('%s compression speed: %d KiB/sec per thread (in)', alg, in_speed[alg] / 1024) log.info('%s compression speed: %d KiB/sec per thread (out)', alg, out_speed[alg] / 1024) print('') print('With %d KiB blocks, maximum performance for different compression' % (block_sizes[-1] / 1024), 'algorithms and thread counts is:', '', sep='\n') threads = set([1, 2, 4, 8]) cores = os.sysconf('SC_NPROCESSORS_ONLN') if cores != -1: threads.add(cores) if options.threads: threads.add(options.threads) print('%-26s' % 'Threads:', ('%12d' * len(threads)) % tuple(sorted(threads))) for alg in ALGS: speeds = [] limits = [] for t in sorted(threads): if fuse_speed > t * in_speed[alg]: limit = 'CPU' speed = t * in_speed[alg] else: limit = 'S3QL/FUSE' speed = fuse_speed if speed / in_speed[alg] * out_speed[alg] > backend_speed: limit = 'uplink' speed = backend_speed * in_speed[alg] / out_speed[alg] limits.append(limit) speeds.append(speed / 1024) print('%-26s' % ('Max FS throughput (%s):' % alg), ('%7d KiB/s' * len(threads)) % tuple(speeds)) print('%-26s' % '..limited by:', ('%12s' * len(threads)) % tuple(limits)) print('') print( 'All numbers assume that the test file is representative and that', 'there are enough processor cores to run all active threads in parallel.', 'To compensate for network latency, you should use about twice as', 'many upload threads as indicated by the above table.\n', sep='\n')