Пример #1
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    steps = [len(x) / (options.processes - 1) for x in pool]
    prefixes = list()
    for i in range(options.processes - 1):
        parts = [x[int(i * y):int((i + 1) * y)] for (x, y) in zip(pool, steps)]
        prefixes.append(''.join(parts))

    filters = ['-! [%s]*' % x for x in prefixes]

    # Catch all
    filters.append('- [%s]*' % ''.join(prefixes))

    rsync_args = ['rsync', '-f', '+ */']
    if not options.quiet:
        rsync_args.append('--out-format')
        rsync_args.append('%n%L')
    if options.a:
        rsync_args.append('-aHAX')

    processes = list()
    for filter_ in filters:
        cmd = rsync_args + ['-f', filter_] + options.pps
        log.debug('Calling %s', cmd)
        processes.append(subprocess.Popen(cmd))

    if all([c.wait() == 0 for c in processes]):
        sys.exit(0)
    else:
        sys.exit(1)
Пример #2
0
def parse_args(args):
    '''Parse command line'''

    parser = ArgumentParser(description='Clone an S3QL file system.')

    parser.add_quiet()
    parser.add_log()
    parser.add_debug()
    parser.add_backend_options()
    parser.add_version()

    parser.add_argument("--threads",
                        type=int,
                        default=3,
                        help='Number of threads to use')

    # Can't use parser.add_storage_url(), because we need both a source
    # and destination.
    parser.add_argument("--authfile",
                        type=str,
                        metavar='<path>',
                        default=os.path.expanduser("~/.s3ql/authinfo2"),
                        help='Read authentication credentials from this file '
                        '(default: `~/.s3ql/authinfo2)`')
    parser.add_argument(
        "src_storage_url",
        metavar='<source-storage-url>',
        type=storage_url_type,
        help='Storage URL of the source backend that contains the file system')
    parser.add_argument("dst_storage_url",
                        metavar='<destination-storage-url>',
                        type=storage_url_type,
                        help='Storage URL of the destination backend')

    options = parser.parse_args(args)
    setup_logging(options)

    # Print message so that the user has some idea what credentials are
    # wanted (if not specified in authfile).
    log.info('Connecting to source backend...')
    options.storage_url = options.src_storage_url
    parser._init_backend_factory(options)
    src_options = argparse.Namespace()
    src_options.__dict__.update(options.__dict__)
    options.src_backend_factory = lambda: src_options.backend_class(src_options
                                                                    )

    log.info('Connecting to destination backend...')
    options.storage_url = options.dst_storage_url
    parser._init_backend_factory(options)
    dst_options = argparse.Namespace()
    dst_options.__dict__.update(options.__dict__)
    options.dst_backend_factory = lambda: dst_options.backend_class(dst_options
                                                                    )
    del options.storage_url
    del options.backend_class

    return options
Пример #3
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    fsck = ROFsck(options.path)
    fsck.check()
Пример #4
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    fsck = ROFsck(options.path)
    fsck.check()
Пример #5
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Determine available backups
    backup_list = set(x for x in os.listdir('.')
                      if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x))

    if not os.path.exists(options.state) and len(backup_list) > 1:
        if not options.reconstruct_state:
            raise QuietError(
                'Found more than one backup but no state file! Aborting.')

        log.warning('Trying to reconstruct state file..')
        state = upgrade_to_state(backup_list)
        if not options.n:
            log.info('Saving reconstructed state..')
            with open(options.state, 'wb') as fh:
                fh.write(freeze_basic_mapping(state))
    elif not os.path.exists(options.state):
        log.warning('Creating state file..')
        state = dict()
    else:
        log.info('Reading state...')
        # Older versions used pickle to store state...
        with open(options.state, 'rb') as fh:
            proto = fh.read(2)
            fh.seek(0)
            if proto == b'\x80\x02':
                state = pickle.load(fh)
            else:
                state = thaw_basic_mapping(fh.read())

    to_delete = process_backups(backup_list, state, options.cycles)

    for x in to_delete:
        log.info('Backup %s is no longer needed, removing...', x)
        if not options.n:
            if options.use_s3qlrm:
                s3qlrm([x])
            else:
                shutil.rmtree(x)

    if options.n:
        log.info('Dry run, not saving state.')
    else:
        log.info('Saving state..')
        with open(options.state, 'wb') as fh:
            fh.write(freeze_basic_mapping(state))
Пример #6
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Determine available backups
    backup_list = set(x for x in os.listdir('.')
                      if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x))

    if not os.path.exists(options.state) and len(backup_list) > 1:
        if not options.reconstruct_state:
            raise QuietError('Found more than one backup but no state file! Aborting.')

        log.warning('Trying to reconstruct state file..')
        state = upgrade_to_state(backup_list)
        if not options.n:
            log.info('Saving reconstructed state..')
            with open(options.state, 'wb') as fh:
                fh.write(freeze_basic_mapping(state))
    elif not os.path.exists(options.state):
        log.warning('Creating state file..')
        state = dict()
    else:
        log.info('Reading state...')
        # Older versions used pickle to store state...
        with open(options.state, 'rb') as fh:
            proto = fh.read(2)
            fh.seek(0)
            if proto == b'\x80\x02':
                state = pickle.load(fh)
            else:
                state = thaw_basic_mapping(fh.read())

    to_delete = process_backups(backup_list, state, options.cycles)

    for x in to_delete:
        log.info('Backup %s is no longer needed, removing...', x)
        if not options.n:
            if options.use_s3qlrm:
                s3qlrm([x])
            else:
                shutil.rmtree(x)

    if options.n:
        log.info('Dry run, not saving state.')
    else:
        log.info('Saving state..')
        with open(options.state, 'wb') as fh:
            fh.write(freeze_basic_mapping(state))
Пример #7
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    backend = get_backend(options, raw=True)
    atexit.register(backend.close)

    for line in options.file:
        key = line.rstrip()
        log.info('Deleting %s', key)
        backend.delete(key)
Пример #8
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    backend = get_backend(options, raw=True)
    atexit.register(backend.close)

    for line in options.file:
        key = line.rstrip()
        log.info('Deleting %s', key)
        backend.delete(key)
Пример #9
0
def parse_args(args):
    '''Parse command line'''

    parser = ArgumentParser(
                description='Clone an S3QL file system.')

    parser.add_quiet()
    parser.add_debug()
    parser.add_backend_options()
    parser.add_version()

    parser.add_argument("--threads", type=int, default=3,
                        help='Number of threads to use')

    # Can't use parser.add_storage_url(), because we need both a source
    # and destination.
    parser.add_argument("--authfile", type=str, metavar='<path>',
                      default=os.path.expanduser("~/.s3ql/authinfo2"),
                      help='Read authentication credentials from this file '
                      '(default: `~/.s3ql/authinfo2)`')
    parser.add_argument("src_storage_url", metavar='<source-storage-url>',
                        type=storage_url_type,
                        help='Storage URL of the source backend that contains the file system')
    parser.add_argument("dst_storage_url", metavar='<destination-storage-url>',
                        type=storage_url_type,
                        help='Storage URL of the destination backend')


    options = parser.parse_args(args)
    setup_logging(options)

    # Print message so that the user has some idea what credentials are
    # wanted (if not specified in authfile).
    log.info('Connecting to source backend...')
    options.storage_url = options.src_storage_url
    parser._init_backend_factory(options)
    src_options = argparse.Namespace()
    src_options.__dict__.update(options.__dict__)
    options.src_backend_factory = lambda: src_options.backend_class(src_options)

    log.info('Connecting to destination backend...')
    options.storage_url = options.dst_storage_url
    parser._init_backend_factory(options)
    dst_options = argparse.Namespace()
    dst_options.__dict__.update(options.__dict__)
    options.dst_backend_factory = lambda: dst_options.backend_class(dst_options)
    del options.storage_url
    del options.backend_class

    return options
Пример #10
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Check for cached metadata
    cachepath = get_backend_cachedir(options.storage_url, options.cachedir)
    if not os.path.exists(cachepath + '.params'):
        raise QuietError("No local metadata found.")

    with open(cachepath + '.params', 'rb') as fh:
        param = pickle.load(fh)

    # Check revision
    if param['revision'] < CURRENT_FS_REV:
        raise QuietError('File system revision too old.')
    elif param['revision'] > CURRENT_FS_REV:
        raise QuietError('File system revision too new.')

    if os.path.exists(DBNAME):
        raise QuietError('%s exists, aborting.' % DBNAME)

    log.info('Copying database...')
    dst = tempfile.NamedTemporaryFile()
    with open(cachepath + '.db', 'rb') as src:
        shutil.copyfileobj(src, dst)
    dst.flush()
    db = Connection(dst.name)

    log.info('Scrambling...')
    md5 = lambda x: hashlib.md5(x).hexdigest()
    for (id_, name) in db.query('SELECT id, name FROM names'):
        db.execute('UPDATE names SET name=? WHERE id=?',
                   (md5(name), id_))

    for (id_, name) in db.query('SELECT inode, target FROM symlink_targets'):
        db.execute('UPDATE symlink_targets SET target=? WHERE inode=?',
                   (md5(name), id_))

    for (id_, name) in db.query('SELECT rowid, value FROM ext_attributes'):
        db.execute('UPDATE ext_attributes SET value=? WHERE rowid=?',
                   (md5(name), id_))

    log.info('Saving...')
    with open(DBNAME, 'wb+') as fh:
        dump_metadata(db, fh)
Пример #11
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Check for cached metadata
    cachepath = options.cachepath
    if not os.path.exists(cachepath + '.params'):
        raise QuietError("No local metadata found.")

    param = load_params(cachepath)

    # Check revision
    if param['revision'] < CURRENT_FS_REV:
        raise QuietError('File system revision too old.')
    elif param['revision'] > CURRENT_FS_REV:
        raise QuietError('File system revision too new.')

    if os.path.exists(DBNAME):
        raise QuietError('%s exists, aborting.' % DBNAME)

    log.info('Copying database...')
    dst = tempfile.NamedTemporaryFile()
    with open(cachepath + '.db', 'rb') as src:
        shutil.copyfileobj(src, dst)
    dst.flush()
    db = Connection(dst.name)

    log.info('Scrambling...')
    md5 = lambda x: hashlib.md5(x).hexdigest()
    for (id_, name) in db.query('SELECT id, name FROM names'):
        db.execute('UPDATE names SET name=? WHERE id=?',
                   (md5(name), id_))

    for (id_, name) in db.query('SELECT inode, target FROM symlink_targets'):
        db.execute('UPDATE symlink_targets SET target=? WHERE inode=?',
                   (md5(name), id_))

    for (id_, name) in db.query('SELECT rowid, value FROM ext_attributes'):
        db.execute('UPDATE ext_attributes SET value=? WHERE rowid=?',
                   (md5(name), id_))

    log.info('Saving...')
    with open(DBNAME, 'wb+') as fh:
        dump_metadata(db, fh)
Пример #12
0
def simulate(args):

    options = parse_args(args)
    setup_logging(options)

    state = dict()
    backup_list = set()
    for i in range(50):
        backup_list.add('backup-%2d' % i)
        delete = process_backups(backup_list, state, options.cycles)
        log.info('Deleting %s', delete)
        backup_list -= delete

        log.info('Available backups on day %d:', i)
        for x in sorted(backup_list):
            log.info(x)
Пример #13
0
def simulate(args):

    options = parse_args(args)
    setup_logging(options)

    state = dict()
    backup_list = set()
    for i in range(50):
        backup_list.add('backup-%2d' % i)
        delete = process_backups(backup_list, state, options.cycles)
        log.info('Deleting %s', delete)
        backup_list -= delete

        log.info('Available backups on day %d:', i)
        for x in sorted(backup_list):
            log.info(x)
Пример #14
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Determine available backups
    backup_list = set(x for x in os.listdir('.')
                      if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x))

    if not os.path.exists(options.state) and len(backup_list) > 1:
        if not options.reconstruct_state:
            raise QuietError(
                'Found more than one backup but no state file! Aborting.')

        log.warning('Trying to reconstruct state file..')
        state = upgrade_to_state(backup_list)
        if not options.n:
            log.info('Saving reconstructed state..')
            pickle.dump(state, open(options.state, 'wb'), PICKLE_PROTOCOL)
    elif not os.path.exists(options.state):
        log.warning('Creating state file..')
        state = dict()
    else:
        log.info('Reading state...')
        state = pickle.load(open(options.state, 'rb'))

    to_delete = process_backups(backup_list, state, options.cycles)

    for x in to_delete:
        log.info('Backup %s is no longer needed, removing...', x)
        if not options.n:
            if options.use_s3qlrm:
                s3qlrm([x])
            else:
                shutil.rmtree(x)

    if options.n:
        log.info('Dry run, not saving state.')
    else:
        log.info('Saving state..')
        pickle.dump(state, open(options.state, 'wb'), PICKLE_PROTOCOL)
Пример #15
0
def main(args=None):

    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # Determine available backups
    backup_list = set(x for x in os.listdir('.')
                      if re.match(r'^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d$', x))

    if not os.path.exists(options.state) and len(backup_list) > 1:
        if not options.reconstruct_state:
            raise QuietError('Found more than one backup but no state file! Aborting.')

        log.warning('Trying to reconstruct state file..')
        state = upgrade_to_state(backup_list)
        if not options.n:
            log.info('Saving reconstructed state..')
            pickle.dump(state, open(options.state, 'wb'), PICKLE_PROTOCOL)
    elif not os.path.exists(options.state):
        log.warning('Creating state file..')
        state = dict()
    else:
        log.info('Reading state...')
        state = pickle.load(open(options.state, 'rb'))

    to_delete = process_backups(backup_list, state, options.cycles)

    for x in to_delete:
        log.info('Backup %s is no longer needed, removing...', x)
        if not options.n:
            if options.use_s3qlrm:
                s3qlrm([x])
            else:
                shutil.rmtree(x)

    if options.n:
        log.info('Dry run, not saving state.')
    else:
        log.info('Saving state..')
        pickle.dump(state, open(options.state, 'wb'), PICKLE_PROTOCOL)
Пример #16
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    src_backends = []
    dst_backends = []

    try:
        options.storage_url = options.src_storage_url
        for _ in range(options.threads + 1):
            src_backends.append(get_backend(options, plain=True))

        options.storage_url = options.dst_storage_url
        for _ in range(options.threads):
            dst_backends.append(get_backend(options, plain=True))
    except DanglingStorageURLError as exc:
        raise QuietError(str(exc)) from None

    queue = Queue(maxsize=options.threads)
    threads = []
    for (src_backend, dst_backend) in zip(src_backends, dst_backends):
        t = Thread(target=copy_loop, args=(queue, src_backend, dst_backend))
        t.start()
        threads.append(t)

    for (i, key) in enumerate(src_backends[-1]):
        if i % 500 == 0 and sys.stdout.isatty():
            sys.stdout.write('\rCopied %d objects so far...' % i)
            sys.stdout.flush()
        queue.put(key)

    for t in threads:
        queue.put(None)

    for t in threads:
        t.join()

    if sys.stdout.isatty():
        sys.stdout.write('\n')
Пример #17
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    pool = ('abcdefghijklmnopqrstuvwxyz',
             'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
             '0123456789')
    steps = [ len(x) / (options.processes - 1) for x in pool ]
    prefixes = list()
    for i in range(options.processes - 1):
        parts = [ x[int(i * y):int((i + 1) * y)] for (x, y) in zip(pool, steps) ]
        prefixes.append(''.join(parts))

    filters = [ '-! [%s]*' % x for x in prefixes ]

    # Catch all
    filters.append('- [%s]*' % ''.join(prefixes))

    rsync_args = [ 'rsync', '-f', '+ */' ]
    if not options.quiet:
        rsync_args.append('--out-format')
        rsync_args.append('%n%L')
    if options.a:
        rsync_args.append('-aHAX')

    processes = list()
    for filter_ in filters:
        cmd = rsync_args + [ '-f', filter_ ] + options.pps
        log.debug('Calling %s', cmd)
        processes.append(subprocess.Popen(cmd))

    if all([ c.wait() == 0 for c in processes]):
        sys.exit(0)
    else:
        sys.exit(1)
Пример #18
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # /dev/urandom may be slow, so we cache the data first
    log.info('Preparing test data...')
    rnd_fh = tempfile.TemporaryFile()
    with open('/dev/urandom', 'rb', 0) as src:
        copied = 0
        while copied < 50 * 1024 * 1024:
            buf = src.read(BUFSIZE)
            rnd_fh.write(buf)
            copied += len(buf)

    log.info('Measuring throughput to cache...')
    backend_dir = tempfile.mkdtemp(prefix='s3ql-benchmark-')
    mnt_dir = tempfile.mkdtemp(prefix='s3ql-mnt')
    atexit.register(shutil.rmtree, backend_dir)
    atexit.register(shutil.rmtree, mnt_dir)

    block_sizes = [2**b for b in range(12, 18)]
    for blocksize in block_sizes:
        write_time = 0
        size = 50 * 1024 * 1024
        while write_time < 3:
            log.debug('Write took %.3g seconds, retrying', write_time)
            subprocess.check_call([
                exec_prefix + 'mkfs.s3ql', '--plain',
                'local://%s' % backend_dir, '--quiet', '--force', '--cachedir',
                options.cachedir
            ])
            subprocess.check_call([
                exec_prefix + 'mount.s3ql', '--threads', '1', '--quiet',
                '--cachesize',
                '%d' % (2 * size / 1024), '--log',
                '%s/mount.log' % backend_dir, '--cachedir', options.cachedir,
                'local://%s' % backend_dir, mnt_dir
            ])
            try:
                size *= 2
                with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst:
                    rnd_fh.seek(0)
                    write_time = time.time()
                    copied = 0
                    while copied < size:
                        buf = rnd_fh.read(blocksize)
                        if not buf:
                            rnd_fh.seek(0)
                            continue
                        dst.write(buf)
                        copied += len(buf)

                write_time = time.time() - write_time
                os.unlink('%s/bigfile' % mnt_dir)
            finally:
                subprocess.check_call([exec_prefix + 'umount.s3ql', mnt_dir])

        fuse_speed = copied / write_time
        log.info('Cache throughput with %3d KiB blocks: %d KiB/sec',
                 blocksize / 1024, fuse_speed / 1024)

    # Upload random data to prevent effects of compression
    # on the network layer
    log.info('Measuring raw backend throughput..')
    try:
        backend = get_backend(options, raw=True)
    except DanglingStorageURLError as exc:
        raise QuietError(str(exc)) from None

    upload_time = 0
    size = 512 * 1024
    while upload_time < 10:
        size *= 2

        def do_write(dst):
            rnd_fh.seek(0)
            stamp = time.time()
            copied = 0
            while copied < size:
                buf = rnd_fh.read(BUFSIZE)
                if not buf:
                    rnd_fh.seek(0)
                    continue
                dst.write(buf)
                copied += len(buf)
            return (copied, stamp)

        (upload_size,
         upload_time) = backend.perform_write(do_write, 's3ql_testdata')
        upload_time = time.time() - upload_time
    backend_speed = upload_size / upload_time
    log.info('Backend throughput: %d KiB/sec', backend_speed / 1024)
    backend.delete('s3ql_testdata')

    src = options.file
    size = os.fstat(options.file.fileno()).st_size
    log.info('Test file size: %.2f MiB', (size / 1024**2))

    in_speed = dict()
    out_speed = dict()
    for alg in ALGS:
        log.info('compressing with %s-6...', alg)
        backend = ComprencBackend(
            b'pass', (alg, 6),
            Backend(argparse.Namespace(storage_url='local://' + backend_dir)))

        def do_write(dst):  #pylint: disable=E0102
            src.seek(0)
            stamp = time.time()
            while True:
                buf = src.read(BUFSIZE)
                if not buf:
                    break
                dst.write(buf)
            return (dst, stamp)

        (dst_fh, stamp) = backend.perform_write(do_write, 's3ql_testdata')
        dt = time.time() - stamp
        in_speed[alg] = size / dt
        out_speed[alg] = dst_fh.get_obj_size() / dt
        log.info('%s compression speed: %d KiB/sec per thread (in)', alg,
                 in_speed[alg] / 1024)
        log.info('%s compression speed: %d KiB/sec per thread (out)', alg,
                 out_speed[alg] / 1024)

    print('')
    print('With %d KiB blocks, maximum performance for different compression' %
          (block_sizes[-1] / 1024),
          'algorithms and thread counts is:',
          '',
          sep='\n')

    threads = set([1, 2, 4, 8])
    cores = os.sysconf('SC_NPROCESSORS_ONLN')
    if cores != -1:
        threads.add(cores)
    if options.threads:
        threads.add(options.threads)

    print('%-26s' % 'Threads:',
          ('%12d' * len(threads)) % tuple(sorted(threads)))

    for alg in ALGS:
        speeds = []
        limits = []
        for t in sorted(threads):
            if fuse_speed > t * in_speed[alg]:
                limit = 'CPU'
                speed = t * in_speed[alg]
            else:
                limit = 'S3QL/FUSE'
                speed = fuse_speed

            if speed / in_speed[alg] * out_speed[alg] > backend_speed:
                limit = 'uplink'
                speed = backend_speed * in_speed[alg] / out_speed[alg]

            limits.append(limit)
            speeds.append(speed / 1024)

        print('%-26s' % ('Max FS throughput (%s):' % alg),
              ('%7d KiB/s' * len(threads)) % tuple(speeds))
        print('%-26s' % '..limited by:',
              ('%12s' * len(threads)) % tuple(limits))

    print('')
    print(
        'All numbers assume that the test file is representative and that',
        'there are enough processor cores to run all active threads in parallel.',
        'To compensate for network latency, you should use about twice as',
        'many upload threads as indicated by the above table.\n',
        sep='\n')
Пример #19
0
def main(args=None):
    options = parse_args(args)
    setup_logging(options)

    try:
        options.storage_url = options.src_storage_url
        src_backend_factory = get_backend_factory(options.src_storage_url,
                                                  options.backend_options,
                                                  options.authfile,
                                                  raw=True)

        options.storage_url = options.dst_storage_url
        dst_backend_factory = get_backend_factory(options.dst_storage_url,
                                                  options.backend_options,
                                                  options.authfile,
                                                  raw=True)
    except DanglingStorageURLError as exc:
        raise QuietError(str(exc)) from None

    queue = Queue(maxsize=options.threads)
    threads = []
    for _ in range(options.threads):
        t = AsyncFn(copy_loop, queue, src_backend_factory, dst_backend_factory)
        # Don't wait for worker threads, gives deadlock if main thread
        # terminates with exception
        t.daemon = True
        t.start()
        threads.append(t)

    with src_backend_factory() as backend:
        stamp1 = 0
        for (i, key) in enumerate(backend):
            stamp2 = time.time()
            if stamp2 - stamp1 > 1:
                stamp1 = stamp2
                sys.stdout.write('\rCopied %d objects so far...' % i)
                sys.stdout.flush()

                # Terminate early if any thread failed with an exception
                for t in threads:
                    if not t.is_alive():
                        t.join_and_raise()

            # Avoid blocking if all threads terminated
            while True:
                try:
                    queue.put(key, timeout=1)
                except QueueFull:
                    pass
                else:
                    break
                for t in threads:
                    if not t.is_alive():
                        t.join_and_raise()
    sys.stdout.write('\n')

    queue.maxsize += len(threads)
    for t in threads:
        queue.put(None)

    for t in threads:
        t.join_and_raise()
Пример #20
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    try:
        options.storage_url = options.src_storage_url
        src_backend_factory = get_backend_factory(options, plain=True)

        options.storage_url = options.dst_storage_url
        dst_backend_factory = get_backend_factory(options, plain=True)
    except DanglingStorageURLError as exc:
        raise QuietError(str(exc)) from None

    src_backends = [ src_backend_factory() for _ in range(options.threads) ]
    dst_backends = [ dst_backend_factory() for _ in range(options.threads) ]

    queue = Queue(maxsize=options.threads)
    threads = []
    for (src_backend, dst_backend) in zip(src_backends, dst_backends):
        t = AsyncFn(copy_loop, queue, src_backend, dst_backend)
        # Don't wait for worker threads, gives deadlock if main thread
        # terminates with exception
        t.daemon = True
        t.start()
        threads.append(t)

    stamp1 = 0
    for (i, key) in enumerate(src_backends[-1]):
        stamp2 = time.time()
        if stamp2 - stamp1 > 1:
            stamp1 = stamp2
            sys.stdout.write('\rCopied %d objects so far...' % i)
            sys.stdout.flush()

            # Terminate early if any thread failed with an exception
            for t in threads:
                if not t.is_alive():
                    t.join_and_raise()

        # Avoid blocking if all threads terminated
        while True:
            try:
                queue.put(key, timeout=1)
            except QueueFull:
                pass
            else:
                break
            for t in threads:
                if not t.is_alive():
                    t.join_and_raise()
    sys.stdout.write('\n')

    queue.maxsize += len(threads)
    for t in threads:
        queue.put(None)

    for t in threads:
        t.join_and_raise()
Пример #21
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]

    options = parse_args(args)
    setup_logging(options)

    # /dev/urandom may be slow, so we cache the data first
    log.info('Preparing test data...')
    rnd_fh = tempfile.TemporaryFile()
    with open('/dev/urandom', 'rb', 0) as src:
        copied = 0
        while copied < 50 * 1024 * 1024:
            buf = src.read(BUFSIZE)
            rnd_fh.write(buf)
            copied += len(buf)

    log.info('Measuring throughput to cache...')
    backend_dir = tempfile.mkdtemp(prefix='s3ql-benchmark-')
    mnt_dir = tempfile.mkdtemp(prefix='s3ql-mnt')
    atexit.register(shutil.rmtree, backend_dir)
    atexit.register(shutil.rmtree, mnt_dir)

    block_sizes = [ 2**b for b in range(12, 18) ]
    for blocksize in block_sizes:
        write_time = 0
        size = 50 * 1024 * 1024
        while write_time < 3:
            log.debug('Write took %.3g seconds, retrying', write_time)
            subprocess.check_call([exec_prefix + 'mkfs.s3ql', '--plain', 'local://%s' % backend_dir,
                                   '--quiet', '--force', '--cachedir', options.cachedir])
            subprocess.check_call([exec_prefix + 'mount.s3ql', '--threads', '1', '--quiet',
                                   '--cachesize', '%d' % (2 * size / 1024), '--log',
                                   '%s/mount.log' % backend_dir, '--cachedir', options.cachedir,
                                   'local://%s' % backend_dir, mnt_dir])
            try:
                size *= 2
                with open('%s/bigfile' % mnt_dir, 'wb', 0) as dst:
                    rnd_fh.seek(0)
                    write_time = time.time()
                    copied = 0
                    while copied < size:
                        buf = rnd_fh.read(blocksize)
                        if not buf:
                            rnd_fh.seek(0)
                            continue
                        dst.write(buf)
                        copied += len(buf)

                write_time = time.time() - write_time
                os.unlink('%s/bigfile' % mnt_dir)
            finally:
                subprocess.check_call([exec_prefix + 'umount.s3ql', mnt_dir])

        fuse_speed = copied / write_time
        log.info('Cache throughput with %3d KiB blocks: %d KiB/sec',
                 blocksize / 1024, fuse_speed / 1024)

    # Upload random data to prevent effects of compression
    # on the network layer
    log.info('Measuring raw backend throughput..')
    try:
        backend = get_backend(options, raw=True)
    except DanglingStorageURLError as exc:
        raise QuietError(str(exc)) from None

    upload_time = 0
    size = 512 * 1024
    while upload_time < 10:
        size *= 2
        def do_write(dst):
            rnd_fh.seek(0)
            stamp = time.time()
            copied = 0
            while copied < size:
                buf = rnd_fh.read(BUFSIZE)
                if not buf:
                    rnd_fh.seek(0)
                    continue
                dst.write(buf)
                copied += len(buf)
            return (copied, stamp)
        (upload_size, upload_time) = backend.perform_write(do_write, 's3ql_testdata')
        upload_time = time.time() - upload_time
    backend_speed = upload_size / upload_time
    log.info('Backend throughput: %d KiB/sec', backend_speed / 1024)
    backend.delete('s3ql_testdata')

    src = options.file
    size = os.fstat(options.file.fileno()).st_size
    log.info('Test file size: %.2f MiB', (size / 1024 ** 2))

    in_speed = dict()
    out_speed = dict()
    for alg in ALGS:
        log.info('compressing with %s-6...', alg)
        backend = ComprencBackend(b'pass', (alg, 6),Backend(argparse.Namespace(storage_url='local://' + backend_dir)))
        def do_write(dst): #pylint: disable=E0102
            src.seek(0)
            stamp = time.time()
            while True:
                buf = src.read(BUFSIZE)
                if not buf:
                    break
                dst.write(buf)
            return (dst, stamp)
        (dst_fh, stamp) = backend.perform_write(do_write, 's3ql_testdata')
        dt = time.time() - stamp
        in_speed[alg] = size / dt
        out_speed[alg] = dst_fh.get_obj_size() / dt
        log.info('%s compression speed: %d KiB/sec per thread (in)', alg, in_speed[alg] / 1024)
        log.info('%s compression speed: %d KiB/sec per thread (out)', alg, out_speed[alg] / 1024)

    print('')
    print('With %d KiB blocks, maximum performance for different compression'
          % (block_sizes[-1]/1024), 'algorithms and thread counts is:', '', sep='\n')

    threads = set([1,2,4,8])
    cores = os.sysconf('SC_NPROCESSORS_ONLN')
    if cores != -1:
        threads.add(cores)
    if options.threads:
        threads.add(options.threads)

    print('%-26s' % 'Threads:',
          ('%12d' * len(threads)) % tuple(sorted(threads)))

    for alg in ALGS:
        speeds = []
        limits = []
        for t in sorted(threads):
            if fuse_speed > t * in_speed[alg]:
                limit = 'CPU'
                speed = t * in_speed[alg]
            else:
                limit = 'S3QL/FUSE'
                speed = fuse_speed

            if speed / in_speed[alg] * out_speed[alg] > backend_speed:
                limit = 'uplink'
                speed = backend_speed * in_speed[alg] / out_speed[alg]

            limits.append(limit)
            speeds.append(speed / 1024)

        print('%-26s' % ('Max FS throughput (%s):' % alg),
              ('%7d KiB/s' * len(threads)) % tuple(speeds))
        print('%-26s' % '..limited by:',
              ('%12s' * len(threads)) % tuple(limits))

    print('')
    print('All numbers assume that the test file is representative and that',
          'there are enough processor cores to run all active threads in parallel.',
          'To compensate for network latency, you should use about twice as',
          'many upload threads as indicated by the above table.\n', sep='\n')