Пример #1
0
def ceph_clients(ctx, config):
    cluster_name = config['cluster']

    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
    for remote, roles_for_host in clients.remotes.items():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
                                                     cluster_name):
            name = teuthology.ceph_role(role)
            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(
                cluster_name, name)
            r = _shell(
                ctx,
                config,
                args=[
                    'ceph',
                    'auth',
                    'get-or-create',
                    name,
                    'mon',
                    'allow *',
                    'osd',
                    'allow *',
                    'mds',
                    'allow *',
                    'mgr',
                    'allow *',
                ],
                stdout=BytesIO(),
            )
            keyring = r.stdout.getvalue()
            remote.write_file(client_keyring, keyring, sudo=True, mode='0644')
    yield
Пример #2
0
def create_keyring(ctx, cluster_name):
    """
    Set up key ring on remote sites
    """
    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    for remote, roles_for_host in clients.remotes.items():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
                                                     cluster_name):
            name = teuthology.ceph_role(role)
            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(
                cluster_name, name)
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
                    '--name={name}'.format(name=name),
                    client_keyring,
                    run.Raw('&&'),
                    'sudo',
                    'chmod',
                    '0644',
                    client_keyring,
                ], )
Пример #3
0
def create_keyring(ctx, cluster_name):
    """
    Set up key ring on remote sites
    """
    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    for remote, roles_for_host in clients.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
                                                     cluster_name):
            name = teuthology.ceph_role(role)
            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, name)
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
                    '--name={name}'.format(name=name),
                    client_keyring,
                    run.Raw('&&'),
                    'sudo',
                    'chmod',
                    '0644',
                    client_keyring,
                    ],
                )
Пример #4
0
def osd_scrub_pgs(ctx, config):
    """
    Scrub pgs when we exit.

    First make sure all pgs are active and clean.
    Next scrub all osds.
    Then periodically check until all pgs have scrub time stamps that
    indicate the last scrub completed.  Time out if no progess is made
    here after two minutes.
    """
    retries = 12
    delays = 10
    cluster_name = config['cluster']
    manager = ctx.managers[cluster_name]
    all_clean = False
    for _ in range(0, retries):
        stats = manager.get_pg_stats()
        states = [stat['state'] for stat in stats]
        if len(set(states)) == 1 and states[0] == 'active+clean':
            all_clean = True
            break
        log.info("Waiting for all osds to be active and clean.")
        time.sleep(delays)
    if not all_clean:
        log.info("Scrubbing terminated -- not all pgs were active and clean.")
        return
    check_time_now = time.localtime()
    time.sleep(1)
    all_roles = teuthology.all_roles(ctx.cluster)
    for role in teuthology.cluster_roles_of_type(all_roles, 'osd',
                                                 cluster_name):
        log.info("Scrubbing {osd}".format(osd=role))
        _, _, id_ = teuthology.split_role(role)
        manager.raw_cluster_cmd('osd', 'deep-scrub', id_)
    prev_good = 0
    gap_cnt = 0
    loop = True
    while loop:
        stats = manager.get_pg_stats()
        timez = [stat['last_scrub_stamp'] for stat in stats]
        loop = False
        thiscnt = 0
        for tmval in timez:
            pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S')
            if pgtm > check_time_now:
                thiscnt += 1
            else:
                loop = True
        if thiscnt > prev_good:
            prev_good = thiscnt
            gap_cnt = 0
        else:
            gap_cnt += 1
            if gap_cnt > retries:
                log.info('Exiting scrub checking -- not all pgs scrubbed.')
                return
        if loop:
            log.info('Still waiting for all pgs to be scrubbed.')
            time.sleep(delays)
Пример #5
0
def osd_scrub_pgs(ctx, config):
    """
    Scrub pgs when we exit.

    First make sure all pgs are active and clean.
    Next scrub all osds.
    Then periodically check until all pgs have scrub time stamps that
    indicate the last scrub completed.  Time out if no progess is made
    here after two minutes.
    """
    retries = 12
    delays = 10
    cluster_name = config["cluster"]
    manager = ctx.managers[cluster_name]
    all_clean = False
    for _ in range(0, retries):
        stats = manager.get_pg_stats()
        states = [stat["state"] for stat in stats]
        if len(set(states)) == 1 and states[0] == "active+clean":
            all_clean = True
            break
        log.info("Waiting for all osds to be active and clean.")
        time.sleep(delays)
    if not all_clean:
        log.info("Scrubbing terminated -- not all pgs were active and clean.")
        return
    check_time_now = time.localtime()
    time.sleep(1)
    all_roles = teuthology.all_roles(ctx.cluster)
    for role in teuthology.cluster_roles_of_type(all_roles, "osd", cluster_name):
        log.info("Scrubbing {osd}".format(osd=role))
        _, _, id_ = teuthology.split_role(role)
        manager.raw_cluster_cmd("osd", "deep-scrub", id_)
    prev_good = 0
    gap_cnt = 0
    loop = True
    while loop:
        stats = manager.get_pg_stats()
        timez = [stat["last_scrub_stamp"] for stat in stats]
        loop = False
        thiscnt = 0
        for tmval in timez:
            pgtm = time.strptime(tmval[0 : tmval.find(".")], "%Y-%m-%d %H:%M:%S")
            if pgtm > check_time_now:
                thiscnt += 1
            else:
                loop = True
        if thiscnt > prev_good:
            prev_good = thiscnt
            gap_cnt = 0
        else:
            gap_cnt += 1
            if gap_cnt > retries:
                log.info("Exiting scrub checking -- not all pgs scrubbed.")
                return
        if loop:
            log.info("Still waiting for all pgs to be scrubbed.")
            time.sleep(delays)
Пример #6
0
 def create_keyring(self):
     """
     Set up key ring on remote sites
     """
     log.info('Setting up client nodes...')
     clients = self.ctx.cluster.only(
         teuthology.is_type('client', self.cluster_name))
     testdir = teuthology.get_testdir(self.ctx)
     coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
     for remote, roles_for_host in clients.remotes.iteritems():
         for role in teuthology.cluster_roles_of_type(
                 roles_for_host, 'client', self.cluster_name):
             name = teuthology.ceph_role(role)
             log.info("Creating keyring for {}".format(name))
             client_keyring = '/etc/ceph/ceph.{}.keyring'.format(name)
             remote.run(
                 args=[
                     'sudo',
                     'adjust-ulimits',
                     'ceph-coverage',
                     coverage_dir,
                     'ceph-authtool',
                     '--create-keyring',
                     '--gen-key',
                     # TODO this --name= is not really obeyed, all unknown
                     # "types" are munged to "client"
                     '--name={name}'.format(name=name),
                     '--cap',
                     'osd',
                     'allow rwx',
                     '--cap',
                     'mon',
                     'allow rwx',
                     client_keyring,
                     run.Raw('&&'),
                     'sudo',
                     'chmod',
                     '0644',
                     client_keyring,
                     run.Raw('&&'),
                     'sudo',
                     'ls',
                     run.Raw('-l'),
                     client_keyring,
                     run.Raw('&&'),
                     'sudo',
                     'ceph',
                     'auth',
                     'import',
                     run.Raw('-i'),
                     client_keyring,
                 ], )
Пример #7
0
def test_cluster_roles_of_type():
    expected = [
        (['client.0', 'osd.0',
          'ceph.osd.1'], 'osd', 'ceph', ['osd.0', 'ceph.osd.1']),
        (['client.0', 'osd.0', 'ceph.osd.1'], 'client', 'ceph', ['client.0']),
        (['foo.client.1', 'bar.client.2.3', 'baz.osd.1'], 'mon', None, []),
        (['foo.client.1', 'bar.client.2.3',
          'baz.osd.1'], 'client', None, ['foo.client.1', 'bar.client.2.3']),
        (['foo.client.1', 'bar.client.2.3',
          'baz.osd.1'], 'client', 'bar', ['bar.client.2.3']),
    ]
    for roles_for_host, type_, cluster_, expected_roles in expected:
        roles = list(
            misc.cluster_roles_of_type(roles_for_host, type_, cluster_))
        assert roles == expected_roles
Пример #8
0
def ceph_clients(ctx, config):
    cluster_name = config['cluster']
    testdir = teuthology.get_testdir(ctx)

    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    ctx.cluster.run(args=[
        'sudo',
        'mkdir',
        '-p',
        '/etc/ceph',
    ])
    for remote, roles_for_host in clients.remotes.items():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
                                                     cluster_name):
            name = teuthology.ceph_role(role)
            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(
                cluster_name, name)
            r = _shell(
                ctx=ctx,
                cluster_name=cluster_name,
                remote=remote,
                args=[
                    'ceph',
                    'auth',
                    'get-or-create',
                    name,
                    'mon',
                    'allow *',
                    'osd',
                    'allow *',
                    'mds',
                    'allow *',
                    'mgr',
                    'allow *',
                ],
                stdout=StringIO(),
            )
            keyring = r.stdout.getvalue()
            teuthology.sudo_write_file(remote=remote,
                                       path=client_keyring,
                                       data=keyring,
                                       perms='0644')
    yield
Пример #9
0
def execute(ctx, config):
    """
    Run the blktrace program on remote machines.
    """
    procs = []
    testdir = teuthology.get_testdir(ctx)
    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir)

    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.items():
        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd',
                                                     config['cluster']):
            if roles_to_devs.get(role):
                dev = roles_to_devs[role]
                log.info("running blktrace on %s: %s" % (remote.name, dev))

                proc = remote.run(
                    args=[
                        'cd',
                        log_dir,
                        run.Raw(';'),
                        'daemon-helper',
                        daemon_signal,
                        'sudo',
                        blktrace,
                        '-o',
                        dev.rsplit("/", 1)[1],
                        '-d',
                        dev,
                    ],
                    wait=False,
                    stdin=run.PIPE,
                )
                procs.append(proc)
    try:
        yield
    finally:
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        log.info('stopping blktrace processs')
        for proc in procs:
            proc.stdin.close()
Пример #10
0
def execute(ctx, config):
    """
    Run the blktrace program on remote machines.
    """
    procs = []
    testdir = teuthology.get_testdir(ctx)
    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir)

    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd',
                                                     config['cluster']):
            if roles_to_devs.get(role):
                dev = roles_to_devs[role]
                log.info("running blktrace on %s: %s" % (remote.name, dev))

                proc = remote.run(
                    args=[
                        'cd',
                        log_dir,
                        run.Raw(';'),
                        'daemon-helper',
                        daemon_signal,
                        'sudo',
                        blktrace,
                        '-o',
                        dev.rsplit("/", 1)[1],
                        '-d',
                        dev,
                        ],
                    wait=False,
                    stdin=run.PIPE,
                    )
                procs.append(proc)
    try:
        yield
    finally:
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        log.info('stopping blktrace processs')
        for proc in procs:
            proc.stdin.close()
Пример #11
0
def cluster(ctx, config):
    """
    Handle the creation and removal of a ceph cluster.

    On startup:
        Create directories needed for the cluster.
        Create remote journals for all osds.
        Create and set keyring.
        Copy the monmap to tht test systems.
        Setup mon nodes.
        Setup mds nodes.
        Mkfs osd nodes.
        Add keyring information to monmaps
        Mkfs mon nodes.

    On exit:
        If errors occured, extract a failure message and store in ctx.summary.
        Unmount all test files and temporary journaling files.
        Save the monitor information and archive all ceph logs.
        Cleanup the keyring setup, and remove all monitor map and data files left over.

    :param ctx: Context
    :param config: Configuration
    """
    if ctx.config.get('use_existing_cluster', False) is True:
        log.info("'use_existing_cluster' is true; skipping cluster creation")
        yield

    testdir = teuthology.get_testdir(ctx)
    cluster_name = config['cluster']
    data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir,
                                              cluster=cluster_name)
    log.info('Creating ceph cluster %s...', cluster_name)
    run.wait(
        ctx.cluster.run(
            args=[
                'install',
                '-d',
                '-m0755',
                '--',
                data_dir,
            ],
            wait=False,
        ))

    run.wait(
        ctx.cluster.run(
            args=[
                'sudo',
                'install',
                '-d',
                '-m0777',
                '--',
                '/var/run/ceph',
            ],
            wait=False,
        ))

    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checking for scratch devs')
            log.info('found devs: %s' % (str(devs), ))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(
                teuthology.cluster_roles_of_type(roles_for_host, 'osd',
                                                 cluster_name), iddevs)
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs):]
            devs_to_clean[remote] = []

        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.cluster_roles_of_type(roles_for_host, 'osd',
                                                 cluster_name), iddevs)
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt'])
            for role in teuthology.cluster_roles_of_type(
                    roles_for_host, 'osd', cluster_name):
                tmpfs = '/mnt/' + role
                roles_to_journals[role] = tmpfs
                remote.run(args=['truncate', '-s', '1500M', tmpfs])
            log.info('journal map: %s', roles_to_journals)

        log.info('dev map: %s' % (str(roles_to_devs), ))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals

    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [
        host for (host, port) in (remote.ssh.get_transport().getpeername()
                                  for (remote, role_list) in remotes_and_roles)
    ]
    conf = teuthology.skeleton_config(ctx,
                                      roles=roles,
                                      ips=ips,
                                      cluster=cluster_name)
    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
        for role, journal in roles_to_journals.iteritems():
            name = teuthology.ceph_role(role)
            if name not in conf:
                conf[name] = {}
            conf[name]['osd journal'] = journal
    for section, keys in config['conf'].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    if config.get('tmpfs_journal'):
        conf['journal dio'] = False

    if not hasattr(ctx, 'ceph'):
        ctx.ceph = {}
    ctx.ceph[cluster_name] = argparse.Namespace()
    ctx.ceph[cluster_name].conf = conf

    default_keyring = '/etc/ceph/{cluster}.keyring'.format(
        cluster=cluster_name)
    keyring_path = config.get('keyring_path', default_keyring)

    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    firstmon = teuthology.get_first_mon(ctx, config, cluster_name)

    log.info('Setting up %s...' % firstmon)
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--create-keyring',
        keyring_path,
    ], )
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--gen-key',
        '--name=mon.',
        keyring_path,
    ], )
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'chmod',
        '0644',
        keyring_path,
    ], )
    (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys()
    monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir,
                                                   cluster=cluster_name)
    fsid = teuthology.create_simple_monmap(
        ctx,
        remote=mon0_remote,
        conf=conf,
        path=monmap_path,
    )
    if not 'global' in conf:
        conf['global'] = {}
    conf['global']['fsid'] = fsid

    default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name)
    conf_path = config.get('conf_path', default_conf_path)
    log.info('Writing %s for FSID %s...' % (conf_path, fsid))
    write_conf(ctx, conf_path, cluster_name)

    log.info('Creating admin key on %s...' % firstmon)
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--gen-key',
        '--name=client.admin',
        '--set-uid=0',
        '--cap',
        'mon',
        'allow *',
        '--cap',
        'osd',
        'allow *',
        '--cap',
        'mds',
        'allow *',
        keyring_path,
    ], )

    log.info('Copying monmap to all nodes...')
    keyring = teuthology.get_file(
        remote=mon0_remote,
        path=keyring_path,
    )
    monmap = teuthology.get_file(
        remote=mon0_remote,
        path=monmap_path,
    )

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info('Sending monmap to node {remote}'.format(remote=rem))
        teuthology.sudo_write_file(remote=rem,
                                   path=keyring_path,
                                   data=keyring,
                                   perms='0644')
        teuthology.write_file(
            remote=rem,
            path=monmap_path,
            data=monmap,
        )

    log.info('Setting up mon nodes...')
    mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name))
    osdmap_path = '{tdir}/{cluster}.osdmap'.format(tdir=testdir,
                                                   cluster=cluster_name)
    run.wait(
        mons.run(
            args=[
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'osdmaptool',
                '-c',
                conf_path,
                '--clobber',
                '--createsimple',
                '{num:d}'.format(num=teuthology.num_instances_of_type(
                    ctx.cluster, 'osd', cluster_name), ),
                osdmap_path,
                '--pg_bits',
                '2',
                '--pgp_bits',
                '4',
            ],
            wait=False,
        ), )

    log.info('Setting up mds nodes...')
    mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds',
                                                     cluster_name):
            _, _, id_ = teuthology.split_role(role)
            mds_dir = '/var/lib/ceph/mds/{cluster}-{id}'.format(
                cluster=cluster_name,
                id=id_,
            )
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                mds_dir,
                run.Raw('&&'),
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-authtool',
                '--create-keyring',
                '--gen-key',
                '--name=mds.{id}'.format(id=id_),
                mds_dir + '/keyring',
            ], )

    cclient.create_keyring(ctx, cluster_name)
    log.info('Running mkfs on osd nodes...')

    if not hasattr(ctx, 'disk_config'):
        ctx.disk_config = argparse.Namespace()
    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'):
        ctx.disk_config.remote_to_roles_to_dev = {}
    if not hasattr(ctx.disk_config, 'remote_to_roles_to_journals'):
        ctx.disk_config.remote_to_roles_to_journals = {}
    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'):
        ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'):
        ctx.disk_config.remote_to_roles_to_dev_fstype = {}

    teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev,
                          remote_to_roles_to_devs)
    teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals,
                          remote_to_roles_to_journals)

    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(
        r=str(ctx.disk_config.remote_to_roles_to_dev)))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = remote_to_roles_to_devs[remote]
        roles_to_journals = remote_to_roles_to_journals[remote]

        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd',
                                                     cluster_name):
            _, _, id_ = teuthology.split_role(role)
            mnt_point = '/var/lib/ceph/osd/{cluster}-{id}'.format(
                cluster=cluster_name, id=id_)
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                mnt_point,
            ])
            log.info(str(roles_to_journals))
            log.info(role)
            if roles_to_devs.get(role):
                dev = roles_to_devs[role]
                fs = config.get('fs')
                package = None
                mkfs_options = config.get('mkfs_options')
                mount_options = config.get('mount_options')
                if fs == 'btrfs':
                    # package = 'btrfs-tools'
                    if mount_options is None:
                        mount_options = ['noatime', 'user_subvol_rm_allowed']
                    if mkfs_options is None:
                        mkfs_options = [
                            '-m', 'single', '-l', '32768', '-n', '32768'
                        ]
                if fs == 'xfs':
                    # package = 'xfsprogs'
                    if mount_options is None:
                        mount_options = ['noatime']
                    if mkfs_options is None:
                        mkfs_options = ['-f', '-i', 'size=2048']
                if fs == 'ext4' or fs == 'ext3':
                    if mount_options is None:
                        mount_options = ['noatime', 'user_xattr']

                if mount_options is None:
                    mount_options = []
                if mkfs_options is None:
                    mkfs_options = []
                mkfs = ['mkfs.%s' % fs] + mkfs_options
                log.info('%s on %s on %s' % (mkfs, dev, remote))
                if package is not None:
                    remote.run(
                        args=['sudo', 'apt-get', 'install', '-y', package],
                        stdout=StringIO(),
                    )

                try:
                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs +
                               [dev])
                except run.CommandFailedError:
                    # Newer btfs-tools doesn't prompt for overwrite, use -f
                    if '-f' not in mount_options:
                        mkfs_options.append('-f')
                        mkfs = ['mkfs.%s' % fs] + mkfs_options
                        log.info('%s on %s on %s' % (mkfs, dev, remote))
                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs +
                               [dev])

                log.info('mount %s on %s -o %s' %
                         (dev, remote, ','.join(mount_options)))
                remote.run(args=[
                    'sudo',
                    'mount',
                    '-t',
                    fs,
                    '-o',
                    ','.join(mount_options),
                    dev,
                    mnt_point,
                ])
                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
                    ctx.disk_config.remote_to_roles_to_dev_mount_options[
                        remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][
                    role] = mount_options
                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][
                    role] = fs
                devs_to_clean[remote].append(mnt_point)

        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd',
                                                     cluster_name):
            _, _, id_ = teuthology.split_role(role)
            remote.run(args=[
                'sudo',
                'MALLOC_CHECK_=3',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-osd',
                '--cluster',
                cluster_name,
                '--mkfs',
                '--mkkey',
                '-i',
                id_,
                '--monmap',
                monmap_path,
            ], )

    log.info('Reading keys from all nodes...')
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['mds', 'osd']:
            for role in teuthology.cluster_roles_of_type(
                    roles_for_host, type_, cluster_name):
                _, _, id_ = teuthology.split_role(role)
                data = teuthology.get_file(
                    remote=remote,
                    path='/var/lib/ceph/{type}/{cluster}-{id}/keyring'.format(
                        type=type_,
                        id=id_,
                        cluster=cluster_name,
                    ),
                    sudo=True,
                )
                keys.append((type_, id_, data))
                keys_fp.write(data)
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
                                                     cluster_name):
            _, _, id_ = teuthology.split_role(role)
            data = teuthology.get_file(
                remote=remote,
                path='/etc/ceph/{cluster}.client.{id}.keyring'.format(
                    id=id_, cluster=cluster_name))
            keys.append(('client', id_, data))
            keys_fp.write(data)

    log.info('Adding keys to all mons...')
    writes = mons.run(
        args=[
            'sudo',
            'tee',
            '-a',
            keyring_path,
        ],
        stdin=run.PIPE,
        wait=False,
        stdout=StringIO(),
    )
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    keyring_path,
                    '--name={type}.{id}'.format(
                        type=type_,
                        id=id_,
                    ),
                ] + list(teuthology.generate_caps(type_)),
                wait=False,
            ), )

    log.info('Running mkfs on mon nodes...')
    for remote, roles_for_host in mons.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon',
                                                     cluster_name):
            _, _, id_ = teuthology.split_role(role)
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                '/var/lib/ceph/mon/{cluster}-{id}'.format(
                    id=id_, cluster=cluster_name),
            ], )
            remote.run(args=[
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-mon',
                '--cluster',
                cluster_name,
                '--mkfs',
                '-i',
                id_,
                '--monmap',
                monmap_path,
                '--osdmap',
                osdmap_path,
                '--keyring',
                keyring_path,
            ], )

    run.wait(
        mons.run(
            args=[
                'rm',
                '--',
                monmap_path,
                osdmap_path,
            ],
            wait=False,
        ), )

    try:
        yield
    except Exception:
        # we need to know this below
        ctx.summary['success'] = False
        raise
    finally:
        (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys()

        log.info('Checking cluster log for badness...')

        def first_in_ceph_log(pattern, excludes):
            """
            Find the first occurence of the pattern specified in the Ceph log,
            Returns None if none found.

            :param pattern: Pattern scanned for.
            :param excludes: Patterns to ignore.
            :return: First line of text (or None if not found)
            """
            args = [
                'sudo',
                'egrep',
                pattern,
                '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name),
            ]
            for exclude in excludes:
                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
            args.extend([
                run.Raw('|'),
                'head',
                '-n',
                '1',
            ])
            r = mon0_remote.run(
                stdout=StringIO(),
                args=args,
            )
            stdout = r.stdout.getvalue()
            if stdout != '':
                return stdout
            return None

        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                             config['log_whitelist']) is not None:
            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
            ctx.summary['success'] = False
            # use the most severe problem as the failure reason
            if 'failure_reason' not in ctx.summary:
                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
                    match = first_in_ceph_log(pattern, config['log_whitelist'])
                    if match is not None:
                        ctx.summary['failure_reason'] = \
                            '"{match}" in cluster log'.format(
                                match=match.rstrip('\n'),
                            )
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info('Unmounting %s on %s' % (dir_, remote))
                try:
                    remote.run(args=[
                        'sync',
                        run.Raw('&&'), 'sudo', 'umount', '-f', dir_
                    ])
                except Exception as e:
                    remote.run(args=[
                        'sudo',
                        run.Raw('PATH=/usr/sbin:$PATH'),
                        'lsof',
                        run.Raw(';'),
                        'ps',
                        'auxf',
                    ])
                    raise e

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
            for remote, roles_for_host in osds.remotes.iteritems():
                remote.run(
                    args=['sudo', 'umount', '-f', '/mnt'],
                    check_status=False,
                )

        if ctx.archive is not None and \
                not (ctx.config.get('archive-on-error') and ctx.summary['success']):

            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            try:
                os.makedirs(path)
            except OSError as e:
                if e.errno == errno.EEXIST:
                    pass
                else:
                    raise
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    is_mon = teuthology.is_type('mon', cluster_name)
                    if is_mon(role):
                        _, _, id_ = teuthology.split_role(role)
                        mon_dir = '/var/lib/ceph/mon/' + \
                                  '{0}-{1}'.format(cluster_name, id_)
                        teuthology.pull_directory_tarball(
                            remote, mon_dir, path + '/' + role + '.tgz')

        log.info('Cleaning ceph cluster...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-rf',
                    '--',
                    conf_path,
                    keyring_path,
                    data_dir,
                    monmap_path,
                    osdmap_path,
                    run.Raw('{tdir}/../*.pid'.format(tdir=testdir)),
                ],
                wait=False,
            ), )
Пример #12
0
def cluster(ctx, config):
    """
    Handle the creation and removal of a ceph cluster.

    On startup:
        Create directories needed for the cluster.
        Create remote journals for all osds.
        Create and set keyring.
        Copy the monmap to tht test systems.
        Setup mon nodes.
        Setup mds nodes.
        Mkfs osd nodes.
        Add keyring information to monmaps
        Mkfs mon nodes.

    On exit:
        If errors occured, extract a failure message and store in ctx.summary.
        Unmount all test files and temporary journaling files.
        Save the monitor information and archive all ceph logs.
        Cleanup the keyring setup, and remove all monitor map and data files left over.

    :param ctx: Context
    :param config: Configuration
    """
    if ctx.config.get("use_existing_cluster", False) is True:
        log.info("'use_existing_cluster' is true; skipping cluster creation")
        yield

    testdir = teuthology.get_testdir(ctx)
    cluster_name = config["cluster"]
    data_dir = "{tdir}/{cluster}.data".format(tdir=testdir, cluster=cluster_name)
    log.info("Creating ceph cluster %s...", cluster_name)
    run.wait(ctx.cluster.run(args=["install", "-d", "-m0755", "--", data_dir], wait=False))

    run.wait(ctx.cluster.run(args=["sudo", "install", "-d", "-m0777", "--", "/var/run/ceph"], wait=False))

    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type("osd", cluster_name))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get("fs"):
            log.info("fs option selected, checking for scratch devs")
            log.info("found devs: %s" % (str(devs),))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name), iddevs)
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs) :]
            devs_to_clean[remote] = []

        if config.get("block_journal"):
            log.info("block journal enabled")
            roles_to_journals = assign_devs(
                teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name), iddevs
            )
            log.info("journal map: %s", roles_to_journals)

        if config.get("tmpfs_journal"):
            log.info("tmpfs journal enabled")
            roles_to_journals = {}
            remote.run(args=["sudo", "mount", "-t", "tmpfs", "tmpfs", "/mnt"])
            for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name):
                tmpfs = "/mnt/" + role
                roles_to_journals[role] = tmpfs
                remote.run(args=["truncate", "-s", "1500M", tmpfs])
            log.info("journal map: %s", roles_to_journals)

        log.info("dev map: %s" % (str(roles_to_devs),))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals

    log.info("Generating config...")
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [
        host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)
    ]
    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips, cluster=cluster_name)
    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
        for role, journal in roles_to_journals.iteritems():
            name = teuthology.ceph_role(role)
            if name not in conf:
                conf[name] = {}
            conf[name]["osd journal"] = journal
    for section, keys in config["conf"].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    if config.get("tmpfs_journal"):
        conf["journal dio"] = False

    if not hasattr(ctx, "ceph"):
        ctx.ceph = {}
    ctx.ceph[cluster_name] = argparse.Namespace()
    ctx.ceph[cluster_name].conf = conf

    default_keyring = "/etc/ceph/{cluster}.keyring".format(cluster=cluster_name)
    keyring_path = config.get("keyring_path", default_keyring)

    coverage_dir = "{tdir}/archive/coverage".format(tdir=testdir)

    firstmon = teuthology.get_first_mon(ctx, config, cluster_name)

    log.info("Setting up %s..." % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            "sudo",
            "adjust-ulimits",
            "ceph-coverage",
            coverage_dir,
            "ceph-authtool",
            "--create-keyring",
            keyring_path,
        ]
    )
    ctx.cluster.only(firstmon).run(
        args=[
            "sudo",
            "adjust-ulimits",
            "ceph-coverage",
            coverage_dir,
            "ceph-authtool",
            "--gen-key",
            "--name=mon.",
            keyring_path,
        ]
    )
    ctx.cluster.only(firstmon).run(args=["sudo", "chmod", "0644", keyring_path])
    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
    monmap_path = "{tdir}/{cluster}.monmap".format(tdir=testdir, cluster=cluster_name)
    fsid = teuthology.create_simple_monmap(ctx, remote=mon0_remote, conf=conf, path=monmap_path)
    if not "global" in conf:
        conf["global"] = {}
    conf["global"]["fsid"] = fsid

    default_conf_path = "/etc/ceph/{cluster}.conf".format(cluster=cluster_name)
    conf_path = config.get("conf_path", default_conf_path)
    log.info("Writing %s for FSID %s..." % (conf_path, fsid))
    write_conf(ctx, conf_path, cluster_name)

    log.info("Creating admin key on %s..." % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            "sudo",
            "adjust-ulimits",
            "ceph-coverage",
            coverage_dir,
            "ceph-authtool",
            "--gen-key",
            "--name=client.admin",
            "--set-uid=0",
            "--cap",
            "mon",
            "allow *",
            "--cap",
            "osd",
            "allow *",
            "--cap",
            "mds",
            "allow *",
            keyring_path,
        ]
    )

    log.info("Copying monmap to all nodes...")
    keyring = teuthology.get_file(remote=mon0_remote, path=keyring_path)
    monmap = teuthology.get_file(remote=mon0_remote, path=monmap_path)

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info("Sending monmap to node {remote}".format(remote=rem))
        teuthology.sudo_write_file(remote=rem, path=keyring_path, data=keyring, perms="0644")
        teuthology.write_file(remote=rem, path=monmap_path, data=monmap)

    log.info("Setting up mon nodes...")
    mons = ctx.cluster.only(teuthology.is_type("mon", cluster_name))
    osdmap_path = "{tdir}/{cluster}.osdmap".format(tdir=testdir, cluster=cluster_name)
    run.wait(
        mons.run(
            args=[
                "adjust-ulimits",
                "ceph-coverage",
                coverage_dir,
                "osdmaptool",
                "-c",
                conf_path,
                "--clobber",
                "--createsimple",
                "{num:d}".format(num=teuthology.num_instances_of_type(ctx.cluster, "osd", cluster_name)),
                osdmap_path,
                "--pg_bits",
                "2",
                "--pgp_bits",
                "4",
            ],
            wait=False,
        )
    )

    log.info("Setting up mgr nodes...")
    mgrs = ctx.cluster.only(teuthology.is_type("mgr", cluster_name))
    for remote, roles_for_host in mgrs.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, "mgr", cluster_name):
            _, _, id_ = teuthology.split_role(role)
            mgr_dir = "/var/lib/ceph/mgr/{cluster}-{id}".format(cluster=cluster_name, id=id_)
            remote.run(
                args=[
                    "sudo",
                    "mkdir",
                    "-p",
                    mgr_dir,
                    run.Raw("&&"),
                    "sudo",
                    "adjust-ulimits",
                    "ceph-coverage",
                    coverage_dir,
                    "ceph-authtool",
                    "--create-keyring",
                    "--gen-key",
                    "--name=mgr.{id}".format(id=id_),
                    mgr_dir + "/keyring",
                ]
            )

    log.info("Setting up mds nodes...")
    mdss = ctx.cluster.only(teuthology.is_type("mds", cluster_name))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, "mds", cluster_name):
            _, _, id_ = teuthology.split_role(role)
            mds_dir = "/var/lib/ceph/mds/{cluster}-{id}".format(cluster=cluster_name, id=id_)
            remote.run(
                args=[
                    "sudo",
                    "mkdir",
                    "-p",
                    mds_dir,
                    run.Raw("&&"),
                    "sudo",
                    "adjust-ulimits",
                    "ceph-coverage",
                    coverage_dir,
                    "ceph-authtool",
                    "--create-keyring",
                    "--gen-key",
                    "--name=mds.{id}".format(id=id_),
                    mds_dir + "/keyring",
                ]
            )

    cclient.create_keyring(ctx, cluster_name)
    log.info("Running mkfs on osd nodes...")

    if not hasattr(ctx, "disk_config"):
        ctx.disk_config = argparse.Namespace()
    if not hasattr(ctx.disk_config, "remote_to_roles_to_dev"):
        ctx.disk_config.remote_to_roles_to_dev = {}
    if not hasattr(ctx.disk_config, "remote_to_roles_to_journals"):
        ctx.disk_config.remote_to_roles_to_journals = {}
    if not hasattr(ctx.disk_config, "remote_to_roles_to_dev_mount_options"):
        ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
    if not hasattr(ctx.disk_config, "remote_to_roles_to_dev_fstype"):
        ctx.disk_config.remote_to_roles_to_dev_fstype = {}

    teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs)
    teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals)

    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev)))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = remote_to_roles_to_devs[remote]
        roles_to_journals = remote_to_roles_to_journals[remote]

        for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name):
            _, _, id_ = teuthology.split_role(role)
            mnt_point = "/var/lib/ceph/osd/{cluster}-{id}".format(cluster=cluster_name, id=id_)
            remote.run(args=["sudo", "mkdir", "-p", mnt_point])
            log.info(str(roles_to_journals))
            log.info(role)
            if roles_to_devs.get(role):
                dev = roles_to_devs[role]
                fs = config.get("fs")
                package = None
                mkfs_options = config.get("mkfs_options")
                mount_options = config.get("mount_options")
                if fs == "btrfs":
                    # package = 'btrfs-tools'
                    if mount_options is None:
                        mount_options = ["noatime", "user_subvol_rm_allowed"]
                    if mkfs_options is None:
                        mkfs_options = ["-m", "single", "-l", "32768", "-n", "32768"]
                if fs == "xfs":
                    # package = 'xfsprogs'
                    if mount_options is None:
                        mount_options = ["noatime"]
                    if mkfs_options is None:
                        mkfs_options = ["-f", "-i", "size=2048"]
                if fs == "ext4" or fs == "ext3":
                    if mount_options is None:
                        mount_options = ["noatime", "user_xattr"]

                if mount_options is None:
                    mount_options = []
                if mkfs_options is None:
                    mkfs_options = []
                mkfs = ["mkfs.%s" % fs] + mkfs_options
                log.info("%s on %s on %s" % (mkfs, dev, remote))
                if package is not None:
                    remote.run(args=["sudo", "apt-get", "install", "-y", package], stdout=StringIO())

                try:
                    remote.run(args=["yes", run.Raw("|")] + ["sudo"] + mkfs + [dev])
                except run.CommandFailedError:
                    # Newer btfs-tools doesn't prompt for overwrite, use -f
                    if "-f" not in mount_options:
                        mkfs_options.append("-f")
                        mkfs = ["mkfs.%s" % fs] + mkfs_options
                        log.info("%s on %s on %s" % (mkfs, dev, remote))
                    remote.run(args=["yes", run.Raw("|")] + ["sudo"] + mkfs + [dev])

                log.info("mount %s on %s -o %s" % (dev, remote, ",".join(mount_options)))
                remote.run(args=["sudo", "mount", "-t", fs, "-o", ",".join(mount_options), dev, mnt_point])
                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
                    ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options
                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs
                devs_to_clean[remote].append(mnt_point)

        for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name):
            _, _, id_ = teuthology.split_role(role)
            remote.run(
                args=[
                    "sudo",
                    "MALLOC_CHECK_=3",
                    "adjust-ulimits",
                    "ceph-coverage",
                    coverage_dir,
                    "ceph-osd",
                    "--cluster",
                    cluster_name,
                    "--mkfs",
                    "--mkkey",
                    "-i",
                    id_,
                    "--monmap",
                    monmap_path,
                ]
            )

    log.info("Reading keys from all nodes...")
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ["mgr", "mds", "osd"]:
            for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name):
                _, _, id_ = teuthology.split_role(role)
                data = teuthology.get_file(
                    remote=remote,
                    path="/var/lib/ceph/{type}/{cluster}-{id}/keyring".format(type=type_, id=id_, cluster=cluster_name),
                    sudo=True,
                )
                keys.append((type_, id_, data))
                keys_fp.write(data)
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, "client", cluster_name):
            _, _, id_ = teuthology.split_role(role)
            data = teuthology.get_file(
                remote=remote, path="/etc/ceph/{cluster}.client.{id}.keyring".format(id=id_, cluster=cluster_name)
            )
            keys.append(("client", id_, data))
            keys_fp.write(data)

    log.info("Adding keys to all mons...")
    writes = mons.run(args=["sudo", "tee", "-a", keyring_path], stdin=run.PIPE, wait=False, stdout=StringIO())
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                    "sudo",
                    "adjust-ulimits",
                    "ceph-coverage",
                    coverage_dir,
                    "ceph-authtool",
                    keyring_path,
                    "--name={type}.{id}".format(type=type_, id=id_),
                ]
                + list(generate_caps(type_)),
                wait=False,
            )
        )

    log.info("Running mkfs on mon nodes...")
    for remote, roles_for_host in mons.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, "mon", cluster_name):
            _, _, id_ = teuthology.split_role(role)
            remote.run(
                args=["sudo", "mkdir", "-p", "/var/lib/ceph/mon/{cluster}-{id}".format(id=id_, cluster=cluster_name)]
            )
            remote.run(
                args=[
                    "sudo",
                    "adjust-ulimits",
                    "ceph-coverage",
                    coverage_dir,
                    "ceph-mon",
                    "--cluster",
                    cluster_name,
                    "--mkfs",
                    "-i",
                    id_,
                    "--monmap",
                    monmap_path,
                    "--osdmap",
                    osdmap_path,
                    "--keyring",
                    keyring_path,
                ]
            )

    run.wait(mons.run(args=["rm", "--", monmap_path, osdmap_path], wait=False))

    try:
        yield
    except Exception:
        # we need to know this below
        ctx.summary["success"] = False
        raise
    finally:
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()

        log.info("Checking cluster log for badness...")

        def first_in_ceph_log(pattern, excludes):
            """
            Find the first occurence of the pattern specified in the Ceph log,
            Returns None if none found.

            :param pattern: Pattern scanned for.
            :param excludes: Patterns to ignore.
            :return: First line of text (or None if not found)
            """
            args = ["sudo", "egrep", pattern, "/var/log/ceph/{cluster}.log".format(cluster=cluster_name)]
            for exclude in excludes:
                args.extend([run.Raw("|"), "egrep", "-v", exclude])
            args.extend([run.Raw("|"), "head", "-n", "1"])
            r = mon0_remote.run(stdout=StringIO(), args=args)
            stdout = r.stdout.getvalue()
            if stdout != "":
                return stdout
            return None

        if first_in_ceph_log("\[ERR\]|\[WRN\]|\[SEC\]", config["log_whitelist"]) is not None:
            log.warning("Found errors (ERR|WRN|SEC) in cluster log")
            ctx.summary["success"] = False
            # use the most severe problem as the failure reason
            if "failure_reason" not in ctx.summary:
                for pattern in ["\[SEC\]", "\[ERR\]", "\[WRN\]"]:
                    match = first_in_ceph_log(pattern, config["log_whitelist"])
                    if match is not None:
                        ctx.summary["failure_reason"] = '"{match}" in cluster log'.format(match=match.rstrip("\n"))
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info("Unmounting %s on %s" % (dir_, remote))
                try:
                    remote.run(args=["sync", run.Raw("&&"), "sudo", "umount", "-f", dir_])
                except Exception as e:
                    remote.run(args=["sudo", run.Raw("PATH=/usr/sbin:$PATH"), "lsof", run.Raw(";"), "ps", "auxf"])
                    raise e

        if config.get("tmpfs_journal"):
            log.info("tmpfs journal enabled - unmounting tmpfs at /mnt")
            for remote, roles_for_host in osds.remotes.iteritems():
                remote.run(args=["sudo", "umount", "-f", "/mnt"], check_status=False)

        if ctx.archive is not None and not (ctx.config.get("archive-on-error") and ctx.summary["success"]):

            # archive mon data, too
            log.info("Archiving mon data...")
            path = os.path.join(ctx.archive, "data")
            try:
                os.makedirs(path)
            except OSError as e:
                if e.errno == errno.EEXIST:
                    pass
                else:
                    raise
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    is_mon = teuthology.is_type("mon", cluster_name)
                    if is_mon(role):
                        _, _, id_ = teuthology.split_role(role)
                        mon_dir = "/var/lib/ceph/mon/" + "{0}-{1}".format(cluster_name, id_)
                        teuthology.pull_directory_tarball(remote, mon_dir, path + "/" + role + ".tgz")

        log.info("Cleaning ceph cluster...")
        run.wait(
            ctx.cluster.run(
                args=[
                    "sudo",
                    "rm",
                    "-rf",
                    "--",
                    conf_path,
                    keyring_path,
                    data_dir,
                    monmap_path,
                    osdmap_path,
                    run.Raw("{tdir}/../*.pid".format(tdir=testdir)),
                ],
                wait=False,
            )
        )
Пример #13
0
def cluster(ctx, config):
    """
    Handle the creation and removal of a ceph cluster.

    On startup:
        Create directories needed for the cluster.
        Create remote journals for all osds.
        Create and set keyring.
        Copy the monmap to tht test systems.
        Setup mon nodes.
        Setup mds nodes.
        Mkfs osd nodes.
        Add keyring information to monmaps
        Mkfs mon nodes.

    On exit:
        If errors occured, extract a failure message and store in ctx.summary.
        Unmount all test files and temporary journaling files.
        Save the monitor information and archive all ceph logs.
        Cleanup the keyring setup, and remove all monitor map and data files left over.

    :param ctx: Context
    :param config: Configuration
    """
    if ctx.config.get('use_existing_cluster', False) is True:
        log.info("'use_existing_cluster' is true; skipping cluster creation")
        yield

    testdir = teuthology.get_testdir(ctx)
    cluster_name = config['cluster']
    data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir, cluster=cluster_name)
    log.info('Creating ceph cluster %s...', cluster_name)
    run.wait(
        ctx.cluster.run(
            args=[
                'install', '-d', '-m0755', '--',
                data_dir,
            ],
            wait=False,
        )
    )

    run.wait(
        ctx.cluster.run(
            args=[
                'sudo',
                'install', '-d', '-m0777', '--', '/var/run/ceph',
            ],
            wait=False,
        )
    )

    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checking for scratch devs')
            log.info('found devs: %s' % (str(devs),))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(
                teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), iddevs
            )
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs):]
            devs_to_clean[remote] = []

        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), iddevs
            )
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt'])
            for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name):
                tmpfs = '/mnt/' + role
                roles_to_journals[role] = tmpfs
                remote.run(args=['truncate', '-s', '1500M', tmpfs])
            log.info('journal map: %s', roles_to_journals)

        log.info('dev map: %s' % (str(roles_to_devs),))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals

    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [host for (host, port) in
           (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips, cluster=cluster_name)
    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
        for role, journal in roles_to_journals.iteritems():
            name = teuthology.ceph_role(role)
            if name not in conf:
                conf[name] = {}
            conf[name]['osd journal'] = journal
    for section, keys in config['conf'].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    if config.get('tmpfs_journal'):
        conf['journal dio'] = False

    if not hasattr(ctx, 'ceph'):
        ctx.ceph = {}
    ctx.ceph[cluster_name] = argparse.Namespace()
    ctx.ceph[cluster_name].conf = conf

    default_keyring = '/etc/ceph/{cluster}.keyring'.format(cluster=cluster_name)
    keyring_path = config.get('keyring_path', default_keyring)

    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    firstmon = teuthology.get_first_mon(ctx, config, cluster_name)

    log.info('Setting up %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--create-keyring',
            keyring_path,
        ],
    )
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--gen-key',
            '--name=mon.',
            keyring_path,
        ],
    )
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'chmod',
            '0644',
            keyring_path,
        ],
    )
    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
    monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir,
                                                   cluster=cluster_name)
    fsid = teuthology.create_simple_monmap(
        ctx,
        remote=mon0_remote,
        conf=conf,
        path=monmap_path,
    )
    if not 'global' in conf:
        conf['global'] = {}
    conf['global']['fsid'] = fsid

    default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name)
    conf_path = config.get('conf_path', default_conf_path)
    log.info('Writing %s for FSID %s...' % (conf_path, fsid))
    write_conf(ctx, conf_path, cluster_name)

    log.info('Creating admin key on %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--gen-key',
            '--name=client.admin',
            '--set-uid=0',
            '--cap', 'mon', 'allow *',
            '--cap', 'osd', 'allow *',
            '--cap', 'mds', 'allow *',
            keyring_path,
        ],
    )

    log.info('Copying monmap to all nodes...')
    keyring = teuthology.get_file(
        remote=mon0_remote,
        path=keyring_path,
    )
    monmap = teuthology.get_file(
        remote=mon0_remote,
        path=monmap_path,
    )

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info('Sending monmap to node {remote}'.format(remote=rem))
        teuthology.sudo_write_file(
            remote=rem,
            path=keyring_path,
            data=keyring,
            perms='0644'
        )
        teuthology.write_file(
            remote=rem,
            path=monmap_path,
            data=monmap,
        )

    log.info('Setting up mon nodes...')
    mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name))
    osdmap_path = '{tdir}/{cluster}.osdmap'.format(tdir=testdir,
                                                   cluster=cluster_name)
    run.wait(
        mons.run(
            args=[
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'osdmaptool',
                '-c', conf_path,
                '--clobber',
                '--createsimple', '{num:d}'.format(
                    num=teuthology.num_instances_of_type(ctx.cluster, 'osd',
                                                         cluster_name),
                ),
                osdmap_path,
                '--pg_bits', '2',
                '--pgp_bits', '4',
            ],
            wait=False,
        ),
    )

    log.info('Setting up mds nodes...')
    mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds',
                                                     cluster_name):
            _, _, id_ = teuthology.split_role(role)
            mds_dir = '/var/lib/ceph/mds/{cluster}-{id}'.format(
                cluster=cluster_name,
                id=id_,
            )
            remote.run(
                args=[
                    'sudo',
                    'mkdir',
                    '-p',
                    mds_dir,
                    run.Raw('&&'),
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    '--name=mds.{id}'.format(id=id_),
                    mds_dir + '/keyring',
                ],
            )

    cclient.create_keyring(ctx, cluster_name)
    log.info('Running mkfs on osd nodes...')

    if not hasattr(ctx, 'disk_config'):
        ctx.disk_config = argparse.Namespace()
    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'):
        ctx.disk_config.remote_to_roles_to_dev = {}
    if not hasattr(ctx.disk_config, 'remote_to_roles_to_journals'):
        ctx.disk_config.remote_to_roles_to_journals = {}
    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'):
        ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
    if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'):
        ctx.disk_config.remote_to_roles_to_dev_fstype = {}

    teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs)
    teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals)

    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev)))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = remote_to_roles_to_devs[remote]
        roles_to_journals = remote_to_roles_to_journals[remote]

        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name):
            _, _, id_ = teuthology.split_role(role)
            mnt_point = '/var/lib/ceph/osd/{cluster}-{id}'.format(cluster=cluster_name, id=id_)
            remote.run(
                args=[
                    'sudo',
                    'mkdir',
                    '-p',
                    mnt_point,
                ])
            log.info(str(roles_to_journals))
            log.info(role)
            if roles_to_devs.get(role):
                dev = roles_to_devs[role]
                fs = config.get('fs')
                package = None
                mkfs_options = config.get('mkfs_options')
                mount_options = config.get('mount_options')
                if fs == 'btrfs':
                    # package = 'btrfs-tools'
                    if mount_options is None:
                        mount_options = ['noatime', 'user_subvol_rm_allowed']
                    if mkfs_options is None:
                        mkfs_options = ['-m', 'single',
                                        '-l', '32768',
                                        '-n', '32768']
                if fs == 'xfs':
                    # package = 'xfsprogs'
                    if mount_options is None:
                        mount_options = ['noatime']
                    if mkfs_options is None:
                        mkfs_options = ['-f', '-i', 'size=2048']
                if fs == 'ext4' or fs == 'ext3':
                    if mount_options is None:
                        mount_options = ['noatime', 'user_xattr']

                if mount_options is None:
                    mount_options = []
                if mkfs_options is None:
                    mkfs_options = []
                mkfs = ['mkfs.%s' % fs] + mkfs_options
                log.info('%s on %s on %s' % (mkfs, dev, remote))
                if package is not None:
                    remote.run(
                        args=[
                            'sudo',
                            'apt-get', 'install', '-y', package
                        ],
                        stdout=StringIO(),
                    )

                try:
                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
                except run.CommandFailedError:
                    # Newer btfs-tools doesn't prompt for overwrite, use -f
                    if '-f' not in mount_options:
                        mkfs_options.append('-f')
                        mkfs = ['mkfs.%s' % fs] + mkfs_options
                        log.info('%s on %s on %s' % (mkfs, dev, remote))
                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])

                log.info('mount %s on %s -o %s' % (dev, remote,
                                                   ','.join(mount_options)))
                remote.run(
                    args=[
                        'sudo',
                        'mount',
                        '-t', fs,
                        '-o', ','.join(mount_options),
                        dev,
                        mnt_point,
                    ]
                )
                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
                    ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options
                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs
                devs_to_clean[remote].append(mnt_point)

        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name):
            _, _, id_ = teuthology.split_role(role)
            remote.run(
                args=[
                    'sudo',
                    'MALLOC_CHECK_=3',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-osd',
                    '--cluster',
                    cluster_name,
                    '--mkfs',
                    '--mkkey',
                    '-i', id_,
                    '--monmap', monmap_path,
                ],
            )

    log.info('Reading keys from all nodes...')
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['mds', 'osd']:
            for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name):
                _, _, id_ = teuthology.split_role(role)
                data = teuthology.get_file(
                    remote=remote,
                    path='/var/lib/ceph/{type}/{cluster}-{id}/keyring'.format(
                        type=type_,
                        id=id_,
                        cluster=cluster_name,
                    ),
                    sudo=True,
                )
                keys.append((type_, id_, data))
                keys_fp.write(data)
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name):
            _, _, id_ = teuthology.split_role(role)
            data = teuthology.get_file(
                remote=remote,
                path='/etc/ceph/{cluster}.client.{id}.keyring'.format(id=id_, cluster=cluster_name)
            )
            keys.append(('client', id_, data))
            keys_fp.write(data)

    log.info('Adding keys to all mons...')
    writes = mons.run(
        args=[
            'sudo', 'tee', '-a',
            keyring_path,
        ],
        stdin=run.PIPE,
        wait=False,
        stdout=StringIO(),
    )
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                         'sudo',
                         'adjust-ulimits',
                         'ceph-coverage',
                         coverage_dir,
                         'ceph-authtool',
                         keyring_path,
                         '--name={type}.{id}'.format(
                             type=type_,
                             id=id_,
                         ),
                     ] + list(teuthology.generate_caps(type_)),
                wait=False,
            ),
        )

    log.info('Running mkfs on mon nodes...')
    for remote, roles_for_host in mons.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon', cluster_name):
            _, _, id_ = teuthology.split_role(role)
            remote.run(
                args=[
                    'sudo',
                    'mkdir',
                    '-p',
                    '/var/lib/ceph/mon/{cluster}-{id}'.format(id=id_, cluster=cluster_name),
                ],
            )
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-mon',
                    '--cluster', cluster_name,
                    '--mkfs',
                    '-i', id_,
                    '--monmap', monmap_path,
                    '--osdmap', osdmap_path,
                    '--keyring', keyring_path,
                ],
            )

    run.wait(
        mons.run(
            args=[
                'rm',
                '--',
                monmap_path,
                osdmap_path,
            ],
            wait=False,
        ),
    )

    try:
        yield
    except Exception:
        # we need to know this below
        ctx.summary['success'] = False
        raise
    finally:
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()

        log.info('Checking cluster log for badness...')

        def first_in_ceph_log(pattern, excludes):
            """
            Find the first occurence of the pattern specified in the Ceph log,
            Returns None if none found.

            :param pattern: Pattern scanned for.
            :param excludes: Patterns to ignore.
            :return: First line of text (or None if not found)
            """
            args = [
                'sudo',
                'egrep', pattern,
                '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name),
            ]
            for exclude in excludes:
                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
            args.extend([
                run.Raw('|'), 'head', '-n', '1',
            ])
            r = mon0_remote.run(
                stdout=StringIO(),
                args=args,
            )
            stdout = r.stdout.getvalue()
            if stdout != '':
                return stdout
            return None

        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                             config['log_whitelist']) is not None:
            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
            ctx.summary['success'] = False
            # use the most severe problem as the failure reason
            if 'failure_reason' not in ctx.summary:
                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
                    match = first_in_ceph_log(pattern, config['log_whitelist'])
                    if match is not None:
                        ctx.summary['failure_reason'] = \
                            '"{match}" in cluster log'.format(
                                match=match.rstrip('\n'),
                            )
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info('Unmounting %s on %s' % (dir_, remote))
                try:
                    remote.run(
                        args=[
                            'sync',
                            run.Raw('&&'),
                            'sudo',
                            'umount',
                            '-f',
                            dir_
                        ]
                    )
                except Exception as e:
                    remote.run(args=[
                        'sudo',
                        run.Raw('PATH=/usr/sbin:$PATH'),
                        'lsof',
                        run.Raw(';'),
                        'ps', 'auxf',
                    ])
                    raise e

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
            for remote, roles_for_host in osds.remotes.iteritems():
                remote.run(
                    args=['sudo', 'umount', '-f', '/mnt'],
                    check_status=False,
                )

        if ctx.archive is not None and \
                not (ctx.config.get('archive-on-error') and ctx.summary['success']):

            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            try:
                os.makedirs(path)
            except OSError as e:
                if e.errno == errno.EEXIST:
                    pass
                else:
                    raise
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    is_mon = teuthology.is_type('mon', cluster_name)
                    if is_mon(role):
                        _, _, id_ = teuthology.split_role(role)
                        mon_dir = '/var/lib/ceph/mon/' + \
                                  '{0}-{1}'.format(cluster_name, id_)
                        teuthology.pull_directory_tarball(
                            remote,
                            mon_dir,
                            path + '/' + role + '.tgz')

        log.info('Cleaning ceph cluster...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-rf',
                    '--',
                    conf_path,
                    keyring_path,
                    data_dir,
                    monmap_path,
                    osdmap_path,
                    run.Raw('{tdir}/../*.pid'.format(tdir=testdir)),
                ],
                wait=False,
            ),
        )