示例#1
0
def create_ceph_conf(ctx, config):

    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checking for scratch devs')
            log.info('found devs: %s' % (str(devs),))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
                )
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs):]
            devs_to_clean[remote] = []

        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
                )
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] )
            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
                tmpfs = '/mnt/osd.%s' % osd
                roles_to_journals[osd] = tmpfs
                remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] )
            log.info('journal map: %s', roles_to_journals)

        log.info('dev map: %s' % (str(roles_to_devs),))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals

    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips)

    ctx.ceph = argparse.Namespace()
    ctx.ceph.conf = conf

    log.info(ctx)
    
    yield
 def osd_admin_socket(self, osdnum, command, check_status=True):
     """
     Remotely start up ceph specifying the admin socket
     """
     testdir = teuthology.get_testdir(self.ctx)
     remote = None
     for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems():
         for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
             if int(id_) == int(osdnum):
                 remote = _remote
     assert remote is not None
     args = [
         'sudo',
         'adjust-ulimits',
         'ceph-coverage',
         '{tdir}/archive/coverage'.format(tdir=testdir),
         'ceph',
         '--admin-daemon',
         '/var/run/ceph/ceph-osd.{id}.asok'.format(id=osdnum),
         ]
     args.extend(command)
     return remote.run(
         args=args,
         stdout=StringIO(),
         wait=True,
         check_status=check_status
         )
示例#3
0
def execute(ctx, config):
    procs = []
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                log.info("running blktrace on %s: %s" % (remote.name, dev))

                proc = remote.run(
                    args=[
                        'cd',
                        log_dir,
                        run.Raw(';'),
                        '/tmp/cephtest/daemon-helper',
                        daemon_signal,
                        'sudo',
                        blktrace,
                        '-o',
                        dev.rsplit("/", 1)[1],
                        '-d',
                        dev,
                        ],
                    wait=False,   
                    stdin=run.PIPE,
                    )
                procs.append(proc)
    try:
        yield
    finally:
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        log.info('stopping blktrace processs')
        for proc in procs:
            proc.stdin.close()
示例#4
0
def get_dev_for_osd(ctx, config):
    """Get a list of all osd device names."""
    osd_devs = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        host = remote.name.split('@')[-1]
        shortname = host.split('.')[0]
        devs = teuthology.get_scratch_devices(remote)
        num_osd_per_host = list(
            teuthology.roles_of_type(
                roles_for_host, 'osd'))
        num_osds = len(num_osd_per_host)
        if config.get('separate_journal_disk') is not None:
            num_devs_reqd = 2 * num_osds
            assert num_devs_reqd <= len(
                devs), 'fewer data and journal disks than required ' + shortname
            for dindex in range(0, num_devs_reqd, 2):
                jd_index = dindex + 1
                dev_short = devs[dindex].split('/')[-1]
                jdev_short = devs[jd_index].split('/')[-1]
                osd_devs.append((shortname, dev_short, jdev_short))
        else:
            assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
            for dev in devs[:num_osds]:
                dev_short = dev.split('/')[-1]
                osd_devs.append((shortname, dev_short))
    return osd_devs
示例#5
0
 def osd_admin_socket(self, osdnum, command, check_status=True):
     """
     Remotely start up ceph specifying the admin socket
     """
     testdir = teuthology.get_testdir(self.ctx)
     remote = None
     for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems():
         for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
             if int(id_) == int(osdnum):
                 remote = _remote
     assert remote is not None
     args = [
         'sudo',
         'adjust-ulimits',
         'ceph-coverage',
         '{tdir}/archive/coverage'.format(tdir=testdir),
         'ceph',
         '--admin-daemon',
         '/var/run/ceph/ceph-osd.{id}.asok'.format(id=osdnum),
     ]
     args.extend(command)
     return remote.run(args=args,
                       stdout=StringIO(),
                       wait=True,
                       check_status=check_status)
示例#6
0
def get_dev_for_osd(ctx, config):
    """Get a list of all osd device names."""
    osd_devs = []
    for remote, roles_for_host in ctx.cluster.remotes.items():
        host = remote.name.split('@')[-1]
        shortname = host.split('.')[0]
        devs = teuthology.get_scratch_devices(remote)
        num_osd_per_host = list(
            teuthology.roles_of_type(
                roles_for_host, 'osd'))
        num_osds = len(num_osd_per_host)
        if config.get('separate_journal_disk') is not None:
            num_devs_reqd = 2 * num_osds
            assert num_devs_reqd <= len(
                devs), 'fewer data and journal disks than required ' + shortname
            for dindex in range(0, num_devs_reqd, 2):
                jd_index = dindex + 1
                dev_short = devs[dindex].split('/')[-1]
                jdev_short = devs[jd_index].split('/')[-1]
                osd_devs.append((shortname, dev_short, jdev_short))
        else:
            assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
            for dev in devs[:num_osds]:
                dev_short = dev.split('/')[-1]
                osd_devs.append((shortname, dev_short))
    return osd_devs
示例#7
0
def create_keyring(ctx):
    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client'))
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    for remote, roles_for_host in clients.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
            client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
                    '--name=client.{id}'.format(id=id_),
                    client_keyring,
                    run.Raw('&&'),
                    'sudo',
                    'chmod',
                    '0644',
                    client_keyring,
                    ],
                )
示例#8
0
def create_keyring(ctx):
    """
    Set up key ring on remote sites
    """
    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client'))
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    for remote, roles_for_host in clients.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
            client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(
                id=id_)
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
                    '--name=client.{id}'.format(id=id_),
                    client_keyring,
                    run.Raw('&&'),
                    'sudo',
                    'chmod',
                    '0644',
                    client_keyring,
                ], )
示例#9
0
def test_roles_of_type():
    expected = [
        (['client.0', 'osd.0', 'ceph.osd.1'], 'osd', ['0', '1']),
        (['client.0', 'osd.0', 'ceph.osd.1'], 'client', ['0']),
        (['foo.client.1', 'bar.client.2.3', 'baz.osd.1'], 'mon', []),
        (['foo.client.1', 'bar.client.2.3',
          'baz.osd.1'], 'client', ['1', '2.3']),
    ]
    for roles_for_host, type_, expected_ids in expected:
        ids = list(misc.roles_of_type(roles_for_host, type_))
        assert ids == expected_ids
示例#10
0
def get_nodes_using_roles(ctx, config, role):
    newl = []
    for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, role):
            rem = _remote
            if role == 'mon':
                req1 = str(rem).split('@')[-1]
            else:
                req = str(rem).split('.')[0]
                req1 = str(req).split('@')[1]
            newl.append(req1)
    return newl
示例#11
0
def get_nodes_using_roles(ctx, config, role):
    newl = []
    for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, role):
            rem = _remote
            if role == 'mon':
                req1 = str(rem).split('@')[-1]
            else:
                req = str(rem).split('.')[0]
                req1 = str(req).split('@')[1]
            newl.append(req1)
    return newl
示例#12
0
def get_dev_for_osd(ctx, config):
    osd_devs = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        host = remote.name.split('@')[-1]
        shortname = host.split('.')[0]
        devs = teuthology.get_scratch_devices(remote)
        num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, 'osd'))
        num_osds = len(num_osd_per_host)
        assert num_osds <= len(devs), 'fewer disks than osds on ' + shortname
        for dev in devs[:num_osds]:
            dev_short = dev.split('/')[-1]
            osd_devs.append('{host}:{dev}'.format(host=shortname, dev=dev_short))
    return osd_devs
示例#13
0
def get_nodes_using_roles(ctx, config, role):
    """Extract the names of nodes that match a given role from a cluster"""
    newl = []
    for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, role):
            rem = _remote
            if role == 'mon':
                req1 = str(rem).split('@')[-1]
            else:
                req = str(rem).split('.')[0]
                req1 = str(req).split('@')[1]
            newl.append(req1)
    return newl
示例#14
0
def get_nodes_using_roles(ctx, config, role):
    """Extract the names of nodes that match a given role from a cluster"""
    newl = []
    for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, role):
            rem = _remote
            if role == 'mon':
                req1 = str(rem).split('@')[-1]
            else:
                req = str(rem).split('.')[0]
                req1 = str(req).split('@')[1]
            newl.append(req1)
    return newl
示例#15
0
def get_dev_for_osd(ctx, config):
    """Get a list of all osd device names."""
    osd_devs = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        host = remote.name.split("@")[-1]
        shortname = host.split(".")[0]
        devs = teuthology.get_scratch_devices(remote)
        num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, "osd"))
        num_osds = len(num_osd_per_host)
        assert num_osds <= len(devs), "fewer disks than osds on " + shortname
        for dev in devs[:num_osds]:
            dev_short = dev.split("/")[-1]
            osd_devs.append("{host}:{dev}".format(host=shortname, dev=dev_short))
    return osd_devs
示例#16
0
def get_dev_for_osd(ctx, config):
    osd_devs = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        host = remote.name.split('@')[-1]
        shortname = host.split('.')[0]
        devs = teuthology.get_scratch_devices(remote)
        num_osd_per_host = list(teuthology.roles_of_type(
            roles_for_host, 'osd'))
        num_osds = len(num_osd_per_host)
        assert num_osds <= len(devs), 'fewer disks than osds on ' + shortname
        for dev in devs[:num_osds]:
            dev_short = dev.split('/')[-1]
            osd_devs.append('{host}:{dev}'.format(host=shortname,
                                                  dev=dev_short))
    return osd_devs
示例#17
0
 def osd_admin_socket(self, osdnum, command, check_status=True):
     testdir = teuthology.get_testdir(self.ctx)
     remote = None
     for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems():
         for id_ in teuthology.roles_of_type(roles_for_host, "osd"):
             if int(id_) == int(osdnum):
                 remote = _remote
     assert remote is not None
     args = [
         "sudo",
         "adjust-ulimits",
         "ceph-coverage",
         "{tdir}/archive/coverage".format(tdir=testdir),
         "ceph",
         "--admin-daemon",
         "/var/run/ceph/ceph-osd.{id}.asok".format(id=osdnum),
     ]
     args.extend(command)
     return remote.run(args=args, stdout=StringIO(), wait=True, check_status=check_status)
示例#18
0
def make_deamons_list(ctx, config):

    for type_ in ['mon','mds','osd','client','samba']:
        daemons = ctx.cluster.only(teuthology.is_type(type_))
        if daemons is None: continue
        for remote, roles_for_host in daemons.remotes.iteritems():
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                name = '%s.%s' % (type_, id_)

                ctx.daemons.add_daemon(remote, type_, id_,
                                      args='no-op',
                                      logger=log.getChild(name),
                                      stdin=run.PIPE,
                                      wait=False,
                                      )
    log.info('ctx daemon lists')
    log.info(ctx.daemons.resolve_role_list(roles=None, types=['mon','mds','osd','client','samba'])) 

    yield
示例#19
0
def execute(ctx, config):
    """
    Run the blktrace program on remote machines.
    """
    procs = []
    testdir = teuthology.get_testdir(ctx)
    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir)

    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                log.info("running blktrace on %s: %s" % (remote.name, dev))

                proc = remote.run(
                    args=[
                        'cd',
                        log_dir,
                        run.Raw(';'),
                        'daemon-helper',
                        daemon_signal,
                        'sudo',
                        blktrace,
                        '-o',
                        dev.rsplit("/", 1)[1],
                        '-d',
                        dev,
                    ],
                    wait=False,
                    stdin=run.PIPE,
                )
                procs.append(proc)
    try:
        yield
    finally:
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        log.info('stopping blktrace processs')
        for proc in procs:
            proc.stdin.close()
示例#20
0
 def osd_admin_socket(self, osdnum, command):
     remote = None
     for _remote, roles_for_host in self.ctx.cluster.remotes.iteritems():
         for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
             if int(id_) == osdnum:
                 remote = _remote
     assert remote is not None
     args=[
             'LD_LIBRARY_PRELOAD=/tmp/cephtest/binary/usr/local/lib',
             '/tmp/cephtest/enable-coredump',
             '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
             '/tmp/cephtest/archive/coverage',
             '/tmp/cephtest/binary/usr/local/bin/ceph',
             '-k', '/tmp/cephtest/ceph.keyring',
             '-c', '/tmp/cephtest/ceph.conf',
             '--admin-daemon',
             "/tmp/cephtest/asok.osd.%s"%(str(osdnum),)]
     args.extend(command)
     return remote.run(
         args=args,
         stdout=StringIO(),
         wait=True,
         )
示例#21
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    # Expect to find ceph_admin on the first mon by ID, same place that the download task
    # puts it.  Remember this here, because subsequently IDs will change from those in
    # the test config to those that ceph-deploy invents.
    (ceph_admin,) = ctx.cluster.only(teuthology.get_first_mon(ctx, config)).remotes.iterkeys()

    def execute_ceph_deploy(cmd):
        """Remotely execute a ceph_deploy command"""
        return ceph_admin.run(
            args=["cd", "{tdir}/ceph-deploy".format(tdir=testdir), run.Raw("&&"), run.Raw(cmd)], check_status=False
        ).exitstatus

    try:
        log.info("Building ceph cluster using ceph-deploy...")
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get("branch") is not None:
            cbranch = config.get("branch")
            for var, val in cbranch.iteritems():
                ceph_branch = "--{var}={val}".format(var=var, val=val)
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_role(ctx, "mds")
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_role(ctx, "mon")
        mon_nodes = " ".join(mon_node)
        new_mon = "./ceph-deploy new" + " " + mon_nodes
        mon_hostname = mon_nodes.split(" ")[0]
        mon_hostname = str(mon_hostname)
        gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname
        deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info("adding config inputs...")
        testdir = teuthology.get_testdir(ctx)
        conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir)

        if config.get("conf") is not None:
            confp = config.get("conf")
            for section, keys in confp.iteritems():
                lines = "[{section}]\n".format(section=section)
                teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = "{key} = {value}\n".format(key=key, value=value)
                    teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True)

        # install ceph
        install_nodes = "./ceph-deploy install " + (ceph_branch if ceph_branch else "--dev=master") + " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")
        # install ceph-test package too
        install_nodes2 = (
            "./ceph-deploy install --tests " + (ceph_branch if ceph_branch else "--dev=master") + " " + all_nodes
        )
        estatus_install = execute_ceph_deploy(install_nodes2)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph-test")

        mon_create_nodes = "./ceph-deploy mon create-initial"
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(mon_create_nodes)

        estatus_gather = execute_ceph_deploy(gather_keys)
        max_gather_tries = 90
        gather_tries = 0
        while estatus_gather != 0:
            gather_tries += 1
            if gather_tries >= max_gather_tries:
                msg = "ceph-deploy was not able to gatherkeys after 15 minutes"
                raise RuntimeError(msg)
            estatus_gather = execute_ceph_deploy(gather_keys)
            time.sleep(10)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get("test_mon_destroy") is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        for d in node_dev_list:
            node = d[0]
            for disk in d[1:]:
                zap = "./ceph-deploy disk zap " + node + ":" + disk
                estatus = execute_ceph_deploy(zap)
                if estatus != 0:
                    raise RuntimeError("ceph-deploy: Failed to zap osds")
            osd_create_cmd = "./ceph-deploy osd create "
            if config.get("dmcrypt") is not None:
                osd_create_cmd += "--dmcrypt "
            osd_create_cmd += ":".join(d)
            estatus_osd = execute_ceph_deploy(osd_create_cmd)
            if estatus_osd == 0:
                log.info("successfully created osd")
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get("wait-for-healthy", True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info("Setting up client nodes...")
            conf_path = "/etc/ceph/ceph.conf"
            admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring"
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True)
            admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True)

            clients = ctx.cluster.only(teuthology.is_type("client"))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, "client"):
                    client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_)
                    mon0_remote.run(
                        args=[
                            "cd",
                            "{tdir}".format(tdir=testdir),
                            run.Raw("&&"),
                            "sudo",
                            "bash",
                            "-c",
                            run.Raw('"'),
                            "ceph",
                            "auth",
                            "get-or-create",
                            "client.{id}".format(id=id_),
                            "mds",
                            "allow",
                            "mon",
                            "allow *",
                            "osd",
                            "allow *",
                            run.Raw(">"),
                            client_keyring,
                            run.Raw('"'),
                        ]
                    )
                    key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True)
                    teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644")
                    teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644")
                    teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644")

            if mds_nodes:
                log.info("Configuring CephFS...")
                ceph_fs = Filesystem(ctx, admin_remote=clients.remotes.keys()[0])
                if not ceph_fs.legacy_configured():
                    ceph_fs.create()
        elif not config.get("only_mon"):
            raise RuntimeError("The cluster is NOT operational due to insufficient OSDs")
        yield

    except Exception:
        log.info("Error encountered, logging exception before tearing down ceph-deploy")
        log.info(traceback.format_exc())
        raise
    finally:
        if config.get("keep_running"):
            return
        log.info("Stopping ceph...")
        ctx.cluster.run(
            args=[
                "sudo",
                "stop",
                "ceph-all",
                run.Raw("||"),
                "sudo",
                "service",
                "ceph",
                "stop",
                run.Raw("||"),
                "sudo",
                "systemctl",
                "stop",
                "ceph.target",
            ]
        )

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(
            args=[
                "sudo",
                "status",
                "ceph-all",
                run.Raw("||"),
                "sudo",
                "service",
                "ceph",
                "status",
                run.Raw("||"),
                "sudo",
                "systemctl",
                "status",
                "ceph.target",
            ],
            check_status=False,
        )

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(
            args=["sudo", "ps", "aux", run.Raw("|"), "grep", "-v", "grep", run.Raw("|"), "grep", "ceph"],
            check_status=False,
        )

        if ctx.archive is not None:
            # archive mon data, too
            log.info("Archiving mon data...")
            path = os.path.join(ctx.archive, "data")
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type("mon"))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith("mon."):
                        teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz")

            log.info("Compressing logs...")
            run.wait(
                ctx.cluster.run(
                    args=[
                        "sudo",
                        "find",
                        "/var/log/ceph",
                        "-name",
                        "*.log",
                        "-print0",
                        run.Raw("|"),
                        "sudo",
                        "xargs",
                        "-0",
                        "--no-run-if-empty",
                        "--",
                        "gzip",
                        "--",
                    ],
                    wait=False,
                )
            )

            log.info("Archiving logs...")
            path = os.path.join(ctx.archive, "remote")
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log"))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = "./ceph-deploy purge" + " " + all_nodes
        purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes

        log.info("Purging package...")
        execute_ceph_deploy(purge_nodes)
        log.info("Purging data...")
        execute_ceph_deploy(purgedata_nodes)
示例#22
0
def run_daemon(ctx, config, type_):
    """
    Run daemons for a role type.  Handle the startup and termination of a a daemon.
    On startup -- set coverages, cpu_profile, valgrind values for all remotes,
    and a max_mds value for one mds.
    On cleanup -- Stop all existing daemons of this type.

    :param ctx: Context
    :param config: Configuration
    :paran type_: Role type
    """
    log.info('Starting %s daemons...' % type_)
    testdir = teuthology.get_testdir(ctx)
    daemons = ctx.cluster.only(teuthology.is_type(type_))

    # check whether any daemons if this type are configured
    if daemons is None:
        return
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    daemon_signal = 'kill'
    if config.get('coverage') or config.get('valgrind') is not None:
        daemon_signal = 'term'

    for remote, roles_for_host in daemons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, type_):
            name = '%s.%s' % (type_, id_)

            run_cmd = [
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'daemon-helper',
                daemon_signal,
                ]
            run_cmd_tail = [
                'ceph-%s' % (type_),
                '-f',
                '-i', id_]

            if type_ in config.get('cpu_profile', []):
                profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_)
                run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ])

            if config.get('valgrind') is not None:
                valgrind_args = None
                if type_ in config['valgrind']:
                    valgrind_args = config['valgrind'][type_]
                if name in config['valgrind']:
                    valgrind_args = config['valgrind'][name]
                run_cmd = teuthology.get_valgrind_args(testdir, name,
                                                       run_cmd,
                                                       valgrind_args)

            run_cmd.extend(run_cmd_tail)

            ctx.daemons.add_daemon(remote, type_, id_,
                                   args=run_cmd,
                                   logger=log.getChild(name),
                                   stdin=run.PIPE,
                                   wait=False,
                                   )

    try:
        yield
    finally:
        teuthology.stop_daemons_of_type(ctx, type_)
示例#23
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.iteritems():
                if var == 'testing':
                    ceph_branch = '--{var}'.format(var=var)
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        node_dev_list = []
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_roles(ctx, config, 'mon')
        mon_nodes = " ".join(mon_node)
        new_mon = './ceph-deploy new' + " " + mon_nodes
        install_nodes = './ceph-deploy install ' + ceph_branch + " " + all_nodes
        purge_nodes = './ceph-deploy purge' + " " + all_nodes
        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
        deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
        first_mon = teuthology.get_first_mon(ctx, config)
        (remote, ) = ctx.cluster.only(first_mon).remotes.keys()

        lines = None
        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(remote,
                                                conf_path,
                                                lines,
                                                sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(remote,
                                                    conf_path,
                                                    lines,
                                                    sudo=True)

        estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")

        mon_no = None
        mon_no = config.get('mon_initial_members')
        if mon_no is not None:
            i = 0
            mon1 = []
            while (i < mon_no):
                mon1.append(mon_node[i])
                i = i + 1
            initial_mons = " ".join(mon1)
            for k in range(mon_no, len(mon_node)):
                mon_create_nodes = './ceph-deploy mon create' + " " + \
                    initial_mons + " " + mon_node[k]
                estatus_mon = execute_ceph_deploy(ctx, config,
                                                  mon_create_nodes)
                if estatus_mon != 0:
                    raise RuntimeError("ceph-deploy: Failed to create monitor")
        else:
            mon_create_nodes = './ceph-deploy mon create-initial'
            estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
            if estatus_mon != 0:
                raise RuntimeError("ceph-deploy: Failed to create monitors")

        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
        max_gather_tries = 90
        gather_tries = 0
        while (estatus_gather != 0):
            gather_tries += 1
            if gather_tries >= max_gather_tries:
                msg = 'ceph-deploy was not able to gatherkeys after 15 minutes'
                raise RuntimeError(msg)
            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
            time.sleep(10)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy' + " " + mon_node[
                    d]
                estatus_mon_d = execute_ceph_deploy(ctx, config,
                                                    mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        for d in node_dev_list:
            osd_create_cmds = './ceph-deploy osd create --zap-disk' + " " + d
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                zap_disk = './ceph-deploy disk zap' + " " + d
                execute_ceph_deploy(ctx, config, zap_disk)
                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
                if estatus_osd == 0:
                    log.info('successfully created osd')
                    no_of_osds += 1
                else:
                    raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote, ) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
            )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
            )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(args=[
                        'cd',
                        '{tdir}'.format(tdir=testdir),
                        run.Raw('&&'),
                        'sudo',
                        'bash',
                        '-c',
                        run.Raw('"'),
                        'ceph',
                        'auth',
                        'get-or-create',
                        'client.{id}'.format(id=id_),
                        'mds',
                        'allow',
                        'mon',
                        'allow *',
                        'osd',
                        'allow *',
                        run.Raw('>'),
                        client_keyring,
                        run.Raw('"'),
                    ], )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                    )
                    teuthology.sudo_write_file(remote=remot,
                                               path=client_keyring,
                                               data=key_data,
                                               perms='0644')
                    teuthology.sudo_write_file(remote=remot,
                                               path=admin_keyring_path,
                                               data=admin_keyring,
                                               perms='0644')
                    teuthology.sudo_write_file(remote=remot,
                                               path=conf_path,
                                               data=conf_data,
                                               perms='0644')
        else:
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        yield

    finally:
        log.info('Stopping ceph...')
        ctx.cluster.run(args=[
            'sudo', 'stop', 'ceph-all',
            run.Raw('||'), 'sudo', 'service', 'ceph', 'stop'
        ])

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote, '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                    ],
                    wait=False,
                ), )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge' + " " + all_nodes
        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(ctx, config, purgedata_nodes)
示例#24
0
文件: ceph.py 项目: tv42/teuthology
def cluster(ctx, config):
    log.info('Creating ceph cluster...')
    run.wait(
        ctx.cluster.run(
            args=[
                'install', '-d', '-m0755', '--',
                '/tmp/cephtest/data',
                ],
            wait=False,
            )
        )

    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [roles for (remote, roles) in remotes_and_roles]
    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, roles) in remotes_and_roles)]
    conf = teuthology.skeleton_config(roles=roles, ips=ips)
    for section, keys in config['conf'].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    ctx.ceph = argparse.Namespace()
    ctx.ceph.conf = conf

    log.info('Writing configs...')
    conf_fp = StringIO()
    conf.write(conf_fp)
    conf_fp.seek(0)
    writes = ctx.cluster.run(
        args=[
            'python',
            '-c',
            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
            '/tmp/cephtest/ceph.conf',
            ],
        stdin=run.PIPE,
        wait=False,
        )
    teuthology.feed_many_stdins_and_close(conf_fp, writes)
    run.wait(writes)

    coverage_dir = '/tmp/cephtest/archive/coverage'

    firstmon = teuthology.get_first_mon(ctx, config)

    log.info('Setting up %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            '/tmp/cephtest/enable-coredump',
            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
            coverage_dir,
            '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
            '--create-keyring',
            '/tmp/cephtest/ceph.keyring',
            ],
        )
    ctx.cluster.only(firstmon).run(
        args=[
            '/tmp/cephtest/enable-coredump',
            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
            coverage_dir,
            '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
            '--gen-key',
            '--name=mon.',
            '/tmp/cephtest/ceph.keyring',
            ],
        )
    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
    teuthology.create_simple_monmap(
        remote=mon0_remote,
        conf=conf,
        )

    log.info('Creating admin key on %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            '/tmp/cephtest/enable-coredump',
            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
            coverage_dir,
            '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
            '--gen-key',
            '--name=client.admin',
            '--set-uid=0',
            '--cap', 'mon', 'allow *',
            '--cap', 'osd', 'allow *',
            '--cap', 'mds', 'allow',
            '/tmp/cephtest/ceph.keyring',
            ],
        )

    log.info('Copying monmap to all nodes...')
    keyring = teuthology.get_file(
        remote=mon0_remote,
        path='/tmp/cephtest/ceph.keyring',
        )
    monmap = teuthology.get_file(
        remote=mon0_remote,
        path='/tmp/cephtest/monmap',
        )

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info('Sending monmap to node {remote}'.format(remote=rem))
        teuthology.write_file(
            remote=rem,
            path='/tmp/cephtest/ceph.keyring',
            data=keyring,
            )
        teuthology.write_file(
            remote=rem,
            path='/tmp/cephtest/monmap',
            data=monmap,
            )

    log.info('Setting up mon nodes...')
    mons = ctx.cluster.only(teuthology.is_type('mon'))
    run.wait(
        mons.run(
            args=[
                '/tmp/cephtest/enable-coredump',
                '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                coverage_dir,
                '/tmp/cephtest/binary/usr/local/bin/osdmaptool',
                '--clobber',
                '--createsimple', '{num:d}'.format(
                    num=teuthology.num_instances_of_type(ctx.cluster, 'osd'),
                    ),
                '/tmp/cephtest/osdmap',
                '--pg_bits', '2',
                '--pgp_bits', '4',
                ],
            wait=False,
            ),
        )

    log.info('Setting up osd nodes...')
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    '--name=osd.{id}'.format(id=id_),
                    '/tmp/cephtest/data/osd.{id}.keyring'.format(id=id_),
                    ],
                )

    log.info('Setting up mds nodes...')
    mdss = ctx.cluster.only(teuthology.is_type('mds'))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    '--name=mds.{id}'.format(id=id_),
                    '/tmp/cephtest/data/mds.{id}.keyring'.format(id=id_),
                    ],
                )

    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client'))
    for remote, roles_for_host in clients.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
                    '--name=client.{id}'.format(id=id_),
                    '/tmp/cephtest/data/client.{id}.keyring'.format(id=id_),
                    ],
                )

    log.info('Reading keys from all nodes...')
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['osd', 'mds', 'client']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/tmp/cephtest/data/{type}.{id}.keyring'.format(
                        type=type_,
                        id=id_,
                        ),
                    )
                keys.append((type_, id_, data))
                keys_fp.write(data)

    log.info('Adding keys to all mons...')
    writes = mons.run(
        args=[
            'cat',
            run.Raw('>>'),
            '/tmp/cephtest/ceph.keyring',
            ],
        stdin=run.PIPE,
        wait=False,
        )
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
                    '/tmp/cephtest/ceph.keyring',
                    '--name={type}.{id}'.format(
                        type=type_,
                        id=id_,
                        ),
                    ] + list(teuthology.generate_caps(type_)),
                wait=False,
                ),
            )

    log.info('Running mkfs on mon nodes...')
    for remote, roles_for_host in mons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-mon',
                    '--mkfs',
                    '-i', id_,
                    '-c', '/tmp/cephtest/ceph.conf',
                    '--monmap=/tmp/cephtest/monmap',
                    '--osdmap=/tmp/cephtest/osdmap',
                    '--keyring=/tmp/cephtest/ceph.keyring',
                    ],
                )

    log.info('Running mkfs on osd nodes...')
    devs_to_clean = {}
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = {}
        if config.get('btrfs'):
            log.info('btrfs option selected, checkin for scrach devs')
            devs = teuthology.get_scratch_devices(remote)
            log.info('found devs: %s' % (str(devs),))
            roles_to_devs = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), devs
                )
            log.info('dev map: %s' % (str(roles_to_devs),))
            devs_to_clean[remote] = []

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    'mkdir',
                    os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)),
                    ],
                )
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                log.info('mkfs.btrfs on %s on %s' % (dev, remote))
                remote.run(
                    args=[
                        'sudo',
                        'apt-get', 'install', '-y', 'btrfs-tools'
                        ]
                    )
                remote.run(
                    args=[
                        'sudo',
                        'mkfs.btrfs',
                        dev
                        ]
                    )
                log.info('mount %s on %s' % (dev, remote))
                remote.run(
                    args=[
                        'sudo',
                        'mount',
                        '-o',
                        'user_subvol_rm_allowed',
                        dev,
                        os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)),
                        ]
                    )
                remote.run(
                    args=[
                        'sudo', 'chown', '-R', 'ubuntu.ubuntu',
                        os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_))
                        ]
                    )
                remote.run(
                    args=[
                        'sudo', 'chmod', '-R', '755',
                        os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_))
                        ]
                    )
                devs_to_clean[remote].append(
                    os.path.join(
                        '/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)
                        )
                    )

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-osd',
                    '--mkfs',
                    '-i', id_,
                    '-c', '/tmp/cephtest/ceph.conf',
                    '--monmap', '/tmp/cephtest/monmap',
                    ],
                )
    run.wait(
        mons.run(
            args=[
                'rm',
                '--',
                '/tmp/cephtest/monmap',
                '/tmp/cephtest/osdmap',
                ],
            wait=False,
            ),
        )

    try:
        yield
    finally:
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
        if ctx.archive is not None:
            log.info('Grabbing cluster log from %s %s...' % (mon0_remote,
                                                             firstmon))
            dest = os.path.join(ctx.archive, 'ceph.log')
            mon0_remote.run(
                args = [
                    'cat',
                    '--',
                    '/tmp/cephtest/data/%s/log' % firstmon
                    ],
                stdout=file(dest, 'wb'),
                )

        log.info('Checking cluster ceph.log for badness...')
        def first_in_ceph_log(pattern, excludes):
            args = [
                'egrep', pattern,
                '/tmp/cephtest/data/%s/log' % firstmon,
                ]
            for exclude in excludes:
                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
            args.extend([
                    run.Raw('|'), 'head', '-n', '1',
                    ])
            r = mon0_remote.run(
                stdout=StringIO(),
                args=args,
                )
            stdout = r.stdout.getvalue()
            if stdout != '':
                return stdout
            return None

        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                             config['log_whitelist']) is not None:
            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
            ctx.summary['success'] = False
            # use the most severe problem as the failure reason
            if 'failure_reason' not in ctx.summary:
                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
                    match = first_in_ceph_log(pattern, config['log_whitelist'])
                    if match is not None:
                        ctx.summary['failure_reason'] = \
                            '"{match}" in cluster log'.format(
                            match=match.rstrip('\n'),
                            )
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info('Unmounting %s on %s' % (dir_, remote))
                remote.run(
                    args=[
                        "sudo",
                        "umount",
                        "-f",
                        dir_
                        ]
                    )

        log.info('Cleaning ceph cluster...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'rm',
                    '-rf',
                    '--',
                    '/tmp/cephtest/ceph.conf',
                    '/tmp/cephtest/ceph.keyring',
                    '/tmp/cephtest/data',
                    '/tmp/cephtest/monmap',
                    run.Raw('/tmp/cephtest/asok.*')
                    ],
                wait=False,
                ),
            )
示例#25
0
def cluster(ctx, config):
    """
    Handle the creation and removal of a ceph cluster.

    On startup:
        Create directories needed for the cluster.
        Create remote journals for all osds.
        Create and set keyring.
        Copy the monmap to tht test systems.
        Setup mon nodes.
        Setup mds nodes.
        Mkfs osd nodes.
        Add keyring information to monmaps
        Mkfs mon nodes.

    On exit:
        If errors occured, extract a failure message and store in ctx.summary.
        Unmount all test files and temporary journaling files.
        Save the monitor information and archive all ceph logs.
        Cleanup the keyring setup, and remove all monitor map and data files left over.

    :param ctx: Context
    :param config: Configuration
    """
    if ctx.config.get('use_existing_cluster', False) is True:
        log.info("'use_existing_cluster' is true; skipping cluster creation")
        yield

    testdir = teuthology.get_testdir(ctx)
    log.info('Creating ceph cluster...')
    run.wait(
        ctx.cluster.run(
            args=[
                'install', '-d', '-m0755', '--',
                '{tdir}/data'.format(tdir=testdir),
                ],
            wait=False,
            )
        )

    run.wait(
        ctx.cluster.run(
            args=[
                'sudo',
                'install', '-d', '-m0777', '--', '/var/run/ceph',
                ],
            wait=False,
            )
        )


    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checking for scratch devs')
            log.info('found devs: %s' % (str(devs),))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
                )
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs):]
            devs_to_clean[remote] = []

        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
                )
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] )
            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
                tmpfs = '/mnt/osd.%s' % osd
                roles_to_journals[osd] = tmpfs
                remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] )
            log.info('journal map: %s', roles_to_journals)

        log.info('dev map: %s' % (str(roles_to_devs),))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals


    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips)
    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
        for role, journal in roles_to_journals.iteritems():
            key = "osd." + str(role)
            if key not in conf:
                conf[key] = {}
            conf[key]['osd journal'] = journal
    for section, keys in config['conf'].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    if config.get('tmpfs_journal'):
        conf['journal dio'] = False

    ctx.ceph = argparse.Namespace()
    ctx.ceph.conf = conf

    keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring')

    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    firstmon = teuthology.get_first_mon(ctx, config)

    log.info('Setting up %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--create-keyring',
            keyring_path,
            ],
        )
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--gen-key',
            '--name=mon.',
            keyring_path,
            ],
        )
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'chmod',
            '0644',
            keyring_path,
            ],
        )
    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
    fsid = teuthology.create_simple_monmap(
        ctx,
        remote=mon0_remote,
        conf=conf,
        )
    if not 'global' in conf:
        conf['global'] = {}
    conf['global']['fsid'] = fsid

    log.info('Writing ceph.conf for FSID %s...' % fsid)
    conf_path = config.get('conf_path', DEFAULT_CONF_PATH)
    write_conf(ctx, conf_path)

    log.info('Creating admin key on %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--gen-key',
            '--name=client.admin',
            '--set-uid=0',
            '--cap', 'mon', 'allow *',
            '--cap', 'osd', 'allow *',
            '--cap', 'mds', 'allow *',
            keyring_path,
            ],
        )

    log.info('Copying monmap to all nodes...')
    keyring = teuthology.get_file(
        remote=mon0_remote,
        path=keyring_path,
        )
    monmap = teuthology.get_file(
        remote=mon0_remote,
        path='{tdir}/monmap'.format(tdir=testdir),
        )

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info('Sending monmap to node {remote}'.format(remote=rem))
        teuthology.sudo_write_file(
            remote=rem,
            path=keyring_path,
            data=keyring,
            perms='0644'
            )
        teuthology.write_file(
            remote=rem,
            path='{tdir}/monmap'.format(tdir=testdir),
            data=monmap,
            )

    log.info('Setting up mon nodes...')
    mons = ctx.cluster.only(teuthology.is_type('mon'))
    run.wait(
        mons.run(
            args=[
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'osdmaptool',
                '-c', conf_path,
                '--clobber',
                '--createsimple', '{num:d}'.format(
                    num=teuthology.num_instances_of_type(ctx.cluster, 'osd'),
                    ),
                '{tdir}/osdmap'.format(tdir=testdir),
                '--pg_bits', '2',
                '--pgp_bits', '4',
                ],
            wait=False,
            ),
        )

    log.info('Setting up mds nodes...')
    mdss = ctx.cluster.only(teuthology.is_type('mds'))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
            remote.run(
                args=[
                    'sudo',
                    'mkdir',
                    '-p',
                    '/var/lib/ceph/mds/ceph-{id}'.format(id=id_),
                    run.Raw('&&'),
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    '--name=mds.{id}'.format(id=id_),
                    '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_),
                    ],
                )

    cclient.create_keyring(ctx)
    log.info('Running mkfs on osd nodes...')

    ctx.disk_config = argparse.Namespace()
    ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs
    ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals
    ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
    ctx.disk_config.remote_to_roles_to_dev_fstype = {}

    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev)))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = remote_to_roles_to_devs[remote]
        roles_to_journals = remote_to_roles_to_journals[remote]


        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    'sudo',
                    'mkdir',
                    '-p',
                    '/var/lib/ceph/osd/ceph-{id}'.format(id=id_),
                    ])
            log.info(str(roles_to_journals))
            log.info(id_)
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                fs = config.get('fs')
                package = None
                mkfs_options = config.get('mkfs_options')
                mount_options = config.get('mount_options')
                if fs == 'btrfs':
                    #package = 'btrfs-tools'
                    if mount_options is None:
                        mount_options = ['noatime','user_subvol_rm_allowed']
                    if mkfs_options is None:
                        mkfs_options = ['-m', 'single',
                                        '-l', '32768',
                                        '-n', '32768']
                if fs == 'xfs':
                    #package = 'xfsprogs'
                    if mount_options is None:
                        mount_options = ['noatime']
                    if mkfs_options is None:
                        mkfs_options = ['-f', '-i', 'size=2048']
                if fs == 'ext4' or fs == 'ext3':
                    if mount_options is None:
                        mount_options = ['noatime','user_xattr']

                if mount_options is None:
                    mount_options = []
                if mkfs_options is None:
                    mkfs_options = []
                mkfs = ['mkfs.%s' % fs] + mkfs_options
                log.info('%s on %s on %s' % (mkfs, dev, remote))
                if package is not None:
                    remote.run(
                        args=[
                            'sudo',
                            'apt-get', 'install', '-y', package
                            ],
                        stdout=StringIO(),
                        )

                try:
                    remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
                except run.CommandFailedError:
                    # Newer btfs-tools doesn't prompt for overwrite, use -f
                    if '-f' not in mount_options:
                        mkfs_options.append('-f')
                        mkfs = ['mkfs.%s' % fs] + mkfs_options
                        log.info('%s on %s on %s' % (mkfs, dev, remote))
                    remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])

                log.info('mount %s on %s -o %s' % (dev, remote,
                                                   ','.join(mount_options)))
                remote.run(
                    args=[
                        'sudo',
                        'mount',
                        '-t', fs,
                        '-o', ','.join(mount_options),
                        dev,
                        os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)),
                        ]
                    )
                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
                    ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options
                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs
                devs_to_clean[remote].append(
                    os.path.join(
                        os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)),
                        )
                    )

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    'sudo',
                    'MALLOC_CHECK_=3',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-osd',
                    '--mkfs',
                    '--mkkey',
                    '-i', id_,
                    '--monmap', '{tdir}/monmap'.format(tdir=testdir),
                    ],
                )


    log.info('Reading keys from all nodes...')
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['mds','osd']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format(
                        type=type_,
                        id=id_,
                        ),
                    sudo=True,
                    )
                keys.append((type_, id_, data))
                keys_fp.write(data)
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['client']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    )
                keys.append((type_, id_, data))
                keys_fp.write(data)

    log.info('Adding keys to all mons...')
    writes = mons.run(
        args=[
            'sudo', 'tee', '-a',
            keyring_path,
            ],
        stdin=run.PIPE,
        wait=False,
        stdout=StringIO(),
        )
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    keyring_path,
                    '--name={type}.{id}'.format(
                        type=type_,
                        id=id_,
                        ),
                    ] + list(teuthology.generate_caps(type_)),
                wait=False,
                ),
            )

    log.info('Running mkfs on mon nodes...')
    for remote, roles_for_host in mons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
            remote.run(
                args=[
                  'sudo',
                  'mkdir',
                  '-p',
                  '/var/lib/ceph/mon/ceph-{id}'.format(id=id_),
                  ],
                )
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-mon',
                    '--mkfs',
                    '-i', id_,
                    '--monmap={tdir}/monmap'.format(tdir=testdir),
                    '--osdmap={tdir}/osdmap'.format(tdir=testdir),
                    '--keyring={kpath}'.format(kpath=keyring_path),
                    ],
                )


    run.wait(
        mons.run(
            args=[
                'rm',
                '--',
                '{tdir}/monmap'.format(tdir=testdir),
                '{tdir}/osdmap'.format(tdir=testdir),
                ],
            wait=False,
            ),
        )

    try:
        yield
    except Exception:
        # we need to know this below
        ctx.summary['success'] = False
        raise
    finally:
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()

        log.info('Checking cluster log for badness...')
        def first_in_ceph_log(pattern, excludes):
            """
            Find the first occurence of the pattern specified in the Ceph log,
            Returns None if none found.

            :param pattern: Pattern scanned for.
            :param excludes: Patterns to ignore.
            :return: First line of text (or None if not found)
            """
            args = [
                'sudo',
                'egrep', pattern,
                '/var/log/ceph/ceph.log',
                ]
            for exclude in excludes:
                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
            args.extend([
                    run.Raw('|'), 'head', '-n', '1',
                    ])
            r = mon0_remote.run(
                stdout=StringIO(),
                args=args,
                )
            stdout = r.stdout.getvalue()
            if stdout != '':
                return stdout
            return None

        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                             config['log_whitelist']) is not None:
            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
            ctx.summary['success'] = False
            # use the most severe problem as the failure reason
            if 'failure_reason' not in ctx.summary:
                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
                    match = first_in_ceph_log(pattern, config['log_whitelist'])
                    if match is not None:
                        ctx.summary['failure_reason'] = \
                            '"{match}" in cluster log'.format(
                            match=match.rstrip('\n'),
                            )
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info('Unmounting %s on %s' % (dir_, remote))
                try:
                    remote.run(
                        args=[
                            'sync',
                            run.Raw('&&'),
                            'sudo',
                            'umount',
                            '-f',
                            dir_
                        ]
                    )
                except Exception as e:
                    remote.run(args=[
                            'sudo',
                            run.Raw('PATH=/usr/sbin:$PATH'),
                            'lsof',
                            run.Raw(';'),
                            'ps', 'auxf',
                            ])
                    raise e

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
            for remote, roles_for_host in osds.remotes.iteritems():
                remote.run(
                    args=[ 'sudo', 'umount', '-f', '/mnt' ],
                    check_status=False,
                )

        if ctx.archive is not None and \
           not (ctx.config.get('archive-on-error') and ctx.summary['success']):

            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

        log.info('Cleaning ceph cluster...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-rf',
                    '--',
                    conf_path,
                    keyring_path,
                    '{tdir}/data'.format(tdir=testdir),
                    '{tdir}/monmap'.format(tdir=testdir),
                    ],
                wait=False,
                ),
            )
示例#26
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""
    log.info("Building ceph cluster using ceph-deploy...")
    testdir = teuthology.get_testdir(ctx)
    ceph_branch = None
    if config.get("branch") is not None:
        cbranch = config.get("branch")
        for var, val in cbranch.iteritems():
            if var == "testing":
                ceph_branch = "--{var}".format(var=var)
            ceph_branch = "--{var}={val}".format(var=var, val=val)
    node_dev_list = []
    all_nodes = get_all_nodes(ctx, config)
    mds_nodes = get_nodes_using_roles(ctx, config, "mds")
    mds_nodes = " ".join(mds_nodes)
    mon_node = get_nodes_using_roles(ctx, config, "mon")
    mon_nodes = " ".join(mon_node)
    new_mon = "./ceph-deploy new" + " " + mon_nodes
    install_nodes = "./ceph-deploy install " + ceph_branch + " " + all_nodes
    purge_nodes = "./ceph-deploy purge" + " " + all_nodes
    purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes
    mon_hostname = mon_nodes.split(" ")[0]
    mon_hostname = str(mon_hostname)
    gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname
    deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes
    no_of_osds = 0

    if mon_nodes is None:
        raise RuntimeError("no monitor nodes in the config file")

    estatus_new = execute_ceph_deploy(ctx, config, new_mon)
    if estatus_new != 0:
        raise RuntimeError("ceph-deploy: new command failed")

    log.info("adding config inputs...")
    testdir = teuthology.get_testdir(ctx)
    conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir)
    first_mon = teuthology.get_first_mon(ctx, config)
    (remote,) = ctx.cluster.only(first_mon).remotes.keys()

    lines = None
    if config.get("conf") is not None:
        confp = config.get("conf")
        for section, keys in confp.iteritems():
            lines = "[{section}]\n".format(section=section)
            teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
            for key, value in keys.iteritems():
                log.info("[%s] %s = %s" % (section, key, value))
                lines = "{key} = {value}\n".format(key=key, value=value)
                teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)

    estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
    if estatus_install != 0:
        raise RuntimeError("ceph-deploy: Failed to install ceph")

    mon_no = None
    mon_no = config.get("mon_initial_members")
    if mon_no is not None:
        i = 0
        mon1 = []
        while i < mon_no:
            mon1.append(mon_node[i])
            i = i + 1
        initial_mons = " ".join(mon1)
        for k in range(mon_no, len(mon_node)):
            mon_create_nodes = "./ceph-deploy mon create" + " " + initial_mons + " " + mon_node[k]
            estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
            if estatus_mon != 0:
                raise RuntimeError("ceph-deploy: Failed to create monitor")
    else:
        mon_create_nodes = "./ceph-deploy mon create" + " " + mon_nodes
        estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
        if estatus_mon != 0:
            raise RuntimeError("ceph-deploy: Failed to create monitors")

    estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
    while estatus_gather != 0:
        # mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0]
        # execute_ceph_deploy(ctx, config, mon_create_nodes)
        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)

    if mds_nodes:
        estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
        if estatus_mds != 0:
            raise RuntimeError("ceph-deploy: Failed to deploy mds")

    if config.get("test_mon_destroy") is not None:
        for d in range(1, len(mon_node)):
            mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d]
            estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes)
            if estatus_mon_d != 0:
                raise RuntimeError("ceph-deploy: Failed to delete monitor")

    node_dev_list = get_dev_for_osd(ctx, config)
    for d in node_dev_list:
        osd_create_cmds = "./ceph-deploy osd create --zap-disk" + " " + d
        estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
        if estatus_osd == 0:
            log.info("successfully created osd")
            no_of_osds += 1
        else:
            zap_disk = "./ceph-deploy disk zap" + " " + d
            execute_ceph_deploy(ctx, config, zap_disk)
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
            if estatus_osd == 0:
                log.info("successfully created osd")
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

    if config.get("wait-for-healthy", True) and no_of_osds >= 2:
        is_healthy(ctx=ctx, config=None)

        log.info("Setting up client nodes...")
        conf_path = "/etc/ceph/ceph.conf"
        admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring"
        first_mon = teuthology.get_first_mon(ctx, config)
        (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
        conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True)
        admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True)

        clients = ctx.cluster.only(teuthology.is_type("client"))
        for remot, roles_for_host in clients.remotes.iteritems():
            for id_ in teuthology.roles_of_type(roles_for_host, "client"):
                client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_)
                mon0_remote.run(
                    args=[
                        "cd",
                        "{tdir}".format(tdir=testdir),
                        run.Raw("&&"),
                        "sudo",
                        "bash",
                        "-c",
                        run.Raw('"'),
                        "ceph",
                        "auth",
                        "get-or-create",
                        "client.{id}".format(id=id_),
                        "mds",
                        "allow",
                        "mon",
                        "allow *",
                        "osd",
                        "allow *",
                        run.Raw(">"),
                        client_keyring,
                        run.Raw('"'),
                    ]
                )
                key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True)
                teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644")
                teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644")
                teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644")
    else:
        raise RuntimeError("The cluster is NOT operational due to insufficient OSDs")

    try:
        yield

    finally:
        log.info("Stopping ceph...")
        ctx.cluster.run(args=["sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop"])

        if ctx.archive is not None:
            # archive mon data, too
            log.info("Archiving mon data...")
            path = os.path.join(ctx.archive, "data")
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type("mon"))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith("mon."):
                        teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz")

            log.info("Compressing logs...")
            run.wait(
                ctx.cluster.run(
                    args=[
                        "sudo",
                        "find",
                        "/var/log/ceph",
                        "-name",
                        "*.log",
                        "-print0",
                        run.Raw("|"),
                        "sudo",
                        "xargs",
                        "-0",
                        "--no-run-if-empty",
                        "--",
                        "gzip",
                        "--",
                    ],
                    wait=False,
                )
            )

            log.info("Archiving logs...")
            path = os.path.join(ctx.archive, "remote")
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log"))

        log.info("Purging package...")
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info("Purging data...")
        execute_ceph_deploy(ctx, config, purgedata_nodes)
示例#27
0
def execute(ctx, config):
    """
    Run the blktrace program on remote machines.
    """
    procs = []
    testdir = teuthology.get_testdir(ctx)
    log_dir = '/home/ubuntu/archive/performance/blktrace'#.format(tdir=testdir)

    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = config['remote_to_roles_to_dev'][remote.name]
        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            if roles_to_devs.get(int(id_)):
                dev = roles_to_devs[int(id_)]
                log.info("running blktrace on %s: %s" % (remote.name, dev))

                proc = remote.run(
                    args=[
                        'daemon-helper',
                        daemon_signal,
                        'sudo', blktrace,
                        '-d', dev,
                        '-D', log_dir,
                        '-o', dev.rsplit("/", 1)[1],
                        ],
                    wait=False,   
                    stdin=run.PIPE,
                    )
                procs.append(proc)
                log.info(proc)

#        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
#            if roles_to_devs.get(int(id_)):
#                dev = roles_to_devs[int(id_)]
#                remote.run(
#                    args=[
#                        'sudo',
#                        'chmod',
#                        '0664',
#                        '{0}/{1}.blktrace.*'.format(log_dir, dev.rsplit("/",1)[1]),
#                        ],
#                    wait=False,
#                    )
    try:
        yield
    finally:
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        for remote, roles_for_host in osds.remotes.iteritems():
            roles_to_devs = config['remote_to_roles_to_dev'][remote.name]
            for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
                if roles_to_devs.get(int(id_)):
                    dev = roles_to_devs[int(id_)]
                    log.info("running blkparse on %s: %s" % (remote.name, dev))

                    remote.run(
                        args=[
                            'cd',
                            log_dir,
                            run.Raw(';'),
                            blkparse,
                            '-i', '{0}.blktrace.0'.format(dev.rsplit("/", 1)[1]),
                            '-o', '{0}.blkparse'.format(dev.rsplit("/", 1)[1]),
                            ],
                        wait=False,
                        )

        log.info('stopping blktrace processs')
        for proc in procs:
            proc.stdin.close()
示例#28
0
def cluster(ctx, config):
    log.info('Creating ceph cluster...')
    run.wait(
        ctx.cluster.run(
            args=[
                'install', '-d', '-m0755', '--',
                '/tmp/cephtest/data',
                ],
            wait=False,
            )
        )


    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checkin for scratch devs')
            log.info('found devs: %s' % (str(devs),))
            roles_to_devs = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), devs
                )
            if len(roles_to_devs) < len(devs):
                devs = devs[len(roles_to_devs):]
            log.info('dev map: %s' % (str(roles_to_devs),))
            devs_to_clean[remote] = []
            
        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), devs
                )
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] )
            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
                tmpfs = '/mnt/osd.%s' % osd
                roles_to_journals[osd] = tmpfs
                remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] )
            log.info('journal map: %s', roles_to_journals)

        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals


    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [roles for (remote, roles) in remotes_and_roles]
    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, roles) in remotes_and_roles)]
    conf = teuthology.skeleton_config(roles=roles, ips=ips)
    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
        for role, journal in roles_to_journals.iteritems():
            key = "osd." + str(role)
            if key not in conf:
                conf[key] = {}
            conf[key]['osd journal'] = journal
    for section, keys in config['conf'].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    if config.get('tmpfs_journal'):
        conf['journal dio'] = False

    ctx.ceph = argparse.Namespace()
    ctx.ceph.conf = conf

    log.info('Writing configs...')
    conf_fp = StringIO()
    conf.write(conf_fp)
    conf_fp.seek(0)
    writes = ctx.cluster.run(
        args=[
            'python',
            '-c',
            'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))',
            '/tmp/cephtest/ceph.conf',
            ],
        stdin=run.PIPE,
        wait=False,
        )
    teuthology.feed_many_stdins_and_close(conf_fp, writes)
    run.wait(writes)

    coverage_dir = '/tmp/cephtest/archive/coverage'

    firstmon = teuthology.get_first_mon(ctx, config)

    log.info('Setting up %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            '/tmp/cephtest/enable-coredump',
            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
            coverage_dir,
            '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
            '--create-keyring',
            '/tmp/cephtest/ceph.keyring',
            ],
        )
    ctx.cluster.only(firstmon).run(
        args=[
            '/tmp/cephtest/enable-coredump',
            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
            coverage_dir,
            '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
            '--gen-key',
            '--name=mon.',
            '/tmp/cephtest/ceph.keyring',
            ],
        )
    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
    teuthology.create_simple_monmap(
        remote=mon0_remote,
        conf=conf,
        )

    log.info('Creating admin key on %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            '/tmp/cephtest/enable-coredump',
            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
            coverage_dir,
            '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
            '--gen-key',
            '--name=client.admin',
            '--set-uid=0',
            '--cap', 'mon', 'allow *',
            '--cap', 'osd', 'allow *',
            '--cap', 'mds', 'allow',
            '/tmp/cephtest/ceph.keyring',
            ],
        )

    log.info('Copying monmap to all nodes...')
    keyring = teuthology.get_file(
        remote=mon0_remote,
        path='/tmp/cephtest/ceph.keyring',
        )
    monmap = teuthology.get_file(
        remote=mon0_remote,
        path='/tmp/cephtest/monmap',
        )

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info('Sending monmap to node {remote}'.format(remote=rem))
        teuthology.write_file(
            remote=rem,
            path='/tmp/cephtest/ceph.keyring',
            data=keyring,
            )
        teuthology.write_file(
            remote=rem,
            path='/tmp/cephtest/monmap',
            data=monmap,
            )

    log.info('Setting up mon nodes...')
    mons = ctx.cluster.only(teuthology.is_type('mon'))
    run.wait(
        mons.run(
            args=[
                '/tmp/cephtest/enable-coredump',
                '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                coverage_dir,
                '/tmp/cephtest/binary/usr/local/bin/osdmaptool',
                '--clobber',
                '--createsimple', '{num:d}'.format(
                    num=teuthology.num_instances_of_type(ctx.cluster, 'osd'),
                    ),
                '/tmp/cephtest/osdmap',
                '--pg_bits', '2',
                '--pgp_bits', '4',
                ],
            wait=False,
            ),
        )

    log.info('Setting up osd nodes...')
    for remote, roles_for_host in osds.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    '--name=osd.{id}'.format(id=id_),
                    '/tmp/cephtest/data/osd.{id}.keyring'.format(id=id_),
                    ],
                )

    log.info('Setting up mds nodes...')
    mdss = ctx.cluster.only(teuthology.is_type('mds'))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    '--name=mds.{id}'.format(id=id_),
                    '/tmp/cephtest/data/mds.{id}.keyring'.format(id=id_),
                    ],
                )

    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client'))
    for remote, roles_for_host in clients.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
                    '--name=client.{id}'.format(id=id_),
                    '/tmp/cephtest/data/client.{id}.keyring'.format(id=id_),
                    ],
                )

    log.info('Reading keys from all nodes...')
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['osd', 'mds', 'client']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/tmp/cephtest/data/{type}.{id}.keyring'.format(
                        type=type_,
                        id=id_,
                        ),
                    )
                keys.append((type_, id_, data))
                keys_fp.write(data)

    log.info('Adding keys to all mons...')
    writes = mons.run(
        args=[
            'cat',
            run.Raw('>>'),
            '/tmp/cephtest/ceph.keyring',
            ],
        stdin=run.PIPE,
        wait=False,
        )
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-authtool',
                    '/tmp/cephtest/ceph.keyring',
                    '--name={type}.{id}'.format(
                        type=type_,
                        id=id_,
                        ),
                    ] + list(teuthology.generate_caps(type_)),
                wait=False,
                ),
            )

    log.info('Running mkfs on mon nodes...')
    for remote, roles_for_host in mons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-mon',
                    '--mkfs',
                    '-i', id_,
                    '-c', '/tmp/cephtest/ceph.conf',
                    '--monmap=/tmp/cephtest/monmap',
                    '--osdmap=/tmp/cephtest/osdmap',
                    '--keyring=/tmp/cephtest/ceph.keyring',
                    ],
                )

    log.info('Running mkfs on osd nodes...')
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = remote_to_roles_to_devs[remote]
        roles_to_journals = remote_to_roles_to_journals[remote]
        ctx.disk_config = argparse.Namespace()
        ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs
        ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            log.info(str(roles_to_journals))
            log.info(id_)
            remote.run(
                args=[
                    'mkdir',
                    os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)),
                    ],
                )
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                fs = config.get('fs')
                package = None
                mkfs_options = config.get('mkfs_options')
                mount_options = config.get('mount_options')
                if fs == 'btrfs':
                    package = 'btrfs-tools'
                    if mount_options is None:
                        mount_options = ['noatime','user_subvol_rm_allowed']
                    if mkfs_options is None:
                        mkfs_options = ['-m', 'single',
                                        '-l', '32768',
                                        '-n', '32768']
                if fs == 'xfs':
                    package = 'xfsprogs'
                    if mount_options is None:
                        mount_options = ['noatime']
                    if mkfs_options is None:
                        mkfs_options = ['-f', '-i', 'size=2048']
                if fs == 'ext4' or fs == 'ext3':
                    if mount_options is None:
                        mount_options = ['noatime','user_xattr']

                if mount_options is None:
                    mount_options = []
                if mkfs_options is None:
                    mkfs_options = []
                mkfs = ['mkfs.%s' % fs] + mkfs_options
                log.info('%s on %s on %s' % (mkfs, dev, remote))
                if package is not None:
                    remote.run(
                        args=[
                            'sudo',
                            'apt-get', 'install', '-y', package
                            ]
                        )
                remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
                log.info('mount %s on %s -o %s' % (dev, remote,
                                                   ','.join(mount_options)))
                remote.run(
                    args=[
                        'sudo',
                        'mount',
                        '-t', fs,
                        '-o', ','.join(mount_options),
                        dev,
                        os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)),
                        ]
                    )
                remote.run(
                    args=[
                        'sudo', 'chown', '-R', 'ubuntu.ubuntu',
                        os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_))
                        ]
                    )
                remote.run(
                    args=[
                        'sudo', 'chmod', '-R', '755',
                        os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_))
                        ]
                    )
                devs_to_clean[remote].append(
                    os.path.join(
                        '/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)
                        )
                    )

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    '/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/binary/usr/local/bin/ceph-osd',
                    '--mkfs',
                    '-i', id_,
                    '-c', '/tmp/cephtest/ceph.conf',
                    '--monmap', '/tmp/cephtest/monmap',
                    ],
                )
    run.wait(
        mons.run(
            args=[
                'rm',
                '--',
                '/tmp/cephtest/monmap',
                '/tmp/cephtest/osdmap',
                ],
            wait=False,
            ),
        )

    try:
        yield
    finally:
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()

        log.info('Checking cluster log for badness...')
        def first_in_ceph_log(pattern, excludes):
            args = [
                'egrep', pattern,
                '/tmp/cephtest/archive/log/cluster.%s.log' % firstmon,
                ]
            for exclude in excludes:
                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
            args.extend([
                    run.Raw('|'), 'head', '-n', '1',
                    ])
            r = mon0_remote.run(
                stdout=StringIO(),
                args=args,
                )
            stdout = r.stdout.getvalue()
            if stdout != '':
                return stdout
            return None

        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                             config['log_whitelist']) is not None:
            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
            ctx.summary['success'] = False
            # use the most severe problem as the failure reason
            if 'failure_reason' not in ctx.summary:
                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
                    match = first_in_ceph_log(pattern, config['log_whitelist'])
                    if match is not None:
                        ctx.summary['failure_reason'] = \
                            '"{match}" in cluster log'.format(
                            match=match.rstrip('\n'),
                            )
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info('Unmounting %s on %s' % (dir_, remote))
                remote.run(
                    args=[
                        'sync',
                        run.Raw('&&'),
                        'sudo',
                        'umount',
                        '-f',
                        dir_
                        ]
                    )

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
            for remote, roles_for_host in osds.remotes.iteritems():
                remote.run(
                    args=[ 'sudo', 'umount', '-f', '/mnt' ],
                    check_status=False,
                )

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(remote,
                                       '/tmp/cephtest/data/%s' % role,
                                       path + '/' + role + '.tgz')

        log.info('Cleaning ceph cluster...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'rm',
                    '-rf',
                    '--',
                    '/tmp/cephtest/ceph.conf',
                    '/tmp/cephtest/ceph.keyring',
                    '/tmp/cephtest/data',
                    '/tmp/cephtest/monmap',
                    run.Raw('/tmp/cephtest/asok.*')
                    ],
                wait=False,
                ),
            )
示例#29
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    # Expect to find ceph_admin on the first mon by ID, same place that the download task
    # puts it.  Remember this here, because subsequently IDs will change from those in
    # the test config to those that ceph-deploy invents.
    (ceph_admin,) = ctx.cluster.only(
        teuthology.get_first_mon(ctx, config)).remotes.iterkeys()

    def execute_ceph_deploy(cmd):
        """Remotely execute a ceph_deploy command"""
        return ceph_admin.run(
            args=[
                'cd',
                '{tdir}/ceph-deploy'.format(tdir=testdir),
                run.Raw('&&'),
                run.Raw(cmd),
            ],
            check_status=False,
        ).exitstatus

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.iteritems():
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_role(ctx, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_role(ctx, 'mon')
        mon_nodes = " ".join(mon_node)
        mgr_nodes = get_nodes_using_role(ctx, 'mgr')
        mgr_nodes = " ".join(mgr_nodes)
        new_mon = './ceph-deploy new' + " " + mon_nodes
        mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
        deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)

        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
                                                sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(
                        ceph_admin, conf_path, lines, sudo=True)

        # install ceph
        dev_branch = ctx.config['branch']
        branch = '--dev={branch}'.format(branch=dev_branch)
        if ceph_branch:
            option = ceph_branch
        else:
            option = branch
        install_nodes = './ceph-deploy install ' + option + " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")
        # install ceph-test package too
        install_nodes2 = './ceph-deploy install --tests ' + option + \
                         " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes2)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph-test")

        mon_create_nodes = './ceph-deploy mon create-initial'
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(mon_create_nodes)
        execute_ceph_deploy(mgr_create)

        # create-keys is explicit now
        # http://tracker.ceph.com/issues/16036
        mons = ctx.cluster.only(teuthology.is_type('mon'))
        for remote in mons.remotes.iterkeys():
            remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph',
                             '--id', remote.shortname])

        estatus_gather = execute_ceph_deploy(gather_keys)
        if mds_nodes:
            estatus_mds = execute_ceph_deploy(deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy' + \
                    " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        for d in node_dev_list:
            node = d[0]
            for disk in d[1:]:
                zap = './ceph-deploy disk zap ' + node + ':' + disk
                estatus = execute_ceph_deploy(zap)
                if estatus != 0:
                    raise RuntimeError("ceph-deploy: Failed to zap osds")
            osd_create_cmd = './ceph-deploy osd create '
            if config.get('dmcrypt') is not None:
                osd_create_cmd += '--dmcrypt '
            osd_create_cmd += ":".join(d)
            estatus_osd = execute_ceph_deploy(osd_create_cmd)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
            )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
            )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(
                        args=[
                            'cd',
                            '{tdir}'.format(tdir=testdir),
                            run.Raw('&&'),
                            'sudo', 'bash', '-c',
                            run.Raw('"'), 'ceph',
                            'auth',
                            'get-or-create',
                            'client.{id}'.format(id=id_),
                            'mds', 'allow',
                            'mon', 'allow *',
                            'osd', 'allow *',
                            run.Raw('>'),
                            client_keyring,
                            run.Raw('"'),
                        ],
                    )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=client_keyring,
                        data=key_data,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=admin_keyring_path,
                        data=admin_keyring,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=conf_path,
                        data=conf_data,
                        perms='0644'
                    )

            if mds_nodes:
                log.info('Configuring CephFS...')
                ceph_fs = Filesystem(ctx, create=True)
        elif not config.get('only_mon'):
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        yield

    except Exception:
        log.info(
            "Error encountered, logging exception before tearing down ceph-deploy")
        log.info(traceback.format_exc())
        raise
    finally:
        if config.get('keep_running'):
            return
        log.info('Stopping ceph...')
        ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
                              'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
                              'sudo', 'systemctl', 'stop', 'ceph.target'])

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(
            args=[
                'sudo', 'status', 'ceph-all', run.Raw('||'),
                'sudo', 'service', 'ceph', 'status', run.Raw('||'),
                'sudo', 'systemctl', 'status', 'ceph.target'],
            check_status=False)

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
                              'grep', '-v', 'grep', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                    ],
                    wait=False,
                ),
            )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge' + " " + all_nodes
        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(purgedata_nodes)
示例#30
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.iteritems():
                if var == 'testing':
                    ceph_branch = '--{var}'.format(var=var)
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        node_dev_list = []
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_roles(ctx, config, 'mon')
        mon_nodes = " ".join(mon_node)
        new_mon = './ceph-deploy new'+" "+mon_nodes
        install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes
        purge_nodes = './ceph-deploy purge'+" "+all_nodes
        purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname
        deploy_mds = './ceph-deploy mds create'+" "+mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
        first_mon = teuthology.get_first_mon(ctx, config)
        (remote,) = ctx.cluster.only(first_mon).remotes.keys()

        lines = None
        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(remote, conf_path, lines,
                                                sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(remote, conf_path, lines,
                                                    sudo=True)

        estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")

        mon_create_nodes = './ceph-deploy mon create-initial'
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(ctx, config, mon_create_nodes)

        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
        max_gather_tries = 90
        gather_tries = 0
        while (estatus_gather != 0):
            gather_tries += 1
            if gather_tries >= max_gather_tries:
                msg = 'ceph-deploy was not able to gatherkeys after 15 minutes'
                raise RuntimeError(msg)
            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
            time.sleep(10)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d]
                estatus_mon_d = execute_ceph_deploy(ctx, config,
                                                    mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        osd_create_cmd = './ceph-deploy osd create --zap-disk '
        for d in node_dev_list:
            if config.get('dmcrypt') is not None:
                osd_create_cmd_d = osd_create_cmd+'--dmcrypt'+" "+d
            else:
                osd_create_cmd_d = osd_create_cmd+d
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                disks = []
                disks = d.split(':')
                dev_disk = disks[0]+":"+disks[1]
                j_disk = disks[0]+":"+disks[2]
                zap_disk = './ceph-deploy disk zap '+dev_disk+" "+j_disk
                execute_ceph_deploy(ctx, config, zap_disk)
                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
                if estatus_osd == 0:
                    log.info('successfully created osd')
                    no_of_osds += 1
                else:
                    raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
                )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
                )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(
                        args=[
                            'cd',
                            '{tdir}'.format(tdir=testdir),
                            run.Raw('&&'),
                            'sudo', 'bash', '-c',
                            run.Raw('"'), 'ceph',
                            'auth',
                            'get-or-create',
                            'client.{id}'.format(id=id_),
                            'mds', 'allow',
                            'mon', 'allow *',
                            'osd', 'allow *',
                            run.Raw('>'),
                            client_keyring,
                            run.Raw('"'),
                            ],
                        )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                        )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=client_keyring,
                        data=key_data,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=admin_keyring_path,
                        data=admin_keyring,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=conf_path,
                        data=conf_data,
                        perms='0644'
                    )
        else:
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        yield

    finally:
        log.info('Stopping ceph...')
        ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
                              'sudo', 'service', 'ceph', 'stop' ])

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(args=['sudo', 'status', 'ceph-all', run.Raw('||'),
                              'sudo', 'service',  'ceph', 'status'],
                              check_status=False)

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
                              'grep', '-v', 'grep', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                        ],
                    wait=False,
                    ),
                )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge'+" "+all_nodes
        purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(ctx, config, purgedata_nodes)
示例#31
0
def build_ceph_cluster(ctx, config):
    log.info('Building ceph cluster using ceph-deploy...')
    testdir = teuthology.get_testdir(ctx)
    ceph_branch = None
    if config.get('branch') is not None:
        cbranch = config.get('branch')
        for var, val in cbranch.iteritems():
            if var == 'testing':
                ceph_branch = '--{var}'.format(var=var)
            ceph_branch = '--{var}={val}'.format(var=var, val=val)
    node_dev_list = []
    all_nodes = get_all_nodes(ctx, config)
    mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
    mds_nodes = " ".join(mds_nodes)
    mon_node = get_nodes_using_roles(ctx, config, 'mon')
    mon_nodes = " ".join(mon_node)
    new_mon = './ceph-deploy new'+" "+mon_nodes
    install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes
    purge_nodes = './ceph-deploy purge'+" "+all_nodes
    purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
    mon_hostname = mon_nodes.split(' ')[0]
    mon_hostname = str(mon_hostname)
    gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname
    deploy_mds = './ceph-deploy mds create'+" "+mds_nodes
    no_of_osds = 0

    if mon_nodes is None:
        raise RuntimeError("no monitor nodes in the config file")

    estatus_new = execute_ceph_deploy(ctx, config, new_mon)
    if estatus_new != 0:
        raise RuntimeError("ceph-deploy: new command failed")

    log.info('adding config inputs...')
    testdir = teuthology.get_testdir(ctx)
    conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
    first_mon = teuthology.get_first_mon(ctx, config)
    (remote,) = ctx.cluster.only(first_mon).remotes.keys()

    lines = None
    if config.get('conf') is not None:
        confp = config.get('conf')
        for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)

    estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
    if estatus_install != 0:
        raise RuntimeError("ceph-deploy: Failed to install ceph")

    mon_no = None
    mon_no = config.get('mon_initial_members')
    if mon_no is not None:
        i = 0
        mon1 = []
        while(i < mon_no):
            mon1.append(mon_node[i])
            i = i + 1
        initial_mons = " ".join(mon1)
        for k in range(mon_no, len(mon_node)):
            mon_create_nodes = './ceph-deploy mon create'+" "+initial_mons+" "+mon_node[k]
            estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
            if estatus_mon != 0:
                raise RuntimeError("ceph-deploy: Failed to create monitor")
    else:
        mon_create_nodes = './ceph-deploy mon create'+" "+mon_nodes
        estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
        if estatus_mon != 0:
            raise RuntimeError("ceph-deploy: Failed to create monitors")

    estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
    while (estatus_gather != 0):
        #mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0]
        #execute_ceph_deploy(ctx, config, mon_create_nodes)
        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)

    if mds_nodes:
        estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
        if estatus_mds != 0:
            raise RuntimeError("ceph-deploy: Failed to deploy mds")

    if config.get('test_mon_destroy') is not None:
        for d in range(1, len(mon_node)):
            mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d]
            estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes)
            if estatus_mon_d != 0:
                raise RuntimeError("ceph-deploy: Failed to delete monitor")

    node_dev_list = get_dev_for_osd(ctx, config)
    for d in node_dev_list:
        osd_create_cmds = './ceph-deploy osd create --zap-disk'+" "+d
        estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
        if estatus_osd == 0:
            log.info('successfully created osd')
            no_of_osds += 1
        else:
            zap_disk = './ceph-deploy disk zap'+" "+d
            execute_ceph_deploy(ctx, config, zap_disk)
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

    if config.get('wait-for-healthy', True) and no_of_osds >= 2:
        is_healthy(ctx=ctx, config=None)

        log.info('Setting up client nodes...')
        conf_path = '/etc/ceph/ceph.conf'
        admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
        first_mon = teuthology.get_first_mon(ctx, config)
        (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
        conf_data = teuthology.get_file(
            remote=mon0_remote,
            path=conf_path,
            sudo=True,
            )
        admin_keyring = teuthology.get_file(
            remote=mon0_remote,
            path=admin_keyring_path,
            sudo=True,
            )

        clients = ctx.cluster.only(teuthology.is_type('client'))
        for remot, roles_for_host in clients.remotes.iteritems():
            for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                mon0_remote.run(
                    args=[
                        'cd',
                        '{tdir}'.format(tdir=testdir),
                        run.Raw('&&'),
                        'sudo','bash','-c',
                        run.Raw('"'),'ceph',
                        'auth',
                        'get-or-create',
                        'client.{id}'.format(id=id_),
                        'mds', 'allow',
                        'mon', 'allow *',
                        'osd', 'allow *',
                        run.Raw('>'),
                        client_keyring,
                        run.Raw('"'),
                        ],
                    )
                key_data = teuthology.get_file(
                    remote=mon0_remote,
                    path=client_keyring,
                    sudo=True,
                    )
                teuthology.sudo_write_file(
                    remote=remot,
                    path=client_keyring,
                    data=key_data,
                    perms='0644'
                )
                teuthology.sudo_write_file(
                    remote=remot,
                    path=admin_keyring_path,
                    data=admin_keyring,
                    perms='0644'
                )
                teuthology.sudo_write_file(
                    remote=remot,
                    path=conf_path,
                    data=conf_data,
                    perms='0644'
                )
    else:
        raise RuntimeError("The cluster is NOT operational due to insufficient OSDs")

    try:
        yield

    finally:
        log.info('Stopping ceph...')
        ctx.cluster.run(args=[
                'sudo', 'stop', 'ceph-all',
                run.Raw('||'),
                'sudo', 'service', 'ceph', 'stop'
                ])

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                        ],
                    wait=False,
                    ),
                )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        log.info('Purging package...')
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(ctx, config, purgedata_nodes)
示例#32
0
def task(ctx, config):
    """
    Mount/unmount a ``kernel`` client.

    The config is optional and defaults to mounting on all clients. If
    a config is given, it is expected to be a list of clients to do
    this operation on. This lets you e.g. set up one client with
    ``ceph-fuse`` and another with ``kclient``.

    Example that mounts all clients::

        tasks:
        - ceph:
        - kclient:
        - interactive:

    Example that uses both ``kclient` and ``ceph-fuse``::

        tasks:
        - ceph:
        - ceph-fuse: [client.0]
        - kclient: [client.1]
        - interactive:


    Pass a dictionary instead of lists to specify per-client config:

        tasks:
        -kclient:
            client.0:
                debug: true

    :param ctx: Context
    :param config: Configuration
    """
    log.info('Mounting kernel clients...')
    assert config is None or isinstance(config, list) or isinstance(config, dict), \
        "task kclient got invalid config"

    if config is None:
        config = ['client.{id}'.format(id=id_)
                  for id_ in misc.all_roles_of_type(ctx.cluster, 'client')]

    if isinstance(config, list):
        client_roles = config
        config = dict([r, dict()] for r in client_roles)
    elif isinstance(config, dict):
        client_roles = config.keys()
    else:
        raise ValueError("Invalid config object: {0} ({1})".format(config, config.__class__))

    clients = ctx.cluster.only(misc.is_type('client'))
    test_dir = misc.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=test_dir)

    for remote, roles_for_host in clients.remotes.iteritems():
        for id_ in misc.roles_of_type(roles_for_host, 'client'):
            client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph', 'auth', '-o', client_keyring, 'get', 'client.{id}'.format(id=id_),
                ],
            )

    # config has been converted to a dict by this point
    overrides = ctx.config.get('overrides', {})
    deep_merge(config, overrides.get('kclient', {}))

    clients = list(misc.get_clients(ctx=ctx, roles=client_roles))

    # Assemble mon addresses
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [roles for (remote_, roles) in remotes_and_roles]
    ips = [remote_.ssh.get_transport().getpeername()[0]
           for (remote_, _) in remotes_and_roles]
    mons = misc.get_mons(roles, ips).values()

    mounts = {}
    for id_, remote in clients:
        kernel_mount = KernelMount(
            mons,
            test_dir,
            id_,
            remote,
            ctx.teuthology_config.get('ipmi_user', None),
            ctx.teuthology_config.get('ipmi_password', None),
            ctx.teuthology_config.get('ipmi_domain', None)
        )

        mounts[id_] = kernel_mount

        client_config = config["client.{0}".format(id_)]
        if client_config.get('debug', False):
            remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"])
            remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"])

        kernel_mount.mount()

    ctx.mounts = mounts
    try:
        yield mounts
    finally:
        log.info('Unmounting kernel clients...')
        for mount in mounts.values():
            mount.umount()
示例#33
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    # Expect to find ceph_admin on the first mon by ID, same place that the download task
    # puts it.  Remember this here, because subsequently IDs will change from those in
    # the test config to those that ceph-deploy invents.

    (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()

    def execute_ceph_deploy(cmd):
        """Remotely execute a ceph_deploy command"""
        return ceph_admin.run(
            args=[
                'cd',
                '{tdir}/ceph-deploy'.format(tdir=testdir),
                run.Raw('&&'),
                run.Raw(cmd),
            ],
            check_status=False,
        ).exitstatus

    def ceph_disk_osd_create(ctx, config):
        node_dev_list = get_dev_for_osd(ctx, config)
        no_of_osds = 0
        for d in node_dev_list:
            node = d[0]
            for disk in d[1:]:
                zap = './ceph-deploy disk zap ' + node + ' ' + disk
                estatus = execute_ceph_deploy(zap)
                if estatus != 0:
                    raise RuntimeError("ceph-deploy: Failed to zap osds")
            osd_create_cmd = './ceph-deploy osd create '
            # first check for filestore, default is bluestore with ceph-deploy
            if config.get('filestore') is not None:
                osd_create_cmd += '--filestore '
            elif config.get('bluestore') is not None:
                osd_create_cmd += '--bluestore '
            if config.get('dmcrypt') is not None:
                osd_create_cmd += '--dmcrypt '
            osd_create_cmd += ":".join(d)
            estatus_osd = execute_ceph_deploy(osd_create_cmd)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")
        return no_of_osds

    def ceph_volume_osd_create(ctx, config):
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        no_of_osds = 0
        for remote in osds.remotes.keys():
            # all devs should be lvm
            osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
            # default is bluestore so we just need config item for filestore
            roles = ctx.cluster.remotes[remote]
            dev_needed = len([role for role in roles
                              if role.startswith('osd')])
            all_devs = teuthology.get_scratch_devices(remote)
            log.info("node={n}, need_devs={d}, available={a}".format(
                        n=remote.shortname,
                        d=dev_needed,
                        a=all_devs,
                        ))
            devs = all_devs[0:dev_needed]
            # rest of the devices can be used for journal if required
            jdevs = dev_needed
            for device in devs:
                device_split = device.split('/')
                lv_device = device_split[-2] + '/' + device_split[-1]
                if config.get('filestore') is not None:
                    osd_create_cmd += '--filestore --data ' + lv_device + ' '
                    # filestore with ceph-volume also needs journal disk
                    try:
                        jdevice = all_devs.pop(jdevs)
                    except IndexError:
                        raise RuntimeError("No device available for \
                                            journal configuration")
                    jdevice_split = jdevice.split('/')
                    j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
                    osd_create_cmd += '--journal ' + j_lv
                else:
                    osd_create_cmd += ' --data ' + lv_device
                estatus_osd = execute_ceph_deploy(osd_create_cmd)
                if estatus_osd == 0:
                    log.info('successfully created osd')
                    no_of_osds += 1
                else:
                    raise RuntimeError("ceph-deploy: Failed to create osds")
        return no_of_osds

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.items():
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_role(ctx, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_role(ctx, 'mon')
        mon_nodes = " ".join(mon_node)
        # skip mgr based on config item
        # this is needed when test uses latest code to install old ceph
        # versions
        skip_mgr = config.get('skip-mgr', False)
        if not skip_mgr:
            mgr_nodes = get_nodes_using_role(ctx, 'mgr')
            mgr_nodes = " ".join(mgr_nodes)
        new_mon = './ceph-deploy new' + " " + mon_nodes
        if not skip_mgr:
            mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
        deploy_mds = './ceph-deploy mds create' + " " + mds_nodes

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)

        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.items():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
                                                sudo=True)
                for key, value in keys.items():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(
                        ceph_admin, conf_path, lines, sudo=True)

        # install ceph
        dev_branch = ctx.config['branch']
        branch = '--dev={branch}'.format(branch=dev_branch)
        if ceph_branch:
            option = ceph_branch
        else:
            option = branch
        install_nodes = './ceph-deploy install ' + option + " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")
        # install ceph-test package too
        install_nodes2 = './ceph-deploy install --tests ' + option + \
                         " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes2)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph-test")

        mon_create_nodes = './ceph-deploy mon create-initial'
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(mon_create_nodes)

        estatus_gather = execute_ceph_deploy(gather_keys)
        if estatus_gather != 0:
            raise RuntimeError("ceph-deploy: Failed during gather keys")

        # install admin key on mons (ceph-create-keys doesn't do this any more)
        mons = ctx.cluster.only(teuthology.is_type('mon'))
        for remote in mons.remotes.keys():
            execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname)

        # create osd's
        if config.get('use-ceph-volume', False):
            no_of_osds = ceph_volume_osd_create(ctx, config)
        else:
            # this method will only work with ceph-deploy v1.5.39 or older
            no_of_osds = ceph_disk_osd_create(ctx, config)

        if not skip_mgr:
            execute_ceph_deploy(mgr_create)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy' + \
                    " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")



        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
            )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
            )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.items():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(
                        args=[
                            'cd',
                            '{tdir}'.format(tdir=testdir),
                            run.Raw('&&'),
                            'sudo', 'bash', '-c',
                            run.Raw('"'), 'ceph',
                            'auth',
                            'get-or-create',
                            'client.{id}'.format(id=id_),
                            'mds', 'allow',
                            'mon', 'allow *',
                            'osd', 'allow *',
                            run.Raw('>'),
                            client_keyring,
                            run.Raw('"'),
                        ],
                    )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=client_keyring,
                        data=key_data,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=admin_keyring_path,
                        data=admin_keyring,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=conf_path,
                        data=conf_data,
                        perms='0644'
                    )

            if mds_nodes:
                log.info('Configuring CephFS...')
                Filesystem(ctx, create=True)
        elif not config.get('only_mon'):
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        # create rbd pool
        ceph_admin.run(
            args=[
                'sudo', 'ceph', '--cluster', 'ceph',
                'osd', 'pool', 'create', 'rbd', '128', '128'],
            check_status=False)
        ceph_admin.run(
            args=[
                'sudo', 'ceph', '--cluster', 'ceph',
                'osd', 'pool', 'application', 'enable',
                'rbd', 'rbd', '--yes-i-really-mean-it'
                ],
            check_status=False)
        yield

    except Exception:
        log.info(
            "Error encountered, logging exception before tearing down ceph-deploy")
        log.info(traceback.format_exc())
        raise
    finally:
        if config.get('keep_running'):
            return
        log.info('Stopping ceph...')
        ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
                        check_status=False)
        time.sleep(4)

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
                              'grep', '-v', 'grep', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)
        ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.items():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                    ],
                    wait=False,
                ),
            )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.keys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge' + " " + all_nodes
        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(purgedata_nodes)
示例#34
0
文件: ceph.py 项目: tv42/teuthology
def run_daemon(ctx, config, type):
    log.info('Starting %s daemons...' % type)
    daemons = ctx.cluster.only(teuthology.is_type(type))
    coverage_dir = '/tmp/cephtest/archive/coverage'

    daemon_signal = 'kill'
    if config.get('coverage'):
        log.info('Recording coverage for this run.')
        daemon_signal = 'term'

    num_active = 0
    for remote, roles_for_host in daemons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, type):
            name = '%s.%s' % (type, id_)

            if not id_.endswith('-s'):
                num_active += 1

            proc_signal = daemon_signal
            run_cmd = ['/tmp/cephtest/enable-coredump',
                    '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                    coverage_dir,
                    '/tmp/cephtest/daemon-helper'
                       ]
            run_cmd_tail = [
                '/tmp/cephtest/binary/usr/local/bin/ceph-%s' % type,
                '-f',
                '-i', id_,
                '-c', '/tmp/cephtest/ceph.conf']

            extra_args = None

            if config.get('valgrind') and (config.get('valgrind').get(name, None) is not None):
                valgrind_args = config.get('valgrind').get(name)
                if not isinstance(valgrind_args, list):
                    valgrind_args = [valgrind_args]
                log.debug('running %s under valgrind with args %s' % (name, valgrind_args))
                val_path = '/tmp/cephtest/archive/log/{val_dir}'.format(val_dir=config.get('valgrind').get('logs', 'valgrind'))
                proc_signal = 'term'
                if '--tool=memcheck' in valgrind_args or \
                        '--tool=helgrind' in valgrind_args:
                    extra_args = ['valgrind', '--xml=yes', '--xml-file={vdir}/{type}.{id}.log'.format(vdir=val_path, type=type, id=id_)]
                else:
                    extra_args = ['valgrind', '--log-file={vdir}/{type}.{id}.log'.format(vdir=val_path, type=type, id=id_)]
                extra_args.extend(valgrind_args)

            run_cmd.append(proc_signal)
            if extra_args is not None:
                run_cmd.extend(extra_args)
            run_cmd.extend(run_cmd_tail)
            ctx.daemons.add_daemon(remote, type, id_,
                                   args=run_cmd,
                                   logger=log.getChild(name),
                                   stdin=run.PIPE,
                                   wait=False,
                                   )

    if type is 'mds':
        firstmon = teuthology.get_first_mon(ctx, config)
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
        mon0_remote.run(args=[
            '/tmp/cephtest/enable-coredump',
            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
            coverage_dir,
            '/tmp/cephtest/binary/usr/local/bin/ceph',
            '-c', '/tmp/cephtest/ceph.conf',
            'mds', 'set_max_mds', str(num_active)])

    try:
        yield
    finally:
        log.info('Shutting down %s daemons...' % type)
        [i.stop() for i in ctx.daemons.iter_daemons_of_role(type)]
示例#35
0
def run_daemon(ctx, config, type_):
    log.info('Starting %s daemons...' % type_)
    daemons = ctx.cluster.only(teuthology.is_type(type_))
    coverage_dir = '/tmp/cephtest/archive/coverage'

    daemon_signal = 'kill'
    if config.get('coverage') or config.get('valgrind') is not None:
        daemon_signal = 'term'

    num_active = 0
    for remote, roles_for_host in daemons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, type_):
            name = '%s.%s' % (type_, id_)

            if not id_.endswith('-s'):
                num_active += 1

            run_cmd = [
                '/tmp/cephtest/enable-coredump',
                '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
                coverage_dir,
                '/tmp/cephtest/daemon-helper',
                daemon_signal,
                ]
            run_cmd_tail = [
                '/tmp/cephtest/binary/usr/local/bin/ceph-%s' % type_,
                '-f',
                '-i', id_,
                '-c', '/tmp/cephtest/ceph.conf']

            if config.get('valgrind') is not None:
                valgrind_args = None
                if type_ in config['valgrind']:
                    valgrind_args = config['valgrind'][type_]
                if name in config['valgrind']:
                    valgrind_args = config['valgrind'][name]
                run_cmd.extend(teuthology.get_valgrind_args(name, valgrind_args))

            if type_ in config.get('cpu_profile', []):
                profile_path = '/tmp/cephtest/archive/log/%s.%s.prof' % (type_, id_)
                run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ])

            run_cmd.extend(run_cmd_tail)
            ctx.daemons.add_daemon(remote, type_, id_,
                                   args=run_cmd,
                                   logger=log.getChild(name),
                                   stdin=run.PIPE,
                                   wait=False,
                                   )

    if type_ == 'mds':
        firstmon = teuthology.get_first_mon(ctx, config)
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
        mon0_remote.run(args=[
            '/tmp/cephtest/enable-coredump',
            '/tmp/cephtest/binary/usr/local/bin/ceph-coverage',
            coverage_dir,
            '/tmp/cephtest/binary/usr/local/bin/ceph',
            '-c', '/tmp/cephtest/ceph.conf',
            'mds', 'set_max_mds', str(num_active)])

    try:
        yield
    finally:
        log.info('Shutting down %s daemons...' % type_)
        exc_info = (None, None, None)
        for daemon in ctx.daemons.iter_daemons_of_role(type_):
            try:
                daemon.stop()
            except (run.CommandFailedError,
                    run.CommandCrashedError,
                    run.ConnectionLostError):
                exc_info = sys.exc_info()
                log.exception('Saw exception from %s.%s', daemon.role, daemon.id_)
        if exc_info != (None, None, None):
            raise exc_info[0], exc_info[1], exc_info[2]
示例#36
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    # Expect to find ceph_admin on the first mon by ID, same place that the download task
    # puts it.  Remember this here, because subsequently IDs will change from those in
    # the test config to those that ceph-deploy invents.
    (ceph_admin,) = ctx.cluster.only(
        teuthology.get_first_mon(ctx, config)).remotes.iterkeys()

    def execute_ceph_deploy(cmd):
        """Remotely execute a ceph_deploy command"""
        return ceph_admin.run(
            args=[
                'cd',
                '{tdir}/ceph-deploy'.format(tdir=testdir),
                run.Raw('&&'),
                run.Raw(cmd),
            ],
            check_status=False,
        ).exitstatus

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.iteritems():
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_role(ctx, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_role(ctx, 'mon')
        mon_nodes = " ".join(mon_node)
        new_mon = './ceph-deploy new' + " " + mon_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
        deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)

        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
                                                sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(
                        ceph_admin, conf_path, lines, sudo=True)

        # install ceph
        dev_branch = ctx.config['branch']
        branch = '--dev={branch}'.format(branch=dev_branch)
        if ceph_branch:
            option = ceph_branch
        else:
            option = branch
        install_nodes = './ceph-deploy install ' + option + " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")
        # install ceph-test package too
        install_nodes2 = './ceph-deploy install --tests ' + option + \
                         " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes2)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph-test")

        mon_create_nodes = './ceph-deploy mon create-initial'
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(mon_create_nodes)

        # create-keys is explicit now
        # http://tracker.ceph.com/issues/16036
        mons = ctx.cluster.only(teuthology.is_type('mon'))
        for remote in mons.remotes.iterkeys():
            remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph',
                             '--id', remote.shortname])

        estatus_gather = execute_ceph_deploy(gather_keys)
        if mds_nodes:
            estatus_mds = execute_ceph_deploy(deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy' + \
                    " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        for d in node_dev_list:
            node = d[0]
            for disk in d[1:]:
                zap = './ceph-deploy disk zap ' + node + ':' + disk
                estatus = execute_ceph_deploy(zap)
                if estatus != 0:
                    raise RuntimeError("ceph-deploy: Failed to zap osds")
            osd_create_cmd = './ceph-deploy osd create '
            if config.get('dmcrypt') is not None:
                osd_create_cmd += '--dmcrypt '
            osd_create_cmd += ":".join(d)
            estatus_osd = execute_ceph_deploy(osd_create_cmd)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
            )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
            )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(
                        args=[
                            'cd',
                            '{tdir}'.format(tdir=testdir),
                            run.Raw('&&'),
                            'sudo', 'bash', '-c',
                            run.Raw('"'), 'ceph',
                            'auth',
                            'get-or-create',
                            'client.{id}'.format(id=id_),
                            'mds', 'allow',
                            'mon', 'allow *',
                            'osd', 'allow *',
                            run.Raw('>'),
                            client_keyring,
                            run.Raw('"'),
                        ],
                    )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=client_keyring,
                        data=key_data,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=admin_keyring_path,
                        data=admin_keyring,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=conf_path,
                        data=conf_data,
                        perms='0644'
                    )

            if mds_nodes:
                log.info('Configuring CephFS...')
                ceph_fs = Filesystem(ctx)
                if not ceph_fs.legacy_configured():
                    ceph_fs.create()
        elif not config.get('only_mon'):
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        yield

    except Exception:
        log.info(
            "Error encountered, logging exception before tearing down ceph-deploy")
        log.info(traceback.format_exc())
        raise
    finally:
        if config.get('keep_running'):
            return
        log.info('Stopping ceph...')
        ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
                              'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
                              'sudo', 'systemctl', 'stop', 'ceph.target'])

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(
            args=[
                'sudo', 'status', 'ceph-all', run.Raw('||'),
                'sudo', 'service', 'ceph', 'status', run.Raw('||'),
                'sudo', 'systemctl', 'status', 'ceph.target'],
            check_status=False)

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
                              'grep', '-v', 'grep', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                    ],
                    wait=False,
                ),
            )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge' + " " + all_nodes
        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(purgedata_nodes)
示例#37
0
def cluster(ctx, config):
    """
    Handle the creation and removal of a ceph cluster.

    On startup:
        Create directories needed for the cluster.
        Create remote journals for all osds.
        Create and set keyring.
        Copy the monmap to tht test systems.
        Setup mon nodes.
        Setup mds nodes.
        Mkfs osd nodes.
        Add keyring information to monmaps
        Mkfs mon nodes.

    On exit:
        If errors occured, extract a failure message and store in ctx.summary.
        Unmount all test files and temporary journaling files.
        Save the monitor information and archive all ceph logs.
        Cleanup the keyring setup, and remove all monitor map and data files left over.

    :param ctx: Context
    :param config: Configuration
    """
    if ctx.config.get('use_existing_cluster', False) is True:
        log.info("'use_existing_cluster' is true; skipping cluster creation")
        yield

    testdir = teuthology.get_testdir(ctx)
    log.info('Creating ceph cluster...')
    run.wait(
        ctx.cluster.run(
            args=[
                'install',
                '-d',
                '-m0755',
                '--',
                '{tdir}/data'.format(tdir=testdir),
            ],
            wait=False,
        ))

    run.wait(
        ctx.cluster.run(
            args=[
                'sudo',
                'install',
                '-d',
                '-m0777',
                '--',
                '/var/run/ceph',
            ],
            wait=False,
        ))

    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checking for scratch devs')
            log.info('found devs: %s' % (str(devs), ))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs)
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs):]
            devs_to_clean[remote] = []

        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs)
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt'])
            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
                tmpfs = '/mnt/osd.%s' % osd
                roles_to_journals[osd] = tmpfs
                remote.run(args=['truncate', '-s', '1500M', tmpfs])
            log.info('journal map: %s', roles_to_journals)

        log.info('dev map: %s' % (str(roles_to_devs), ))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals

    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [
        host for (host, port) in (remote.ssh.get_transport().getpeername()
                                  for (remote, role_list) in remotes_and_roles)
    ]
    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips)
    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
        for role, journal in roles_to_journals.iteritems():
            key = "osd." + str(role)
            if key not in conf:
                conf[key] = {}
            conf[key]['osd journal'] = journal
    for section, keys in config['conf'].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    if config.get('tmpfs_journal'):
        conf['journal dio'] = False

    ctx.ceph = argparse.Namespace()
    ctx.ceph.conf = conf

    keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring')

    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    firstmon = teuthology.get_first_mon(ctx, config)

    log.info('Setting up %s...' % firstmon)
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--create-keyring',
        keyring_path,
    ], )
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--gen-key',
        '--name=mon.',
        keyring_path,
    ], )
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'chmod',
        '0644',
        keyring_path,
    ], )
    (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys()
    fsid = teuthology.create_simple_monmap(
        ctx,
        remote=mon0_remote,
        conf=conf,
    )
    if not 'global' in conf:
        conf['global'] = {}
    conf['global']['fsid'] = fsid

    conf_path = config.get('conf_path', DEFAULT_CONF_PATH)
    log.info('Writing %s for FSID %s...' % (conf_path, fsid))
    write_conf(ctx, conf_path)

    log.info('Creating admin key on %s...' % firstmon)
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--gen-key',
        '--name=client.admin',
        '--set-uid=0',
        '--cap',
        'mon',
        'allow *',
        '--cap',
        'osd',
        'allow *',
        '--cap',
        'mds',
        'allow *',
        keyring_path,
    ], )

    log.info('Copying monmap to all nodes...')
    keyring = teuthology.get_file(
        remote=mon0_remote,
        path=keyring_path,
    )
    monmap = teuthology.get_file(
        remote=mon0_remote,
        path='{tdir}/monmap'.format(tdir=testdir),
    )

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info('Sending monmap to node {remote}'.format(remote=rem))
        teuthology.sudo_write_file(remote=rem,
                                   path=keyring_path,
                                   data=keyring,
                                   perms='0644')
        teuthology.write_file(
            remote=rem,
            path='{tdir}/monmap'.format(tdir=testdir),
            data=monmap,
        )

    log.info('Setting up mon nodes...')
    mons = ctx.cluster.only(teuthology.is_type('mon'))
    run.wait(
        mons.run(
            args=[
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'osdmaptool',
                '-c',
                conf_path,
                '--clobber',
                '--createsimple',
                '{num:d}'.format(num=teuthology.num_instances_of_type(
                    ctx.cluster, 'osd'), ),
                '{tdir}/osdmap'.format(tdir=testdir),
                '--pg_bits',
                '2',
                '--pgp_bits',
                '4',
            ],
            wait=False,
        ), )

    log.info('Setting up mds nodes...')
    mdss = ctx.cluster.only(teuthology.is_type('mds'))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                '/var/lib/ceph/mds/ceph-{id}'.format(id=id_),
                run.Raw('&&'),
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-authtool',
                '--create-keyring',
                '--gen-key',
                '--name=mds.{id}'.format(id=id_),
                '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_),
            ], )

    cclient.create_keyring(ctx)
    log.info('Running mkfs on osd nodes...')

    ctx.disk_config = argparse.Namespace()
    ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs
    ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals
    ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
    ctx.disk_config.remote_to_roles_to_dev_fstype = {}

    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(
        r=str(ctx.disk_config.remote_to_roles_to_dev)))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = remote_to_roles_to_devs[remote]
        roles_to_journals = remote_to_roles_to_journals[remote]

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                '/var/lib/ceph/osd/ceph-{id}'.format(id=id_),
            ])
            log.info(str(roles_to_journals))
            log.info(id_)
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                fs = config.get('fs')
                package = None
                mkfs_options = config.get('mkfs_options')
                mount_options = config.get('mount_options')
                if fs == 'btrfs':
                    # package = 'btrfs-tools'
                    if mount_options is None:
                        mount_options = ['noatime', 'user_subvol_rm_allowed']
                    if mkfs_options is None:
                        mkfs_options = [
                            '-m', 'single', '-l', '32768', '-n', '32768'
                        ]
                if fs == 'xfs':
                    # package = 'xfsprogs'
                    if mount_options is None:
                        mount_options = ['noatime']
                    if mkfs_options is None:
                        mkfs_options = ['-f', '-i', 'size=2048']
                if fs == 'ext4' or fs == 'ext3':
                    if mount_options is None:
                        mount_options = ['noatime', 'user_xattr']

                if mount_options is None:
                    mount_options = []
                if mkfs_options is None:
                    mkfs_options = []
                mkfs = ['mkfs.%s' % fs] + mkfs_options
                log.info('%s on %s on %s' % (mkfs, dev, remote))
                if package is not None:
                    remote.run(
                        args=['sudo', 'apt-get', 'install', '-y', package],
                        stdout=StringIO(),
                    )

                try:
                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs +
                               [dev])
                except run.CommandFailedError:
                    # Newer btfs-tools doesn't prompt for overwrite, use -f
                    if '-f' not in mount_options:
                        mkfs_options.append('-f')
                        mkfs = ['mkfs.%s' % fs] + mkfs_options
                        log.info('%s on %s on %s' % (mkfs, dev, remote))
                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs +
                               [dev])

                log.info('mount %s on %s -o %s' %
                         (dev, remote, ','.join(mount_options)))
                remote.run(args=[
                    'sudo',
                    'mount',
                    '-t',
                    fs,
                    '-o',
                    ','.join(mount_options),
                    dev,
                    os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(
                        id=id_)),
                ])
                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
                    ctx.disk_config.remote_to_roles_to_dev_mount_options[
                        remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][
                    id_] = mount_options
                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs
                devs_to_clean[remote].append(
                    os.path.join(
                        os.path.join('/var/lib/ceph/osd',
                                     'ceph-{id}'.format(id=id_)), ))

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(args=[
                'sudo',
                'MALLOC_CHECK_=3',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-osd',
                '--mkfs',
                '--mkkey',
                '-i',
                id_,
                '--monmap',
                '{tdir}/monmap'.format(tdir=testdir),
            ], )

    log.info('Reading keys from all nodes...')
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['mds', 'osd']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format(
                        type=type_,
                        id=id_,
                    ),
                    sudo=True,
                )
                keys.append((type_, id_, data))
                keys_fp.write(data)
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['client']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_))
                keys.append((type_, id_, data))
                keys_fp.write(data)

    log.info('Adding keys to all mons...')
    writes = mons.run(
        args=[
            'sudo',
            'tee',
            '-a',
            keyring_path,
        ],
        stdin=run.PIPE,
        wait=False,
        stdout=StringIO(),
    )
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    keyring_path,
                    '--name={type}.{id}'.format(
                        type=type_,
                        id=id_,
                    ),
                ] + list(teuthology.generate_caps(type_)),
                wait=False,
            ), )

    log.info('Running mkfs on mon nodes...')
    for remote, roles_for_host in mons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                '/var/lib/ceph/mon/ceph-{id}'.format(id=id_),
            ], )
            remote.run(args=[
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-mon',
                '--mkfs',
                '-i',
                id_,
                '--monmap={tdir}/monmap'.format(tdir=testdir),
                '--osdmap={tdir}/osdmap'.format(tdir=testdir),
                '--keyring={kpath}'.format(kpath=keyring_path),
            ], )

    run.wait(
        mons.run(
            args=[
                'rm',
                '--',
                '{tdir}/monmap'.format(tdir=testdir),
                '{tdir}/osdmap'.format(tdir=testdir),
            ],
            wait=False,
        ), )

    try:
        yield
    except Exception:
        # we need to know this below
        ctx.summary['success'] = False
        raise
    finally:
        (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys()

        log.info('Checking cluster log for badness...')

        def first_in_ceph_log(pattern, excludes):
            """
            Find the first occurence of the pattern specified in the Ceph log,
            Returns None if none found.

            :param pattern: Pattern scanned for.
            :param excludes: Patterns to ignore.
            :return: First line of text (or None if not found)
            """
            args = [
                'sudo',
                'egrep',
                pattern,
                '/var/log/ceph/ceph.log',
            ]
            for exclude in excludes:
                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
            args.extend([
                run.Raw('|'),
                'head',
                '-n',
                '1',
            ])
            r = mon0_remote.run(
                stdout=StringIO(),
                args=args,
            )
            stdout = r.stdout.getvalue()
            if stdout != '':
                return stdout
            return None

        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                             config['log_whitelist']) is not None:
            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
            ctx.summary['success'] = False
            # use the most severe problem as the failure reason
            if 'failure_reason' not in ctx.summary:
                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
                    match = first_in_ceph_log(pattern, config['log_whitelist'])
                    if match is not None:
                        ctx.summary['failure_reason'] = \
                            '"{match}" in cluster log'.format(
                                match=match.rstrip('\n'),
                            )
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info('Unmounting %s on %s' % (dir_, remote))
                try:
                    remote.run(args=[
                        'sync',
                        run.Raw('&&'), 'sudo', 'umount', '-f', dir_
                    ])
                except Exception as e:
                    remote.run(args=[
                        'sudo',
                        run.Raw('PATH=/usr/sbin:$PATH'),
                        'lsof',
                        run.Raw(';'),
                        'ps',
                        'auxf',
                    ])
                    raise e

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
            for remote, roles_for_host in osds.remotes.iteritems():
                remote.run(
                    args=['sudo', 'umount', '-f', '/mnt'],
                    check_status=False,
                )

        if ctx.archive is not None and \
                not (ctx.config.get('archive-on-error') and ctx.summary['success']):

            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote, '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

        log.info('Cleaning ceph cluster...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-rf',
                    '--',
                    conf_path,
                    keyring_path,
                    '{tdir}/data'.format(tdir=testdir),
                    '{tdir}/monmap'.format(tdir=testdir),
                    run.Raw('{tdir}/../*.pid'.format(tdir=testdir)),
                ],
                wait=False,
            ), )
示例#38
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    try:
        log.info("Building ceph cluster using ceph-deploy...")
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get("branch") is not None:
            cbranch = config.get("branch")
            for var, val in cbranch.iteritems():
                if var == "testing":
                    ceph_branch = "--{var}".format(var=var)
                ceph_branch = "--{var}={val}".format(var=var, val=val)
        node_dev_list = []
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_roles(ctx, config, "mds")
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_roles(ctx, config, "mon")
        mon_nodes = " ".join(mon_node)
        new_mon = "./ceph-deploy new" + " " + mon_nodes
        install_nodes = "./ceph-deploy install " + ceph_branch + " " + all_nodes
        purge_nodes = "./ceph-deploy purge" + " " + all_nodes
        purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes
        mon_hostname = mon_nodes.split(" ")[0]
        mon_hostname = str(mon_hostname)
        gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname
        deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info("adding config inputs...")
        testdir = teuthology.get_testdir(ctx)
        conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir)
        first_mon = teuthology.get_first_mon(ctx, config)
        (remote,) = ctx.cluster.only(first_mon).remotes.keys()

        lines = None
        if config.get("conf") is not None:
            confp = config.get("conf")
            for section, keys in confp.iteritems():
                lines = "[{section}]\n".format(section=section)
                teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = "{key} = {value}\n".format(key=key, value=value)
                    teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)

        estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")

        mon_create_nodes = "./ceph-deploy mon create-initial"
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)

        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
        max_gather_tries = 90
        gather_tries = 0
        while estatus_gather != 0:
            gather_tries += 1
            if gather_tries >= max_gather_tries:
                msg = "ceph-deploy was not able to gatherkeys after 15 minutes"
                raise RuntimeError(msg)
            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
            time.sleep(10)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get("test_mon_destroy") is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        osd_create_cmd = "./ceph-deploy osd create --zap-disk "
        for d in node_dev_list:
            if config.get("dmcrypt") is not None:
                osd_create_cmd_d = osd_create_cmd + "--dmcrypt" + " " + d
            else:
                osd_create_cmd_d = osd_create_cmd + d
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
            if estatus_osd == 0:
                log.info("successfully created osd")
                no_of_osds += 1
            else:
                disks = []
                disks = d.split(":")
                dev_disk = disks[0] + ":" + disks[1]
                j_disk = disks[0] + ":" + disks[2]
                zap_disk = "./ceph-deploy disk zap " + dev_disk + " " + j_disk
                execute_ceph_deploy(ctx, config, zap_disk)
                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
                if estatus_osd == 0:
                    log.info("successfully created osd")
                    no_of_osds += 1
                else:
                    raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get("wait-for-healthy", True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info("Setting up client nodes...")
            conf_path = "/etc/ceph/ceph.conf"
            admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring"
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True)
            admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True)

            clients = ctx.cluster.only(teuthology.is_type("client"))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, "client"):
                    client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_)
                    mon0_remote.run(
                        args=[
                            "cd",
                            "{tdir}".format(tdir=testdir),
                            run.Raw("&&"),
                            "sudo",
                            "bash",
                            "-c",
                            run.Raw('"'),
                            "ceph",
                            "auth",
                            "get-or-create",
                            "client.{id}".format(id=id_),
                            "mds",
                            "allow",
                            "mon",
                            "allow *",
                            "osd",
                            "allow *",
                            run.Raw(">"),
                            client_keyring,
                            run.Raw('"'),
                        ]
                    )
                    key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True)
                    teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644")
                    teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644")
                    teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644")
        else:
            raise RuntimeError("The cluster is NOT operational due to insufficient OSDs")
        yield

    finally:
        log.info("Stopping ceph...")
        ctx.cluster.run(args=["sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop"])

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(
            args=["sudo", "status", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "status"], check_status=False
        )

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(
            args=["sudo", "ps", "aux", run.Raw("|"), "grep", "-v", "grep", run.Raw("|"), "grep", "ceph"],
            check_status=False,
        )

        if ctx.archive is not None:
            # archive mon data, too
            log.info("Archiving mon data...")
            path = os.path.join(ctx.archive, "data")
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type("mon"))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith("mon."):
                        teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz")

            log.info("Compressing logs...")
            run.wait(
                ctx.cluster.run(
                    args=[
                        "sudo",
                        "find",
                        "/var/log/ceph",
                        "-name",
                        "*.log",
                        "-print0",
                        run.Raw("|"),
                        "sudo",
                        "xargs",
                        "-0",
                        "--no-run-if-empty",
                        "--",
                        "gzip",
                        "--",
                    ],
                    wait=False,
                )
            )

            log.info("Archiving logs...")
            path = os.path.join(ctx.archive, "remote")
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log"))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = "./ceph-deploy purge" + " " + all_nodes
        purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes

        log.info("Purging package...")
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info("Purging data...")
        execute_ceph_deploy(ctx, config, purgedata_nodes)
示例#39
0
def run_daemon(ctx, config, type_):
    """
    Run daemons for a role type.  Handle the startup and termination of a a daemon.
    On startup -- set coverages, cpu_profile, valgrind values for all remotes,
    and a max_mds value for one mds.
    On cleanup -- Stop all existing daemons of this type.

    :param ctx: Context
    :param config: Configuration
    :paran type_: Role type
    """
    log.info('Starting %s daemons...' % type_)
    testdir = teuthology.get_testdir(ctx)
    daemons = ctx.cluster.only(teuthology.is_type(type_))

    # check whether any daemons if this type are configured
    if daemons is None:
        return
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    daemon_signal = 'kill'
    if config.get('coverage') or config.get('valgrind') is not None:
        daemon_signal = 'term'

    for remote, roles_for_host in daemons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, type_):
            name = '%s.%s' % (type_, id_)

            run_cmd = [
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'daemon-helper',
                daemon_signal,
            ]
            run_cmd_tail = ['ceph-%s' % (type_), '-f', '-i', id_]

            if type_ in config.get('cpu_profile', []):
                profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (
                    type_, id_)
                run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path])

            if config.get('valgrind') is not None:
                valgrind_args = None
                if type_ in config['valgrind']:
                    valgrind_args = config['valgrind'][type_]
                if name in config['valgrind']:
                    valgrind_args = config['valgrind'][name]
                run_cmd = teuthology.get_valgrind_args(testdir, name, run_cmd,
                                                       valgrind_args)

            run_cmd.extend(run_cmd_tail)

            ctx.daemons.add_daemon(
                remote,
                type_,
                id_,
                args=run_cmd,
                logger=log.getChild(name),
                stdin=run.PIPE,
                wait=False,
            )

    try:
        yield
    finally:
        teuthology.stop_daemons_of_type(ctx, type_)
示例#40
0
def run_daemon(ctx, config, type_):
    log.info('Starting %s daemons...' % type_)
    testdir = teuthology.get_testdir(ctx)
    daemons = ctx.cluster.only(teuthology.is_type(type_))
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    daemon_signal = 'kill'
    if config.get('coverage') or config.get('valgrind') is not None:
        daemon_signal = 'term'

    num_active = 0
    for remote, roles_for_host in daemons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, type_):
            name = '%s.%s' % (type_, id_)

            if not (id_.endswith('-s')) and (id_.find('-s-') == -1):
                num_active += 1

            run_cmd = [
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'daemon-helper',
                daemon_signal,
                ]
            run_cmd_tail = [
                'ceph-%s' % (type_),
                '-f',
                '-i', id_]

            if type_ in config.get('cpu_profile', []):
                profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_)
                run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ])

            if config.get('valgrind') is not None:
                valgrind_args = None
                if type_ in config['valgrind']:
                    valgrind_args = config['valgrind'][type_]
                if name in config['valgrind']:
                    valgrind_args = config['valgrind'][name]
                run_cmd = teuthology.get_valgrind_args(testdir, name,
                                                       run_cmd,
                                                       valgrind_args)

            run_cmd.extend(run_cmd_tail)

            ctx.daemons.add_daemon(remote, type_, id_,
                                   args=run_cmd,
                                   logger=log.getChild(name),
                                   stdin=run.PIPE,
                                   wait=False,
                                   )

    if type_ == 'mds':
        firstmon = teuthology.get_first_mon(ctx, config)
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()

        mon0_remote.run(args=[
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph',
            'mds', 'set_max_mds', str(num_active)])

    try:
        yield
    finally:
        teuthology.stop_daemons_of_type(ctx, type_)