def setup_dnsmasq(remote, cnames): """ configure dnsmasq on the given remote, adding each cname given """ log.info('Configuring dnsmasq on remote %s..', remote.name) # back up existing resolv.conf resolv_conf = misc.get_file(remote, '/etc/resolv.conf') # point resolv.conf to local dnsmasq misc.sudo_write_file(remote, '/etc/resolv.conf', "nameserver 127.0.0.1\n") # add address entries to /etc/dnsmasq.d/ceph dnsmasq = "server=8.8.8.8\nserver=8.8.4.4\n" address_template = "address=/{cname}/{ip_address}\n" for cname, ip_address in cnames.iteritems(): dnsmasq += address_template.format(cname=cname, ip_address=ip_address) misc.sudo_write_file(remote, '/etc/dnsmasq.d/ceph', dnsmasq) remote.run(args=['cat', '/etc/dnsmasq.d/ceph']) # restart dnsmasq remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq']) remote.run(args=['sudo', 'systemctl', 'status', 'dnsmasq']) # verify dns name is set remote.run(args=['ping', '-c', '4', cnames.keys()[0]]) yield log.info('Removing dnsmasq configuration from remote %s..', remote.name) # restore resolv.conf misc.sudo_write_file(remote, '/etc/resolv.conf', resolv_conf) # restart dnsmasq remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
def _setup_mon(ctx, manager, remote, mon, name, data_path, conf_path): # co-locate a new monitor on remote where an existing monitor is hosted cluster = manager.cluster remote.run(args=['sudo', 'mkdir', '-p', data_path]) keyring_path = '/etc/ceph/{cluster}.keyring'.format( cluster=manager.cluster) testdir = teuthology.get_testdir(ctx) monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir, cluster=cluster) manager.raw_cluster_cmd('mon', 'getmap', '-o', monmap_path) if manager.controller != remote: monmap = teuthology.get_file(manager.controller, monmap_path) teuthology.write_file(remote, monmap_path, StringIO(monmap)) remote.run( args=[ 'sudo', 'ceph-mon', '--cluster', cluster, '--mkfs', '-i', mon, '--monmap', monmap_path, '--keyring', keyring_path]) if manager.controller != remote: teuthology.delete_file(remote, monmap_path) # raw_cluster_cmd() is performed using sudo, so sudo here also. teuthology.delete_file(manager.controller, monmap_path, sudo=True) # update ceph.conf so that the ceph CLI is able to connect to the cluster if conf_path: ip = remote.ip_address port = _get_next_port(ctx, ip, cluster) mon_addr = '{ip}:{port}'.format(ip=ip, port=port) ctx.ceph[cluster].conf[name] = {'mon addr': mon_addr} write_conf(ctx, conf_path, cluster)
def install_distro_kernel(remote): """ RPM: Find newest kernel on the machine and update grub to use kernel + reboot. DEB: Find newest kernel. Parse grub.cfg to figure out the entryname/subentry. then modify 01_ceph_kernel to have correct entry + updategrub + reboot. """ system_type = teuthology.get_system_type(remote) distribution = '' if system_type == 'rpm': output, err_mess = StringIO(), StringIO() remote.run(args=['rpm', '-q', 'kernel', '--last' ], stdout=output, stderr=err_mess ) newest=output.getvalue().split()[0].split('kernel-')[1] log.info('Distro Kernel Version: {version}'.format(version=newest)) update_grub_rpm(remote, newest) remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False ) output.close() err_mess.close() return if system_type == 'deb': distribution = teuthology.get_system_type(remote, distro=True) newversion = get_version_from_pkg(remote, distribution) if 'ubuntu' in distribution: grub2conf = teuthology.get_file(remote, '/boot/grub/grub.cfg', True) submenu = '' menuentry = '' for line in grub2conf.split('\n'): if 'submenu' in line: submenu = line.split('submenu ')[1] # Ubuntu likes to be sneaky and change formatting of # grub.cfg between quotes/doublequotes between versions if submenu.startswith("'"): submenu = submenu.split("'")[1] if submenu.startswith('"'): submenu = submenu.split('"')[1] if 'menuentry' in line: if newversion in line and 'recovery' not in line: menuentry = line.split('\'')[1] break if submenu: grubvalue = submenu + '>' + menuentry else: grubvalue = menuentry grubfile = 'cat <<EOF\nset default="' + grubvalue + '"\nEOF' teuthology.delete_file(remote, '/etc/grub.d/01_ceph_kernel', sudo=True, force=True) teuthology.sudo_write_file(remote, '/etc/grub.d/01_ceph_kernel', StringIO(grubfile), '755') log.info('Distro Kernel Version: {version}'.format(version=newversion)) remote.run(args=['sudo', 'update-grub']) remote.run(args=['sudo', 'shutdown', '-r', 'now'], wait=False ) return if 'debian' in distribution: grub2_kernel_select_generic(remote, newversion, 'deb') log.info('Distro Kernel Version: {version}'.format(version=newversion)) remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False ) return
def generate_legacy_grub_entry(remote, newversion): """ This will likely need to be used for ceph kernels as well as legacy grub rpm distros don't have an easy way of selecting a kernel just via a command. This generates an entry in legacy grub for a new kernel version using the existing entry as a base. """ grubconf = teuthology.get_file(remote, '/boot/grub/grub.conf', True) titleline = '' rootline = '' kernelline = '' initline = '' kernelversion = '' linenum = 0 titlelinenum = 0 #Grab first kernel entry (title/root/kernel/init lines) for line in grubconf.split('\n'): if re.match('^title', line): titleline = line titlelinenum = linenum if re.match('(^\s+)root', line): rootline = line if re.match('(^\s+)kernel', line): kernelline = line for word in line.split(' '): if 'vmlinuz' in word: kernelversion = word.split('vmlinuz-')[-1] if re.match('(^\s+)initrd', line): initline = line if (kernelline != '') and (initline != ''): break else: linenum += 1 #insert new entry into grubconfnew list: linenum = 0 newgrubconf = [] for line in grubconf.split('\n'): line = line.rstrip('\n') if linenum == titlelinenum: newtitle = re.sub(kernelversion, newversion, titleline) newroot = re.sub(kernelversion, newversion, rootline) newkernel = re.sub(kernelversion, newversion, kernelline) newinit = re.sub(kernelversion, newversion, initline) newgrubconf.append(newtitle) newgrubconf.append(newroot) newgrubconf.append(newkernel) newgrubconf.append(newinit) newgrubconf.append('') newgrubconf.append(line) else: newgrubconf.append(line) linenum += 1 return newgrubconf
def run_tests(ctx, config): assert isinstance(config, dict) testdir = teuthology.get_testdir(ctx) for client, client_config in config.iteritems(): (remote,) = ctx.cluster.only(client).remotes.keys() conf = teuthology.get_file( remote, "{tdir}/archive/s3readwrite.{client}.config.yaml".format(tdir=testdir, client=client) ) args = ["{tdir}/s3-tests/virtualenv/bin/s3tests-test-readwrite".format(tdir=testdir)] if client_config is not None and "extra_args" in client_config: args.extend(client_config["extra_args"]) ctx.cluster.only(client).run(args=args, stdin=conf) yield
def run_tests(ctx, config): assert isinstance(config, dict) for client, client_config in config.iteritems(): (remote,) = ctx.cluster.only(client).remotes.keys() conf = teuthology.get_file(remote, '/tmp/cephtest/archive/s3roundtrip.{client}.config.yaml'.format(client=client)) args = [ '/tmp/cephtest/s3-tests/virtualenv/bin/s3tests-test-roundtrip', ] if client_config is not None and 'extra_args' in client_config: args.extend(client_config['extra_args']) ctx.cluster.only(client).run( args=args, stdin=conf, ) yield
def grub2_kernel_select_generic(remote, newversion, ostype): """ Can be used on DEB and RPM. Sets which entry should be boted by entrynum. """ if ostype == 'rpm': grubset = 'grub2-set-default' mkconfig = 'grub2-mkconfig' grubconfig = '/boot/grub2/grub.cfg' if ostype == 'deb': grubset = 'grub-set-default' grubconfig = '/boot/grub/grub.cfg' mkconfig = 'grub-mkconfig' remote.run(args=['sudo', mkconfig, '-o', grubconfig, ]) grub2conf = teuthology.get_file(remote, grubconfig, True) entry_num = 0 for line in grub2conf.split('\n'): if line.startswith('menuentry'): if newversion in line: break entry_num =+ 1 remote.run(args=['sudo', grubset, str(entry_num), ])
def get_tests(ctx, config, role, remote, testdir): """Download restart tests""" srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role) refspec = config.get('branch') if refspec is None: refspec = config.get('sha1') if refspec is None: refspec = config.get('tag') if refspec is None: refspec = 'HEAD' log.info('Pulling restart qa/workunits from ref %s', refspec) remote.run( logger=log.getChild(role), args=[ 'mkdir', '--', srcdir, run.Raw('&&'), 'git', 'archive', '--remote=git://ceph.com/git/ceph.git', '%s:qa/workunits' % refspec, run.Raw('|'), 'tar', '-C', srcdir, '-x', '-f-', run.Raw('&&'), 'cd', '--', srcdir, run.Raw('&&'), 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', run.Raw('&&'), 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), run.Raw('>{tdir}/restarts.list'.format(tdir=testdir)), ], ) restarts = sorted(teuthology.get_file( remote, '{tdir}/restarts.list'.format(tdir=testdir)).split('\0')) return (srcdir, restarts)
def run_tests(ctx, config): """ Run the s3 roundtrip after everything is set up. :param ctx: Context passed to task :param config: specific configuration information """ assert isinstance(config, dict) testdir = teuthology.get_testdir(ctx) for client, client_config in config.iteritems(): (remote,) = ctx.cluster.only(client).remotes.keys() conf = teuthology.get_file(remote, '{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client)) args = [ '{tdir}/s3-tests/virtualenv/bin/s3tests-test-roundtrip'.format(tdir=testdir), ] if client_config is not None and 'extra_args' in client_config: args.extend(client_config['extra_args']) ctx.cluster.only(client).run( args=args, stdin=conf, ) yield
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" # Expect to find ceph_admin on the first mon by ID, same place that the download task # puts it. Remember this here, because subsequently IDs will change from those in # the test config to those that ceph-deploy invents. (ceph_admin,) = ctx.cluster.only( teuthology.get_first_mon(ctx, config)).remotes.iterkeys() def execute_ceph_deploy(cmd): """Remotely execute a ceph_deploy command""" return ceph_admin.run( args=[ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), run.Raw(cmd), ], check_status=False, ).exitstatus try: log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): ceph_branch = '--{var}={val}'.format(var=var, val=val) all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_role(ctx, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_role(ctx, 'mon') mon_nodes = " ".join(mon_node) mgr_nodes = get_nodes_using_role(ctx, 'mgr') mgr_nodes = " ".join(mgr_nodes) new_mon = './ceph-deploy new' + " " + mon_nodes mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname deploy_mds = './ceph-deploy mds create' + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file( ceph_admin, conf_path, lines, sudo=True) # install ceph dev_branch = ctx.config['branch'] branch = '--dev={branch}'.format(branch=dev_branch) if ceph_branch: option = ceph_branch else: option = branch install_nodes = './ceph-deploy install ' + option + " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") # install ceph-test package too install_nodes2 = './ceph-deploy install --tests ' + option + \ " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes2) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph-test") mon_create_nodes = './ceph-deploy mon create-initial' # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(mon_create_nodes) execute_ceph_deploy(mgr_create) # create-keys is explicit now # http://tracker.ceph.com/issues/16036 mons = ctx.cluster.only(teuthology.is_type('mon')) for remote in mons.remotes.iterkeys(): remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph', '--id', remote.shortname]) estatus_gather = execute_ceph_deploy(gather_keys) if mds_nodes: estatus_mds = execute_ceph_deploy(deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy' + \ " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: node = d[0] for disk in d[1:]: zap = './ceph-deploy disk zap ' + node + ':' + disk estatus = execute_ceph_deploy(zap) if estatus != 0: raise RuntimeError("ceph-deploy: Failed to zap osds") osd_create_cmd = './ceph-deploy osd create ' if config.get('dmcrypt') is not None: osd_create_cmd += '--dmcrypt ' osd_create_cmd += ":".join(d) estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = \ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'bash', '-c', run.Raw('"'), 'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file( remote=remot, path=client_keyring, data=key_data, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=conf_path, data=conf_data, perms='0644' ) if mds_nodes: log.info('Configuring CephFS...') ceph_fs = Filesystem(ctx, create=True) elif not config.get('only_mon'): raise RuntimeError( "The cluster is NOT operational due to insufficient OSDs") yield except Exception: log.info( "Error encountered, logging exception before tearing down ceph-deploy") log.info(traceback.format_exc()) raise finally: if config.get('keep_running'): return log.info('Stopping ceph...') ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target']) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run( args=[ 'sudo', 'status', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'status', run.Raw('||'), 'sudo', 'systemctl', 'status', 'ceph.target'], check_status=False) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), 'grep', '-v', 'grep', run.Raw('|'), 'grep', 'ceph'], check_status=False) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = './ceph-deploy purge' + " " + all_nodes purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes log.info('Purging package...') execute_ceph_deploy(purge_nodes) log.info('Purging data...') execute_ceph_deploy(purgedata_nodes)
def update_devstack_config_files(devstack_node, secret_uuid): log.info("Updating DevStack config files to use Ceph...") def backup_config(node, file_name, backup_ext=".orig.teuth"): node.run(args=["cp", "-f", file_name, file_name + backup_ext]) def update_config(config_name, config_stream, update_dict, section="DEFAULT"): parser = ConfigParser() parser.read_file(config_stream) for (key, value) in update_dict.items(): parser.set(section, key, value) out_stream = StringIO() parser.write(out_stream) out_stream.seek(0) return out_stream updates = [ dict( name="/etc/glance/glance-api.conf", options=dict( default_store="rbd", rbd_store_user="******", rbd_store_pool="images", show_image_direct_url="True" ), ), dict( name="/etc/cinder/cinder.conf", options=dict( volume_driver="cinder.volume.drivers.rbd.RBDDriver", rbd_pool="volumes", rbd_ceph_conf="/etc/ceph/ceph.conf", rbd_flatten_volume_from_snapshot="false", rbd_max_clone_depth="5", glance_api_version="2", rbd_user="******", rbd_secret_uuid=secret_uuid, backup_driver="cinder.backup.drivers.ceph", backup_ceph_conf="/etc/ceph/ceph.conf", backup_ceph_user="******", backup_ceph_chunk_size="134217728", backup_ceph_pool="backups", backup_ceph_stripe_unit="0", backup_ceph_stripe_count="0", restore_discard_excess_bytes="true", ), ), dict( name="/etc/nova/nova.conf", options=dict( libvirt_images_type="rbd", libvirt_images_rbd_pool="volumes", libvirt_images_rbd_ceph_conf="/etc/ceph/ceph.conf", rbd_user="******", rbd_secret_uuid=secret_uuid, libvirt_inject_password="******", libvirt_inject_key="false", libvirt_inject_partition="-2", ), ), ] for update in updates: file_name = update["name"] options = update["options"] config_str = misc.get_file(devstack_node, file_name, sudo=True) config_stream = StringIO(config_str) backup_config(devstack_node, file_name) new_config_stream = update_config(file_name, config_stream, options) misc.sudo_write_file(devstack_node, file_name, new_config_stream)
def cluster(ctx, config): log.info('Creating ceph cluster...') run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '/tmp/cephtest/data', ], wait=False, ) ) log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [roles for (remote, roles) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, roles) in remotes_and_roles)] conf = teuthology.skeleton_config(roles=roles, ips=ips) for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value ctx.ceph = argparse.Namespace() ctx.ceph.conf = conf log.info('Writing configs...') conf_fp = StringIO() conf.write(conf_fp) conf_fp.seek(0) writes = ctx.cluster.run( args=[ 'python', '-c', 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', '/tmp/cephtest/ceph.conf', ], stdin=run.PIPE, wait=False, ) teuthology.feed_many_stdins_and_close(conf_fp, writes) run.wait(writes) coverage_dir = '/tmp/cephtest/archive/coverage' firstmon = teuthology.get_first_mon(ctx, config) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '/tmp/cephtest/ceph.keyring', ], ) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--gen-key', '--name=mon.', '/tmp/cephtest/ceph.keyring', ], ) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.create_simple_monmap( remote=mon0_remote, conf=conf, ) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow', '/tmp/cephtest/ceph.keyring', ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path='/tmp/cephtest/ceph.keyring', ) monmap = teuthology.get_file( remote=mon0_remote, path='/tmp/cephtest/monmap', ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.write_file( remote=rem, path='/tmp/cephtest/ceph.keyring', data=keyring, ) teuthology.write_file( remote=rem, path='/tmp/cephtest/monmap', data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon')) run.wait( mons.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/osdmaptool', '--clobber', '--createsimple', '{num:d}'.format( num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), ), '/tmp/cephtest/osdmap', '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up osd nodes...') osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', '--name=osd.{id}'.format(id=id_), '/tmp/cephtest/data/osd.{id}.keyring'.format(id=id_), ], ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds')) for remote, roles_for_host in mdss.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), '/tmp/cephtest/data/mds.{id}.keyring'.format(id=id_), ], ) log.info('Setting up client nodes...') clients = ctx.cluster.only(teuthology.is_type('client')) for remote, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" '--name=client.{id}'.format(id=id_), '/tmp/cephtest/data/client.{id}.keyring'.format(id=id_), ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['osd', 'mds', 'client']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/tmp/cephtest/data/{type}.{id}.keyring'.format( type=type_, id=id_, ), ) keys.append((type_, id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'cat', run.Raw('>>'), '/tmp/cephtest/ceph.keyring', ], stdin=run.PIPE, wait=False, ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '/tmp/cephtest/ceph.keyring', '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-mon', '--mkfs', '-i', id_, '-c', '/tmp/cephtest/ceph.conf', '--monmap=/tmp/cephtest/monmap', '--osdmap=/tmp/cephtest/osdmap', '--keyring=/tmp/cephtest/ceph.keyring', ], ) log.info('Running mkfs on osd nodes...') devs_to_clean = {} for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = {} if config.get('btrfs'): log.info('btrfs option selected, checkin for scrach devs') devs = teuthology.get_scratch_devices(remote) log.info('found devs: %s' % (str(devs),)) roles_to_devs = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), devs ) log.info('dev map: %s' % (str(roles_to_devs),)) devs_to_clean[remote] = [] for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ 'mkdir', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)), ], ) if roles_to_devs.get(id_): dev = roles_to_devs[id_] log.info('mkfs.btrfs on %s on %s' % (dev, remote)) remote.run( args=[ 'sudo', 'apt-get', 'install', '-y', 'btrfs-tools' ] ) remote.run( args=[ 'sudo', 'mkfs.btrfs', dev ] ) log.info('mount %s on %s' % (dev, remote)) remote.run( args=[ 'sudo', 'mount', '-o', 'user_subvol_rm_allowed', dev, os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)), ] ) remote.run( args=[ 'sudo', 'chown', '-R', 'ubuntu.ubuntu', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)) ] ) remote.run( args=[ 'sudo', 'chmod', '-R', '755', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)) ] ) devs_to_clean[remote].append( os.path.join( '/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_) ) ) for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-osd', '--mkfs', '-i', id_, '-c', '/tmp/cephtest/ceph.conf', '--monmap', '/tmp/cephtest/monmap', ], ) run.wait( mons.run( args=[ 'rm', '--', '/tmp/cephtest/monmap', '/tmp/cephtest/osdmap', ], wait=False, ), ) try: yield finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() if ctx.archive is not None: log.info('Grabbing cluster log from %s %s...' % (mon0_remote, firstmon)) dest = os.path.join(ctx.archive, 'ceph.log') mon0_remote.run( args = [ 'cat', '--', '/tmp/cephtest/data/%s/log' % firstmon ], stdout=file(dest, 'wb'), ) log.info('Checking cluster ceph.log for badness...') def first_in_ceph_log(pattern, excludes): args = [ 'egrep', pattern, '/tmp/cephtest/data/%s/log' % firstmon, ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) remote.run( args=[ "sudo", "umount", "-f", dir_ ] ) log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'rm', '-rf', '--', '/tmp/cephtest/ceph.conf', '/tmp/cephtest/ceph.keyring', '/tmp/cephtest/data', '/tmp/cephtest/monmap', run.Raw('/tmp/cephtest/asok.*') ], wait=False, ), )
def install_kernel(remote, path=None, version=None): """ A bit of misnomer perhaps - the actual kernel package is installed elsewhere, this function deals with initrd and grub. Currently the following cases are handled: - local, gitbuilder, distro for rpm packages - distro for deb packages - see TODO in install_and_reboot() TODO: reboots should be issued from install_and_reboot() :param path: package path (for local and gitbuilder cases) :param version: for RPM distro kernels, pass this to update_grub_rpm """ templ = "install_kernel(remote={remote}, path={path}, version={version})" log.debug(templ.format(remote=remote, path=path, version=version)) package_type = remote.os.package_type if package_type == 'rpm': if path: version = get_image_version(remote, path) # This is either a gitbuilder or a local package and both of these # could have been built with upstream rpm targets with specs that # don't have a %post section at all, which means no initrd. maybe_generate_initrd_rpm(remote, path, version) elif not version or version == 'distro': version = get_latest_image_version_rpm(remote) update_grub_rpm(remote, version) remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False ) return if package_type == 'deb': distribution = remote.os.name newversion = get_latest_image_version_deb(remote, distribution) if 'ubuntu' in distribution: grub2conf = teuthology.get_file(remote, '/boot/grub/grub.cfg', True) submenu = '' menuentry = '' for line in grub2conf.split('\n'): if 'submenu' in line: submenu = line.split('submenu ')[1] # Ubuntu likes to be sneaky and change formatting of # grub.cfg between quotes/doublequotes between versions if submenu.startswith("'"): submenu = submenu.split("'")[1] if submenu.startswith('"'): submenu = submenu.split('"')[1] if 'menuentry' in line: if newversion in line and 'recovery' not in line: menuentry = line.split('\'')[1] break if submenu: grubvalue = submenu + '>' + menuentry else: grubvalue = menuentry grubfile = 'cat <<EOF\nset default="' + grubvalue + '"\nEOF' teuthology.delete_file(remote, '/etc/grub.d/01_ceph_kernel', sudo=True, force=True) teuthology.sudo_write_file(remote, '/etc/grub.d/01_ceph_kernel', StringIO(grubfile), '755') log.info('Distro Kernel Version: {version}'.format(version=newversion)) remote.run(args=['sudo', 'update-grub']) remote.run(args=['sudo', 'shutdown', '-r', 'now'], wait=False ) return if 'debian' in distribution: grub2_kernel_select_generic(remote, newversion, 'deb') log.info('Distro Kernel Version: {version}'.format(version=newversion)) remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False ) return
def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None): """ Run the individual test. Create a scratch directory and then extract the workunits from git. Make the executables, and then run the tests. Clean up (remove files created) after the tests are finished. :param ctx: Context :param refspec: branch, sha1, or version tag used to identify this build :param tests: specific tests specified. :param env: environment set in yaml file. Could be None. :param subdir: subdirectory set in yaml file. Could be None :param timeout: If present, use the 'timeout' command on the remote host to limit execution time. Must be specified by a number followed by 's' for seconds, 'm' for minutes, 'h' for hours, or 'd' for days. If '0' or anything that evaluates to False is passed, the 'timeout' command is not used. """ testdir = misc.get_testdir(ctx) assert isinstance(role, basestring) cluster, type_, id_ = misc.split_role(role) assert type_ == 'client' remote = get_remote_for_role(ctx, role) mnt = _client_mountpoint(ctx, cluster, id_) # subdir so we can remove and recreate this a lot without sudo if subdir is None: scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') else: scratch_tmp = os.path.join(mnt, subdir) clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) srcdir = '{cdir}/qa/workunits'.format(cdir=clonedir) git_url = teuth_config.get_ceph_git_url() try: remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', clonedir, run.Raw('&&'), 'git', 'clone', git_url, clonedir, run.Raw('&&'), 'cd', '--', clonedir, run.Raw('&&'), 'git', 'checkout', refspec, ], ) except CommandFailedError: alt_git_url = git_url.replace('ceph-ci', 'ceph') log.info( "failed to check out '%s' from %s; will also try in %s", refspec, git_url, alt_git_url, ) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', clonedir, run.Raw('&&'), 'git', 'clone', alt_git_url, clonedir, run.Raw('&&'), 'cd', '--', clonedir, run.Raw('&&'), 'git', 'checkout', refspec, ], ) remote.run( logger=log.getChild(role), args=[ 'cd', '--', srcdir, run.Raw('&&'), 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', run.Raw('&&'), 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), ], ) workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) workunits = sorted(misc.get_file(remote, workunits_file).split('\0')) assert workunits try: assert isinstance(tests, list) for spec in tests: log.info('Running workunits matching %s on %s...', spec, role) prefix = '{spec}/'.format(spec=spec) to_run = [w for w in workunits if w == spec or w.startswith(prefix)] if not to_run: raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) for workunit in to_run: log.info('Running workunit %s...', workunit) args = [ 'mkdir', '-p', '--', scratch_tmp, run.Raw('&&'), 'cd', '--', scratch_tmp, run.Raw('&&'), run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), run.Raw('CEPH_REF={ref}'.format(ref=refspec)), run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)), run.Raw('CEPH_ID="{id}"'.format(id=id_)), run.Raw('PATH=$PATH:/usr/sbin'), run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)), ] if env is not None: for var, val in env.iteritems(): quoted_val = pipes.quote(val) env_arg = '{var}={val}'.format(var=var, val=quoted_val) args.append(run.Raw(env_arg)) args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir)]) if timeout and timeout != '0': args.extend(['timeout', timeout]) args.extend([ '{srcdir}/{workunit}'.format( srcdir=srcdir, workunit=workunit, ), ]) remote.run( logger=log.getChild(role), args=args, label="workunit test {workunit}".format(workunit=workunit) ) remote.run( logger=log.getChild(role), args=['sudo', 'rm', '-rf', '--', scratch_tmp], ) finally: log.info('Stopping %s on %s...', tests, role) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', '--', workunits_file, clonedir, ], )
def cluster(ctx, config): """ Handle the creation and removal of a ceph cluster. On startup: Create directories needed for the cluster. Create remote journals for all osds. Create and set keyring. Copy the monmap to tht test systems. Setup mon nodes. Setup mds nodes. Mkfs osd nodes. Add keyring information to monmaps Mkfs mon nodes. On exit: If errors occured, extract a failure message and store in ctx.summary. Unmount all test files and temporary journaling files. Save the monitor information and archive all ceph logs. Cleanup the keyring setup, and remove all monitor map and data files left over. :param ctx: Context :param config: Configuration """ if ctx.config.get('use_existing_cluster', False) is True: log.info("'use_existing_cluster' is true; skipping cluster creation") yield testdir = teuthology.get_testdir(ctx) log.info('Creating ceph cluster...') run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '{tdir}/data'.format(tdir=testdir), ], wait=False, ) ) run.wait( ctx.cluster.run( args=[ 'sudo', 'install', '-d', '-m0777', '--', '/var/run/ceph', ], wait=False, ) ) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checking for scratch devs') log.info('found devs: %s' % (str(devs),)) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), iddevs ) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs):] devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), iddevs ) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] ) for osd in teuthology.roles_of_type(roles_for_host, 'osd'): tmpfs = '/mnt/osd.%s' % osd roles_to_journals[osd] = tmpfs remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] ) log.info('journal map: %s', roles_to_journals) log.info('dev map: %s' % (str(roles_to_devs),)) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): key = "osd." + str(role) if key not in conf: conf[key] = {} conf[key]['osd journal'] = journal for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get('tmpfs_journal'): conf['journal dio'] = False ctx.ceph = argparse.Namespace() ctx.ceph.conf = conf keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring') coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', keyring_path, ], ) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=mon.', keyring_path, ], ) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'chmod', '0644', keyring_path, ], ) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() fsid = teuthology.create_simple_monmap( ctx, remote=mon0_remote, conf=conf, ) if not 'global' in conf: conf['global'] = {} conf['global']['fsid'] = fsid log.info('Writing ceph.conf for FSID %s...' % fsid) conf_path = config.get('conf_path', DEFAULT_CONF_PATH) write_conf(ctx, conf_path) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow *', keyring_path, ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path=keyring_path, ) monmap = teuthology.get_file( remote=mon0_remote, path='{tdir}/monmap'.format(tdir=testdir), ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.sudo_write_file( remote=rem, path=keyring_path, data=keyring, perms='0644' ) teuthology.write_file( remote=rem, path='{tdir}/monmap'.format(tdir=testdir), data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon')) run.wait( mons.run( args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'osdmaptool', '-c', conf_path, '--clobber', '--createsimple', '{num:d}'.format( num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), ), '{tdir}/osdmap'.format(tdir=testdir), '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds')) for remote, roles_for_host in mdss.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mds/ceph-{id}'.format(id=id_), run.Raw('&&'), 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_), ], ) cclient.create_keyring(ctx) log.info('Running mkfs on osd nodes...') ctx.disk_config = argparse.Namespace() ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals ctx.disk_config.remote_to_roles_to_dev_mount_options = {} ctx.disk_config.remote_to_roles_to_dev_fstype = {} log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/osd/ceph-{id}'.format(id=id_), ]) log.info(str(roles_to_journals)) log.info(id_) if roles_to_devs.get(id_): dev = roles_to_devs[id_] fs = config.get('fs') package = None mkfs_options = config.get('mkfs_options') mount_options = config.get('mount_options') if fs == 'btrfs': #package = 'btrfs-tools' if mount_options is None: mount_options = ['noatime','user_subvol_rm_allowed'] if mkfs_options is None: mkfs_options = ['-m', 'single', '-l', '32768', '-n', '32768'] if fs == 'xfs': #package = 'xfsprogs' if mount_options is None: mount_options = ['noatime'] if mkfs_options is None: mkfs_options = ['-f', '-i', 'size=2048'] if fs == 'ext4' or fs == 'ext3': if mount_options is None: mount_options = ['noatime','user_xattr'] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) if package is not None: remote.run( args=[ 'sudo', 'apt-get', 'install', '-y', package ], stdout=StringIO(), ) try: remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) except run.CommandFailedError: # Newer btfs-tools doesn't prompt for overwrite, use -f if '-f' not in mount_options: mkfs_options.append('-f') mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) log.info('mount %s on %s -o %s' % (dev, remote, ','.join(mount_options))) remote.run( args=[ 'sudo', 'mount', '-t', fs, '-o', ','.join(mount_options), dev, os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), ] ) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs devs_to_clean[remote].append( os.path.join( os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), ) ) for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ 'sudo', 'MALLOC_CHECK_=3', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-osd', '--mkfs', '--mkkey', '-i', id_, '--monmap', '{tdir}/monmap'.format(tdir=testdir), ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['mds','osd']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format( type=type_, id=id_, ), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['client']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) ) keys.append((type_, id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'sudo', 'tee', '-a', keyring_path, ], stdin=run.PIPE, wait=False, stdout=StringIO(), ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', keyring_path, '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mon/ceph-{id}'.format(id=id_), ], ) remote.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-mon', '--mkfs', '-i', id_, '--monmap={tdir}/monmap'.format(tdir=testdir), '--osdmap={tdir}/osdmap'.format(tdir=testdir), '--keyring={kpath}'.format(kpath=keyring_path), ], ) run.wait( mons.run( args=[ 'rm', '--', '{tdir}/monmap'.format(tdir=testdir), '{tdir}/osdmap'.format(tdir=testdir), ], wait=False, ), ) try: yield except Exception: # we need to know this below ctx.summary['success'] = False raise finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): """ Find the first occurence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = [ 'sudo', 'egrep', pattern, '/var/log/ceph/ceph.log', ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) try: remote.run( args=[ 'sync', run.Raw('&&'), 'sudo', 'umount', '-f', dir_ ] ) except Exception as e: remote.run(args=[ 'sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', run.Raw(';'), 'ps', 'auxf', ]) raise e if config.get('tmpfs_journal'): log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') for remote, roles_for_host in osds.remotes.iteritems(): remote.run( args=[ 'sudo', 'umount', '-f', '/mnt' ], check_status=False, ) if ctx.archive is not None and \ not (ctx.config.get('archive-on-error') and ctx.summary['success']): # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'sudo', 'rm', '-rf', '--', conf_path, keyring_path, '{tdir}/data'.format(tdir=testdir), '{tdir}/monmap'.format(tdir=testdir), ], wait=False, ), )
def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None): """ Run the individual test. Create a scratch directory and then extract the workunits from git. Make the executables, and then run the tests. Clean up (remove files created) after the tests are finished. :param ctx: Context :param refspec: branch, sha1, or version tag used to identify this build :param tests: specific tests specified. :param env: environment set in yaml file. Could be None. :param subdir: subdirectory set in yaml file. Could be None :param timeout: If present, use the 'timeout' command on the remote host to limit execution time. Must be specified by a number followed by 's' for seconds, 'm' for minutes, 'h' for hours, or 'd' for days. If '0' or anything that evaluates to False is passed, the 'timeout' command is not used. """ testdir = teuthology.get_testdir(ctx) assert isinstance(role, basestring) PREFIX = 'client.' assert role.startswith(PREFIX) id_ = role[len(PREFIX):] (remote,) = ctx.cluster.only(role).remotes.iterkeys() mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) # subdir so we can remove and recreate this a lot without sudo if subdir is None: scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') else: scratch_tmp = os.path.join(mnt, subdir) srcdir = '{tdir}/workunit.{role}'.format(tdir=testdir, role=role) remote.run( logger=log.getChild(role), args=[ 'mkdir', '--', srcdir, run.Raw('&&'), 'git', 'archive', '--remote=git://ceph.newdream.net/git/ceph.git', '%s:qa/workunits' % refspec, run.Raw('|'), 'tar', '-C', srcdir, '-x', '-f-', run.Raw('&&'), 'cd', '--', srcdir, run.Raw('&&'), 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', run.Raw('&&'), 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), run.Raw('>{tdir}/workunits.list'.format(tdir=testdir)), ], ) workunits = sorted(teuthology.get_file( remote, '{tdir}/workunits.list'.format(tdir=testdir)).split('\0')) assert workunits try: assert isinstance(tests, list) for spec in tests: log.info('Running workunits matching %s on %s...', spec, role) prefix = '{spec}/'.format(spec=spec) to_run = [w for w in workunits if w == spec or w.startswith(prefix)] if not to_run: raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) for workunit in to_run: log.info('Running workunit %s...', workunit) args = [ 'mkdir', '-p', '--', scratch_tmp, run.Raw('&&'), 'cd', '--', scratch_tmp, run.Raw('&&'), run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), run.Raw('CEPH_REF={ref}'.format(ref=refspec)), run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), run.Raw('CEPH_ID="{id}"'.format(id=id_)), ] if env is not None: for var, val in env.iteritems(): quoted_val = pipes.quote(val) env_arg = '{var}={val}'.format(var=var, val=quoted_val) args.append(run.Raw(env_arg)) args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir)]) if timeout and timeout != '0': args.extend(['timeout', timeout]) args.extend([ '{srcdir}/{workunit}'.format( srcdir=srcdir, workunit=workunit, ), ]) remote.run( logger=log.getChild(role), args=args, ) remote.run( logger=log.getChild(role), args=['sudo', 'rm', '-rf', '--', scratch_tmp], ) finally: log.info('Stopping %s on %s...', spec, role) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', '--', '{tdir}/workunits.list'.format(tdir=testdir), srcdir, ], )
def _run_tests(ctx, refspec, role, tests, env, subdir=None): testdir = teuthology.get_testdir(ctx) assert isinstance(role, basestring) PREFIX = 'client.' assert role.startswith(PREFIX) id_ = role[len(PREFIX):] (remote,) = ctx.cluster.only(role).remotes.iterkeys() mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) # subdir so we can remove and recreate this a lot without sudo if subdir is None: scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') else: scratch_tmp = os.path.join(mnt, subdir) srcdir = '{tdir}/workunit.{role}'.format(tdir=testdir, role=role) remote.run( logger=log.getChild(role), args=[ 'mkdir', '--', srcdir, run.Raw('&&'), 'git', 'archive', '--remote=git://ceph.newdream.net/git/ceph.git', '%s:qa/workunits' % refspec, run.Raw('|'), 'tar', '-C', srcdir, '-x', '-f-', run.Raw('&&'), 'cd', '--', srcdir, run.Raw('&&'), 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', run.Raw('&&'), 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), run.Raw('>{tdir}/workunits.list'.format(tdir=testdir)), ], ) workunits = sorted(teuthology.get_file( remote, '{tdir}/workunits.list'.format(tdir=testdir)).split('\0')) assert workunits try: assert isinstance(tests, list) for spec in tests: log.info('Running workunits matching %s on %s...', spec, role) prefix = '{spec}/'.format(spec=spec) to_run = [w for w in workunits if w == spec or w.startswith(prefix)] if not to_run: raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) for workunit in to_run: log.info('Running workunit %s...', workunit) args = [ 'mkdir', '-p', '--', scratch_tmp, run.Raw('&&'), 'cd', '--', scratch_tmp, run.Raw('&&'), run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), run.Raw('CEPH_REF={ref}'.format(ref=refspec)), run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), run.Raw('CEPH_ID="{id}"'.format(id=id_)), ] if env is not None: for var, val in env.iteritems(): quoted_val = pipes.quote(val) env_arg = '{var}={val}'.format(var=var, val=quoted_val) args.append(run.Raw(env_arg)) args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), '{srcdir}/{workunit}'.format( srcdir=srcdir, workunit=workunit, ), ]) remote.run( logger=log.getChild(role), args=args, ) remote.run( logger=log.getChild(role), args=['sudo', 'rm', '-rf', '--', scratch_tmp], ) finally: log.info('Stopping %s on %s...', spec, role) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', '--', '{tdir}/workunits.list'.format(tdir=testdir), srcdir, ], )
def cluster(ctx, config): """ Handle the creation and removal of a ceph cluster. On startup: Create directories needed for the cluster. Create remote journals for all osds. Create and set keyring. Copy the monmap to tht test systems. Setup mon nodes. Setup mds nodes. Mkfs osd nodes. Add keyring information to monmaps Mkfs mon nodes. On exit: If errors occured, extract a failure message and store in ctx.summary. Unmount all test files and temporary journaling files. Save the monitor information and archive all ceph logs. Cleanup the keyring setup, and remove all monitor map and data files left over. :param ctx: Context :param config: Configuration """ if ctx.config.get("use_existing_cluster", False) is True: log.info("'use_existing_cluster' is true; skipping cluster creation") yield testdir = teuthology.get_testdir(ctx) cluster_name = config["cluster"] data_dir = "{tdir}/{cluster}.data".format(tdir=testdir, cluster=cluster_name) log.info("Creating ceph cluster %s...", cluster_name) run.wait(ctx.cluster.run(args=["install", "-d", "-m0755", "--", data_dir], wait=False)) run.wait(ctx.cluster.run(args=["sudo", "install", "-d", "-m0777", "--", "/var/run/ceph"], wait=False)) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type("osd", cluster_name)) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get("fs"): log.info("fs option selected, checking for scratch devs") log.info("found devs: %s" % (str(devs),)) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs(teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name), iddevs) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs) :] devs_to_clean[remote] = [] if config.get("block_journal"): log.info("block journal enabled") roles_to_journals = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name), iddevs ) log.info("journal map: %s", roles_to_journals) if config.get("tmpfs_journal"): log.info("tmpfs journal enabled") roles_to_journals = {} remote.run(args=["sudo", "mount", "-t", "tmpfs", "tmpfs", "/mnt"]) for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): tmpfs = "/mnt/" + role roles_to_journals[role] = tmpfs remote.run(args=["truncate", "-s", "1500M", tmpfs]) log.info("journal map: %s", roles_to_journals) log.info("dev map: %s" % (str(roles_to_devs),)) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info("Generating config...") remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips, cluster=cluster_name) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): name = teuthology.ceph_role(role) if name not in conf: conf[name] = {} conf[name]["osd journal"] = journal for section, keys in config["conf"].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get("tmpfs_journal"): conf["journal dio"] = False if not hasattr(ctx, "ceph"): ctx.ceph = {} ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].conf = conf default_keyring = "/etc/ceph/{cluster}.keyring".format(cluster=cluster_name) keyring_path = config.get("keyring_path", default_keyring) coverage_dir = "{tdir}/archive/coverage".format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config, cluster_name) log.info("Setting up %s..." % firstmon) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", keyring_path, ] ) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--gen-key", "--name=mon.", keyring_path, ] ) ctx.cluster.only(firstmon).run(args=["sudo", "chmod", "0644", keyring_path]) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() monmap_path = "{tdir}/{cluster}.monmap".format(tdir=testdir, cluster=cluster_name) fsid = teuthology.create_simple_monmap(ctx, remote=mon0_remote, conf=conf, path=monmap_path) if not "global" in conf: conf["global"] = {} conf["global"]["fsid"] = fsid default_conf_path = "/etc/ceph/{cluster}.conf".format(cluster=cluster_name) conf_path = config.get("conf_path", default_conf_path) log.info("Writing %s for FSID %s..." % (conf_path, fsid)) write_conf(ctx, conf_path, cluster_name) log.info("Creating admin key on %s..." % firstmon) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--gen-key", "--name=client.admin", "--set-uid=0", "--cap", "mon", "allow *", "--cap", "osd", "allow *", "--cap", "mds", "allow *", keyring_path, ] ) log.info("Copying monmap to all nodes...") keyring = teuthology.get_file(remote=mon0_remote, path=keyring_path) monmap = teuthology.get_file(remote=mon0_remote, path=monmap_path) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info("Sending monmap to node {remote}".format(remote=rem)) teuthology.sudo_write_file(remote=rem, path=keyring_path, data=keyring, perms="0644") teuthology.write_file(remote=rem, path=monmap_path, data=monmap) log.info("Setting up mon nodes...") mons = ctx.cluster.only(teuthology.is_type("mon", cluster_name)) osdmap_path = "{tdir}/{cluster}.osdmap".format(tdir=testdir, cluster=cluster_name) run.wait( mons.run( args=[ "adjust-ulimits", "ceph-coverage", coverage_dir, "osdmaptool", "-c", conf_path, "--clobber", "--createsimple", "{num:d}".format(num=teuthology.num_instances_of_type(ctx.cluster, "osd", cluster_name)), osdmap_path, "--pg_bits", "2", "--pgp_bits", "4", ], wait=False, ) ) log.info("Setting up mgr nodes...") mgrs = ctx.cluster.only(teuthology.is_type("mgr", cluster_name)) for remote, roles_for_host in mgrs.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mgr", cluster_name): _, _, id_ = teuthology.split_role(role) mgr_dir = "/var/lib/ceph/mgr/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run( args=[ "sudo", "mkdir", "-p", mgr_dir, run.Raw("&&"), "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", "--gen-key", "--name=mgr.{id}".format(id=id_), mgr_dir + "/keyring", ] ) log.info("Setting up mds nodes...") mdss = ctx.cluster.only(teuthology.is_type("mds", cluster_name)) for remote, roles_for_host in mdss.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mds", cluster_name): _, _, id_ = teuthology.split_role(role) mds_dir = "/var/lib/ceph/mds/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run( args=[ "sudo", "mkdir", "-p", mds_dir, run.Raw("&&"), "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", "--gen-key", "--name=mds.{id}".format(id=id_), mds_dir + "/keyring", ] ) cclient.create_keyring(ctx, cluster_name) log.info("Running mkfs on osd nodes...") if not hasattr(ctx, "disk_config"): ctx.disk_config = argparse.Namespace() if not hasattr(ctx.disk_config, "remote_to_roles_to_dev"): ctx.disk_config.remote_to_roles_to_dev = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_journals"): ctx.disk_config.remote_to_roles_to_journals = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_dev_mount_options"): ctx.disk_config.remote_to_roles_to_dev_mount_options = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_dev_fstype"): ctx.disk_config.remote_to_roles_to_dev_fstype = {} teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals) log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): _, _, id_ = teuthology.split_role(role) mnt_point = "/var/lib/ceph/osd/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run(args=["sudo", "mkdir", "-p", mnt_point]) log.info(str(roles_to_journals)) log.info(role) if roles_to_devs.get(role): dev = roles_to_devs[role] fs = config.get("fs") package = None mkfs_options = config.get("mkfs_options") mount_options = config.get("mount_options") if fs == "btrfs": # package = 'btrfs-tools' if mount_options is None: mount_options = ["noatime", "user_subvol_rm_allowed"] if mkfs_options is None: mkfs_options = ["-m", "single", "-l", "32768", "-n", "32768"] if fs == "xfs": # package = 'xfsprogs' if mount_options is None: mount_options = ["noatime"] if mkfs_options is None: mkfs_options = ["-f", "-i", "size=2048"] if fs == "ext4" or fs == "ext3": if mount_options is None: mount_options = ["noatime", "user_xattr"] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ["mkfs.%s" % fs] + mkfs_options log.info("%s on %s on %s" % (mkfs, dev, remote)) if package is not None: remote.run(args=["sudo", "apt-get", "install", "-y", package], stdout=StringIO()) try: remote.run(args=["yes", run.Raw("|")] + ["sudo"] + mkfs + [dev]) except run.CommandFailedError: # Newer btfs-tools doesn't prompt for overwrite, use -f if "-f" not in mount_options: mkfs_options.append("-f") mkfs = ["mkfs.%s" % fs] + mkfs_options log.info("%s on %s on %s" % (mkfs, dev, remote)) remote.run(args=["yes", run.Raw("|")] + ["sudo"] + mkfs + [dev]) log.info("mount %s on %s -o %s" % (dev, remote, ",".join(mount_options))) remote.run(args=["sudo", "mount", "-t", fs, "-o", ",".join(mount_options), dev, mnt_point]) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs devs_to_clean[remote].append(mnt_point) for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): _, _, id_ = teuthology.split_role(role) remote.run( args=[ "sudo", "MALLOC_CHECK_=3", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-osd", "--cluster", cluster_name, "--mkfs", "--mkkey", "-i", id_, "--monmap", monmap_path, ] ) log.info("Reading keys from all nodes...") keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ["mgr", "mds", "osd"]: for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path="/var/lib/ceph/{type}/{cluster}-{id}/keyring".format(type=type_, id=id_, cluster=cluster_name), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "client", cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path="/etc/ceph/{cluster}.client.{id}.keyring".format(id=id_, cluster=cluster_name) ) keys.append(("client", id_, data)) keys_fp.write(data) log.info("Adding keys to all mons...") writes = mons.run(args=["sudo", "tee", "-a", keyring_path], stdin=run.PIPE, wait=False, stdout=StringIO()) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", keyring_path, "--name={type}.{id}".format(type=type_, id=id_), ] + list(generate_caps(type_)), wait=False, ) ) log.info("Running mkfs on mon nodes...") for remote, roles_for_host in mons.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mon", cluster_name): _, _, id_ = teuthology.split_role(role) remote.run( args=["sudo", "mkdir", "-p", "/var/lib/ceph/mon/{cluster}-{id}".format(id=id_, cluster=cluster_name)] ) remote.run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-mon", "--cluster", cluster_name, "--mkfs", "-i", id_, "--monmap", monmap_path, "--osdmap", osdmap_path, "--keyring", keyring_path, ] ) run.wait(mons.run(args=["rm", "--", monmap_path, osdmap_path], wait=False)) try: yield except Exception: # we need to know this below ctx.summary["success"] = False raise finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info("Checking cluster log for badness...") def first_in_ceph_log(pattern, excludes): """ Find the first occurence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = ["sudo", "egrep", pattern, "/var/log/ceph/{cluster}.log".format(cluster=cluster_name)] for exclude in excludes: args.extend([run.Raw("|"), "egrep", "-v", exclude]) args.extend([run.Raw("|"), "head", "-n", "1"]) r = mon0_remote.run(stdout=StringIO(), args=args) stdout = r.stdout.getvalue() if stdout != "": return stdout return None if first_in_ceph_log("\[ERR\]|\[WRN\]|\[SEC\]", config["log_whitelist"]) is not None: log.warning("Found errors (ERR|WRN|SEC) in cluster log") ctx.summary["success"] = False # use the most severe problem as the failure reason if "failure_reason" not in ctx.summary: for pattern in ["\[SEC\]", "\[ERR\]", "\[WRN\]"]: match = first_in_ceph_log(pattern, config["log_whitelist"]) if match is not None: ctx.summary["failure_reason"] = '"{match}" in cluster log'.format(match=match.rstrip("\n")) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info("Unmounting %s on %s" % (dir_, remote)) try: remote.run(args=["sync", run.Raw("&&"), "sudo", "umount", "-f", dir_]) except Exception as e: remote.run(args=["sudo", run.Raw("PATH=/usr/sbin:$PATH"), "lsof", run.Raw(";"), "ps", "auxf"]) raise e if config.get("tmpfs_journal"): log.info("tmpfs journal enabled - unmounting tmpfs at /mnt") for remote, roles_for_host in osds.remotes.iteritems(): remote.run(args=["sudo", "umount", "-f", "/mnt"], check_status=False) if ctx.archive is not None and not (ctx.config.get("archive-on-error") and ctx.summary["success"]): # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST: pass else: raise for remote, roles in mons.remotes.iteritems(): for role in roles: is_mon = teuthology.is_type("mon", cluster_name) if is_mon(role): _, _, id_ = teuthology.split_role(role) mon_dir = "/var/lib/ceph/mon/" + "{0}-{1}".format(cluster_name, id_) teuthology.pull_directory_tarball(remote, mon_dir, path + "/" + role + ".tgz") log.info("Cleaning ceph cluster...") run.wait( ctx.cluster.run( args=[ "sudo", "rm", "-rf", "--", conf_path, keyring_path, data_dir, monmap_path, osdmap_path, run.Raw("{tdir}/../*.pid".format(tdir=testdir)), ], wait=False, ) )
def _run_tests(ctx, role, tests): assert isinstance(role, basestring) PREFIX = 'client.' assert role.startswith(PREFIX) id_ = role[len(PREFIX):] (remote,) = ctx.cluster.only(role).remotes.iterkeys() mnt = os.path.join('/tmp/cephtest', 'mnt.{id}'.format(id=id_)) # subdir so we can remove and recreate this a lot without sudo scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') srcdir = '/tmp/cephtest/workunit.{role}'.format(role=role) secretfile = '/tmp/cephtest/data/{role}.secret'.format(role=role) teuthology.write_secret_file(remote, role, secretfile) remote.run( logger=log.getChild(role), args=[ 'mkdir', '--', srcdir, run.Raw('&&'), 'wget', '-q', '-O-', # TODO make branch/tag/sha1 used configurable 'https://github.com/NewDreamNetwork/ceph/tarball/HEAD', run.Raw('|'), 'tar', '-C', srcdir, '-x', '-z', '-f-', '--wildcards', '--no-wildcards-match-slash', '--strip-components=3', '--', '*/qa/workunits/', run.Raw('&&'), 'cd', '--', srcdir, run.Raw('&&'), 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', run.Raw('&&'), 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), run.Raw('>/tmp/cephtest/workunits.list'), ], ) workunits = sorted(teuthology.get_file(remote, '/tmp/cephtest/workunits.list').split('\0')) assert workunits try: assert isinstance(tests, list) for spec in tests: log.info('Running workunits matching %s on %s...', spec, role) prefix = '{spec}/'.format(spec=spec) to_run = [w for w in workunits if w == spec or w.startswith(prefix)] if not to_run: raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) for workunit in to_run: log.info('Running workunit %s...', workunit) remote.run( logger=log.getChild(role), args=[ 'mkdir', '--', scratch_tmp, run.Raw('&&'), 'cd', '--', scratch_tmp, run.Raw('&&'), run.Raw('PATH="$PATH:/tmp/cephtest/binary/usr/local/bin"'), run.Raw('LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/tmp/cephtest/binary/usr/local/lib"'), run.Raw('CEPH_CONF="/tmp/cephtest/ceph.conf"'), run.Raw('CEPH_SECRET_FILE="{file}"'.format(file=secretfile)), run.Raw('CEPH_ID="{id}"'.format(id=id_)), run.Raw('PYTHONPATH="$PYTHONPATH:/tmp/cephtest/binary/usr/local/lib/python2.6/dist-packages"'), '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', '/tmp/cephtest/archive/coverage', '{srcdir}/{workunit}'.format( srcdir=srcdir, workunit=workunit, ), run.Raw('&&'), 'rm', '-rf', '--', scratch_tmp, ], ) finally: remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', '--', '/tmp/cephtest/workunits.list', srcdir, ], )
def build_ceph_cluster(ctx, config): log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): if var == 'testing': ceph_branch = '--{var}'.format(var=var) ceph_branch = '--{var}={val}'.format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, 'mon') mon_nodes = " ".join(mon_node) new_mon = './ceph-deploy new'+" "+mon_nodes install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes purge_nodes = './ceph-deploy purge'+" "+all_nodes purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname deploy_mds = './ceph-deploy mds create'+" "+mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_no = None mon_no = config.get('mon_initial_members') if mon_no is not None: i = 0 mon1 = [] while(i < mon_no): mon1.append(mon_node[i]) i = i + 1 initial_mons = " ".join(mon1) for k in range(mon_no, len(mon_node)): mon_create_nodes = './ceph-deploy mon create'+" "+initial_mons+" "+mon_node[k] estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitor") else: mon_create_nodes = './ceph-deploy mon create'+" "+mon_nodes estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitors") estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) while (estatus_gather != 0): #mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0] #execute_ceph_deploy(ctx, config, mon_create_nodes) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: osd_create_cmds = './ceph-deploy osd create --zap-disk'+" "+d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: zap_disk = './ceph-deploy disk zap'+" "+d execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo','bash','-c', run.Raw('"'),'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file( remote=remot, path=client_keyring, data=key_data, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=conf_path, data=conf_data, perms='0644' ) else: raise RuntimeError("The cluster is NOT operational due to insufficient OSDs") try: yield finally: log.info('Stopping ceph...') ctx.cluster.run(args=[ 'sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop' ]) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) log.info('Purging package...') execute_ceph_deploy(ctx, config, purge_nodes) log.info('Purging data...') execute_ceph_deploy(ctx, config, purgedata_nodes)
def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None): """ Run the individual test. Create a scratch directory and then extract the workunits from git. Make the executables, and then run the tests. Clean up (remove files created) after the tests are finished. :param ctx: Context :param refspec: branch, sha1, or version tag used to identify this build :param tests: specific tests specified. :param env: environment set in yaml file. Could be None. :param subdir: subdirectory set in yaml file. Could be None :param timeout: If present, use the 'timeout' command on the remote host to limit execution time. Must be specified by a number followed by 's' for seconds, 'm' for minutes, 'h' for hours, or 'd' for days. If '0' or anything that evaluates to False is passed, the 'timeout' command is not used. """ testdir = misc.get_testdir(ctx) assert isinstance(role, basestring) assert role.startswith(CLIENT_PREFIX) id_ = role[len(CLIENT_PREFIX) :] (remote,) = ctx.cluster.only(role).remotes.iterkeys() mnt = os.path.join(testdir, "mnt.{id}".format(id=id_)) # subdir so we can remove and recreate this a lot without sudo if subdir is None: scratch_tmp = os.path.join(mnt, "client.{id}".format(id=id_), "tmp") else: scratch_tmp = os.path.join(mnt, subdir) srcdir = "{tdir}/workunit.{role}".format(tdir=testdir, role=role) remote.run( logger=log.getChild(role), args=[ "mkdir", "--", srcdir, run.Raw("&&"), "git", "archive", "--remote=git://ceph.newdream.net/git/ceph.git", "%s:qa/workunits" % refspec, run.Raw("|"), "tar", "-C", srcdir, "-x", "-f-", run.Raw("&&"), "cd", "--", srcdir, run.Raw("&&"), "if", "test", "-e", "Makefile", run.Raw(";"), "then", "make", run.Raw(";"), "fi", run.Raw("&&"), "find", "-executable", "-type", "f", "-printf", r"%P\0".format(srcdir=srcdir), run.Raw(">{tdir}/workunits.list".format(tdir=testdir)), ], ) workunits = sorted(misc.get_file(remote, "{tdir}/workunits.list".format(tdir=testdir)).split("\0")) assert workunits try: assert isinstance(tests, list) for spec in tests: log.info("Running workunits matching %s on %s...", spec, role) prefix = "{spec}/".format(spec=spec) to_run = [w for w in workunits if w == spec or w.startswith(prefix)] if not to_run: raise RuntimeError("Spec did not match any workunits: {spec!r}".format(spec=spec)) for workunit in to_run: log.info("Running workunit %s...", workunit) args = [ "mkdir", "-p", "--", scratch_tmp, run.Raw("&&"), "cd", "--", scratch_tmp, run.Raw("&&"), run.Raw("CEPH_CLI_TEST_DUP_COMMAND=1"), run.Raw("CEPH_REF={ref}".format(ref=refspec)), run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), run.Raw('CEPH_ID="{id}"'.format(id=id_)), ] if env is not None: for var, val in env.iteritems(): quoted_val = pipes.quote(val) env_arg = "{var}={val}".format(var=var, val=quoted_val) args.append(run.Raw(env_arg)) args.extend(["adjust-ulimits", "ceph-coverage", "{tdir}/archive/coverage".format(tdir=testdir)]) if timeout and timeout != "0": args.extend(["timeout", timeout]) args.extend(["{srcdir}/{workunit}".format(srcdir=srcdir, workunit=workunit)]) remote.run(logger=log.getChild(role), args=args) remote.run(logger=log.getChild(role), args=["sudo", "rm", "-rf", "--", scratch_tmp]) finally: log.info("Stopping %s on %s...", tests, role) remote.run( logger=log.getChild(role), args=["rm", "-rf", "--", "{tdir}/workunits.list".format(tdir=testdir), srcdir] )
def cluster(ctx, config): log.info('Creating ceph cluster...') run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '/tmp/cephtest/data', ], wait=False, ) ) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checkin for scratch devs') log.info('found devs: %s' % (str(devs),)) roles_to_devs = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), devs ) if len(roles_to_devs) < len(devs): devs = devs[len(roles_to_devs):] log.info('dev map: %s' % (str(roles_to_devs),)) devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), devs ) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] ) for osd in teuthology.roles_of_type(roles_for_host, 'osd'): tmpfs = '/mnt/osd.%s' % osd roles_to_journals[osd] = tmpfs remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] ) log.info('journal map: %s', roles_to_journals) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [roles for (remote, roles) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, roles) in remotes_and_roles)] conf = teuthology.skeleton_config(roles=roles, ips=ips) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): key = "osd." + str(role) if key not in conf: conf[key] = {} conf[key]['osd journal'] = journal for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get('tmpfs_journal'): conf['journal dio'] = False ctx.ceph = argparse.Namespace() ctx.ceph.conf = conf log.info('Writing configs...') conf_fp = StringIO() conf.write(conf_fp) conf_fp.seek(0) writes = ctx.cluster.run( args=[ 'python', '-c', 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', '/tmp/cephtest/ceph.conf', ], stdin=run.PIPE, wait=False, ) teuthology.feed_many_stdins_and_close(conf_fp, writes) run.wait(writes) coverage_dir = '/tmp/cephtest/archive/coverage' firstmon = teuthology.get_first_mon(ctx, config) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '/tmp/cephtest/ceph.keyring', ], ) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--gen-key', '--name=mon.', '/tmp/cephtest/ceph.keyring', ], ) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.create_simple_monmap( remote=mon0_remote, conf=conf, ) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow', '/tmp/cephtest/ceph.keyring', ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path='/tmp/cephtest/ceph.keyring', ) monmap = teuthology.get_file( remote=mon0_remote, path='/tmp/cephtest/monmap', ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.write_file( remote=rem, path='/tmp/cephtest/ceph.keyring', data=keyring, ) teuthology.write_file( remote=rem, path='/tmp/cephtest/monmap', data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon')) run.wait( mons.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/osdmaptool', '--clobber', '--createsimple', '{num:d}'.format( num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), ), '/tmp/cephtest/osdmap', '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up osd nodes...') for remote, roles_for_host in osds.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', '--name=osd.{id}'.format(id=id_), '/tmp/cephtest/data/osd.{id}.keyring'.format(id=id_), ], ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds')) for remote, roles_for_host in mdss.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), '/tmp/cephtest/data/mds.{id}.keyring'.format(id=id_), ], ) log.info('Setting up client nodes...') clients = ctx.cluster.only(teuthology.is_type('client')) for remote, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" '--name=client.{id}'.format(id=id_), '/tmp/cephtest/data/client.{id}.keyring'.format(id=id_), ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['osd', 'mds', 'client']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/tmp/cephtest/data/{type}.{id}.keyring'.format( type=type_, id=id_, ), ) keys.append((type_, id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'cat', run.Raw('>>'), '/tmp/cephtest/ceph.keyring', ], stdin=run.PIPE, wait=False, ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '/tmp/cephtest/ceph.keyring', '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-mon', '--mkfs', '-i', id_, '-c', '/tmp/cephtest/ceph.conf', '--monmap=/tmp/cephtest/monmap', '--osdmap=/tmp/cephtest/osdmap', '--keyring=/tmp/cephtest/ceph.keyring', ], ) log.info('Running mkfs on osd nodes...') for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] ctx.disk_config = argparse.Namespace() ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): log.info(str(roles_to_journals)) log.info(id_) remote.run( args=[ 'mkdir', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)), ], ) if roles_to_devs.get(id_): dev = roles_to_devs[id_] fs = config.get('fs') package = None mkfs_options = config.get('mkfs_options') mount_options = config.get('mount_options') if fs == 'btrfs': package = 'btrfs-tools' if mount_options is None: mount_options = ['noatime','user_subvol_rm_allowed'] if mkfs_options is None: mkfs_options = ['-m', 'single', '-l', '32768', '-n', '32768'] if fs == 'xfs': package = 'xfsprogs' if mount_options is None: mount_options = ['noatime'] if mkfs_options is None: mkfs_options = ['-f', '-i', 'size=2048'] if fs == 'ext4' or fs == 'ext3': if mount_options is None: mount_options = ['noatime','user_xattr'] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) if package is not None: remote.run( args=[ 'sudo', 'apt-get', 'install', '-y', package ] ) remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) log.info('mount %s on %s -o %s' % (dev, remote, ','.join(mount_options))) remote.run( args=[ 'sudo', 'mount', '-t', fs, '-o', ','.join(mount_options), dev, os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)), ] ) remote.run( args=[ 'sudo', 'chown', '-R', 'ubuntu.ubuntu', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)) ] ) remote.run( args=[ 'sudo', 'chmod', '-R', '755', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)) ] ) devs_to_clean[remote].append( os.path.join( '/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_) ) ) for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-osd', '--mkfs', '-i', id_, '-c', '/tmp/cephtest/ceph.conf', '--monmap', '/tmp/cephtest/monmap', ], ) run.wait( mons.run( args=[ 'rm', '--', '/tmp/cephtest/monmap', '/tmp/cephtest/osdmap', ], wait=False, ), ) try: yield finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): args = [ 'egrep', pattern, '/tmp/cephtest/archive/log/cluster.%s.log' % firstmon, ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) remote.run( args=[ 'sync', run.Raw('&&'), 'sudo', 'umount', '-f', dir_ ] ) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') for remote, roles_for_host in osds.remotes.iteritems(): remote.run( args=[ 'sudo', 'umount', '-f', '/mnt' ], check_status=False, ) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball(remote, '/tmp/cephtest/data/%s' % role, path + '/' + role + '.tgz') log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'rm', '-rf', '--', '/tmp/cephtest/ceph.conf', '/tmp/cephtest/ceph.keyring', '/tmp/cephtest/data', '/tmp/cephtest/monmap', run.Raw('/tmp/cephtest/asok.*') ], wait=False, ), )
def update_devstack_config_files(devstack_node, secret_uuid): log.info("Updating DevStack config files to use Ceph...") def backup_config(node, file_name, backup_ext='.orig.teuth'): node.run(args=['cp', '-f', file_name, file_name + backup_ext]) def update_config(config_name, config_stream, update_dict, section='DEFAULT'): parser = ConfigParser() parser.read_file(config_stream) for (key, value) in update_dict.items(): parser.set(section, key, value) out_stream = StringIO() parser.write(out_stream) out_stream.seek(0) return out_stream updates = [ dict(name='/etc/glance/glance-api.conf', options=dict( default_store='rbd', rbd_store_user='******', rbd_store_pool='images', show_image_direct_url='True',)), dict(name='/etc/cinder/cinder.conf', options=dict( volume_driver='cinder.volume.drivers.rbd.RBDDriver', rbd_pool='volumes', rbd_ceph_conf='/etc/ceph/ceph.conf', rbd_flatten_volume_from_snapshot='false', rbd_max_clone_depth='5', glance_api_version='2', rbd_user='******', rbd_secret_uuid=secret_uuid, backup_driver='cinder.backup.drivers.ceph', backup_ceph_conf='/etc/ceph/ceph.conf', backup_ceph_user='******', backup_ceph_chunk_size='134217728', backup_ceph_pool='backups', backup_ceph_stripe_unit='0', backup_ceph_stripe_count='0', restore_discard_excess_bytes='true', )), dict(name='/etc/nova/nova.conf', options=dict( libvirt_images_type='rbd', libvirt_images_rbd_pool='volumes', libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf', rbd_user='******', rbd_secret_uuid=secret_uuid, libvirt_inject_password='******', libvirt_inject_key='false', libvirt_inject_partition='-2', )), ] for update in updates: file_name = update['name'] options = update['options'] config_str = misc.get_file(devstack_node, file_name, sudo=True) config_stream = StringIO(config_str) backup_config(devstack_node, file_name) new_config_stream = update_config(file_name, config_stream, options) misc.sudo_write_file(devstack_node, file_name, new_config_stream)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" log.info("Building ceph cluster using ceph-deploy...") testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get("branch") is not None: cbranch = config.get("branch") for var, val in cbranch.iteritems(): if var == "testing": ceph_branch = "--{var}".format(var=var) ceph_branch = "--{var}={val}".format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, "mds") mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, "mon") mon_nodes = " ".join(mon_node) new_mon = "./ceph-deploy new" + " " + mon_nodes install_nodes = "./ceph-deploy install " + ceph_branch + " " + all_nodes purge_nodes = "./ceph-deploy purge" + " " + all_nodes purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes mon_hostname = mon_nodes.split(" ")[0] mon_hostname = str(mon_hostname) gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info("adding config inputs...") testdir = teuthology.get_testdir(ctx) conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get("conf") is not None: confp = config.get("conf") for section, keys in confp.iteritems(): lines = "[{section}]\n".format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = "{key} = {value}\n".format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_no = None mon_no = config.get("mon_initial_members") if mon_no is not None: i = 0 mon1 = [] while i < mon_no: mon1.append(mon_node[i]) i = i + 1 initial_mons = " ".join(mon1) for k in range(mon_no, len(mon_node)): mon_create_nodes = "./ceph-deploy mon create" + " " + initial_mons + " " + mon_node[k] estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitor") else: mon_create_nodes = "./ceph-deploy mon create" + " " + mon_nodes estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitors") estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) while estatus_gather != 0: # mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0] # execute_ceph_deploy(ctx, config, mon_create_nodes) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get("test_mon_destroy") is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: osd_create_cmds = "./ceph-deploy osd create --zap-disk" + " " + d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: zap_disk = "./ceph-deploy disk zap" + " " + d execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get("wait-for-healthy", True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info("Setting up client nodes...") conf_path = "/etc/ceph/ceph.conf" admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring" first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True) admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True) clients = ctx.cluster.only(teuthology.is_type("client")) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, "client"): client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_) mon0_remote.run( args=[ "cd", "{tdir}".format(tdir=testdir), run.Raw("&&"), "sudo", "bash", "-c", run.Raw('"'), "ceph", "auth", "get-or-create", "client.{id}".format(id=id_), "mds", "allow", "mon", "allow *", "osd", "allow *", run.Raw(">"), client_keyring, run.Raw('"'), ] ) key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True) teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644") teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644") teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644") else: raise RuntimeError("The cluster is NOT operational due to insufficient OSDs") try: yield finally: log.info("Stopping ceph...") ctx.cluster.run(args=["sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop"]) if ctx.archive is not None: # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type("mon")) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith("mon."): teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz") log.info("Compressing logs...") run.wait( ctx.cluster.run( args=[ "sudo", "find", "/var/log/ceph", "-name", "*.log", "-print0", run.Raw("|"), "sudo", "xargs", "-0", "--no-run-if-empty", "--", "gzip", "--", ], wait=False, ) ) log.info("Archiving logs...") path = os.path.join(ctx.archive, "remote") os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log")) log.info("Purging package...") execute_ceph_deploy(ctx, config, purge_nodes) log.info("Purging data...") execute_ceph_deploy(ctx, config, purgedata_nodes)
def _run_tests(ctx, refspec, role, tests, env, subdir=None): assert isinstance(role, basestring) PREFIX = 'client.' assert role.startswith(PREFIX) id_ = role[len(PREFIX):] (remote,) = ctx.cluster.only(role).remotes.iterkeys() mnt = os.path.join('/tmp/cephtest', 'mnt.{id}'.format(id=id_)) # subdir so we can remove and recreate this a lot without sudo if subdir is None: scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') else: scratch_tmp = os.path.join(mnt, subdir) srcdir = '/tmp/cephtest/workunit.{role}'.format(role=role) secretfile = '/tmp/cephtest/data/{role}.secret'.format(role=role) teuthology.write_secret_file(remote, role, secretfile) ceph_ref = ctx.summary.get('ceph-sha1', 'master') remote.run( logger=log.getChild(role), args=[ 'mkdir', '--', srcdir, run.Raw('&&'), 'wget', '-q', '-O-', 'https://github.com/ceph/ceph/tarball/%s' % refspec, run.Raw('|'), 'tar', '-C', srcdir, '-x', '-z', '-f-', '--wildcards', '--no-wildcards-match-slash', '--strip-components=3', '--', '*/qa/workunits/', run.Raw('&&'), 'cd', '--', srcdir, run.Raw('&&'), 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', run.Raw('&&'), 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), run.Raw('>/tmp/cephtest/workunits.list'), ], ) workunits = sorted(teuthology.get_file(remote, '/tmp/cephtest/workunits.list').split('\0')) assert workunits try: assert isinstance(tests, list) for spec in tests: log.info('Running workunits matching %s on %s...', spec, role) prefix = '{spec}/'.format(spec=spec) to_run = [w for w in workunits if w == spec or w.startswith(prefix)] if not to_run: raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) for workunit in to_run: log.info('Running workunit %s...', workunit) args = [ 'mkdir', '-p', '--', scratch_tmp, run.Raw('&&'), 'cd', '--', scratch_tmp, run.Raw('&&'), run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)), run.Raw('PATH="$PATH:/tmp/cephtest/binary/usr/local/bin"'), run.Raw('LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/tmp/cephtest/binary/usr/local/lib"'), run.Raw('CEPH_CONF="/tmp/cephtest/ceph.conf"'), run.Raw('CEPH_SECRET_FILE="{file}"'.format(file=secretfile)), run.Raw('CEPH_ID="{id}"'.format(id=id_)), run.Raw('PYTHONPATH="$PYTHONPATH:/tmp/cephtest/binary/usr/local/lib/python2.7/dist-packages:/tmp/cephtest/binary/usr/local/lib/python2.6/dist-packages"'), ] if env is not None: for var, val in env.iteritems(): quoted_val = pipes.quote(val) env_arg = '{var}={val}'.format(var=var, val=quoted_val) args.append(run.Raw(env_arg)) args.extend([ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', '/tmp/cephtest/archive/coverage', '{srcdir}/{workunit}'.format( srcdir=srcdir, workunit=workunit, ), ]) remote.run( logger=log.getChild(role), args=args, ) remote.run( logger=log.getChild(role), args=['rm', '-rf', '--', scratch_tmp], ) finally: log.info('Stopping %s on %s...', spec, role) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', '--', '/tmp/cephtest/workunits.list', srcdir, ], )
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" try: log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): if var == 'testing': ceph_branch = '--{var}'.format(var=var) ceph_branch = '--{var}={val}'.format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, 'mon') mon_nodes = " ".join(mon_node) new_mon = './ceph-deploy new'+" "+mon_nodes install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes purge_nodes = './ceph-deploy purge'+" "+all_nodes purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname deploy_mds = './ceph-deploy mds create'+" "+mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_create_nodes = './ceph-deploy mon create-initial' # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(ctx, config, mon_create_nodes) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) max_gather_tries = 90 gather_tries = 0 while (estatus_gather != 0): gather_tries += 1 if gather_tries >= max_gather_tries: msg = 'ceph-deploy was not able to gatherkeys after 15 minutes' raise RuntimeError(msg) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) time.sleep(10) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) osd_create_cmd = './ceph-deploy osd create --zap-disk ' for d in node_dev_list: if config.get('dmcrypt') is not None: osd_create_cmd_d = osd_create_cmd+'--dmcrypt'+" "+d else: osd_create_cmd_d = osd_create_cmd+d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: disks = [] disks = d.split(':') dev_disk = disks[0]+":"+disks[1] j_disk = disks[0]+":"+disks[2] zap_disk = './ceph-deploy disk zap '+dev_disk+" "+j_disk execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = \ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'bash', '-c', run.Raw('"'), 'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file( remote=remot, path=client_keyring, data=key_data, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=conf_path, data=conf_data, perms='0644' ) else: raise RuntimeError( "The cluster is NOT operational due to insufficient OSDs") yield finally: log.info('Stopping ceph...') ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop' ]) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run(args=['sudo', 'status', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'status'], check_status=False) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), 'grep', '-v', 'grep', run.Raw('|'), 'grep', 'ceph'], check_status=False) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = './ceph-deploy purge'+" "+all_nodes purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes log.info('Purging package...') execute_ceph_deploy(ctx, config, purge_nodes) log.info('Purging data...') execute_ceph_deploy(ctx, config, purgedata_nodes)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" try: log.info("Building ceph cluster using ceph-deploy...") testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get("branch") is not None: cbranch = config.get("branch") for var, val in cbranch.iteritems(): if var == "testing": ceph_branch = "--{var}".format(var=var) ceph_branch = "--{var}={val}".format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, "mds") mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, "mon") mon_nodes = " ".join(mon_node) new_mon = "./ceph-deploy new" + " " + mon_nodes install_nodes = "./ceph-deploy install " + ceph_branch + " " + all_nodes purge_nodes = "./ceph-deploy purge" + " " + all_nodes purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes mon_hostname = mon_nodes.split(" ")[0] mon_hostname = str(mon_hostname) gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info("adding config inputs...") testdir = teuthology.get_testdir(ctx) conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get("conf") is not None: confp = config.get("conf") for section, keys in confp.iteritems(): lines = "[{section}]\n".format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = "{key} = {value}\n".format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_create_nodes = "./ceph-deploy mon create-initial" # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) max_gather_tries = 90 gather_tries = 0 while estatus_gather != 0: gather_tries += 1 if gather_tries >= max_gather_tries: msg = "ceph-deploy was not able to gatherkeys after 15 minutes" raise RuntimeError(msg) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) time.sleep(10) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get("test_mon_destroy") is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) osd_create_cmd = "./ceph-deploy osd create --zap-disk " for d in node_dev_list: if config.get("dmcrypt") is not None: osd_create_cmd_d = osd_create_cmd + "--dmcrypt" + " " + d else: osd_create_cmd_d = osd_create_cmd + d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: disks = [] disks = d.split(":") dev_disk = disks[0] + ":" + disks[1] j_disk = disks[0] + ":" + disks[2] zap_disk = "./ceph-deploy disk zap " + dev_disk + " " + j_disk execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get("wait-for-healthy", True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info("Setting up client nodes...") conf_path = "/etc/ceph/ceph.conf" admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring" first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True) admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True) clients = ctx.cluster.only(teuthology.is_type("client")) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, "client"): client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_) mon0_remote.run( args=[ "cd", "{tdir}".format(tdir=testdir), run.Raw("&&"), "sudo", "bash", "-c", run.Raw('"'), "ceph", "auth", "get-or-create", "client.{id}".format(id=id_), "mds", "allow", "mon", "allow *", "osd", "allow *", run.Raw(">"), client_keyring, run.Raw('"'), ] ) key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True) teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644") teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644") teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644") else: raise RuntimeError("The cluster is NOT operational due to insufficient OSDs") yield finally: log.info("Stopping ceph...") ctx.cluster.run(args=["sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop"]) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run( args=["sudo", "status", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "status"], check_status=False ) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run( args=["sudo", "ps", "aux", run.Raw("|"), "grep", "-v", "grep", run.Raw("|"), "grep", "ceph"], check_status=False, ) if ctx.archive is not None: # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type("mon")) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith("mon."): teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz") log.info("Compressing logs...") run.wait( ctx.cluster.run( args=[ "sudo", "find", "/var/log/ceph", "-name", "*.log", "-print0", run.Raw("|"), "sudo", "xargs", "-0", "--no-run-if-empty", "--", "gzip", "--", ], wait=False, ) ) log.info("Archiving logs...") path = os.path.join(ctx.archive, "remote") os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log")) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = "./ceph-deploy purge" + " " + all_nodes purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes log.info("Purging package...") execute_ceph_deploy(ctx, config, purge_nodes) log.info("Purging data...") execute_ceph_deploy(ctx, config, purgedata_nodes)
def distribute_ceph_conf(devstack_node, ceph_node): log.info("Copying ceph.conf to DevStack node...") ceph_conf_path = "/etc/ceph/ceph.conf" ceph_conf = misc.get_file(ceph_node, ceph_conf_path, sudo=True) misc.sudo_write_file(devstack_node, ceph_conf_path, ceph_conf)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" # Expect to find ceph_admin on the first mon by ID, same place that the download task # puts it. Remember this here, because subsequently IDs will change from those in # the test config to those that ceph-deploy invents. (ceph_admin,) = ctx.cluster.only(teuthology.get_first_mon(ctx, config)).remotes.iterkeys() def execute_ceph_deploy(cmd): """Remotely execute a ceph_deploy command""" return ceph_admin.run( args=["cd", "{tdir}/ceph-deploy".format(tdir=testdir), run.Raw("&&"), run.Raw(cmd)], check_status=False ).exitstatus try: log.info("Building ceph cluster using ceph-deploy...") testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get("branch") is not None: cbranch = config.get("branch") for var, val in cbranch.iteritems(): ceph_branch = "--{var}={val}".format(var=var, val=val) all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_role(ctx, "mds") mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_role(ctx, "mon") mon_nodes = " ".join(mon_node) new_mon = "./ceph-deploy new" + " " + mon_nodes mon_hostname = mon_nodes.split(" ")[0] mon_hostname = str(mon_hostname) gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info("adding config inputs...") testdir = teuthology.get_testdir(ctx) conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir) if config.get("conf") is not None: confp = config.get("conf") for section, keys in confp.iteritems(): lines = "[{section}]\n".format(section=section) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = "{key} = {value}\n".format(key=key, value=value) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) # install ceph install_nodes = "./ceph-deploy install " + (ceph_branch if ceph_branch else "--dev=master") + " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") # install ceph-test package too install_nodes2 = ( "./ceph-deploy install --tests " + (ceph_branch if ceph_branch else "--dev=master") + " " + all_nodes ) estatus_install = execute_ceph_deploy(install_nodes2) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph-test") mon_create_nodes = "./ceph-deploy mon create-initial" # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(mon_create_nodes) estatus_gather = execute_ceph_deploy(gather_keys) max_gather_tries = 90 gather_tries = 0 while estatus_gather != 0: gather_tries += 1 if gather_tries >= max_gather_tries: msg = "ceph-deploy was not able to gatherkeys after 15 minutes" raise RuntimeError(msg) estatus_gather = execute_ceph_deploy(gather_keys) time.sleep(10) if mds_nodes: estatus_mds = execute_ceph_deploy(deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get("test_mon_destroy") is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: node = d[0] for disk in d[1:]: zap = "./ceph-deploy disk zap " + node + ":" + disk estatus = execute_ceph_deploy(zap) if estatus != 0: raise RuntimeError("ceph-deploy: Failed to zap osds") osd_create_cmd = "./ceph-deploy osd create " if config.get("dmcrypt") is not None: osd_create_cmd += "--dmcrypt " osd_create_cmd += ":".join(d) estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get("wait-for-healthy", True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info("Setting up client nodes...") conf_path = "/etc/ceph/ceph.conf" admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring" first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True) admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True) clients = ctx.cluster.only(teuthology.is_type("client")) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, "client"): client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_) mon0_remote.run( args=[ "cd", "{tdir}".format(tdir=testdir), run.Raw("&&"), "sudo", "bash", "-c", run.Raw('"'), "ceph", "auth", "get-or-create", "client.{id}".format(id=id_), "mds", "allow", "mon", "allow *", "osd", "allow *", run.Raw(">"), client_keyring, run.Raw('"'), ] ) key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True) teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644") teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644") teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644") if mds_nodes: log.info("Configuring CephFS...") ceph_fs = Filesystem(ctx, admin_remote=clients.remotes.keys()[0]) if not ceph_fs.legacy_configured(): ceph_fs.create() elif not config.get("only_mon"): raise RuntimeError("The cluster is NOT operational due to insufficient OSDs") yield except Exception: log.info("Error encountered, logging exception before tearing down ceph-deploy") log.info(traceback.format_exc()) raise finally: if config.get("keep_running"): return log.info("Stopping ceph...") ctx.cluster.run( args=[ "sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop", run.Raw("||"), "sudo", "systemctl", "stop", "ceph.target", ] ) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run( args=[ "sudo", "status", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "status", run.Raw("||"), "sudo", "systemctl", "status", "ceph.target", ], check_status=False, ) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run( args=["sudo", "ps", "aux", run.Raw("|"), "grep", "-v", "grep", run.Raw("|"), "grep", "ceph"], check_status=False, ) if ctx.archive is not None: # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type("mon")) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith("mon."): teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz") log.info("Compressing logs...") run.wait( ctx.cluster.run( args=[ "sudo", "find", "/var/log/ceph", "-name", "*.log", "-print0", run.Raw("|"), "sudo", "xargs", "-0", "--no-run-if-empty", "--", "gzip", "--", ], wait=False, ) ) log.info("Archiving logs...") path = os.path.join(ctx.archive, "remote") os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log")) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = "./ceph-deploy purge" + " " + all_nodes purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes log.info("Purging package...") execute_ceph_deploy(purge_nodes) log.info("Purging data...") execute_ceph_deploy(purgedata_nodes)