def get_host_vars(self, remote): extra_vars = self.config.get('vars', dict()) host_vars = dict() if not extra_vars.get('osd_auto_discovery', False): roles = self.ctx.cluster.remotes[remote] dev_needed = len( [role for role in roles if role.startswith('osd')]) if teuth_config.get('ceph_ansible') and \ self.ctx.machine_type in teuth_config['ceph_ansible']['has_lvm_scratch_disks']: devices = get_file(remote, "/scratch_devs").split() vols = [] for dev in devices: if 'vg_nvme' in dev: splitpath = dev.split('/') vol = dict() vol['data_vg'] = splitpath[2] vol['data'] = splitpath[3] vols.append(vol) extra_vars['lvm_volumes'] = vols self.config.update({'vars': extra_vars}) else: host_vars['devices'] = get_scratch_devices( remote)[0:dev_needed] if 'monitor_interface' not in extra_vars: host_vars['monitor_interface'] = remote.interface if 'radosgw_interface' not in extra_vars: host_vars['radosgw_interface'] = remote.interface if 'public_network' not in extra_vars: host_vars['public_network'] = remote.cidr return host_vars
def get_dev_for_osd(ctx, config): """Get a list of all osd device names.""" osd_devs = [] for remote, roles_for_host in ctx.cluster.remotes.items(): host = remote.name.split('@')[-1] shortname = host.split('.')[0] devs = teuthology.get_scratch_devices(remote) num_osd_per_host = list( teuthology.roles_of_type( roles_for_host, 'osd')) num_osds = len(num_osd_per_host) if config.get('separate_journal_disk') is not None: num_devs_reqd = 2 * num_osds assert num_devs_reqd <= len( devs), 'fewer data and journal disks than required ' + shortname for dindex in range(0, num_devs_reqd, 2): jd_index = dindex + 1 dev_short = devs[dindex].split('/')[-1] jdev_short = devs[jd_index].split('/')[-1] osd_devs.append((shortname, dev_short, jdev_short)) else: assert num_osds <= len(devs), 'fewer disks than osds ' + shortname for dev in devs[:num_osds]: dev_short = dev.split('/')[-1] osd_devs.append((shortname, dev_short)) return osd_devs
def get_dev_for_osd(ctx, config): """Get a list of all osd device names.""" osd_devs = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): host = remote.name.split('@')[-1] shortname = host.split('.')[0] devs = teuthology.get_scratch_devices(remote) num_osd_per_host = list( teuthology.roles_of_type( roles_for_host, 'osd')) num_osds = len(num_osd_per_host) if config.get('separate_journal_disk') is not None: num_devs_reqd = 2 * num_osds assert num_devs_reqd <= len( devs), 'fewer data and journal disks than required ' + shortname for dindex in range(0, num_devs_reqd, 2): jd_index = dindex + 1 dev_short = devs[dindex].split('/')[-1] jdev_short = devs[jd_index].split('/')[-1] osd_devs.append((shortname, dev_short, jdev_short)) else: assert num_osds <= len(devs), 'fewer disks than osds ' + shortname for dev in devs[:num_osds]: dev_short = dev.split('/')[-1] osd_devs.append((shortname, dev_short)) return osd_devs
def ceph_osds(ctx, config): """ Deploy OSDs """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid try: log.info('Deploying OSDs...') # provision OSDs in numeric order id_to_remote = {} devs_by_remote = {} for remote, roles in ctx.cluster.remotes.items(): devs_by_remote[remote] = teuthology.get_scratch_devices(remote) for osd in [ r for r in roles if teuthology.is_type('osd', cluster_name)(r) ]: _, _, id_ = teuthology.split_role(osd) id_to_remote[int(id_)] = (osd, remote) cur = 0 for osd_id in sorted(id_to_remote.keys()): osd, remote = id_to_remote[osd_id] _, _, id_ = teuthology.split_role(osd) assert int(id_) == cur devs = devs_by_remote[remote] assert devs ## FIXME ## dev = devs.pop() if all(_ in dev for _ in ('lv', 'vg')): short_dev = dev.replace('/dev/', '') else: short_dev = dev log.info('Deploying %s on %s with %s...' % (osd, remote.shortname, dev)) _shell(ctx, cluster_name, remote, ['ceph-volume', 'lvm', 'zap', dev]) _shell(ctx, cluster_name, remote, [ 'ceph', 'orch', 'daemon', 'add', 'osd', remote.shortname + ':' + short_dev ]) ctx.daemons.register_daemon( remote, 'osd', id_, cluster=cluster_name, fsid=fsid, logger=log.getChild(osd), wait=False, started=True, ) cur += 1 yield finally: pass
def create_ceph_conf(ctx, config): devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checking for scratch devs') log.info('found devs: %s' % (str(devs),)) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), iddevs ) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs):] devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), iddevs ) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] ) for osd in teuthology.roles_of_type(roles_for_host, 'osd'): tmpfs = '/mnt/osd.%s' % osd roles_to_journals[osd] = tmpfs remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] ) log.info('journal map: %s', roles_to_journals) log.info('dev map: %s' % (str(roles_to_devs),)) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips) ctx.ceph = argparse.Namespace() ctx.ceph.conf = conf log.info(ctx) yield
def get_dev_for_osd(ctx, config): osd_devs = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): host = remote.name.split('@')[-1] shortname = host.split('.')[0] devs = teuthology.get_scratch_devices(remote) num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, 'osd')) num_osds = len(num_osd_per_host) assert num_osds <= len(devs), 'fewer disks than osds on ' + shortname for dev in devs[:num_osds]: dev_short = dev.split('/')[-1] osd_devs.append('{host}:{dev}'.format(host=shortname, dev=dev_short)) return osd_devs
def get_dev_for_osd(ctx, config): """Get a list of all osd device names.""" osd_devs = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): host = remote.name.split("@")[-1] shortname = host.split(".")[0] devs = teuthology.get_scratch_devices(remote) num_osd_per_host = list(teuthology.roles_of_type(roles_for_host, "osd")) num_osds = len(num_osd_per_host) assert num_osds <= len(devs), "fewer disks than osds on " + shortname for dev in devs[:num_osds]: dev_short = dev.split("/")[-1] osd_devs.append("{host}:{dev}".format(host=shortname, dev=dev_short)) return osd_devs
def get_dev_for_osd(ctx, config): osd_devs = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): host = remote.name.split('@')[-1] shortname = host.split('.')[0] devs = teuthology.get_scratch_devices(remote) num_osd_per_host = list(teuthology.roles_of_type( roles_for_host, 'osd')) num_osds = len(num_osd_per_host) assert num_osds <= len(devs), 'fewer disks than osds on ' + shortname for dev in devs[:num_osds]: dev_short = dev.split('/')[-1] osd_devs.append('{host}:{dev}'.format(host=shortname, dev=dev_short)) return osd_devs
def get_host_vars(self, remote): extra_vars = self.config.get('vars', dict()) host_vars = dict() if not extra_vars.get('osd_auto_discovery', False): roles = self.ctx.cluster.remotes[remote] dev_needed = len( [role for role in roles if role.startswith('osd')]) host_vars['devices'] = get_scratch_devices(remote)[0:dev_needed] if 'monitor_interface' not in extra_vars: host_vars['monitor_interface'] = remote.interface if 'radosgw_interface' not in extra_vars: host_vars['radosgw_interface'] = remote.interface if 'public_network' not in extra_vars: host_vars['public_network'] = remote.cidr return host_vars
def ceph_volume_osd_create(ctx, config): osds = ctx.cluster.only(teuthology.is_type('osd')) no_of_osds = 0 for remote in osds.remotes.keys(): # all devs should be lvm osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' ' # default is bluestore so we just need config item for filestore roles = ctx.cluster.remotes[remote] dev_needed = len([role for role in roles if role.startswith('osd')]) all_devs = teuthology.get_scratch_devices(remote) log.info("node={n}, need_devs={d}, available={a}".format( n=remote.shortname, d=dev_needed, a=all_devs, )) devs = all_devs[0:dev_needed] # rest of the devices can be used for journal if required jdevs = dev_needed for device in devs: device_split = device.split('/') lv_device = device_split[-2] + '/' + device_split[-1] if config.get('filestore') is not None: osd_create_cmd += '--filestore --data ' + lv_device + ' ' # filestore with ceph-volume also needs journal disk try: jdevice = all_devs.pop(jdevs) except IndexError: raise RuntimeError("No device available for \ journal configuration") jdevice_split = jdevice.split('/') j_lv = jdevice_split[-2] + '/' + jdevice_split[-1] osd_create_cmd += '--journal ' + j_lv else: osd_create_cmd += ' --data ' + lv_device estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") return no_of_osds
def ceph_volume_osd_create(ctx, config): osds = ctx.cluster.only(teuthology.is_type('osd')) no_of_osds = 0 for remote in osds.remotes.iterkeys(): # all devs should be lvm osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' ' # default is bluestore so we just need config item for filestore roles = ctx.cluster.remotes[remote] dev_needed = len([role for role in roles if role.startswith('osd')]) all_devs = teuthology.get_scratch_devices(remote) log.info("node={n}, need_devs={d}, available={a}".format( n=remote.shortname, d=dev_needed, a=all_devs, )) devs = all_devs[0:dev_needed] # rest of the devices can be used for journal if required jdevs = dev_needed for device in devs: device_split = device.split('/') lv_device = device_split[-2] + '/' + device_split[-1] if config.get('filestore') is not None: osd_create_cmd += '--filestore --data ' + lv_device + ' ' # filestore with ceph-volume also needs journal disk try: jdevice = all_devs.pop(jdevs) except IndexError: raise RuntimeError("No device available for \ journal configuration") jdevice_split = jdevice.split('/') j_lv = jdevice_split[-2] + '/' + jdevice_split[-1] osd_create_cmd += '--journal ' + j_lv else: osd_create_cmd += ' --data ' + lv_device estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") return no_of_osds
def cluster(ctx, config): """ Handle the creation and removal of a ceph cluster. On startup: Create directories needed for the cluster. Create remote journals for all osds. Create and set keyring. Copy the monmap to tht test systems. Setup mon nodes. Setup mds nodes. Mkfs osd nodes. Add keyring information to monmaps Mkfs mon nodes. On exit: If errors occured, extract a failure message and store in ctx.summary. Unmount all test files and temporary journaling files. Save the monitor information and archive all ceph logs. Cleanup the keyring setup, and remove all monitor map and data files left over. :param ctx: Context :param config: Configuration """ if ctx.config.get('use_existing_cluster', False) is True: log.info("'use_existing_cluster' is true; skipping cluster creation") yield testdir = teuthology.get_testdir(ctx) log.info('Creating ceph cluster...') run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '{tdir}/data'.format(tdir=testdir), ], wait=False, ) ) run.wait( ctx.cluster.run( args=[ 'sudo', 'install', '-d', '-m0777', '--', '/var/run/ceph', ], wait=False, ) ) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checking for scratch devs') log.info('found devs: %s' % (str(devs),)) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), iddevs ) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs):] devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), iddevs ) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] ) for osd in teuthology.roles_of_type(roles_for_host, 'osd'): tmpfs = '/mnt/osd.%s' % osd roles_to_journals[osd] = tmpfs remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] ) log.info('journal map: %s', roles_to_journals) log.info('dev map: %s' % (str(roles_to_devs),)) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): key = "osd." + str(role) if key not in conf: conf[key] = {} conf[key]['osd journal'] = journal for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get('tmpfs_journal'): conf['journal dio'] = False ctx.ceph = argparse.Namespace() ctx.ceph.conf = conf keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring') coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', keyring_path, ], ) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=mon.', keyring_path, ], ) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'chmod', '0644', keyring_path, ], ) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() fsid = teuthology.create_simple_monmap( ctx, remote=mon0_remote, conf=conf, ) if not 'global' in conf: conf['global'] = {} conf['global']['fsid'] = fsid log.info('Writing ceph.conf for FSID %s...' % fsid) conf_path = config.get('conf_path', DEFAULT_CONF_PATH) write_conf(ctx, conf_path) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow *', keyring_path, ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path=keyring_path, ) monmap = teuthology.get_file( remote=mon0_remote, path='{tdir}/monmap'.format(tdir=testdir), ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.sudo_write_file( remote=rem, path=keyring_path, data=keyring, perms='0644' ) teuthology.write_file( remote=rem, path='{tdir}/monmap'.format(tdir=testdir), data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon')) run.wait( mons.run( args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'osdmaptool', '-c', conf_path, '--clobber', '--createsimple', '{num:d}'.format( num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), ), '{tdir}/osdmap'.format(tdir=testdir), '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds')) for remote, roles_for_host in mdss.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mds/ceph-{id}'.format(id=id_), run.Raw('&&'), 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_), ], ) cclient.create_keyring(ctx) log.info('Running mkfs on osd nodes...') ctx.disk_config = argparse.Namespace() ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals ctx.disk_config.remote_to_roles_to_dev_mount_options = {} ctx.disk_config.remote_to_roles_to_dev_fstype = {} log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/osd/ceph-{id}'.format(id=id_), ]) log.info(str(roles_to_journals)) log.info(id_) if roles_to_devs.get(id_): dev = roles_to_devs[id_] fs = config.get('fs') package = None mkfs_options = config.get('mkfs_options') mount_options = config.get('mount_options') if fs == 'btrfs': #package = 'btrfs-tools' if mount_options is None: mount_options = ['noatime','user_subvol_rm_allowed'] if mkfs_options is None: mkfs_options = ['-m', 'single', '-l', '32768', '-n', '32768'] if fs == 'xfs': #package = 'xfsprogs' if mount_options is None: mount_options = ['noatime'] if mkfs_options is None: mkfs_options = ['-f', '-i', 'size=2048'] if fs == 'ext4' or fs == 'ext3': if mount_options is None: mount_options = ['noatime','user_xattr'] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) if package is not None: remote.run( args=[ 'sudo', 'apt-get', 'install', '-y', package ], stdout=StringIO(), ) try: remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) except run.CommandFailedError: # Newer btfs-tools doesn't prompt for overwrite, use -f if '-f' not in mount_options: mkfs_options.append('-f') mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) log.info('mount %s on %s -o %s' % (dev, remote, ','.join(mount_options))) remote.run( args=[ 'sudo', 'mount', '-t', fs, '-o', ','.join(mount_options), dev, os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), ] ) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs devs_to_clean[remote].append( os.path.join( os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), ) ) for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ 'sudo', 'MALLOC_CHECK_=3', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-osd', '--mkfs', '--mkkey', '-i', id_, '--monmap', '{tdir}/monmap'.format(tdir=testdir), ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['mds','osd']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format( type=type_, id=id_, ), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['client']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) ) keys.append((type_, id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'sudo', 'tee', '-a', keyring_path, ], stdin=run.PIPE, wait=False, stdout=StringIO(), ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', keyring_path, '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mon/ceph-{id}'.format(id=id_), ], ) remote.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-mon', '--mkfs', '-i', id_, '--monmap={tdir}/monmap'.format(tdir=testdir), '--osdmap={tdir}/osdmap'.format(tdir=testdir), '--keyring={kpath}'.format(kpath=keyring_path), ], ) run.wait( mons.run( args=[ 'rm', '--', '{tdir}/monmap'.format(tdir=testdir), '{tdir}/osdmap'.format(tdir=testdir), ], wait=False, ), ) try: yield except Exception: # we need to know this below ctx.summary['success'] = False raise finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): """ Find the first occurence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = [ 'sudo', 'egrep', pattern, '/var/log/ceph/ceph.log', ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) try: remote.run( args=[ 'sync', run.Raw('&&'), 'sudo', 'umount', '-f', dir_ ] ) except Exception as e: remote.run(args=[ 'sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', run.Raw(';'), 'ps', 'auxf', ]) raise e if config.get('tmpfs_journal'): log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') for remote, roles_for_host in osds.remotes.iteritems(): remote.run( args=[ 'sudo', 'umount', '-f', '/mnt' ], check_status=False, ) if ctx.archive is not None and \ not (ctx.config.get('archive-on-error') and ctx.summary['success']): # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'sudo', 'rm', '-rf', '--', conf_path, keyring_path, '{tdir}/data'.format(tdir=testdir), '{tdir}/monmap'.format(tdir=testdir), ], wait=False, ), )
def cluster(ctx, config): """ Handle the creation and removal of a ceph cluster. On startup: Create directories needed for the cluster. Create remote journals for all osds. Create and set keyring. Copy the monmap to tht test systems. Setup mon nodes. Setup mds nodes. Mkfs osd nodes. Add keyring information to monmaps Mkfs mon nodes. On exit: If errors occured, extract a failure message and store in ctx.summary. Unmount all test files and temporary journaling files. Save the monitor information and archive all ceph logs. Cleanup the keyring setup, and remove all monitor map and data files left over. :param ctx: Context :param config: Configuration """ if ctx.config.get('use_existing_cluster', False) is True: log.info("'use_existing_cluster' is true; skipping cluster creation") yield testdir = teuthology.get_testdir(ctx) cluster_name = config['cluster'] data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir, cluster=cluster_name) log.info('Creating ceph cluster %s...', cluster_name) run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', data_dir, ], wait=False, )) run.wait( ctx.cluster.run( args=[ 'sudo', 'install', '-d', '-m0777', '--', '/var/run/ceph', ], wait=False, )) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name)) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checking for scratch devs') log.info('found devs: %s' % (str(devs), )) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), iddevs) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs):] devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), iddevs) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt']) for role in teuthology.cluster_roles_of_type( roles_for_host, 'osd', cluster_name): tmpfs = '/mnt/' + role roles_to_journals[role] = tmpfs remote.run(args=['truncate', '-s', '1500M', tmpfs]) log.info('journal map: %s', roles_to_journals) log.info('dev map: %s' % (str(roles_to_devs), )) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips, cluster=cluster_name) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): name = teuthology.ceph_role(role) if name not in conf: conf[name] = {} conf[name]['osd journal'] = journal for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get('tmpfs_journal'): conf['journal dio'] = False if not hasattr(ctx, 'ceph'): ctx.ceph = {} ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].conf = conf default_keyring = '/etc/ceph/{cluster}.keyring'.format( cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config, cluster_name) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', keyring_path, ], ) ctx.cluster.only(firstmon).run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=mon.', keyring_path, ], ) ctx.cluster.only(firstmon).run(args=[ 'sudo', 'chmod', '0644', keyring_path, ], ) (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys() monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir, cluster=cluster_name) fsid = teuthology.create_simple_monmap( ctx, remote=mon0_remote, conf=conf, path=monmap_path, ) if not 'global' in conf: conf['global'] = {} conf['global']['fsid'] = fsid default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name) conf_path = config.get('conf_path', default_conf_path) log.info('Writing %s for FSID %s...' % (conf_path, fsid)) write_conf(ctx, conf_path, cluster_name) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow *', keyring_path, ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path=keyring_path, ) monmap = teuthology.get_file( remote=mon0_remote, path=monmap_path, ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.sudo_write_file(remote=rem, path=keyring_path, data=keyring, perms='0644') teuthology.write_file( remote=rem, path=monmap_path, data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name)) osdmap_path = '{tdir}/{cluster}.osdmap'.format(tdir=testdir, cluster=cluster_name) run.wait( mons.run( args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'osdmaptool', '-c', conf_path, '--clobber', '--createsimple', '{num:d}'.format(num=teuthology.num_instances_of_type( ctx.cluster, 'osd', cluster_name), ), osdmap_path, '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name)) for remote, roles_for_host in mdss.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds', cluster_name): _, _, id_ = teuthology.split_role(role) mds_dir = '/var/lib/ceph/mds/{cluster}-{id}'.format( cluster=cluster_name, id=id_, ) remote.run(args=[ 'sudo', 'mkdir', '-p', mds_dir, run.Raw('&&'), 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), mds_dir + '/keyring', ], ) cclient.create_keyring(ctx, cluster_name) log.info('Running mkfs on osd nodes...') if not hasattr(ctx, 'disk_config'): ctx.disk_config = argparse.Namespace() if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'): ctx.disk_config.remote_to_roles_to_dev = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_journals'): ctx.disk_config.remote_to_roles_to_journals = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'): ctx.disk_config.remote_to_roles_to_dev_mount_options = {} if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'): ctx.disk_config.remote_to_roles_to_dev_fstype = {} teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals) log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format( r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): _, _, id_ = teuthology.split_role(role) mnt_point = '/var/lib/ceph/osd/{cluster}-{id}'.format( cluster=cluster_name, id=id_) remote.run(args=[ 'sudo', 'mkdir', '-p', mnt_point, ]) log.info(str(roles_to_journals)) log.info(role) if roles_to_devs.get(role): dev = roles_to_devs[role] fs = config.get('fs') package = None mkfs_options = config.get('mkfs_options') mount_options = config.get('mount_options') if fs == 'btrfs': # package = 'btrfs-tools' if mount_options is None: mount_options = ['noatime', 'user_subvol_rm_allowed'] if mkfs_options is None: mkfs_options = [ '-m', 'single', '-l', '32768', '-n', '32768' ] if fs == 'xfs': # package = 'xfsprogs' if mount_options is None: mount_options = ['noatime'] if mkfs_options is None: mkfs_options = ['-f', '-i', 'size=2048'] if fs == 'ext4' or fs == 'ext3': if mount_options is None: mount_options = ['noatime', 'user_xattr'] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) if package is not None: remote.run( args=['sudo', 'apt-get', 'install', '-y', package], stdout=StringIO(), ) try: remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) except run.CommandFailedError: # Newer btfs-tools doesn't prompt for overwrite, use -f if '-f' not in mount_options: mkfs_options.append('-f') mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) log.info('mount %s on %s -o %s' % (dev, remote, ','.join(mount_options))) remote.run(args=[ 'sudo', 'mount', '-t', fs, '-o', ','.join(mount_options), dev, mnt_point, ]) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[ remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][ role] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][ role] = fs devs_to_clean[remote].append(mnt_point) for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): _, _, id_ = teuthology.split_role(role) remote.run(args=[ 'sudo', 'MALLOC_CHECK_=3', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-osd', '--cluster', cluster_name, '--mkfs', '--mkkey', '-i', id_, '--monmap', monmap_path, ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['mds', 'osd']: for role in teuthology.cluster_roles_of_type( roles_for_host, type_, cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path='/var/lib/ceph/{type}/{cluster}-{id}/keyring'.format( type=type_, id=id_, cluster=cluster_name, ), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path='/etc/ceph/{cluster}.client.{id}.keyring'.format( id=id_, cluster=cluster_name)) keys.append(('client', id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'sudo', 'tee', '-a', keyring_path, ], stdin=run.PIPE, wait=False, stdout=StringIO(), ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', keyring_path, '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon', cluster_name): _, _, id_ = teuthology.split_role(role) remote.run(args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mon/{cluster}-{id}'.format( id=id_, cluster=cluster_name), ], ) remote.run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-mon', '--cluster', cluster_name, '--mkfs', '-i', id_, '--monmap', monmap_path, '--osdmap', osdmap_path, '--keyring', keyring_path, ], ) run.wait( mons.run( args=[ 'rm', '--', monmap_path, osdmap_path, ], wait=False, ), ) try: yield except Exception: # we need to know this below ctx.summary['success'] = False raise finally: (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys() log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): """ Find the first occurence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = [ 'sudo', 'egrep', pattern, '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name), ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) try: remote.run(args=[ 'sync', run.Raw('&&'), 'sudo', 'umount', '-f', dir_ ]) except Exception as e: remote.run(args=[ 'sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', run.Raw(';'), 'ps', 'auxf', ]) raise e if config.get('tmpfs_journal'): log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') for remote, roles_for_host in osds.remotes.iteritems(): remote.run( args=['sudo', 'umount', '-f', '/mnt'], check_status=False, ) if ctx.archive is not None and \ not (ctx.config.get('archive-on-error') and ctx.summary['success']): # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST: pass else: raise for remote, roles in mons.remotes.iteritems(): for role in roles: is_mon = teuthology.is_type('mon', cluster_name) if is_mon(role): _, _, id_ = teuthology.split_role(role) mon_dir = '/var/lib/ceph/mon/' + \ '{0}-{1}'.format(cluster_name, id_) teuthology.pull_directory_tarball( remote, mon_dir, path + '/' + role + '.tgz') log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'sudo', 'rm', '-rf', '--', conf_path, keyring_path, data_dir, monmap_path, osdmap_path, run.Raw('{tdir}/../*.pid'.format(tdir=testdir)), ], wait=False, ), )
def cluster(ctx, config): """ Handle the creation and removal of a ceph cluster. On startup: Create directories needed for the cluster. Create remote journals for all osds. Create and set keyring. Copy the monmap to tht test systems. Setup mon nodes. Setup mds nodes. Mkfs osd nodes. Add keyring information to monmaps Mkfs mon nodes. On exit: If errors occured, extract a failure message and store in ctx.summary. Unmount all test files and temporary journaling files. Save the monitor information and archive all ceph logs. Cleanup the keyring setup, and remove all monitor map and data files left over. :param ctx: Context :param config: Configuration """ if ctx.config.get("use_existing_cluster", False) is True: log.info("'use_existing_cluster' is true; skipping cluster creation") yield testdir = teuthology.get_testdir(ctx) cluster_name = config["cluster"] data_dir = "{tdir}/{cluster}.data".format(tdir=testdir, cluster=cluster_name) log.info("Creating ceph cluster %s...", cluster_name) run.wait(ctx.cluster.run(args=["install", "-d", "-m0755", "--", data_dir], wait=False)) run.wait(ctx.cluster.run(args=["sudo", "install", "-d", "-m0777", "--", "/var/run/ceph"], wait=False)) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type("osd", cluster_name)) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get("fs"): log.info("fs option selected, checking for scratch devs") log.info("found devs: %s" % (str(devs),)) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs(teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name), iddevs) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs) :] devs_to_clean[remote] = [] if config.get("block_journal"): log.info("block journal enabled") roles_to_journals = assign_devs( teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name), iddevs ) log.info("journal map: %s", roles_to_journals) if config.get("tmpfs_journal"): log.info("tmpfs journal enabled") roles_to_journals = {} remote.run(args=["sudo", "mount", "-t", "tmpfs", "tmpfs", "/mnt"]) for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): tmpfs = "/mnt/" + role roles_to_journals[role] = tmpfs remote.run(args=["truncate", "-s", "1500M", tmpfs]) log.info("journal map: %s", roles_to_journals) log.info("dev map: %s" % (str(roles_to_devs),)) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info("Generating config...") remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [ host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles) ] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips, cluster=cluster_name) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): name = teuthology.ceph_role(role) if name not in conf: conf[name] = {} conf[name]["osd journal"] = journal for section, keys in config["conf"].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get("tmpfs_journal"): conf["journal dio"] = False if not hasattr(ctx, "ceph"): ctx.ceph = {} ctx.ceph[cluster_name] = argparse.Namespace() ctx.ceph[cluster_name].conf = conf default_keyring = "/etc/ceph/{cluster}.keyring".format(cluster=cluster_name) keyring_path = config.get("keyring_path", default_keyring) coverage_dir = "{tdir}/archive/coverage".format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config, cluster_name) log.info("Setting up %s..." % firstmon) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", keyring_path, ] ) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--gen-key", "--name=mon.", keyring_path, ] ) ctx.cluster.only(firstmon).run(args=["sudo", "chmod", "0644", keyring_path]) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() monmap_path = "{tdir}/{cluster}.monmap".format(tdir=testdir, cluster=cluster_name) fsid = teuthology.create_simple_monmap(ctx, remote=mon0_remote, conf=conf, path=monmap_path) if not "global" in conf: conf["global"] = {} conf["global"]["fsid"] = fsid default_conf_path = "/etc/ceph/{cluster}.conf".format(cluster=cluster_name) conf_path = config.get("conf_path", default_conf_path) log.info("Writing %s for FSID %s..." % (conf_path, fsid)) write_conf(ctx, conf_path, cluster_name) log.info("Creating admin key on %s..." % firstmon) ctx.cluster.only(firstmon).run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--gen-key", "--name=client.admin", "--set-uid=0", "--cap", "mon", "allow *", "--cap", "osd", "allow *", "--cap", "mds", "allow *", keyring_path, ] ) log.info("Copying monmap to all nodes...") keyring = teuthology.get_file(remote=mon0_remote, path=keyring_path) monmap = teuthology.get_file(remote=mon0_remote, path=monmap_path) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info("Sending monmap to node {remote}".format(remote=rem)) teuthology.sudo_write_file(remote=rem, path=keyring_path, data=keyring, perms="0644") teuthology.write_file(remote=rem, path=monmap_path, data=monmap) log.info("Setting up mon nodes...") mons = ctx.cluster.only(teuthology.is_type("mon", cluster_name)) osdmap_path = "{tdir}/{cluster}.osdmap".format(tdir=testdir, cluster=cluster_name) run.wait( mons.run( args=[ "adjust-ulimits", "ceph-coverage", coverage_dir, "osdmaptool", "-c", conf_path, "--clobber", "--createsimple", "{num:d}".format(num=teuthology.num_instances_of_type(ctx.cluster, "osd", cluster_name)), osdmap_path, "--pg_bits", "2", "--pgp_bits", "4", ], wait=False, ) ) log.info("Setting up mgr nodes...") mgrs = ctx.cluster.only(teuthology.is_type("mgr", cluster_name)) for remote, roles_for_host in mgrs.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mgr", cluster_name): _, _, id_ = teuthology.split_role(role) mgr_dir = "/var/lib/ceph/mgr/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run( args=[ "sudo", "mkdir", "-p", mgr_dir, run.Raw("&&"), "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", "--gen-key", "--name=mgr.{id}".format(id=id_), mgr_dir + "/keyring", ] ) log.info("Setting up mds nodes...") mdss = ctx.cluster.only(teuthology.is_type("mds", cluster_name)) for remote, roles_for_host in mdss.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mds", cluster_name): _, _, id_ = teuthology.split_role(role) mds_dir = "/var/lib/ceph/mds/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run( args=[ "sudo", "mkdir", "-p", mds_dir, run.Raw("&&"), "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", "--create-keyring", "--gen-key", "--name=mds.{id}".format(id=id_), mds_dir + "/keyring", ] ) cclient.create_keyring(ctx, cluster_name) log.info("Running mkfs on osd nodes...") if not hasattr(ctx, "disk_config"): ctx.disk_config = argparse.Namespace() if not hasattr(ctx.disk_config, "remote_to_roles_to_dev"): ctx.disk_config.remote_to_roles_to_dev = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_journals"): ctx.disk_config.remote_to_roles_to_journals = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_dev_mount_options"): ctx.disk_config.remote_to_roles_to_dev_mount_options = {} if not hasattr(ctx.disk_config, "remote_to_roles_to_dev_fstype"): ctx.disk_config.remote_to_roles_to_dev_fstype = {} teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_journals, remote_to_roles_to_journals) log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): _, _, id_ = teuthology.split_role(role) mnt_point = "/var/lib/ceph/osd/{cluster}-{id}".format(cluster=cluster_name, id=id_) remote.run(args=["sudo", "mkdir", "-p", mnt_point]) log.info(str(roles_to_journals)) log.info(role) if roles_to_devs.get(role): dev = roles_to_devs[role] fs = config.get("fs") package = None mkfs_options = config.get("mkfs_options") mount_options = config.get("mount_options") if fs == "btrfs": # package = 'btrfs-tools' if mount_options is None: mount_options = ["noatime", "user_subvol_rm_allowed"] if mkfs_options is None: mkfs_options = ["-m", "single", "-l", "32768", "-n", "32768"] if fs == "xfs": # package = 'xfsprogs' if mount_options is None: mount_options = ["noatime"] if mkfs_options is None: mkfs_options = ["-f", "-i", "size=2048"] if fs == "ext4" or fs == "ext3": if mount_options is None: mount_options = ["noatime", "user_xattr"] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ["mkfs.%s" % fs] + mkfs_options log.info("%s on %s on %s" % (mkfs, dev, remote)) if package is not None: remote.run(args=["sudo", "apt-get", "install", "-y", package], stdout=StringIO()) try: remote.run(args=["yes", run.Raw("|")] + ["sudo"] + mkfs + [dev]) except run.CommandFailedError: # Newer btfs-tools doesn't prompt for overwrite, use -f if "-f" not in mount_options: mkfs_options.append("-f") mkfs = ["mkfs.%s" % fs] + mkfs_options log.info("%s on %s on %s" % (mkfs, dev, remote)) remote.run(args=["yes", run.Raw("|")] + ["sudo"] + mkfs + [dev]) log.info("mount %s on %s -o %s" % (dev, remote, ",".join(mount_options))) remote.run(args=["sudo", "mount", "-t", fs, "-o", ",".join(mount_options), dev, mnt_point]) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs devs_to_clean[remote].append(mnt_point) for role in teuthology.cluster_roles_of_type(roles_for_host, "osd", cluster_name): _, _, id_ = teuthology.split_role(role) remote.run( args=[ "sudo", "MALLOC_CHECK_=3", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-osd", "--cluster", cluster_name, "--mkfs", "--mkkey", "-i", id_, "--monmap", monmap_path, ] ) log.info("Reading keys from all nodes...") keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ["mgr", "mds", "osd"]: for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path="/var/lib/ceph/{type}/{cluster}-{id}/keyring".format(type=type_, id=id_, cluster=cluster_name), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "client", cluster_name): _, _, id_ = teuthology.split_role(role) data = teuthology.get_file( remote=remote, path="/etc/ceph/{cluster}.client.{id}.keyring".format(id=id_, cluster=cluster_name) ) keys.append(("client", id_, data)) keys_fp.write(data) log.info("Adding keys to all mons...") writes = mons.run(args=["sudo", "tee", "-a", keyring_path], stdin=run.PIPE, wait=False, stdout=StringIO()) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-authtool", keyring_path, "--name={type}.{id}".format(type=type_, id=id_), ] + list(generate_caps(type_)), wait=False, ) ) log.info("Running mkfs on mon nodes...") for remote, roles_for_host in mons.remotes.iteritems(): for role in teuthology.cluster_roles_of_type(roles_for_host, "mon", cluster_name): _, _, id_ = teuthology.split_role(role) remote.run( args=["sudo", "mkdir", "-p", "/var/lib/ceph/mon/{cluster}-{id}".format(id=id_, cluster=cluster_name)] ) remote.run( args=[ "sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "ceph-mon", "--cluster", cluster_name, "--mkfs", "-i", id_, "--monmap", monmap_path, "--osdmap", osdmap_path, "--keyring", keyring_path, ] ) run.wait(mons.run(args=["rm", "--", monmap_path, osdmap_path], wait=False)) try: yield except Exception: # we need to know this below ctx.summary["success"] = False raise finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info("Checking cluster log for badness...") def first_in_ceph_log(pattern, excludes): """ Find the first occurence of the pattern specified in the Ceph log, Returns None if none found. :param pattern: Pattern scanned for. :param excludes: Patterns to ignore. :return: First line of text (or None if not found) """ args = ["sudo", "egrep", pattern, "/var/log/ceph/{cluster}.log".format(cluster=cluster_name)] for exclude in excludes: args.extend([run.Raw("|"), "egrep", "-v", exclude]) args.extend([run.Raw("|"), "head", "-n", "1"]) r = mon0_remote.run(stdout=StringIO(), args=args) stdout = r.stdout.getvalue() if stdout != "": return stdout return None if first_in_ceph_log("\[ERR\]|\[WRN\]|\[SEC\]", config["log_whitelist"]) is not None: log.warning("Found errors (ERR|WRN|SEC) in cluster log") ctx.summary["success"] = False # use the most severe problem as the failure reason if "failure_reason" not in ctx.summary: for pattern in ["\[SEC\]", "\[ERR\]", "\[WRN\]"]: match = first_in_ceph_log(pattern, config["log_whitelist"]) if match is not None: ctx.summary["failure_reason"] = '"{match}" in cluster log'.format(match=match.rstrip("\n")) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info("Unmounting %s on %s" % (dir_, remote)) try: remote.run(args=["sync", run.Raw("&&"), "sudo", "umount", "-f", dir_]) except Exception as e: remote.run(args=["sudo", run.Raw("PATH=/usr/sbin:$PATH"), "lsof", run.Raw(";"), "ps", "auxf"]) raise e if config.get("tmpfs_journal"): log.info("tmpfs journal enabled - unmounting tmpfs at /mnt") for remote, roles_for_host in osds.remotes.iteritems(): remote.run(args=["sudo", "umount", "-f", "/mnt"], check_status=False) if ctx.archive is not None and not (ctx.config.get("archive-on-error") and ctx.summary["success"]): # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST: pass else: raise for remote, roles in mons.remotes.iteritems(): for role in roles: is_mon = teuthology.is_type("mon", cluster_name) if is_mon(role): _, _, id_ = teuthology.split_role(role) mon_dir = "/var/lib/ceph/mon/" + "{0}-{1}".format(cluster_name, id_) teuthology.pull_directory_tarball(remote, mon_dir, path + "/" + role + ".tgz") log.info("Cleaning ceph cluster...") run.wait( ctx.cluster.run( args=[ "sudo", "rm", "-rf", "--", conf_path, keyring_path, data_dir, monmap_path, osdmap_path, run.Raw("{tdir}/../*.pid".format(tdir=testdir)), ], wait=False, ) )
def task(ctx, config): log.info('Setting up nvme_loop on scratch devices...') host = 'hostnqn' port = '1' devs_by_remote = {} old_scratch_by_remote = {} for remote, roles in ctx.cluster.remotes.items(): devs = teuthology.get_scratch_devices(remote) devs_by_remote[remote] = devs base = '/sys/kernel/config/nvmet' remote.run(args=[ 'sudo', 'modprobe', 'nvme_loop', run.Raw('&&'), 'sudo', 'mkdir', '-p', f'{base}/hosts/{host}', run.Raw('&&'), 'sudo', 'mkdir', '-p', f'{base}/ports/{port}', run.Raw('&&'), 'echo', 'loop', run.Raw('|'), 'sudo', 'tee', f'{base}/ports/{port}/addr_trtype', ]) for dev in devs: short = dev.split('/')[-1] log.info(f'Connecting nvme_loop {remote.shortname}:{dev}...') remote.run(args=[ 'sudo', 'mkdir', '-p', f'{base}/subsystems/{short}', run.Raw('&&'), 'echo', '1', run.Raw('|'), 'sudo', 'tee', f'{base}/subsystems/{short}/attr_allow_any_host', run.Raw('&&'), 'sudo', 'mkdir', '-p', f'{base}/subsystems/{short}/namespaces/1', run.Raw('&&'), 'echo', dev, run.Raw('|'), 'sudo', 'tee', f'{base}/subsystems/{short}/namespaces/1/device_path', run.Raw('&&'), 'echo', '1', run.Raw('|'), 'sudo', 'tee', f'{base}/subsystems/{short}/namespaces/1/enable', run.Raw('&&'), 'sudo', 'ln', '-s', f'{base}/subsystems/{short}', f'{base}/ports/{port}/subsystems/{short}', run.Raw('&&'), 'sudo', 'nvme', 'connect', '-t', 'loop', '-n', short, '-q', host, ]) # identify nvme_loops devices old_scratch_by_remote[remote] = remote.read_file('/scratch_devs') with contextutil.safe_while(sleep=1, tries=15) as proceed: while proceed(): p = remote.run(args=['sudo', 'nvme', 'list'], stdout=StringIO()) new_devs = [] for line in p.stdout.getvalue().splitlines(): dev, _, vendor = line.split()[0:3] if dev.startswith('/dev/') and vendor == 'Linux': new_devs.append(dev) log.info(f'new_devs {new_devs}') assert len(new_devs) <= len(devs) if len(new_devs) == len(devs): break remote.write_file(path='/scratch_devs', data='\n'.join(new_devs) + '\n', sudo=True) try: yield finally: for remote, devs in devs_by_remote.items(): for dev in devs: short = dev.split('/')[-1] log.info( f'Disconnecting nvme_loop {remote.shortname}:{dev}...') remote.run( args=['sudo', 'nvme', 'disconnect', '-n', short], check_status=False, ) remote.write_file(path='/scratch_devs', data=old_scratch_by_remote[remote], sudo=True)
def cli_test(ctx, config): """ ceph-deploy cli to exercise most commonly use cli's and ensure all commands works and also startup the init system. """ log.info('Ceph-deploy Test') if config is None: config = {} if config.get('rhbuild'): path=None else: path = teuthology.get_testdir(ctx) mons = ctx.cluster.only(teuthology.is_type('mon')) for node,role in mons.remotes.iteritems(): admin=node admin.run( args=[ 'mkdir', '~/', 'cdtest' ],check_status=False) nodename=admin.shortname system_type = teuthology.get_system_type(admin) if config.get('rhbuild'): admin.run(args = ['sudo', 'yum', 'install', 'ceph-deploy', '-y']) log.info('system type is %s', system_type) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote,roles in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) log.info("roles %s" , roles) if (len(devs) < 3): log.error('Test needs minimum of 3 devices, only found %s', str(devs)) raise RuntimeError ( "Needs minimum of 3 devices ") new_cmd= 'new ' + nodename new_mon_install = 'install --mon ' + nodename new_osd_install = 'install --osd ' + nodename new_admin = 'install --cli ' + nodename create_initial= '--overwrite-conf mon create-initial ' + nodename execute_cdeploy(admin,new_cmd,path) execute_cdeploy(admin,new_mon_install,path) execute_cdeploy(admin,new_osd_install,path) execute_cdeploy(admin,new_admin,path) execute_cdeploy(admin,create_initial,path) for i in range(3): zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename,d=devs[i]) prepare= 'osd prepare ' + "{n}:{d}".format(n=nodename,d=devs[i]) execute_cdeploy(admin,zap_disk,path) execute_cdeploy(admin,prepare,path) admin.run(args=['ls',run.Raw('-lt'),run.Raw('~/cdtest/')]) time.sleep(4) remote.run(args=['sudo', 'ceph','-s'],check_status=False) r = remote.run(args=['sudo', 'ceph','health'],stdout=StringIO()) out = r.stdout.getvalue() log.info('Ceph health: %s', out.rstrip('\n')) if out.split(None, 1)[0] == 'HEALTH_WARN': log.info('All ceph-deploy cli tests passed') else: raise RuntimeError ( "Failed to reach HEALTH_WARN State") #test rgw cli rgw_install = 'install --rgw ' + nodename rgw_create = 'rgw create ' + nodename execute_cdeploy(admin,rgw_install,path) execute_cdeploy(admin,rgw_create,path) try: yield finally: log.info("cleaning up") if system_type == 'deb': remote.run(args=['sudo', 'stop','ceph-all'],check_status=False) remote.run(args=['sudo', 'service','ceph', '-a', 'stop'],check_status=False) else: remote.run(args=['sudo', '/etc/init.d/ceph', '-a', 'stop'],check_status=False) time.sleep(4) for i in range(3): umount_dev = "{d}1".format(d=devs[i]) r = remote.run(args=['sudo', 'umount',run.Raw(umount_dev)]) cmd = 'purge ' + nodename execute_cdeploy(admin,cmd,path) cmd = 'purgedata ' + nodename execute_cdeploy(admin,cmd,path) admin.run(args=['rm',run.Raw('-rf'),run.Raw('~/cdtest/*')]) admin.run(args=['rmdir',run.Raw('~/cdtest')]) if config.get('rhbuild'): admin.run(args = ['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
def cli_test(ctx, config): """ ceph-deploy cli to exercise most commonly use cli's and ensure all commands works and also startup the init system. """ log.info("Ceph-deploy Test") if config is None: config = {} test_branch = "" if config.get("rhbuild"): path = None else: path = teuthology.get_testdir(ctx) # test on branch from config eg: wip-* , master or next etc # packages for all distro's should exist for wip* if ctx.config.get("branch"): branch = ctx.config.get("branch") test_branch = " --dev={branch} ".format(branch=branch) mons = ctx.cluster.only(teuthology.is_type("mon")) for node, role in mons.remotes.iteritems(): admin = node admin.run(args=["mkdir", "~/", "cdtest"], check_status=False) nodename = admin.shortname system_type = teuthology.get_system_type(admin) if config.get("rhbuild"): admin.run(args=["sudo", "yum", "install", "ceph-deploy", "-y"]) log.info("system type is %s", system_type) osds = ctx.cluster.only(teuthology.is_type("osd")) for remote, roles in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) log.info("roles %s", roles) if len(devs) < 3: log.error("Test needs minimum of 3 devices, only found %s", str(devs)) raise RuntimeError("Needs minimum of 3 devices ") new_cmd = "new " + nodename new_mon_install = "install {branch} --mon ".format(branch=test_branch) + nodename new_osd_install = "install {branch} --osd ".format(branch=test_branch) + nodename new_admin = "install {branch} --cli ".format(branch=test_branch) + nodename create_initial = "--overwrite-conf mon create-initial " execute_cdeploy(admin, new_cmd, path) execute_cdeploy(admin, new_mon_install, path) execute_cdeploy(admin, new_osd_install, path) execute_cdeploy(admin, new_admin, path) execute_cdeploy(admin, create_initial, path) for i in range(3): zap_disk = "disk zap " + "{n}:{d}".format(n=nodename, d=devs[i]) prepare = "osd prepare " + "{n}:{d}".format(n=nodename, d=devs[i]) execute_cdeploy(admin, zap_disk, path) execute_cdeploy(admin, prepare, path) admin.run(args=["ls", run.Raw("-lt"), run.Raw("~/cdtest/")]) time.sleep(4) remote.run(args=["sudo", "ceph", "-s"], check_status=False) r = remote.run(args=["sudo", "ceph", "health"], stdout=StringIO()) out = r.stdout.getvalue() log.info("Ceph health: %s", out.rstrip("\n")) if out.split(None, 1)[0] == "HEALTH_WARN": log.info("All ceph-deploy cli tests passed") else: raise RuntimeError("Failed to reach HEALTH_WARN State") # test rgw cli rgw_install = "install {branch} --rgw {node}".format(branch=test_branch, node=nodename) rgw_create = "rgw create " + nodename execute_cdeploy(admin, rgw_install, path) execute_cdeploy(admin, rgw_create, path) try: yield finally: log.info("cleaning up") ctx.cluster.run( args=[ "sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop", run.Raw("||"), "sudo", "systemctl", "stop", "ceph.target", ], check_status=False, ) time.sleep(4) for i in range(3): umount_dev = "{d}1".format(d=devs[i]) r = remote.run(args=["sudo", "umount", run.Raw(umount_dev)]) cmd = "purge " + nodename execute_cdeploy(admin, cmd, path) cmd = "purgedata " + nodename execute_cdeploy(admin, cmd, path) admin.run(args=["rm", run.Raw("-rf"), run.Raw("~/cdtest/*")]) admin.run(args=["rmdir", run.Raw("~/cdtest")]) if config.get("rhbuild"): admin.run(args=["sudo", "yum", "remove", "ceph-deploy", "-y"])
def get_host_vars(self, remote, role): """ Method to get host vars Args: remote: remote node role: ceph role """ extra_vars = self.config.get('vars', dict()) host_vars = dict() err_msg =\ """Insufficient disks for {} scenario, Required : {}, but got {} please choose machine types which has sufficient disks""" # check for OSD auto-discovery if "osd" in role and not extra_vars.get('osd_auto_discovery', False): roles = self.each_cluster.remotes[remote] dev_needed = len( [role for role in roles if role.startswith('osd')]) # get disks nvme = self.get_disk_info(remote).get('devices', list()) disks = get_scratch_devices(remote) if len(disks) < dev_needed: raise ConfigError( err_msg.format("collocated", dev_needed, len(disks))) # Todo:1.> Intelligence to choose devices and dedicated # Todo:1.continued> devices lists based on disk types # disks = disks['hdd'] + disks['ssd'] + disks['nvme'] # if disks.get('hdd', 0) and \ # len(disks.get('hdd', list())) >= dev_needed: # devices = disks.get('hdd')[0:dev_needed] # else: # devices = disks[0:dev_needed] devices = disks[0:dev_needed] log.info("devices : {}".format(devices)) host_vars['devices'] = devices # check if the host has flash device, if so use it as journal # consider non-collocated scenario only for luminous if extra_vars.get('osd_scenario') == 'non-collocated' and \ self.rhbuild.startswith('3'): dedicated_devices = list(set(nvme.keys()) - set(devices)) dedicated_devices.sort() # check if disks remaining if not dedicated_devices: raise ConfigError( err_msg.format("non-collocated", dev_needed, len(dedicated_devices))) # check dedicated devices has required disks available # else replicated last disk to required number if len(dedicated_devices) >= dev_needed: dedicated_devices = dedicated_devices[0:dev_needed] else: required = dev_needed - len(dedicated_devices) dedicated_devices += [dedicated_devices[-1]] * required log.info("dedicated devices : {}".format(dedicated_devices)) host_vars['dedicated_devices'] = dedicated_devices if 'monitor_interface' not in extra_vars: host_vars['monitor_interface'] = remote.interface if "rgw" in role and 'radosgw_interface' not in extra_vars: host_vars['radosgw_interface'] = remote.interface if 'public_network' not in extra_vars: host_vars['public_network'] = remote.cidr return host_vars
def cli_test(ctx, config): """ ceph-deploy cli to exercise most commonly use cli's and ensure all commands works and also startup the init system. """ log.info('Ceph-deploy Test') if config is None: config = {} test_branch = '' if config.get('rhbuild'): path = None else: path = teuthology.get_testdir(ctx) # test on branch from config eg: wip-* , master or next etc # packages for all distro's should exist for wip* if ctx.config.get('branch'): branch = ctx.config.get('branch') test_branch = ' --dev={branch} '.format(branch=branch) mons = ctx.cluster.only(teuthology.is_type('mon')) for node, role in mons.remotes.iteritems(): admin = node admin.run(args=['mkdir', '~/', 'cdtest'], check_status=False) nodename = admin.shortname system_type = teuthology.get_system_type(admin) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) log.info('system type is %s', system_type) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) log.info("roles %s", roles) if (len(devs) < 3): log.error('Test needs minimum of 3 devices, only found %s', str(devs)) raise RuntimeError("Needs minimum of 3 devices ") new_cmd = 'new ' + nodename new_mon_install = 'install {branch} --mon '.format( branch=test_branch) + nodename new_osd_install = 'install {branch} --osd '.format( branch=test_branch) + nodename new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename create_initial = '--overwrite-conf mon create-initial ' execute_cdeploy(admin, new_cmd, path) execute_cdeploy(admin, new_mon_install, path) execute_cdeploy(admin, new_osd_install, path) execute_cdeploy(admin, new_admin, path) execute_cdeploy(admin, create_initial, path) for i in range(3): zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) execute_cdeploy(admin, zap_disk, path) execute_cdeploy(admin, prepare, path) admin.run(args=['ls', run.Raw('-lt'), run.Raw('~/cdtest/')]) time.sleep(4) remote.run(args=['sudo', 'ceph', '-s'], check_status=False) r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() log.info('Ceph health: %s', out.rstrip('\n')) if out.split(None, 1)[0] == 'HEALTH_WARN': log.info('All ceph-deploy cli tests passed') else: raise RuntimeError("Failed to reach HEALTH_WARN State") #test rgw cli rgw_install = 'install {branch} --rgw {node}'.format( branch=test_branch, node=nodename, ) rgw_create = 'rgw create ' + nodename execute_cdeploy(admin, rgw_install, path) execute_cdeploy(admin, rgw_create, path) try: yield finally: log.info("cleaning up") ctx.cluster.run(args=[ 'sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target' ], check_status=False) time.sleep(4) for i in range(3): umount_dev = "{d}1".format(d=devs[i]) r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) cmd = 'purge ' + nodename execute_cdeploy(admin, cmd, path) cmd = 'purgedata ' + nodename execute_cdeploy(admin, cmd, path) admin.run(args=['rm', run.Raw('-rf'), run.Raw('~/cdtest/*')]) admin.run(args=['rmdir', run.Raw('~/cdtest')]) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
def cli_test(ctx, config): """ ceph-deploy cli to exercise most commonly use cli's and ensure all commands works and also startup the init system. """ log.info('Ceph-deploy Test') if config is None: config = {} test_branch = '' conf_dir = teuthology.get_testdir(ctx) + "/cdtest" def execute_cdeploy(admin, cmd, path): """Execute ceph-deploy commands """ """Either use git path or repo path """ args = ['cd', conf_dir, run.Raw(';')] if path: args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) else: args.append('ceph-deploy') args.append(run.Raw(cmd)) ec = admin.run(args=args, check_status=False).exitstatus if ec != 0: raise RuntimeError( "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) if config.get('rhbuild'): path = None else: path = teuthology.get_testdir(ctx) # test on branch from config eg: wip-* , master or next etc # packages for all distro's should exist for wip* if ctx.config.get('branch'): branch = ctx.config.get('branch') test_branch = ' --dev={branch} '.format(branch=branch) mons = ctx.cluster.only(teuthology.is_type('mon')) for node, role in mons.remotes.items(): admin = node admin.run(args=['mkdir', conf_dir], check_status=False) nodename = admin.shortname system_type = teuthology.get_system_type(admin) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) log.info('system type is %s', system_type) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles in osds.remotes.items(): devs = teuthology.get_scratch_devices(remote) log.info("roles %s", roles) if (len(devs) < 3): log.error( 'Test needs minimum of 3 devices, only found %s', str(devs)) raise RuntimeError("Needs minimum of 3 devices ") conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) new_cmd = 'new ' + nodename execute_cdeploy(admin, new_cmd, path) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.items(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) for key, value in keys.items(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) new_mon_install = 'install {branch} --mon '.format( branch=test_branch) + nodename new_mgr_install = 'install {branch} --mgr '.format( branch=test_branch) + nodename new_osd_install = 'install {branch} --osd '.format( branch=test_branch) + nodename new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename create_initial = 'mon create-initial ' mgr_create = 'mgr create ' + nodename # either use create-keys or push command push_keys = 'admin ' + nodename execute_cdeploy(admin, new_mon_install, path) execute_cdeploy(admin, new_mgr_install, path) execute_cdeploy(admin, new_osd_install, path) execute_cdeploy(admin, new_admin, path) execute_cdeploy(admin, create_initial, path) execute_cdeploy(admin, mgr_create, path) execute_cdeploy(admin, push_keys, path) for i in range(3): zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) execute_cdeploy(admin, zap_disk, path) execute_cdeploy(admin, prepare, path) log.info("list files for debugging purpose to check file permissions") admin.run(args=['ls', run.Raw('-lt'), conf_dir]) remote.run(args=['sudo', 'ceph', '-s'], check_status=False) r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() log.info('Ceph health: %s', out.rstrip('\n')) log.info("Waiting for cluster to become healthy") with contextutil.safe_while(sleep=10, tries=6, action='check health') as proceed: while proceed(): r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() if (out.split(None, 1)[0] == 'HEALTH_OK'): break rgw_install = 'install {branch} --rgw {node}'.format( branch=test_branch, node=nodename, ) rgw_create = 'rgw create ' + nodename execute_cdeploy(admin, rgw_install, path) execute_cdeploy(admin, rgw_create, path) log.info('All ceph-deploy cli tests passed') try: yield finally: log.info("cleaning up") ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], check_status=False) time.sleep(4) for i in range(3): umount_dev = "{d}1".format(d=devs[i]) r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) cmd = 'purge ' + nodename execute_cdeploy(admin, cmd, path) cmd = 'purgedata ' + nodename execute_cdeploy(admin, cmd, path) log.info("Removing temporary dir") admin.run( args=[ 'rm', run.Raw('-rf'), run.Raw(conf_dir)], check_status=False) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
def cluster(ctx, config): log.info('Creating ceph cluster...') run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '/tmp/cephtest/data', ], wait=False, ) ) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checkin for scratch devs') log.info('found devs: %s' % (str(devs),)) roles_to_devs = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), devs ) if len(roles_to_devs) < len(devs): devs = devs[len(roles_to_devs):] log.info('dev map: %s' % (str(roles_to_devs),)) devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), devs ) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] ) for osd in teuthology.roles_of_type(roles_for_host, 'osd'): tmpfs = '/mnt/osd.%s' % osd roles_to_journals[osd] = tmpfs remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] ) log.info('journal map: %s', roles_to_journals) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [roles for (remote, roles) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, roles) in remotes_and_roles)] conf = teuthology.skeleton_config(roles=roles, ips=ips) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): key = "osd." + str(role) if key not in conf: conf[key] = {} conf[key]['osd journal'] = journal for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get('tmpfs_journal'): conf['journal dio'] = False ctx.ceph = argparse.Namespace() ctx.ceph.conf = conf log.info('Writing configs...') conf_fp = StringIO() conf.write(conf_fp) conf_fp.seek(0) writes = ctx.cluster.run( args=[ 'python', '-c', 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', '/tmp/cephtest/ceph.conf', ], stdin=run.PIPE, wait=False, ) teuthology.feed_many_stdins_and_close(conf_fp, writes) run.wait(writes) coverage_dir = '/tmp/cephtest/archive/coverage' firstmon = teuthology.get_first_mon(ctx, config) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '/tmp/cephtest/ceph.keyring', ], ) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--gen-key', '--name=mon.', '/tmp/cephtest/ceph.keyring', ], ) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.create_simple_monmap( remote=mon0_remote, conf=conf, ) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow', '/tmp/cephtest/ceph.keyring', ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path='/tmp/cephtest/ceph.keyring', ) monmap = teuthology.get_file( remote=mon0_remote, path='/tmp/cephtest/monmap', ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.write_file( remote=rem, path='/tmp/cephtest/ceph.keyring', data=keyring, ) teuthology.write_file( remote=rem, path='/tmp/cephtest/monmap', data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon')) run.wait( mons.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/osdmaptool', '--clobber', '--createsimple', '{num:d}'.format( num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), ), '/tmp/cephtest/osdmap', '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up osd nodes...') for remote, roles_for_host in osds.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', '--name=osd.{id}'.format(id=id_), '/tmp/cephtest/data/osd.{id}.keyring'.format(id=id_), ], ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds')) for remote, roles_for_host in mdss.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), '/tmp/cephtest/data/mds.{id}.keyring'.format(id=id_), ], ) log.info('Setting up client nodes...') clients = ctx.cluster.only(teuthology.is_type('client')) for remote, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" '--name=client.{id}'.format(id=id_), '/tmp/cephtest/data/client.{id}.keyring'.format(id=id_), ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['osd', 'mds', 'client']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/tmp/cephtest/data/{type}.{id}.keyring'.format( type=type_, id=id_, ), ) keys.append((type_, id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'cat', run.Raw('>>'), '/tmp/cephtest/ceph.keyring', ], stdin=run.PIPE, wait=False, ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '/tmp/cephtest/ceph.keyring', '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-mon', '--mkfs', '-i', id_, '-c', '/tmp/cephtest/ceph.conf', '--monmap=/tmp/cephtest/monmap', '--osdmap=/tmp/cephtest/osdmap', '--keyring=/tmp/cephtest/ceph.keyring', ], ) log.info('Running mkfs on osd nodes...') for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] ctx.disk_config = argparse.Namespace() ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): log.info(str(roles_to_journals)) log.info(id_) remote.run( args=[ 'mkdir', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)), ], ) if roles_to_devs.get(id_): dev = roles_to_devs[id_] fs = config.get('fs') package = None mkfs_options = config.get('mkfs_options') mount_options = config.get('mount_options') if fs == 'btrfs': package = 'btrfs-tools' if mount_options is None: mount_options = ['noatime','user_subvol_rm_allowed'] if mkfs_options is None: mkfs_options = ['-m', 'single', '-l', '32768', '-n', '32768'] if fs == 'xfs': package = 'xfsprogs' if mount_options is None: mount_options = ['noatime'] if mkfs_options is None: mkfs_options = ['-f', '-i', 'size=2048'] if fs == 'ext4' or fs == 'ext3': if mount_options is None: mount_options = ['noatime','user_xattr'] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) if package is not None: remote.run( args=[ 'sudo', 'apt-get', 'install', '-y', package ] ) remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) log.info('mount %s on %s -o %s' % (dev, remote, ','.join(mount_options))) remote.run( args=[ 'sudo', 'mount', '-t', fs, '-o', ','.join(mount_options), dev, os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)), ] ) remote.run( args=[ 'sudo', 'chown', '-R', 'ubuntu.ubuntu', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)) ] ) remote.run( args=[ 'sudo', 'chmod', '-R', '755', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)) ] ) devs_to_clean[remote].append( os.path.join( '/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_) ) ) for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-osd', '--mkfs', '-i', id_, '-c', '/tmp/cephtest/ceph.conf', '--monmap', '/tmp/cephtest/monmap', ], ) run.wait( mons.run( args=[ 'rm', '--', '/tmp/cephtest/monmap', '/tmp/cephtest/osdmap', ], wait=False, ), ) try: yield finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): args = [ 'egrep', pattern, '/tmp/cephtest/archive/log/cluster.%s.log' % firstmon, ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) remote.run( args=[ 'sync', run.Raw('&&'), 'sudo', 'umount', '-f', dir_ ] ) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') for remote, roles_for_host in osds.remotes.iteritems(): remote.run( args=[ 'sudo', 'umount', '-f', '/mnt' ], check_status=False, ) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball(remote, '/tmp/cephtest/data/%s' % role, path + '/' + role + '.tgz') log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'rm', '-rf', '--', '/tmp/cephtest/ceph.conf', '/tmp/cephtest/ceph.keyring', '/tmp/cephtest/data', '/tmp/cephtest/monmap', run.Raw('/tmp/cephtest/asok.*') ], wait=False, ), )
def cli_test(ctx, config): """ ceph-deploy cli to exercise most commonly use cli's and ensure all commands works and also startup the init system. """ log.info('Ceph-deploy Test') if config is None: config = {} test_branch = '' conf_dir = teuthology.get_testdir(ctx) + "/cdtest" def execute_cdeploy(admin, cmd, path): """Execute ceph-deploy commands """ """Either use git path or repo path """ args = ['cd', conf_dir, run.Raw(';')] if path: args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)); else: args.append('ceph-deploy') args.append(run.Raw(cmd)) ec = admin.run(args=args, check_status=False).exitstatus if ec != 0: raise RuntimeError( "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) if config.get('rhbuild'): path = None else: path = teuthology.get_testdir(ctx) # test on branch from config eg: wip-* , master or next etc # packages for all distro's should exist for wip* if ctx.config.get('branch'): branch = ctx.config.get('branch') test_branch = ' --dev={branch} '.format(branch=branch) mons = ctx.cluster.only(teuthology.is_type('mon')) for node, role in mons.remotes.iteritems(): admin = node admin.run(args=['mkdir', conf_dir], check_status=False) nodename = admin.shortname system_type = teuthology.get_system_type(admin) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) log.info('system type is %s', system_type) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) log.info("roles %s", roles) if (len(devs) < 3): log.error( 'Test needs minimum of 3 devices, only found %s', str(devs)) raise RuntimeError("Needs minimum of 3 devices ") conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) new_cmd = 'new ' + nodename execute_cdeploy(admin, new_cmd, path) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) new_mon_install = 'install {branch} --mon '.format( branch=test_branch) + nodename new_mgr_install = 'install {branch} --mgr '.format( branch=test_branch) + nodename new_osd_install = 'install {branch} --osd '.format( branch=test_branch) + nodename new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename create_initial = 'mon create-initial ' # either use create-keys or push command push_keys = 'admin ' + nodename execute_cdeploy(admin, new_mon_install, path) execute_cdeploy(admin, new_mgr_install, path) execute_cdeploy(admin, new_osd_install, path) execute_cdeploy(admin, new_admin, path) execute_cdeploy(admin, create_initial, path) execute_cdeploy(admin, push_keys, path) for i in range(3): zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) execute_cdeploy(admin, zap_disk, path) execute_cdeploy(admin, prepare, path) log.info("list files for debugging purpose to check file permissions") admin.run(args=['ls', run.Raw('-lt'), conf_dir]) remote.run(args=['sudo', 'ceph', '-s'], check_status=False) r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() log.info('Ceph health: %s', out.rstrip('\n')) log.info("Waiting for cluster to become healthy") with contextutil.safe_while(sleep=10, tries=6, action='check health') as proceed: while proceed(): r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() if (out.split(None,1)[0] == 'HEALTH_OK'): break rgw_install = 'install {branch} --rgw {node}'.format( branch=test_branch, node=nodename, ) rgw_create = 'rgw create ' + nodename execute_cdeploy(admin, rgw_install, path) execute_cdeploy(admin, rgw_create, path) log.info('All ceph-deploy cli tests passed') try: yield finally: log.info("cleaning up") ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target'], check_status=False) time.sleep(4) for i in range(3): umount_dev = "{d}1".format(d=devs[i]) r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) cmd = 'purge ' + nodename execute_cdeploy(admin, cmd, path) cmd = 'purgedata ' + nodename execute_cdeploy(admin, cmd, path) log.info("Removing temporary dir") admin.run( args=[ 'rm', run.Raw('-rf'), run.Raw(conf_dir)], check_status=False) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
def cluster(ctx, config): testdir = teuthology.get_testdir(ctx) log.info('Creating ceph cluster...') run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '{tdir}/data'.format(tdir=testdir), ], wait=False, ) ) run.wait( ctx.cluster.run( args=[ 'sudo', 'install', '-d', '-m0777', '--', '/var/run/ceph', ], wait=False, ) ) devs_to_clean = {} remote_to_roles_to_devs = {} remote_to_roles_to_journals = {} osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) roles_to_devs = {} roles_to_journals = {} if config.get('fs'): log.info('fs option selected, checking for scratch devs') log.info('found devs: %s' % (str(devs),)) devs_id_map = teuthology.get_wwn_id_map(remote, devs) iddevs = devs_id_map.values() roles_to_devs = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), iddevs ) if len(roles_to_devs) < len(iddevs): iddevs = iddevs[len(roles_to_devs):] devs_to_clean[remote] = [] if config.get('block_journal'): log.info('block journal enabled') roles_to_journals = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), iddevs ) log.info('journal map: %s', roles_to_journals) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled') roles_to_journals = {} remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] ) for osd in teuthology.roles_of_type(roles_for_host, 'osd'): tmpfs = '/mnt/osd.%s' % osd roles_to_journals[osd] = tmpfs remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] ) log.info('journal map: %s', roles_to_journals) log.info('dev map: %s' % (str(roles_to_devs),)) remote_to_roles_to_devs[remote] = roles_to_devs remote_to_roles_to_journals[remote] = roles_to_journals log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [role_list for (remote, role_list) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips) for remote, roles_to_journals in remote_to_roles_to_journals.iteritems(): for role, journal in roles_to_journals.iteritems(): key = "osd." + str(role) if key not in conf: conf[key] = {} conf[key]['osd journal'] = journal for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value if config.get('tmpfs_journal'): conf['journal dio'] = False ctx.ceph = argparse.Namespace() ctx.ceph.conf = conf conf_path = config.get('conf_path', '/etc/ceph/ceph.conf') keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring') log.info('Writing configs...') conf_fp = StringIO() conf.write(conf_fp) conf_fp.seek(0) writes = ctx.cluster.run( args=[ 'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'), 'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'), 'sudo', 'python', '-c', 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', conf_path, run.Raw('&&'), 'sudo', 'chmod', '0644', conf_path, ], stdin=run.PIPE, wait=False, ) teuthology.feed_many_stdins_and_close(conf_fp, writes) run.wait(writes) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) firstmon = teuthology.get_first_mon(ctx, config) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', keyring_path, ], ) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=mon.', keyring_path, ], ) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'chmod', '0644', keyring_path, ], ) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.create_simple_monmap( ctx, remote=mon0_remote, conf=conf, ) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow', keyring_path, ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path=keyring_path, ) monmap = teuthology.get_file( remote=mon0_remote, path='{tdir}/monmap'.format(tdir=testdir), ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.sudo_write_file( remote=rem, path=keyring_path, data=keyring, perms='0644' ) teuthology.write_file( remote=rem, path='{tdir}/monmap'.format(tdir=testdir), data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon')) run.wait( mons.run( args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'osdmaptool', '-c', conf_path, '--clobber', '--createsimple', '{num:d}'.format( num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), ), '{tdir}/osdmap'.format(tdir=testdir), '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds')) for remote, roles_for_host in mdss.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mds/ceph-{id}'.format(id=id_), run.Raw('&&'), 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_), ], ) cclient.create_keyring(ctx) log.info('Running mkfs on osd nodes...') ctx.disk_config = argparse.Namespace() ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals ctx.disk_config.remote_to_roles_to_dev_mount_options = {} ctx.disk_config.remote_to_roles_to_dev_fstype = {} log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = remote_to_roles_to_devs[remote] roles_to_journals = remote_to_roles_to_journals[remote] for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/osd/ceph-{id}'.format(id=id_), ]) log.info(str(roles_to_journals)) log.info(id_) if roles_to_devs.get(id_): dev = roles_to_devs[id_] fs = config.get('fs') package = None mkfs_options = config.get('mkfs_options') mount_options = config.get('mount_options') if fs == 'btrfs': #package = 'btrfs-tools' if mount_options is None: mount_options = ['noatime','user_subvol_rm_allowed'] if mkfs_options is None: mkfs_options = ['-m', 'single', '-l', '32768', '-n', '32768'] if fs == 'xfs': #package = 'xfsprogs' if mount_options is None: mount_options = ['noatime'] if mkfs_options is None: mkfs_options = ['-f', '-i', 'size=2048'] if fs == 'ext4' or fs == 'ext3': if mount_options is None: mount_options = ['noatime','user_xattr'] if mount_options is None: mount_options = [] if mkfs_options is None: mkfs_options = [] mkfs = ['mkfs.%s' % fs] + mkfs_options log.info('%s on %s on %s' % (mkfs, dev, remote)) if package is not None: remote.run( args=[ 'sudo', 'apt-get', 'install', '-y', package ], stdout=StringIO(), ) remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) log.info('mount %s on %s -o %s' % (dev, remote, ','.join(mount_options))) remote.run( args=[ 'sudo', 'mount', '-t', fs, '-o', ','.join(mount_options), dev, os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), ] ) if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs devs_to_clean[remote].append( os.path.join( os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)), ) ) for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ 'sudo', 'MALLOC_CHECK_=3', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-osd', '--mkfs', '--mkkey', '-i', id_, '--monmap', '{tdir}/monmap'.format(tdir=testdir), ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['mds','osd']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format( type=type_, id=id_, ), sudo=True, ) keys.append((type_, id_, data)) keys_fp.write(data) for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['client']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) ) keys.append((type_, id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'sudo', 'tee', '-a', keyring_path, ], stdin=run.PIPE, wait=False, stdout=StringIO(), ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-authtool', keyring_path, '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): remote.run( args=[ 'sudo', 'mkdir', '-p', '/var/lib/ceph/mon/ceph-{id}'.format(id=id_), ], ) remote.run( args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph-mon', '--mkfs', '-i', id_, '--monmap={tdir}/monmap'.format(tdir=testdir), '--osdmap={tdir}/osdmap'.format(tdir=testdir), '--keyring={kpath}'.format(kpath=keyring_path), ], ) run.wait( mons.run( args=[ 'rm', '--', '{tdir}/monmap'.format(tdir=testdir), '{tdir}/osdmap'.format(tdir=testdir), ], wait=False, ), ) try: yield except Exception: # we need to know this below ctx.summary['success'] = False raise finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() log.info('Checking cluster log for badness...') def first_in_ceph_log(pattern, excludes): args = [ 'sudo', 'egrep', pattern, '/var/log/ceph/ceph.log', ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) remote.run( args=[ 'sync', run.Raw('&&'), 'sudo', 'umount', '-f', dir_ ] ) if config.get('tmpfs_journal'): log.info('tmpfs journal enabled - unmounting tmpfs at /mnt') for remote, roles_for_host in osds.remotes.iteritems(): remote.run( args=[ 'sudo', 'umount', '-f', '/mnt' ], check_status=False, ) if ctx.archive is not None and \ not (ctx.config.get('archive-on-error') and ctx.summary['success']): # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') # and logs log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'sudo', 'rm', '-rf', '--', conf_path, keyring_path, '{tdir}/data'.format(tdir=testdir), '{tdir}/monmap'.format(tdir=testdir), ], wait=False, ), )
def cluster(ctx, config): log.info('Creating ceph cluster...') run.wait( ctx.cluster.run( args=[ 'install', '-d', '-m0755', '--', '/tmp/cephtest/data', ], wait=False, ) ) log.info('Generating config...') remotes_and_roles = ctx.cluster.remotes.items() roles = [roles for (remote, roles) in remotes_and_roles] ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, roles) in remotes_and_roles)] conf = teuthology.skeleton_config(roles=roles, ips=ips) for section, keys in config['conf'].iteritems(): for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) if section not in conf: conf[section] = {} conf[section][key] = value ctx.ceph = argparse.Namespace() ctx.ceph.conf = conf log.info('Writing configs...') conf_fp = StringIO() conf.write(conf_fp) conf_fp.seek(0) writes = ctx.cluster.run( args=[ 'python', '-c', 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', '/tmp/cephtest/ceph.conf', ], stdin=run.PIPE, wait=False, ) teuthology.feed_many_stdins_and_close(conf_fp, writes) run.wait(writes) coverage_dir = '/tmp/cephtest/archive/coverage' firstmon = teuthology.get_first_mon(ctx, config) log.info('Setting up %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '/tmp/cephtest/ceph.keyring', ], ) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--gen-key', '--name=mon.', '/tmp/cephtest/ceph.keyring', ], ) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.create_simple_monmap( remote=mon0_remote, conf=conf, ) log.info('Creating admin key on %s...' % firstmon) ctx.cluster.only(firstmon).run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--gen-key', '--name=client.admin', '--set-uid=0', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow', '/tmp/cephtest/ceph.keyring', ], ) log.info('Copying monmap to all nodes...') keyring = teuthology.get_file( remote=mon0_remote, path='/tmp/cephtest/ceph.keyring', ) monmap = teuthology.get_file( remote=mon0_remote, path='/tmp/cephtest/monmap', ) for rem in ctx.cluster.remotes.iterkeys(): # copy mon key and initial monmap log.info('Sending monmap to node {remote}'.format(remote=rem)) teuthology.write_file( remote=rem, path='/tmp/cephtest/ceph.keyring', data=keyring, ) teuthology.write_file( remote=rem, path='/tmp/cephtest/monmap', data=monmap, ) log.info('Setting up mon nodes...') mons = ctx.cluster.only(teuthology.is_type('mon')) run.wait( mons.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/osdmaptool', '--clobber', '--createsimple', '{num:d}'.format( num=teuthology.num_instances_of_type(ctx.cluster, 'osd'), ), '/tmp/cephtest/osdmap', '--pg_bits', '2', '--pgp_bits', '4', ], wait=False, ), ) log.info('Setting up osd nodes...') osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles_for_host in osds.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', '--name=osd.{id}'.format(id=id_), '/tmp/cephtest/data/osd.{id}.keyring'.format(id=id_), ], ) log.info('Setting up mds nodes...') mdss = ctx.cluster.only(teuthology.is_type('mds')) for remote, roles_for_host in mdss.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mds'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', '--name=mds.{id}'.format(id=id_), '/tmp/cephtest/data/mds.{id}.keyring'.format(id=id_), ], ) log.info('Setting up client nodes...') clients = ctx.cluster.only(teuthology.is_type('client')) for remote, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '--create-keyring', '--gen-key', # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" '--name=client.{id}'.format(id=id_), '/tmp/cephtest/data/client.{id}.keyring'.format(id=id_), ], ) log.info('Reading keys from all nodes...') keys_fp = StringIO() keys = [] for remote, roles_for_host in ctx.cluster.remotes.iteritems(): for type_ in ['osd', 'mds', 'client']: for id_ in teuthology.roles_of_type(roles_for_host, type_): data = teuthology.get_file( remote=remote, path='/tmp/cephtest/data/{type}.{id}.keyring'.format( type=type_, id=id_, ), ) keys.append((type_, id_, data)) keys_fp.write(data) log.info('Adding keys to all mons...') writes = mons.run( args=[ 'cat', run.Raw('>>'), '/tmp/cephtest/ceph.keyring', ], stdin=run.PIPE, wait=False, ) keys_fp.seek(0) teuthology.feed_many_stdins_and_close(keys_fp, writes) run.wait(writes) for type_, id_, data in keys: run.wait( mons.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-authtool', '/tmp/cephtest/ceph.keyring', '--name={type}.{id}'.format( type=type_, id=id_, ), ] + list(teuthology.generate_caps(type_)), wait=False, ), ) log.info('Running mkfs on mon nodes...') for remote, roles_for_host in mons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'mon'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-mon', '--mkfs', '-i', id_, '-c', '/tmp/cephtest/ceph.conf', '--monmap=/tmp/cephtest/monmap', '--osdmap=/tmp/cephtest/osdmap', '--keyring=/tmp/cephtest/ceph.keyring', ], ) log.info('Running mkfs on osd nodes...') devs_to_clean = {} for remote, roles_for_host in osds.remotes.iteritems(): roles_to_devs = {} if config.get('btrfs'): log.info('btrfs option selected, checkin for scrach devs') devs = teuthology.get_scratch_devices(remote) log.info('found devs: %s' % (str(devs),)) roles_to_devs = assign_devs( teuthology.roles_of_type(roles_for_host, 'osd'), devs ) log.info('dev map: %s' % (str(roles_to_devs),)) devs_to_clean[remote] = [] for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ 'mkdir', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)), ], ) if roles_to_devs.get(id_): dev = roles_to_devs[id_] log.info('mkfs.btrfs on %s on %s' % (dev, remote)) remote.run( args=[ 'sudo', 'apt-get', 'install', '-y', 'btrfs-tools' ] ) remote.run( args=[ 'sudo', 'mkfs.btrfs', dev ] ) log.info('mount %s on %s' % (dev, remote)) remote.run( args=[ 'sudo', 'mount', '-o', 'user_subvol_rm_allowed', dev, os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)), ] ) remote.run( args=[ 'sudo', 'chown', '-R', 'ubuntu.ubuntu', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)) ] ) remote.run( args=[ 'sudo', 'chmod', '-R', '755', os.path.join('/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_)) ] ) devs_to_clean[remote].append( os.path.join( '/tmp/cephtest/data', 'osd.{id}.data'.format(id=id_) ) ) for id_ in teuthology.roles_of_type(roles_for_host, 'osd'): remote.run( args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph-osd', '--mkfs', '-i', id_, '-c', '/tmp/cephtest/ceph.conf', '--monmap', '/tmp/cephtest/monmap', ], ) run.wait( mons.run( args=[ 'rm', '--', '/tmp/cephtest/monmap', '/tmp/cephtest/osdmap', ], wait=False, ), ) try: yield finally: (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() if ctx.archive is not None: log.info('Grabbing cluster log from %s %s...' % (mon0_remote, firstmon)) dest = os.path.join(ctx.archive, 'ceph.log') mon0_remote.run( args = [ 'cat', '--', '/tmp/cephtest/data/%s/log' % firstmon ], stdout=file(dest, 'wb'), ) log.info('Checking cluster ceph.log for badness...') def first_in_ceph_log(pattern, excludes): args = [ 'egrep', pattern, '/tmp/cephtest/data/%s/log' % firstmon, ] for exclude in excludes: args.extend([run.Raw('|'), 'egrep', '-v', exclude]) args.extend([ run.Raw('|'), 'head', '-n', '1', ]) r = mon0_remote.run( stdout=StringIO(), args=args, ) stdout = r.stdout.getvalue() if stdout != '': return stdout return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_whitelist']) is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False # use the most severe problem as the failure reason if 'failure_reason' not in ctx.summary: for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: match = first_in_ceph_log(pattern, config['log_whitelist']) if match is not None: ctx.summary['failure_reason'] = \ '"{match}" in cluster log'.format( match=match.rstrip('\n'), ) break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: log.info('Unmounting %s on %s' % (dir_, remote)) remote.run( args=[ "sudo", "umount", "-f", dir_ ] ) log.info('Cleaning ceph cluster...') run.wait( ctx.cluster.run( args=[ 'rm', '-rf', '--', '/tmp/cephtest/ceph.conf', '/tmp/cephtest/ceph.keyring', '/tmp/cephtest/data', '/tmp/cephtest/monmap', run.Raw('/tmp/cephtest/asok.*') ], wait=False, ), )