def healthy(ctx, config): """ Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK. :param ctx: Context :param config: Configuration """ config = config if isinstance(config, dict) else dict() cluster_name = config.get('cluster', 'ceph') log.info('Waiting until ceph cluster %s is healthy...', cluster_name) firstmon = teuthology.get_first_mon(ctx, config, cluster_name) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.wait_until_osds_up( ctx, cluster=ctx.cluster, remote=mon0_remote, ceph_cluster=cluster_name, ) teuthology.wait_until_healthy( ctx, remote=mon0_remote, ceph_cluster=cluster_name, ) if ctx.cluster.only(teuthology.is_type('mds', cluster_name)).remotes: # Some MDSs exist, wait for them to be healthy ceph_fs = Filesystem(ctx) # TODO: make Filesystem cluster-aware ceph_fs.wait_for_daemons(timeout=300)
def cephfs_setup(ctx, config): testdir = teuthology.get_testdir(ctx) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (mon_remote, ) = ctx.cluster.only(first_mon).remotes.iterkeys() mdss = ctx.cluster.only(teuthology.is_type('mds')) # If there are any MDSs, then create a filesystem for them to use # Do this last because requires mon cluster to be up and running if mdss.remotes: log.info('Setting up CephFS filesystem...') ceph_fs = Filesystem(ctx) if not ceph_fs.legacy_configured(): ceph_fs.create() is_active_mds = lambda role: role.startswith( 'mds.') and not role.endswith('-s') and role.find('-s-') == -1 all_roles = [ item for remote_roles in mdss.remotes.values() for item in remote_roles ] num_active = len([r for r in all_roles if is_active_mds(r)]) mon_remote.run(args=[ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph', 'mds', 'set_max_mds', str(num_active) ]) yield
def execute_playbook(self): """ Execute ansible-playbook :param _logfile: Use this file-like object instead of a LoggerFile for testing """ args = [ 'ANSIBLE_STDOUT_CALLBACK=debug', 'ansible-playbook', '-vv', '-i', 'inven.yml', 'site.yml' ] log.debug("Running %s", args) # If there is an installer.0 node, use that for the installer. # Otherwise, use the first mon node as installer node. ansible_loc = self.ctx.cluster.only('installer.0') (ceph_first_mon, ) = self.ctx.cluster.only( misc.get_first_mon(self.ctx, self.config)).remotes.keys() if ansible_loc.remotes: (ceph_installer, ) = ansible_loc.remotes.keys() else: ceph_installer = ceph_first_mon self.ceph_first_mon = ceph_first_mon self.ceph_installer = ceph_installer self.args = args if self.config.get('rhbuild'): self.run_rh_playbook() else: self.run_playbook()
def task(ctx, config): """ Test monitor recovery from OSD """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager')) mons = ctx.cluster.only(teuthology.is_type('mon')) # note down the first cluster_name and mon_id # we will recover it later on cluster_name, _, mon_id = teuthology.split_role(first_mon) _nuke_mons(manager, mons, mon_id) default_keyring = '/etc/ceph/{cluster}.keyring'.format( cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path) _revive_mons(manager, mons, mon_id, keyring_path) _revive_mgrs(ctx, manager) _revive_osds(ctx, manager)
def task(ctx, config): """ Stress test the monitor by thrashing them while another task/workunit is running. Please refer to MonitorThrasher class for further information on the available options. """ if config is None: config = {} assert isinstance(config, dict), \ 'mon_thrash task only accepts a dict for configuration' assert len(_get_mons(ctx)) > 2, \ 'mon_thrash task requires at least 3 monitors' log.info('Beginning mon_thrash...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) thrash_proc = MonitorThrasher(ctx, manager, config, logger=log.getChild('mon_thrasher')) try: log.debug('Yielding') yield finally: log.info('joining mon_thrasher') thrash_proc.do_join() mons = _get_mons(ctx) manager.wait_for_mon_quorum_size(len(mons))
def __init__(self, ctx, admin_remote=None): self._ctx = ctx self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds')) if len(self.mds_ids) == 0: raise RuntimeError("This task requires at least one MDS") first_mon = misc.get_first_mon(ctx, None) if admin_remote is None: (self.admin_remote, ) = ctx.cluster.only(first_mon).remotes.iterkeys() else: self.admin_remote = admin_remote self.mon_manager = ceph_manager.CephManager( self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) if hasattr(self._ctx, "daemons"): # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task self.mds_daemons = dict([ (mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids ]) client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) self.client_id = client_list[0] self.client_remote = list( misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id) ]))[0][1]
def is_healthy(ctx, config): """Wait until a Ceph cluster is healthy.""" testdir = teuthology.get_testdir(ctx) ceph_admin = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() max_tries = 90 # 90 tries * 10 secs --> 15 minutes tries = 0 while True: tries += 1 if tries >= max_tries: msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" raise RuntimeError(msg) r = remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'ceph', 'health', ], stdout=StringIO(), logger=log.getChild('health'), ) out = r.stdout.getvalue() log.info('Ceph health: %s', out.rstrip('\n')) if out.split(None, 1)[0] == 'HEALTH_OK': break time.sleep(10)
def cephfs_setup(ctx, config): testdir = teuthology.get_testdir(ctx) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() mdss = ctx.cluster.only(teuthology.is_type('mds')) # If there are any MDSs, then create a filesystem for them to use # Do this last because requires mon cluster to be up and running if mdss.remotes: log.info('Setting up CephFS filesystem...') ceph_fs = Filesystem(ctx) if not ceph_fs.legacy_configured(): ceph_fs.create() is_active_mds = lambda role: role.startswith('mds.') and not role.endswith('-s') and role.find('-s-') == -1 all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles] num_active = len([r for r in all_roles if is_active_mds(r)]) mon_remote.run(args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph', 'mds', 'set_max_mds', str(num_active)]) yield
def wait_for_mon_quorum(ctx, config): """ Check renote ceph status until all monitors are up. :param ctx: Context :param config: Configuration """ assert isinstance(config, list) firstmon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(firstmon).remotes.keys() while True: r = remote.run( args=[ 'ceph', 'quorum_status', ], stdout=StringIO(), logger=log.getChild('quorum_status'), ) j = json.loads(r.stdout.getvalue()) q = j.get('quorum_names', []) log.debug('Quorum: %s', q) if sorted(q) == sorted(config): break time.sleep(1)
def __init__(self, ctx, manager, config, logger): self.ctx = ctx self.manager = manager self.stopping = False self.logger = logger self.config = config if self.config is None: self.config = dict() self.check_interval = float(self.config.get('interval', 30.0)) first_mon = teuthology.get_first_mon(ctx, config) remote = ctx.cluster.only(first_mon).remotes.keys()[0] proc = remote.run( args=[ 'sudo', 'ceph-mon', '-i', first_mon[4:], '--show-config-value', 'mon_clock_drift_allowed' ], stdout=StringIO(), wait=True ) self.max_skew = self.config.get('max-skew', float(proc.stdout.getvalue())) self.expect_skew = self.config.get('expect-skew', False) self.never_fail = self.config.get('never-fail', False) self.at_least_once = self.config.get('at-least-once', True) self.at_least_once_timeout = self.config.get('at-least-once-timeout', 600.0)
def task(ctx, config): """ Use clas ClockSkewCheck to check for clock skews on the monitors. This task will spawn a thread running ClockSkewCheck's do_check(). All the configuration will be directly handled by ClockSkewCheck, so please refer to the class documentation for further information. """ if config is None: config = {} assert isinstance(config, dict), \ 'mon_clock_skew_check task only accepts a dict for configuration' log.info('Beginning mon_clock_skew_check...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) skew_check = ClockSkewCheck(ctx, manager, config, logger=log.getChild('mon_clock_skew_check')) skew_check_thread = gevent.spawn(skew_check.do_check) try: yield finally: log.info('joining mon_clock_skew_check') skew_check.finish() skew_check_thread.get()
def execute_playbook(self): """ Execute ansible-playbook :param _logfile: Use this file-like object instead of a LoggerFile for testing """ args = [ 'ANSIBLE_STDOUT_CALLBACK=debug', 'ansible-playbook', '-vv', '-i', 'inven.yml', 'site.yml' ] log.debug("Running %s", args) # If there is an installer.0 node, use that for the installer. # Otherwise, use the first mon node as installer node. ansible_loc = self.ctx.cluster.only('installer.0') (ceph_first_mon,) = self.ctx.cluster.only( misc.get_first_mon(self.ctx, self.config)).remotes.iterkeys() if ansible_loc.remotes: (ceph_installer,) = ansible_loc.remotes.iterkeys() else: ceph_installer = ceph_first_mon self.ceph_first_mon = ceph_first_mon self.ceph_installer = ceph_installer self.args = args if self.config.get('rhbuild'): self.run_rh_playbook() else: self.run_playbook()
def wait_for_mon_quorum(ctx, config): """ Check renote ceph status until all monitors are up. :param ctx: Context :param config: Configuration """ if isinstance(config, dict): mons = config['daemons'] cluster_name = config.get('cluster', 'ceph') else: assert isinstance(config, list) mons = config cluster_name = 'ceph' firstmon = teuthology.get_first_mon(ctx, config, cluster_name) (remote, ) = ctx.cluster.only(firstmon).remotes.keys() while True: r = remote.run( args=[ 'sudo', 'ceph', 'quorum_status', ], stdout=StringIO(), logger=log.getChild('quorum_status'), ) j = json.loads(r.stdout.getvalue()) q = j.get('quorum_names', []) log.debug('Quorum: %s', q) if sorted(q) == sorted(mons): break time.sleep(1)
def test_standby_for_invalid_fscid(self): # Set invalid standby_fscid with other mds standby_rank # stopping active mds service should not end up in mon crash # Get configured mons in the cluster first_mon = teuthology.get_first_mon(self.ctx, self.configs_set) (mon, ) = self.ctx.cluster.only(first_mon).remotes.iterkeys() manager = CephManager( mon, ctx=self.ctx, logger=log.getChild('ceph_manager'), ) configured_mons = manager.get_mon_quorum() use_daemons = sorted(self.mds_cluster.mds_ids[0:3]) mds_a, mds_b, mds_c = use_daemons log.info("Using MDS daemons: {0}".format(use_daemons)) def set_standby_for_rank(leader_rank, follower_id): self.set_conf("mds.{0}".format(follower_id), "mds_standby_for_rank", leader_rank) # Create one fs fs_a = self.mds_cluster.newfs("cephfs") # Set all the daemons to have a rank assignment but no other # standby preferences. set_standby_for_rank(0, mds_a) set_standby_for_rank(0, mds_b) # Set third daemon to have invalid fscid assignment and no other # standby preferences invalid_fscid = 123 self.set_conf("mds.{0}".format(mds_c), "mds_standby_for_fscid", invalid_fscid) #Restart all the daemons to make the standby preference applied self.mds_cluster.mds_restart(mds_a) self.mds_cluster.mds_restart(mds_b) self.mds_cluster.mds_restart(mds_c) self.wait_for_daemon_start([mds_a, mds_b, mds_c]) #Stop active mds daemon service of fs if (fs_a.get_active_names(), [mds_a]): self.mds_cluster.mds_stop(mds_a) self.mds_cluster.mds_fail(mds_a) fs_a.wait_for_daemons() else: self.mds_cluster.mds_stop(mds_b) self.mds_cluster.mds_fail(mds_b) fs_a.wait_for_daemons() #Get active mons from cluster active_mons = manager.get_mon_quorum() #Check for active quorum mon status and configured mon status self.assertEqual( active_mons, configured_mons, "Not all mons are in quorum Invalid standby invalid fscid test failed!" )
def is_healthy(ctx, config): """Wait until a Ceph cluster is healthy.""" testdir = teuthology.get_testdir(ctx) ceph_admin = teuthology.get_first_mon(ctx, config) (remote, ) = ctx.cluster.only(ceph_admin).remotes.keys() max_tries = 90 # 90 tries * 10 secs --> 15 minutes tries = 0 while True: tries += 1 if tries >= max_tries: msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" raise RuntimeError(msg) r = remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'ceph', 'health', ], stdout=StringIO(), logger=log.getChild('health'), ) out = r.stdout.getvalue() log.debug('Ceph health: %s', out.rstrip('\n')) if out.split(None, 1)[0] == 'HEALTH_OK': break time.sleep(10)
def task(ctx, config): """ Test monitor recovery from OSD """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager(mon, ctx=ctx, logger=log.getChild('ceph_manager')) mons = ctx.cluster.only(teuthology.is_type('mon')) # note down the first cluster_name and mon_id # we will recover it later on cluster_name, _, mon_id = teuthology.split_role(first_mon) _nuke_mons(manager, mons, mon_id) default_keyring = '/etc/ceph/{cluster}.keyring'.format( cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path) _revive_mons(manager, mons, mon_id, keyring_path) _revive_mgrs(ctx, manager) _revive_osds(ctx, manager)
def download_ceph_deploy(ctx, config): log.info('Downloading ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_admin = teuthology.get_first_mon(ctx, config) ctx.cluster.only(ceph_admin).run( args=[ 'git', 'clone', # 'http://github.com/ceph/ceph-deploy.git', 'git://ceph.com/ceph-deploy.git', '{tdir}/ceph-deploy'.format(tdir=testdir), ], ) ctx.cluster.only(ceph_admin).run( args=[ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), './bootstrap', ], ) try: yield finally: log.info('Removing ceph-deploy ...') ctx.cluster.only(ceph_admin).run( args=[ 'rm', '-rf', '{tdir}/ceph-deploy'.format(tdir=testdir), ], )
def __init__(self, ctx, manager, config, logger): self.ctx = ctx self.manager = manager; self.stopping = False self.logger = logger self.config = config if self.config is None: self.config = dict() self.check_interval = float(self.config.get('interval', 30.0)) first_mon = teuthology.get_first_mon(ctx, config) remote = ctx.cluster.only(first_mon).remotes.keys()[0] proc = remote.run( args=[ 'sudo', 'ceph-mon', '-i', first_mon[4:], '--show-config-value', 'mon_clock_drift_allowed' ], stdout=StringIO(), wait=True ) self.max_skew = self.config.get('max-skew', float(proc.stdout.getvalue())) self.expect_skew = self.config.get('expect-skew', False) self.never_fail = self.config.get('never-fail', False) self.at_least_once = self.config.get('at-least-once', True) self.at_least_once_timeout = self.config.get('at-least-once-timeout', 600.0)
def task(ctx, config): if config is None: config = {} assert isinstance(config, dict), \ 'mon_clock_skew_check task only accepts a dict for configuration' interval = float(config.get('interval', 30.0)) expect_skew = config.get('expect-skew', False) log.info('Beginning mon_clock_skew_check...') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) quorum_size = len(teuthology.get_mon_names(ctx)) manager.wait_for_mon_quorum_size(quorum_size) # wait a bit log.info('sleeping for {s} seconds'.format(s=interval)) time.sleep(interval) health = manager.get_mon_health(True) log.info('got health %s' % health) if expect_skew: if 'MON_CLOCK_SKEW' not in health['checks']: raise RuntimeError('expected MON_CLOCK_SKEW but got none') else: if 'MON_CLOCK_SKEW' in health['checks']: raise RuntimeError('got MON_CLOCK_SKEW but expected none')
def download_ceph_deploy(ctx, config): """ Downloads ceph-deploy from the ceph.com git mirror and (by default) switches to the master branch. If the `ceph-deploy-branch` is specified, it will use that instead. """ log.info("Downloading ceph-deploy...") testdir = teuthology.get_testdir(ctx) ceph_admin = teuthology.get_first_mon(ctx, config) default_cd_branch = {"ceph-deploy-branch": "master"} ceph_deploy_branch = config.get("ceph-deploy", default_cd_branch).get("ceph-deploy-branch") ctx.cluster.only(ceph_admin).run( args=[ "git", "clone", "-b", ceph_deploy_branch, teuth_config.ceph_git_base_url + "ceph-deploy.git", "{tdir}/ceph-deploy".format(tdir=testdir), ] ) ctx.cluster.only(ceph_admin).run( args=["cd", "{tdir}/ceph-deploy".format(tdir=testdir), run.Raw("&&"), "./bootstrap"] ) try: yield finally: log.info("Removing ceph-deploy ...") ctx.cluster.only(ceph_admin).run(args=["rm", "-rf", "{tdir}/ceph-deploy".format(tdir=testdir)])
def task(ctx, config): """ Test [deep] repair in several situations: Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] The config should be as follows: Must include the log-whitelist below Must enable filestore_debug_inject_read_err config example: tasks: - chef: - install: - ceph: log-whitelist: - 'candidate had a stat error' - 'candidate had a read error' - 'deep-scrub 0 missing, 1 inconsistent objects' - 'deep-scrub 0 missing, 4 inconsistent objects' - 'deep-scrub 1 errors' - 'deep-scrub 4 errors' - '!= known omap_digest' - 'repair 0 missing, 1 inconsistent objects' - 'repair 0 missing, 4 inconsistent objects' - 'repair 1 errors, 1 fixed' - 'repair 4 errors, 4 fixed' - 'scrub 0 missing, 1 inconsistent' - 'scrub 1 errors' - 'size 1 != known size' conf: osd: filestore debug inject read err: true - repair_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'repair_test task only accepts a dict for config' if not hasattr(ctx, 'manager'): first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager')) ctx.manager.wait_for_all_up() repair_test_1(ctx, mdataerr, choose_primary, "scrub") repair_test_1(ctx, mdataerr, choose_replica, "scrub") repair_test_1(ctx, dataerr, choose_primary, "deep-scrub") repair_test_1(ctx, dataerr, choose_replica, "deep-scrub") repair_test_1(ctx, trunc, choose_primary, "scrub") repair_test_1(ctx, trunc, choose_replica, "scrub") repair_test_2(ctx, config, choose_primary) repair_test_2(ctx, config, choose_replica) repair_test_erasure_code(ctx, hinfoerr, 'primary', "deep-scrub")
def download_ceph_deploy(ctx, config): """ Downloads ceph-deploy from the ceph.com git mirror and (by default) switches to the master branch. If the `ceph-deploy-branch` is specified, it will use that instead. """ log.info('Downloading ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_admin = ctx.cluster.only(teuthology.get_first_mon(ctx, config)) ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') ceph_admin.run(args=[ 'git', 'clone', '-b', ceph_deploy_branch, teuth_config.ceph_git_base_url + 'ceph-deploy.git', '{tdir}/ceph-deploy'.format(tdir=testdir), ], ) ceph_admin.run(args=[ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), './bootstrap', ], ) try: yield finally: log.info('Removing ceph-deploy ...') ceph_admin.run(args=[ 'rm', '-rf', '{tdir}/ceph-deploy'.format(tdir=testdir), ], )
def healthy(ctx, config): """ Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK. :param ctx: Context :param config: Configuration """ config = config if isinstance(config, dict) else dict() cluster_name = config.get('cluster', 'ceph') log.info('Waiting until ceph cluster %s is healthy...', cluster_name) firstmon = teuthology.get_first_mon(ctx, config, cluster_name) (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys() teuthology.wait_until_osds_up( ctx, cluster=ctx.cluster, remote=mon0_remote, ceph_cluster=cluster_name, ) teuthology.wait_until_healthy( ctx, remote=mon0_remote, ceph_cluster=cluster_name, ) if ctx.cluster.only(teuthology.is_type('mds', cluster_name)).remotes: # Some MDSs exist, wait for them to be healthy ceph_fs = Filesystem(ctx) # TODO: make Filesystem cluster-aware ceph_fs.wait_for_daemons(timeout=300)
def crush_setup(ctx, config): first_mon = teuthology.get_first_mon(ctx, config) (mon_remote, ) = ctx.cluster.only(first_mon).remotes.iterkeys() profile = config.get('crush_tunables', 'default') log.info('Setting crush tunables to %s', profile) mon_remote.run(args=['sudo', 'ceph', 'osd', 'crush', 'tunables', profile]) yield
def crush_setup(ctx, config): cluster_name = config["cluster"] first_mon = teuthology.get_first_mon(ctx, config, cluster_name) (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() profile = config.get("crush_tunables", "default") log.info("Setting crush tunables to %s", profile) mon_remote.run(args=["sudo", "ceph", "--cluster", cluster_name, "osd", "crush", "tunables", profile]) yield
def wait_for_osds_up(ctx, config): log.info('Waiting until ceph osds are all up...') firstmon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.wait_until_osds_up( ctx, cluster=ctx.cluster, remote=mon0_remote )
def crush_setup(ctx, config): first_mon = teuthology.get_first_mon(ctx, config) (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() profile = config.get('crush_tunables', 'default') log.info('Setting crush tunables to %s', profile) mon_remote.run( args=['sudo', 'ceph', 'osd', 'crush', 'tunables', profile]) yield
def test_standby_for_invalid_fscid(self): # Set invalid standby_fscid with other mds standby_rank # stopping active mds service should not end up in mon crash # Get configured mons in the cluster first_mon = teuthology.get_first_mon(self.ctx, self.configs_set) (mon,) = self.ctx.cluster.only(first_mon).remotes.iterkeys() manager = CephManager( mon, ctx=self.ctx, logger=log.getChild('ceph_manager'), ) configured_mons = manager.get_mon_quorum() use_daemons = sorted(self.mds_cluster.mds_ids[0:3]) mds_a, mds_b, mds_c = use_daemons log.info("Using MDS daemons: {0}".format(use_daemons)) def set_standby_for_rank(leader_rank, follower_id): self.set_conf("mds.{0}".format(follower_id), "mds_standby_for_rank", leader_rank) # Create one fs fs_a = self.mds_cluster.newfs("cephfs") # Set all the daemons to have a rank assignment but no other # standby preferences. set_standby_for_rank(0, mds_a) set_standby_for_rank(0, mds_b) # Set third daemon to have invalid fscid assignment and no other # standby preferences invalid_fscid = 123 self.set_conf("mds.{0}".format(mds_c), "mds_standby_for_fscid", invalid_fscid) #Restart all the daemons to make the standby preference applied self.mds_cluster.mds_restart(mds_a) self.mds_cluster.mds_restart(mds_b) self.mds_cluster.mds_restart(mds_c) self.wait_for_daemon_start([mds_a, mds_b, mds_c]) #Stop active mds daemon service of fs if (fs_a.get_active_names(), [mds_a]): self.mds_cluster.mds_stop(mds_a) self.mds_cluster.mds_fail(mds_a) fs_a.wait_for_daemons() else: self.mds_cluster.mds_stop(mds_b) self.mds_cluster.mds_fail(mds_b) fs_a.wait_for_daemons() #Get active mons from cluster active_mons = manager.get_mon_quorum() #Check for active quorum mon status and configured mon status self.assertEqual(active_mons, configured_mons, "Not all mons are in quorum Invalid standby invalid fscid test failed!")
def task(ctx, config): """ Die if {testdir}/err exists or if an OSD dumps core """ if config is None: config = {} first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) testdir = teuthology.get_testdir(ctx) while True: for i in range(num_osds): (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.iterkeys() p = osd_remote.run( args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ], wait=True, check_status=False, ) exit_status = p.exitstatus if exit_status == 0: log.info("osd %d has an error" % i) raise Exception("osd %d error" % i) log_path = '/var/log/ceph/osd.%d.log' % (i) p = osd_remote.run( args = [ 'tail', '-1', log_path, run.Raw('|'), 'grep', '-q', 'end dump' ], wait=True, check_status=False, ) exit_status = p.exitstatus if exit_status == 0: log.info("osd %d dumped core" % i) raise Exception("osd %d dumped core" % i) time.sleep(5)
def task(ctx, config): """ Test [deep] repair in several situations: Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] The config should be as follows: Must include the log-whitelist below Must enable filestore_debug_inject_read_err config example: tasks: - chef: - install: - ceph: log-whitelist: ['candidate had a read error', 'deep-scrub 0 missing, 1 inconsistent objects', 'deep-scrub 0 missing, 4 inconsistent objects', 'deep-scrub 1 errors', 'deep-scrub 4 errors', '!= known omap_digest', 'repair 0 missing, 1 inconsistent objects', 'repair 0 missing, 4 inconsistent objects', 'repair 1 errors, 1 fixed', 'repair 4 errors, 4 fixed', 'scrub 0 missing, 1 inconsistent', 'scrub 1 errors', 'size 1 != known size'] conf: osd: filestore debug inject read err: true - repair_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'repair_test task only accepts a dict for config' if not hasattr(ctx, 'manager'): first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager') ) num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) while len(ctx.manager.get_osd_status()['up']) < num_osds: time.sleep(10) tests = [ gen_repair_test_1(mdataerr(ctx), choose_primary(ctx), "scrub"), gen_repair_test_1(mdataerr(ctx), choose_replica(ctx), "scrub"), gen_repair_test_1(dataerr(ctx), choose_primary(ctx), "deep-scrub"), gen_repair_test_1(dataerr(ctx), choose_replica(ctx), "deep-scrub"), gen_repair_test_1(trunc(ctx), choose_primary(ctx), "scrub"), gen_repair_test_1(trunc(ctx), choose_replica(ctx), "scrub"), gen_repair_test_2(choose_primary(ctx)), gen_repair_test_2(choose_replica(ctx)) ] for test in tests: run_test(ctx, config, test)
def crush_setup(ctx, config): cluster_name = config['cluster'] first_mon = teuthology.get_first_mon(ctx, config, cluster_name) (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() profile = config.get('crush_tunables', 'default') log.info('Setting crush tunables to %s', profile) _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, args=['ceph', 'osd', 'crush', 'tunables', profile]) yield
def setup(ctx, config): first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = ceph_manager.CephManager(mon, ctx=ctx, logger=log.getChild("ceph_manager")) ctx.manager.clear_pools() ctx.manager.create_pool(POOLNAME, config.num_pgs) log.info("populating pool") ctx.manager.rados_write_objects( POOLNAME, config.num_objects, config.object_size, config.creation_time_limit, config.create_threads ) log.info("done populating pool")
def setup(self): super(CBT, self).setup() self.first_mon = self.ctx.cluster.only(misc.get_first_mon(self.ctx, self.config)).remotes.keys()[0] self.cbt_config = self.generate_cbt_config() self.log.info('cbt configuration is %s', self.cbt_config) self.cbt_dir = os.path.join(misc.get_archive_dir(self.ctx), 'cbt') self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', self.cbt_dir]) misc.write_file(self.first_mon, os.path.join(self.cbt_dir, 'cbt_config.yaml'), yaml.safe_dump(self.cbt_config, default_flow_style=False)) self.checkout_cbt() self.install_dependencies()
def healthy(ctx, config): log.info('Waiting until ceph is healthy...') firstmon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.wait_until_osds_up( cluster=ctx.cluster, remote=mon0_remote ) teuthology.wait_until_healthy( remote=mon0_remote, )
def task(ctx, config): """ Test [deep] scrub tasks: - chef: - install: - ceph: log-whitelist: - '!= known digest' - '!= known omap_digest' - deep-scrub 0 missing, 1 inconsistent objects - deep-scrub 1 errors - repair 0 missing, 1 inconsistent objects - repair 1 errors, 1 fixed - scrub_test: """ if config is None: config = {} assert isinstance(config, dict), "scrub_test task only accepts a dict for configuration" first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() num_osds = teuthology.num_instances_of_type(ctx.cluster, "osd") log.info("num_osds is %s" % num_osds) manager = ceph_manager.CephManager(mon, ctx=ctx, logger=log.getChild("ceph_manager")) while len(manager.get_osd_status()["up"]) < num_osds: time.sleep(10) for i in range(num_osds): manager.raw_cluster_cmd("tell", "osd.%d" % i, "flush_pg_stats") manager.wait_for_clean() # write some data p = manager.do_rados(mon, ["-p", "rbd", "bench", "--no-cleanup", "1", "write", "-b", "4096"]) log.info("err is %d" % p.exitstatus) # wait for some PG to have data that we can mess with pg, acting = wait_for_victim_pg(manager) osd = acting[0] osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd) manager.do_rados(mon, ["-p", "rbd", "setomapval", obj_name, "key", "val"]) log.info("err is %d" % p.exitstatus) manager.do_rados(mon, ["-p", "rbd", "setomapheader", obj_name, "hdr"]) log.info("err is %d" % p.exitstatus) log.info("messing with PG %s on osd %d" % (pg, osd)) test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, "rbd") test_repair_bad_omap(ctx, manager, pg, osd, obj_name) test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd, obj_name, obj_path) log.info("test successful!")
def execute_ceph_deploy(ctx, config, cmd): """Remotely execute a ceph_deploy command""" testdir = teuthology.get_testdir(ctx) ceph_admin = teuthology.get_first_mon(ctx, config) exec_cmd = cmd (remote,) = ctx.cluster.only(ceph_admin).remotes.iterkeys() proc = remote.run( args=["cd", "{tdir}/ceph-deploy".format(tdir=testdir), run.Raw("&&"), run.Raw(exec_cmd)], check_status=False ) exitstatus = proc.exitstatus return exitstatus
def wait_for_osds_up(ctx, config): """ Wait for all osd's to come up. :param ctx: Context :param config: Configuration """ log.info('Waiting until ceph osds are all up...') firstmon = teuthology.get_first_mon(ctx, config) (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys() teuthology.wait_until_osds_up(ctx, cluster=ctx.cluster, remote=mon0_remote)
def wait_for_osds_up(ctx, config): """ Wait for all osd's to come up. :param ctx: Context :param config: Configuration """ log.info("Waiting until ceph osds are all up...") cluster_name = config.get("cluster", "ceph") firstmon = teuthology.get_first_mon(ctx, config, cluster_name) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() teuthology.wait_until_osds_up(ctx, cluster=ctx.cluster, remote=mon0_remote)
def task(ctx, config): """ Benchmark the recovery system. Generates objects with smalliobench, runs it normally to get a baseline performance measurement, then marks an OSD out and reruns to measure performance during recovery. The config should be as follows: recovery_bench: duration: <seconds for each measurement run> num_objects: <number of objects> io_size: <io size in bytes> example: tasks: - ceph: - recovery_bench: duration: 60 num_objects: 500 io_size: 4096 """ if config is None: config = {} assert isinstance(config, dict), \ 'recovery_bench task only accepts a dict for configuration' log.info('Beginning recovery bench...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') while len(manager.get_osd_status()['up']) < num_osds: manager.sleep(10) bench_proc = RecoveryBencher( manager, config, ) try: yield finally: log.info('joining recovery bencher') bench_proc.do_join()
def test_get_first_mon(): expected = [ ([['mon.a', 'osd.0', 'mon.c']], 'ceph', 'mon.a'), ([['ceph.mon.a', 'osd.0', 'ceph.mon.c']], 'ceph', 'ceph.mon.a'), ([['mon.a', 'osd.0', 'mon.c'], ['ceph.mon.b']], 'ceph', 'ceph.mon.b'), ([['mon.a', 'osd.0', 'mon.c'], ['foo.mon.a']], 'ceph', 'mon.a'), ([['foo.mon.b', 'osd.0', 'mon.c'], ['foo.mon.a']], 'foo', 'foo.mon.a'), ] for remote_roles, cluster_name, expected_mon in expected: ctx = argparse.Namespace() ctx.cluster = Mock() ctx.cluster.remotes = {i: roles for i, roles in enumerate(remote_roles)} mon = misc.get_first_mon(ctx, None, cluster_name) assert expected_mon == mon
def thread(): """Thread spawned by gevent""" if not hasattr(ctx, 'manager'): first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] log.info('clients are %s' % clients) if config.get('ec_pool', False): erasure_code_profile = config.get('erasure_code_profile', {}) erasure_code_profile_name = erasure_code_profile.get('name', False) ctx.manager.create_erasure_code_profile(erasure_code_profile_name, **erasure_code_profile) else: erasure_code_profile_name = False for i in range(int(config.get('runs', '1'))): log.info("starting run %s out of %s", str(i), config.get('runs', '1')) tests = {} existing_pools = config.get('pools', []) created_pools = [] for role in config.get('clients', clients): assert isinstance(role, basestring) PREFIX = 'client.' assert role.startswith(PREFIX) id_ = role[len(PREFIX):] pool = config.get('pool', None) if not pool and existing_pools: pool = existing_pools.pop() else: pool = ctx.manager.create_pool_with_unique_name(erasure_code_profile_name=erasure_code_profile_name) created_pools.append(pool) (remote,) = ctx.cluster.only(role).remotes.iterkeys() proc = remote.run( args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args + ["--pool", pool], logger=log.getChild("rados.{id}".format(id=id_)), stdin=run.PIPE, wait=False ) tests[id_] = proc run.wait(tests.itervalues()) for pool in created_pools: ctx.manager.remove_pool(pool)
def healthy(ctx, config): """ Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK. :param ctx: Context :param config: Configuration """ log.info('Waiting until ceph is healthy...') firstmon = teuthology.get_first_mon(ctx, config) (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys() teuthology.wait_until_osds_up(ctx, cluster=ctx.cluster, remote=mon0_remote) teuthology.wait_until_healthy( ctx, remote=mon0_remote, )
def __init__(self, ctx): self._ctx = ctx self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds')) if len(self.mds_ids) == 0: raise RuntimeError("This task requires at least one MDS") first_mon = misc.get_first_mon(ctx, None) (self.mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() self.mon_manager = ceph_manager.CephManager(self.mon_remote, ctx=ctx, logger=log.getChild('ceph_manager')) self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids]) client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) self.client_id = client_list[0] self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1]
def cephfs_setup(ctx, config): testdir = teuthology.get_testdir(ctx) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (mon_remote, ) = ctx.cluster.only(first_mon).remotes.iterkeys() mdss = ctx.cluster.only(teuthology.is_type('mds')) # If there are any MDSs, then create a filesystem for them to use # Do this last because requires mon cluster to be up and running if mdss.remotes: log.info('Setting up CephFS filesystem...') try: proc = mon_remote.run(args=[ 'sudo', 'ceph', '--format=json-pretty', 'osd', 'lspools' ], stdout=StringIO()) pools = json.loads(proc.stdout.getvalue()) metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools] except CommandFailedError as e: # For use in upgrade tests, Ceph cuttlefish and earlier don't support # structured output (--format) from the CLI. if e.exitstatus == 22: metadata_pool_exists = True else: raise # In case we are using an older Ceph which creates FS by default if metadata_pool_exists: log.info("Metadata pool already exists, skipping") else: ceph_fs = Filesystem(ctx) ceph_fs.create() is_active_mds = lambda role: role.startswith( 'mds.') and not role.endswith('-s') and role.find('-s-') == -1 all_roles = [ item for remote_roles in mdss.remotes.values() for item in remote_roles ] num_active = len([r for r in all_roles if is_active_mds(r)]) mon_remote.run(args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph', 'mds', 'set_max_mds', str(num_active) ]) yield
def cephfs_setup(ctx, config): testdir = teuthology.get_testdir(ctx) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys() mdss = ctx.cluster.only(teuthology.is_type('mds')) # If there are any MDSs, then create a filesystem for them to use # Do this last because requires mon cluster to be up and running if mdss.remotes: log.info('Setting up CephFS filesystem...') try: proc = mon_remote.run(args=['sudo', 'ceph', '--format=json-pretty', 'osd', 'lspools'], stdout=StringIO()) pools = json.loads(proc.stdout.getvalue()) metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools] except CommandFailedError as e: # For use in upgrade tests, Ceph cuttlefish and earlier don't support # structured output (--format) from the CLI. if e.exitstatus == 22: metadata_pool_exists = True else: raise # In case we are using an older Ceph which creates FS by default if metadata_pool_exists: log.info("Metadata pool already exists, skipping") else: mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'metadata', '256']) mon_remote.run(args=['sudo', 'ceph', 'osd', 'pool', 'create', 'data', '256']) # Use 'newfs' to work with either old or new Ceph, until the 'fs new' # stuff is all landed. mon_remote.run(args=['sudo', 'ceph', 'mds', 'newfs', '1', '2']) # mon_remote.run(args=['sudo', 'ceph', 'fs', 'new', 'default', 'metadata', 'data']) is_active_mds = lambda role: role.startswith('mds.') and not role.endswith('-s') and role.find('-s-') == -1 all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles] num_active = len([r for r in all_roles if is_active_mds(r)]) mon_remote.run(args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph', 'mds', 'set_max_mds', str(num_active)]) yield
def execute_ceph_deploy(ctx, config, cmd): testdir = teuthology.get_testdir(ctx) ceph_admin = teuthology.get_first_mon(ctx, config) exec_cmd = cmd (remote, ) = ctx.cluster.only(ceph_admin).remotes.iterkeys() proc = remote.run( args=[ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), run.Raw(exec_cmd), ], check_status=False, ) exitstatus = proc.exitstatus return exitstatus
def is_healthy(ctx, config): """Wait until a Ceph cluster is healthy.""" testdir = teuthology.get_testdir(ctx) ceph_admin = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() while True: r = remote.run( args=["cd", "{tdir}".format(tdir=testdir), run.Raw("&&"), "sudo", "ceph", "health"], stdout=StringIO(), logger=log.getChild("health"), ) out = r.stdout.getvalue() log.debug("Ceph health: %s", out.rstrip("\n")) if out.split(None, 1)[0] == "HEALTH_OK": break time.sleep(1)
def execute_ceph_deploy(ctx, config, cmd): testdir = teuthology.get_testdir(ctx) ceph_admin = teuthology.get_first_mon(ctx, config) exec_cmd = cmd (remote,) = ctx.cluster.only(ceph_admin).remotes.iterkeys() proc = remote.run( args = [ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), run.Raw(exec_cmd), ], check_status=False, ) exitstatus = proc.exitstatus return exitstatus
def task(ctx, config): """ Run scrub periodically. Randomly chooses an OSD to scrub. The config should be as follows: scrub: frequency: <seconds between scrubs> deep: <bool for deepness> example: tasks: - ceph: - scrub: frequency: 30 deep: 0 """ if config is None: config = {} assert isinstance(config, dict), \ 'scrub task only accepts a dict for configuration' log.info('Beginning scrub...') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) scrub_proc = Scrubber( manager, config, ) try: yield finally: log.info('joining scrub') scrub_proc.do_join()
def task(ctx, config): """ Run scrub periodically. Randomly chooses an OSD to scrub. The config should be as follows: scrub: frequency: <seconds between scrubs> deep: <bool for deepness> example: tasks: - ceph: - scrub: frequency: 30 deep: 0 """ if config is None: config = {} assert isinstance(config, dict), \ 'scrub task only accepts a dict for configuration' log.info('Beginning scrub...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) scrub_proc = Scrubber( manager, config, ) try: yield finally: log.info('joining scrub') scrub_proc.do_join()
def task(ctx, config): """ Test [deep] repair in several situations: Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] The config should be as follows: Must include the log-whitelist below Must enable filestore_debug_inject_read_err config example: tasks: - chef: - install: - ceph: log-whitelist: ['candidate had a read error', 'deep-scrub 0 missing, 1 inconsistent objects', 'deep-scrub 0 missing, 4 inconsistent objects', 'deep-scrub 1 errors', 'deep-scrub 4 errors', '!= known omap_digest', 'repair 0 missing, 1 inconsistent objects', 'repair 0 missing, 4 inconsistent objects', 'repair 1 errors, 1 fixed', 'repair 4 errors, 4 fixed', 'scrub 0 missing, 1 inconsistent', 'scrub 1 errors', 'size 1 != known size'] conf: osd: filestore debug inject read err: true - repair_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'repair_test task only accepts a dict for config' if not hasattr(ctx, 'manager'): first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager')) tests = [ gen_repair_test_1(mdataerr(ctx), choose_primary(ctx), "scrub"), gen_repair_test_1(mdataerr(ctx), choose_replica(ctx), "scrub"), gen_repair_test_1(dataerr(ctx), choose_primary(ctx), "deep-scrub"), gen_repair_test_1(dataerr(ctx), choose_replica(ctx), "deep-scrub"), gen_repair_test_1(trunc(ctx), choose_primary(ctx), "scrub"), gen_repair_test_1(trunc(ctx), choose_replica(ctx), "scrub"), gen_repair_test_2(choose_primary(ctx)), gen_repair_test_2(choose_replica(ctx)) ] for test in tests: run_test(ctx, config, test)
def setup(ctx, config): first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) ctx.manager.clear_pools() ctx.manager.create_pool(POOLNAME, config.num_pgs) log.info("populating pool") ctx.manager.rados_write_objects( POOLNAME, config.num_objects, config.object_size, config.creation_time_limit, config.create_threads) log.info("done populating pool")
def thread(): """Thread spawned by gevent""" if not hasattr(ctx, 'manager'): first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] log.info('clients are %s' % clients) for i in range(int(config.get('runs', '1'))): log.info("starting run %s out of %s", str(i), config.get('runs', '1')) tests = {} existing_pools = config.get('pools', []) created_pools = [] for role in config.get('clients', clients): assert isinstance(role, basestring) PREFIX = 'client.' assert role.startswith(PREFIX) id_ = role[len(PREFIX):] pool = config.get('pool', None) if not pool and existing_pools: pool = existing_pools.pop() else: pool = ctx.manager.create_pool_with_unique_name(ec_pool=config.get('ec_pool', False)) created_pools.append(pool) (remote,) = ctx.cluster.only(role).remotes.iterkeys() proc = remote.run( args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args + ["--pool", pool], logger=log.getChild("rados.{id}".format(id=id_)), stdin=run.PIPE, wait=False ) tests[id_] = proc run.wait(tests.itervalues()) for pool in created_pools: ctx.manager.remove_pool(pool)