def task(ctx, config): """ Stress test the monitor by thrashing them while another task/workunit is running. Please refer to MonitorThrasher class for further information on the available options. """ if config is None: config = {} assert isinstance(config, dict), \ 'mon_thrash task only accepts a dict for configuration' assert len(_get_mons(ctx)) > 2, \ 'mon_thrash task requires at least 3 monitors' log.info('Beginning mon_thrash...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) thrash_proc = MonitorThrasher(ctx, manager, config, logger=log.getChild('mon_thrasher')) try: log.debug('Yielding') yield finally: log.info('joining mon_thrasher') thrash_proc.do_join() mons = _get_mons(ctx) manager.wait_for_mon_quorum_size(len(mons))
def task(ctx, config): """ Test monitor recovery from OSD """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() # stash a monmap for later mon.run(args=['ceph', 'mon', 'getmap', '-o', '/tmp/monmap']) manager = ceph_manager.CephManager(mon, ctx=ctx, logger=log.getChild('ceph_manager')) mons = ctx.cluster.only(teuthology.is_type('mon')) # note down the first cluster_name and mon_id # we will recover it later on cluster_name, _, mon_id = teuthology.split_role(first_mon) _nuke_mons(manager, mons, mon_id) default_keyring = '/etc/ceph/{cluster}.keyring'.format( cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path) _revive_mons(manager, mons, mon_id, keyring_path) _revive_mgrs(ctx, manager) _revive_osds(ctx, manager)
def __init__(self, ctx, admin_remote=None): self._ctx = ctx self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds')) if len(self.mds_ids) == 0: raise RuntimeError("This task requires at least one MDS") first_mon = misc.get_first_mon(ctx, None) if admin_remote is None: (self.admin_remote, ) = ctx.cluster.only(first_mon).remotes.iterkeys() else: self.admin_remote = admin_remote self.mon_manager = ceph_manager.CephManager( self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) if hasattr(self._ctx, "daemons"): # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task self.mds_daemons = dict([ (mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids ]) client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) self.client_id = client_list[0] self.client_remote = list( misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id) ]))[0][1]
def task(ctx, config): """ Die if {testdir}/err exists or if an OSD dumps core """ if config is None: config = {} first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) testdir = teuthology.get_testdir(ctx) while True: for i in range(num_osds): (osd_remote, ) = ctx.cluster.only('osd.%d' % i).remotes.keys() p = osd_remote.run( args=['test', '-e', '{tdir}/err'.format(tdir=testdir)], wait=True, check_status=False, ) exit_status = p.exitstatus if exit_status == 0: log.info("osd %d has an error" % i) raise Exception("osd %d error" % i) log_path = '/var/log/ceph/osd.%d.log' % (i) p = osd_remote.run( args=[ 'tail', '-1', log_path, run.Raw('|'), 'grep', '-q', 'end dump' ], wait=True, check_status=False, ) exit_status = p.exitstatus if exit_status == 0: log.info("osd %d dumped core" % i) raise Exception("osd %d dumped core" % i) time.sleep(5)
def __init__(self, ctx): self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds')) self._ctx = ctx if len(self.mds_ids) == 0: raise RuntimeError("This task requires at least one MDS") self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) if hasattr(self._ctx, "daemons"): # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids])
def task(ctx, config): """ Run scrub periodically. Randomly chooses an OSD to scrub. The config should be as follows: scrub: frequency: <seconds between scrubs> deep: <bool for deepness> example: tasks: - ceph: - scrub: frequency: 30 deep: 0 """ if config is None: config = {} assert isinstance(config, dict), \ 'scrub task only accepts a dict for configuration' log.info('Beginning scrub...') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) scrub_proc = Scrubber( manager, config, ) try: yield finally: log.info('joining scrub') scrub_proc.do_join()
def __init__(self, ctx): self._ctx = ctx self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds')) if len(self.mds_ids) == 0: raise RuntimeError("This task requires at least one MDS") first_mon = misc.get_first_mon(ctx, None) (self.mon_remote, ) = ctx.cluster.only(first_mon).remotes.iterkeys() self.mon_manager = ceph_manager.CephManager( self.mon_remote, ctx=ctx, logger=log.getChild('ceph_manager')) self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids]) client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) self.client_id = client_list[0] self.client_remote = list( misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id) ]))[0][1]
def task(ctx, config): """ Stress test the mds by thrashing while another task/workunit is running. Please refer to MDSThrasher class for further information on the available options. """ mds_cluster = MDSCluster(ctx) if config is None: config = {} assert isinstance(config, dict), \ 'mds_thrash task only accepts a dict for configuration' mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) assert len(mdslist) > 1, \ 'mds_thrash task requires at least 2 metadata servers' # choose random seed if 'seed' in config: seed = int(config['seed']) else: seed = int(time.time()) log.info('mds thrasher using random seed: {seed}'.format(seed=seed)) random.seed(seed) (first, ) = ctx.cluster.only( 'mds.{_id}'.format(_id=mdslist[0])).remotes.keys() manager = ceph_manager.CephManager( first, ctx=ctx, logger=log.getChild('ceph_manager'), ) # make sure everyone is in active, standby, or standby-replay log.info('Wait for all MDSs to reach steady state...') status = mds_cluster.status() while True: steady = True for info in status.get_all(): state = info['state'] if state not in ('up:active', 'up:standby', 'up:standby-replay'): steady = False break if steady: break sleep(2) status = mds_cluster.status() log.info('Ready to start thrashing') manager.wait_for_clean() assert manager.is_clean() if 'cluster' not in config: config['cluster'] = 'ceph' for fs in status.get_filesystems(): thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fs['id']), fs['mdsmap']['max_mds']) thrasher.start() ctx.ceph[config['cluster']].thrashers.append(thrasher) try: log.debug('Yielding') yield finally: log.info('joining mds_thrasher') thrasher.stop() if thrasher.exception is not None: raise RuntimeError('error during thrashing') thrasher.join() log.info('done joining')
def task(ctx, config): """ Test handling of object location going down """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' # take 0, 1 out manager.mark_out_osd(0) manager.mark_out_osd(1) manager.wait_for_clean() # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.0', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.2', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.3', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) manager.mark_out_osd(3) manager.wait_till_active() manager.mark_in_osd(0) manager.wait_till_active() manager.flush_pg_stats([2, 0]) manager.mark_out_osd(2) manager.wait_till_active() # bring up 1 manager.mark_in_osd(1) manager.wait_till_active() manager.flush_pg_stats([0, 1]) log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert not unfound manager.kill_osd(2) manager.mark_down_osd(2) manager.kill_osd(3) manager.mark_down_osd(3) manager.flush_pg_stats([0, 1]) log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert unfound
def task(ctx, config): """ Test the dump_stuck command. :param ctx: Context :param config: Configuration """ assert config is None, \ 'dump_stuck requires no configuration' assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \ 'dump_stuck requires exactly 2 osds' timeout = 60 first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.flush_pg_stats([0, 1]) manager.wait_for_clean(timeout) manager.raw_cluster_cmd( 'tell', 'mon.0', 'injectargs', '--', # '--mon-osd-report-timeout 90', '--mon-pg-stuck-threshold 10') # all active+clean check_stuck( manager, num_inactive=0, num_unclean=0, num_stale=0, ) num_pgs = manager.get_num_pgs() manager.mark_out_osd(0) time.sleep(timeout) manager.flush_pg_stats([1]) manager.wait_for_recovery(timeout) # all active+clean+remapped check_stuck( manager, num_inactive=0, num_unclean=0, num_stale=0, ) manager.mark_in_osd(0) manager.flush_pg_stats([0, 1]) manager.wait_for_clean(timeout) # all active+clean check_stuck( manager, num_inactive=0, num_unclean=0, num_stale=0, ) log.info('stopping first osd') manager.kill_osd(0) manager.mark_down_osd(0) manager.wait_for_active(timeout) log.info('waiting for all to be unclean') starttime = time.time() done = False while not done: try: check_stuck( manager, num_inactive=0, num_unclean=num_pgs, num_stale=0, ) done = True except AssertionError: # wait up to 15 minutes to become stale if time.time() - starttime > 900: raise log.info('stopping second osd') manager.kill_osd(1) manager.mark_down_osd(1) log.info('waiting for all to be stale') starttime = time.time() done = False while not done: try: check_stuck( manager, num_inactive=0, num_unclean=num_pgs, num_stale=num_pgs, ) done = True except AssertionError: # wait up to 15 minutes to become stale if time.time() - starttime > 900: raise log.info('reviving') for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'): manager.revive_osd(id_) manager.mark_in_osd(id_) while True: try: manager.flush_pg_stats([0, 1]) break except Exception: log.exception('osds must not be started yet, waiting...') time.sleep(1) manager.wait_for_clean(timeout) check_stuck( manager, num_inactive=0, num_unclean=0, num_stale=0, )
def task(ctx, config): """ Test backfill """ if config is None: config = {} assert isinstance(config, dict), \ 'thrashosds task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) assert num_osds == 3 manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean() # write some data p = rados_start( ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', '--no-cleanup']) err = p.wait() log.info('err is %d' % err) # mark osd.0 out to trigger a rebalance/backfill manager.mark_out_osd(0) # also mark it down to it won't be included in pg_temps manager.kill_osd(0) manager.mark_down_osd(0) # wait for everything to peer and be happy... manager.flush_pg_stats([1, 2]) manager.wait_for_recovery() # write some new data p = rados_start( ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', '--no-cleanup']) time.sleep(15) # blackhole + restart osd.1 # this triggers a divergent backfill target manager.blackhole_kill_osd(1) time.sleep(2) manager.revive_osd(1) # wait for our writes to complete + succeed err = p.wait() log.info('err is %d' % err) # wait for osd.1 and osd.2 to be up manager.wait_till_osd_is_up(1) manager.wait_till_osd_is_up(2) # cluster must recover manager.flush_pg_stats([1, 2]) manager.wait_for_recovery() # re-add osd.0 manager.revive_osd(0) manager.flush_pg_stats([1, 2]) manager.wait_for_clean()
def task(ctx, config): """ Test backfill reservation calculates "toofull" condition correctly. A pretty rigid cluster is brought up and tested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'backfill_toofull task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) profile = config.get('erasure_code_profile', { 'k': '2', 'm': '1', 'crush-failure-domain': 'osd' }) profile_name = profile.get('name', 'backfill_toofull') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( pg_num=1, erasure_code_profile_name=profile_name, min_size=2) manager.raw_cluster_cmd('osd', 'pool', 'set', pool, 'pg_autoscale_mode', 'off') manager.flush_pg_stats([0, 1, 2, 3]) manager.wait_for_clean() pool_id = manager.get_pool_num(pool) pgid = '%d.0' % pool_id pgs = manager.get_pg_stats() acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) log.debug("acting=%s" % acting) assert acting primary = acting[0] target = acting[1] log.debug("write some data") rados(ctx, mon, ['-p', pool, 'bench', '120', 'write', '--no-cleanup']) df = manager.get_osd_df(target) log.debug("target osd df: %s" % df) total_kb = df['kb'] used_kb = df['kb_used'] log.debug("pause recovery") manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'nobackfill') manager.raw_cluster_cmd('osd', 'set', 'norecover') log.debug("stop tartget osd %s" % target) manager.kill_osd(target) manager.wait_till_active() pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg log.debug("re-write data") rados(ctx, mon, ['-p', pool, 'cleanup']) time.sleep(10) rados(ctx, mon, ['-p', pool, 'bench', '60', 'write', '--no-cleanup']) df = manager.get_osd_df(primary) log.debug("primary osd df: %s" % df) primary_used_kb = df['kb_used'] log.info("test backfill reservation rejected with toofull") # We set backfillfull ratio less than new data size and expect the pg # entering backfill_toofull state. # # We also need to update nearfull ratio to prevent "full ratio(s) out of order". backfillfull = 0.9 * primary_used_kb / total_kb nearfull = backfillfull * 0.9 log.debug("update nearfull ratio to %s and backfillfull ratio to %s" % (nearfull, backfillfull)) manager.raw_cluster_cmd('osd', 'set-nearfull-ratio', '{:.3f}'.format(nearfull + 0.001)) manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio', '{:.3f}'.format(backfillfull + 0.001)) log.debug("start tartget osd %s" % target) manager.revive_osd(target) manager.wait_for_active() manager.wait_till_osd_is_up(target) wait_for_pg_state(manager, pgid, 'backfill_toofull', target) log.info("test pg not enter backfill_toofull after restarting backfill") # We want to set backfillfull ratio to be big enough for the target to # successfully backfill new data but smaller than the sum of old and new # data, so if the osd backfill reservation incorrectly calculates "toofull" # the test will detect this (fail). # # Note, we need to operate with "uncompressed" bytes because currently # osd backfill reservation does not take compression into account. # # We also need to update nearfull ratio to prevent "full ratio(s) out of order". pdf = manager.get_pool_df(pool) log.debug("pool %s df: %s" % (pool, pdf)) assert pdf compress_ratio = 1.0 * pdf['compress_under_bytes'] / pdf['compress_bytes_used'] \ if pdf['compress_bytes_used'] > 0 else 1.0 log.debug("compress_ratio: %s" % compress_ratio) backfillfull = (used_kb + primary_used_kb) * compress_ratio / total_kb assert backfillfull < 0.9 nearfull_min = max(used_kb, primary_used_kb) * compress_ratio / total_kb assert nearfull_min < backfillfull delta = backfillfull - nearfull_min nearfull = nearfull_min + delta * 0.1 backfillfull = nearfull_min + delta * 0.2 log.debug("update nearfull ratio to %s and backfillfull ratio to %s" % (nearfull, backfillfull)) manager.raw_cluster_cmd('osd', 'set-nearfull-ratio', '{:.3f}'.format(nearfull + 0.001)) manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio', '{:.3f}'.format(backfillfull + 0.001)) wait_for_pg_state(manager, pgid, 'backfilling', target) pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg log.debug("interrupt %s backfill" % target) manager.mark_down_osd(target) # after marking the target osd down it will automatically be # up soon again log.debug("resume recovery") manager.raw_cluster_cmd('osd', 'unset', 'noout') manager.raw_cluster_cmd('osd', 'unset', 'nobackfill') manager.raw_cluster_cmd('osd', 'unset', 'norecover') # wait for everything to peer, backfill and recover manager.wait_for_clean() pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.info('pg=%s' % pg) assert pg assert 'clean' in pg['state'].split('+')
def test_incomplete_pgs(ctx, config): """ Test handling of incomplete pgs. Requires 4 osds. """ testdir = teuthology.get_testdir(ctx) if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) assert num_osds == 4 manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 4: time.sleep(10) manager.flush_pg_stats([0, 1, 2, 3]) manager.wait_for_clean() log.info('Testing incomplete pgs...') for i in range(4): manager.set_config(i, osd_recovery_delay_start=1000) # move data off of osd.0, osd.1 manager.raw_cluster_cmd('osd', 'out', '0', '1') manager.flush_pg_stats([0, 1, 2, 3], [0, 1]) manager.wait_for_clean() # lots of objects in rbd (no pg log, will backfill) p = rados_start( testdir, mon, ['-p', 'rbd', 'bench', '20', 'write', '-b', '1', '--no-cleanup']) p.wait() # few objects in rbd pool (with pg log, normal recovery) for f in range(1, 20): p = rados_start(testdir, mon, ['-p', 'rbd', 'put', 'foo.%d' % f, '/etc/passwd']) p.wait() # move it back manager.raw_cluster_cmd('osd', 'in', '0', '1') manager.raw_cluster_cmd('osd', 'out', '2', '3') time.sleep(10) manager.flush_pg_stats([0, 1, 2, 3], [2, 3]) time.sleep(10) manager.wait_for_active() assert not manager.is_clean() assert not manager.is_recovered() # kill 2 + 3 log.info('stopping 2,3') manager.kill_osd(2) manager.kill_osd(3) log.info('...') manager.raw_cluster_cmd('osd', 'down', '2', '3') manager.flush_pg_stats([0, 1]) manager.wait_for_active_or_down() assert manager.get_num_down() > 0 # revive 2 + 3 manager.revive_osd(2) manager.revive_osd(3) while len(manager.get_osd_status()['up']) < 4: log.info('waiting a bit...') time.sleep(2) log.info('all are up!') for i in range(4): manager.kick_recovery_wq(i) # cluster must recover manager.wait_for_clean()
def task(ctx, config): """ Test monitor recovery. """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)] log.info("mon ids = %s" % mons) manager.wait_for_mon_quorum_size(len(mons)) log.info('verifying all monitors are in the quorum') for m in mons: s = manager.get_mon_status(m) assert s['state'] == 'leader' or s['state'] == 'peon' assert len(s['quorum']) == len(mons) log.info('restarting each monitor in turn') for m in mons: # stop a monitor manager.kill_mon(m) manager.wait_for_mon_quorum_size(len(mons) - 1) # restart manager.revive_mon(m) manager.wait_for_mon_quorum_size(len(mons)) # in forward and reverse order, rmons = mons rmons.reverse() for mons in mons, rmons: log.info('stopping all monitors') for m in mons: manager.kill_mon(m) log.info('forming a minimal quorum for %s, then adding monitors' % mons) qnum = (len(mons) // 2) + 1 num = 0 for m in mons: manager.revive_mon(m) num += 1 if num >= qnum: manager.wait_for_mon_quorum_size(num) # on both leader and non-leader ranks... for rank in [0, 1]: # take one out log.info('removing mon %s' % mons[rank]) manager.kill_mon(mons[rank]) manager.wait_for_mon_quorum_size(len(mons) - 1) log.info('causing some monitor log activity') m = 30 for n in range(1, m): manager.raw_cluster_cmd('log', '%d of %d' % (n, m)) log.info('adding mon %s back in' % mons[rank]) manager.revive_mon(mons[rank]) manager.wait_for_mon_quorum_size(len(mons))
def task(ctx, config): """ Test (non-backfill) recovery """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' testdir = teuthology.get_testdir(ctx) first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) assert num_osds == 3 manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean() # test some osdmap flags manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noup') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.raw_cluster_cmd('osd', 'unset', 'noin') manager.raw_cluster_cmd('osd', 'unset', 'noout') manager.raw_cluster_cmd('osd', 'unset', 'noup') manager.raw_cluster_cmd('osd', 'unset', 'nodown') # write some new data p = rados_start( testdir, mon, ['-p', 'rbd', 'bench', '20', 'write', '-b', '4096', '--no-cleanup']) time.sleep(15) # trigger a divergent target: # blackhole + restart osd.1 (shorter log) manager.blackhole_kill_osd(1) # kill osd.2 (longer log... we'll make it divergent below) manager.kill_osd(2) time.sleep(2) manager.revive_osd(1) # wait for our writes to complete + succeed err = p.wait() log.info('err is %d' % err) # cluster must repeer manager.flush_pg_stats([0, 1]) manager.wait_for_active_or_down() # write some more (make sure osd.2 really is divergent) p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096']) p.wait() # revive divergent osd manager.revive_osd(2) while len(manager.get_osd_status()['up']) < 3: log.info('waiting a bit...') time.sleep(2) log.info('3 are up!') # cluster must recover manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean()
def __init__(self, ctx): self._ctx = ctx self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
def task(ctx, config): """ Test handling of lost objects on an ec pool. A pretty rigid cluster is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.wait_for_clean() profile = config.get('erasure_code_profile', { 'k': '2', 'm': '2', 'crush-failure-domain': 'osd' }) profile_name = profile.get('name', 'lost_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( erasure_code_profile_name=profile_name, min_size=2) # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' ) manager.kill_osd(0) manager.mark_down_osd(0) manager.kill_osd(3) manager.mark_down_osd(3) for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) # take out osd.1 and a necessary shard of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') manager.revive_osd(0) manager.wait_till_osd_is_up(0) manager.revive_osd(3) manager.wait_till_osd_is_up(3) manager.flush_pg_stats([0, 2, 3]) manager.wait_till_active() manager.flush_pg_stats([0, 2, 3]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append(mon.run( args=[ "/bin/sh", "-c", " ".join(['adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4<<10), '-p' , pool, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild('radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False )) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']); m = manager.list_pg_unfound(pg['pgid']) log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] log.info("reverting unfound in %s", pg['pgid']) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2, 3]) manager.wait_for_recovery() if not config.get('parallel_bench', True): time.sleep(20) # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def task(ctx, config): """ Test peering. """ if config is None: config = {} assert isinstance(config, dict), \ 'peer task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean() for i in range(3): manager.set_config(i, osd_recovery_delay_start=120) # take on osd down manager.kill_osd(2) manager.mark_down_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # kill another and revive 2, so that some pgs can't peer. manager.kill_osd(1) manager.mark_down_osd(1) manager.revive_osd(2) manager.wait_till_osd_is_up(2) manager.flush_pg_stats([0, 2]) manager.wait_for_active_or_down() manager.flush_pg_stats([0, 2]) # look for down pgs num_down_pgs = 0 pgs = manager.get_pg_stats() for pg in pgs: out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') log.debug("out string %s", out) j = json.loads(out) log.info("pg is %s, query json is %s", pg, j) if pg['state'].count('down'): num_down_pgs += 1 # verify that it is blocked on osd.1 rs = j['recovery_state'] assert len(rs) >= 2 assert rs[0]['name'] == 'Started/Primary/Peering/Down' assert rs[1]['name'] == 'Started/Primary/Peering' assert rs[1]['blocked'] assert rs[1]['down_osds_we_would_probe'] == [1] assert len(rs[1]['peering_blocked_by']) == 1 assert rs[1]['peering_blocked_by'][0]['osd'] == 1 assert num_down_pgs > 0 # bring it all back manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean()
def task(ctx, config): """ Test [deep] scrub tasks: - chef: - install: - ceph: log-ignorelist: - '!= data_digest' - '!= omap_digest' - '!= size' - deep-scrub 0 missing, 1 inconsistent objects - deep-scrub [0-9]+ errors - repair 0 missing, 1 inconsistent objects - repair [0-9]+ errors, [0-9]+ fixed - shard [0-9]+ .* : missing - deep-scrub 1 missing, 1 inconsistent objects - does not match object info size - attr name mistmatch - deep-scrub 1 missing, 0 inconsistent objects - failed to pick suitable auth object - candidate size [0-9]+ info size [0-9]+ mismatch conf: osd: osd deep scrub update digest min age: 0 - scrub_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'scrub_test task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) for i in range(num_osds): manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', '--', '--osd-objectstore-fuse') manager.flush_pg_stats(range(num_osds)) manager.wait_for_clean() osd_dump = manager.get_osd_dump_json() poolid = -1 for p in osd_dump['pools']: if p['pool_name'] == 'rbd': poolid = p['pool'] break assert poolid != -1 # write some data p = manager.do_rados(['bench', '--no-cleanup', '1', 'write', '-b', '4096'], pool='rbd') log.info('err is %d' % p.exitstatus) # wait for some PG to have data that we can mess with pg, acting = wait_for_victim_pg(manager, poolid) osd = acting[0] osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd) manager.do_rados(['setomapval', obj_name, 'key', 'val'], pool='rbd') log.info('err is %d' % p.exitstatus) manager.do_rados(['setomapheader', obj_name, 'hdr'], pool='rbd') log.info('err is %d' % p.exitstatus) # Update missing digests, requires "osd deep scrub update digest min age: 0" pgnum = get_pgnum(pg) manager.do_pg_scrub('rbd', pgnum, 'deep-scrub') log.info('messing with PG %s on osd %d' % (pg, osd)) test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd') test_repair_bad_omap(ctx, manager, pg, osd, obj_name) test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd, obj_name, obj_path) log.info('test successful!') # shut down fuse mount for i in range(num_osds): manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', '--', '--no-osd-objectstore-fuse') time.sleep(5) log.info('done')
def task(ctx, config): """ Go through filesystem creation with a synthetic failure in an MDS in its 'up:creating' state, to exercise the retry behaviour. """ # Grab handles to the teuthology objects of interest mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds')) if len(mdslist) != 1: # Require exactly one MDS, the code path for creation failure when # a standby is available is different raise RuntimeError("This task requires exactly one MDS") mds_id = mdslist[0] (mds_remote, ) = ctx.cluster.only( 'mds.{_id}'.format(_id=mds_id)).remotes.keys() manager = ceph_manager.CephManager( mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'), ) # Stop MDS self.fs.set_max_mds(0) self.fs.mds_stop(mds_id) self.fs.mds_fail(mds_id) # Reset the filesystem so that next start will go into CREATING manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it") manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data") # Start the MDS with mds_kill_create_at set, it will crash during creation mds.restart_with_args(["--mds_kill_create_at=1"]) try: mds.wait_for_exit() except CommandFailedError as e: if e.exitstatus == 1: log.info("MDS creation killed as expected") else: log.error("Unexpected status code %s" % e.exitstatus) raise # Since I have intentionally caused a crash, I will clean up the resulting core # file to avoid task.internal.coredump seeing it as a failure. log.info("Removing core file from synthetic MDS failure") mds_remote.run(args=[ 'rm', '-f', Raw("{archive}/coredump/*.core".format( archive=misc.get_archive_dir(ctx))) ]) # It should have left the MDS map state still in CREATING status = self.fs.status().get_mds(mds_id) assert status['state'] == 'up:creating' # Start the MDS again without the kill flag set, it should proceed with creation successfully mds.restart() # Wait for state ACTIVE self.fs.wait_for_state("up:active", timeout=120, mds_id=mds_id) # The system should be back up in a happy healthy state, go ahead and run any further tasks # inside this context. yield
def task(ctx, config): """ Stress test the mds by running scrub iterations while another task/workunit is running. Example config: - fwd_scrub: scrub_timeout: 300 sleep_between_iterations: 1 """ mds_cluster = MDSCluster(ctx) if config is None: config = {} assert isinstance(config, dict), \ 'fwd_scrub task only accepts a dict for configuration' mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) assert len(mdslist) > 0, \ 'fwd_scrub task requires at least 1 metadata server' (first, ) = ctx.cluster.only(f'mds.{mdslist[0]}').remotes.keys() manager = ceph_manager.CephManager( first, ctx=ctx, logger=log.getChild('ceph_manager'), ) # make sure everyone is in active, standby, or standby-replay log.info('Wait for all MDSs to reach steady state...') status = mds_cluster.status() while True: steady = True for info in status.get_all(): state = info['state'] if state not in ('up:active', 'up:standby', 'up:standby-replay'): steady = False break if steady: break sleep(2) status = mds_cluster.status() log.info('Ready to start scrub thrashing') manager.wait_for_clean() assert manager.is_clean() if 'cluster' not in config: config['cluster'] = 'ceph' for fs in status.get_filesystems(): fwd_scrubber = ForwardScrubber(Filesystem(ctx, fscid=fs['id']), config['scrub_timeout'], config['sleep_between_iterations']) fwd_scrubber.start() ctx.ceph[config['cluster']].thrashers.append(fwd_scrubber) try: log.debug('Yielding') yield finally: log.info('joining ForwardScrubbers') stop_all_fwd_scrubbers(ctx.ceph[config['cluster']].thrashers) log.info('done joining')
def task(ctx, config): """ Test handling of lost objects. A pretty rigid cluster is brought up and tested by this task """ POOL = 'unfound_pool' if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() manager.create_pool(POOL) # something that is always there dummyfile = '/etc/fstab' # take an osd out until the very end manager.kill_osd(2) manager.mark_down_osd(2) manager.mark_out_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) # bring osd.0 back up, let it peer, but don't replicate the new # objects... log.info('osd.0 command_args is %s' % 'foo') log.info(ctx.daemons.get_daemon('osd', 0).command_args) ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend( ['--osd-recovery-delay-start', '1000']) manager.revive_osd(0) manager.mark_in_osd(0) manager.wait_till_osd_is_up(0) manager.flush_pg_stats([1, 0]) manager.wait_till_active() # take out osd.1 and the only copy of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.mark_out_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') # bring up osd.2 so that things would otherwise, in theory, recovery fully manager.revive_osd(2) manager.mark_in_osd(2) manager.wait_till_osd_is_up(2) manager.flush_pg_stats([0, 2]) manager.wait_till_active() manager.flush_pg_stats([0, 2]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append( mon.run(args=[ "/bin/sh", "-c", " ".join([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4 << 10), '-p', POOL, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild( 'radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False)) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: primary = 'osd.%d' % pg['acting'][0] # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_unfound(pg['pgid']) #log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] assert m['available_might_have_unfound'] == True assert m['might_have_unfound'][0]['osd'] == "1" assert m['might_have_unfound'][0]['status'] == "osd is down" num_unfound = 0 for o in m['objects']: if len(o['locations']) == 0: num_unfound += 1 assert m['num_unfound'] == num_unfound log.info("reverting unfound in %s on %s", pg['pgid'], primary) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'revert') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2]) manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) assert not err # see if osd.1 can cope manager.mark_in_osd(1) manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs) manager.wait_for_clean()
def task(ctx, config): """ Test handling of objects with inconsistent hash info during backfill and deep-scrub. A pretty rigid cluster is brought up and tested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'ec_inconsistent_hinfo task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) profile = config.get('erasure_code_profile', { 'k': '2', 'm': '1', 'crush-failure-domain': 'osd' }) profile_name = profile.get('name', 'backfill_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( pg_num=1, erasure_code_profile_name=profile_name, min_size=2) manager.raw_cluster_cmd('osd', 'pool', 'set', pool, 'pg_autoscale_mode', 'off') manager.flush_pg_stats([0, 1, 2, 3]) manager.wait_for_clean() pool_id = manager.get_pool_num(pool) pgid = '%d.0' % pool_id pgs = manager.get_pg_stats() acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) log.info("acting=%s" % acting) assert acting primary = acting[0] # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() log.debug("create test object") obj = 'test' rados(ctx, mon, ['-p', pool, 'put', obj, dummyfile]) victim = acting[1] log.info("remove test object hash info from osd.%s shard and test deep-scrub and repair" % victim) manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=victim) check_time_now = time.strftime('%s') manager.raw_cluster_cmd('pg', 'deep-scrub', pgid) wait_for_deep_scrub_complete(manager, pgid, check_time_now, True) check_time_now = time.strftime('%s') manager.raw_cluster_cmd('pg', 'repair', pgid) wait_for_deep_scrub_complete(manager, pgid, check_time_now, False) log.info("remove test object hash info from primary osd.%s shard and test backfill" % primary) log.debug("write some data") rados(ctx, mon, ['-p', pool, 'bench', '30', 'write', '-b', '4096', '--no-cleanup']) manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=primary) # mark the osd out to trigger a rebalance/backfill source = acting[1] target = [x for x in [0, 1, 2, 3] if x not in acting][0] manager.mark_out_osd(source) # wait for everything to peer, backfill and recover wait_for_backfilling_complete(manager, pgid, source, target) manager.wait_for_clean() manager.flush_pg_stats([0, 1, 2, 3]) pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg assert 'clean' in pg['state'].split('+') assert 'inconsistent' not in pg['state'].split('+') unfound = manager.get_num_unfound_objects() log.debug("there are %d unfound objects" % unfound) assert unfound == 0 source, target = target, source log.info("remove test object hash info from non-primary osd.%s shard and test backfill" % source) manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=source) # mark the osd in to trigger a rebalance/backfill manager.mark_in_osd(target) # wait for everything to peer, backfill and recover wait_for_backfilling_complete(manager, pgid, source, target) manager.wait_for_clean() manager.flush_pg_stats([0, 1, 2, 3]) pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg assert 'clean' in pg['state'].split('+') assert 'inconsistent' not in pg['state'].split('+') unfound = manager.get_num_unfound_objects() log.debug("there are %d unfound objects" % unfound) assert unfound == 0 log.info("remove hash info from two shards and test backfill") source = acting[2] target = [x for x in [0, 1, 2, 3] if x not in acting][0] manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=primary) manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', object_name=obj, osd=source) # mark the osd out to trigger a rebalance/backfill manager.mark_out_osd(source) # wait for everything to peer, backfill and detect unfound object wait_for_backfilling_complete(manager, pgid, source, target) # verify that there is unfound object manager.flush_pg_stats([0, 1, 2, 3]) pgs = manager.get_pg_stats() pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) log.debug('pg=%s' % pg) assert pg assert 'backfill_unfound' in pg['state'].split('+') unfound = manager.get_num_unfound_objects() log.debug("there are %d unfound objects" % unfound) assert unfound == 1 m = manager.list_pg_unfound(pgid) log.debug('list_pg_unfound=%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] # mark stuff lost pgs = manager.get_pg_stats() manager.raw_cluster_cmd('pg', pgid, 'mark_unfound_lost', 'delete') # wait for everything to peer and be happy... manager.flush_pg_stats([0, 1, 2, 3]) manager.wait_for_recovery()