def task(ctx, config): if config is None: config = {} assert isinstance(config, dict), \ 'mon_clock_skew_check task only accepts a dict for configuration' interval = float(config.get('interval', 30.0)) expect_skew = config.get('expect-skew', False) log.info('Beginning mon_clock_skew_check...') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) quorum_size = len(teuthology.get_mon_names(ctx)) manager.wait_for_mon_quorum_size(quorum_size) # wait a bit log.info('sleeping for {s} seconds'.format(s=interval)) time.sleep(interval) health = manager.get_mon_health(True) log.info('got health %s' % health) if expect_skew: if 'MON_CLOCK_SKEW' not in health['checks']: raise RuntimeError('expected MON_CLOCK_SKEW but got none') else: if 'MON_CLOCK_SKEW' in health['checks']: raise RuntimeError('got MON_CLOCK_SKEW but expected none')
def __init__(self, ctx, config): self._ctx = ctx self._config = config mds_list = list(misc.all_roles_of_type(ctx.cluster, 'mds')) if len(mds_list) != 1: # Require exactly one MDS, the code path for creation failure when # a standby is available is different raise RuntimeError("This task requires exactly one MDS") self.mds_id = mds_list[0] (mds_remote, ) = ctx.cluster.only( 'mds.{_id}'.format(_id=self.mds_id)).remotes.iterkeys() manager = ceph_manager.CephManager( mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'), ) self.mds_manager = manager client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) self.client_id = client_list[0] self.client_remote = list( misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id) ]))[0][1] self.test_files = ['a', 'b', 'c']
def task(ctx, config): """ Stress test the monitor by thrashing them while another task/workunit is running. Please refer to MonitorThrasher class for further information on the available options. """ if config is None: config = {} assert isinstance(config, dict), \ 'mon_thrash task only accepts a dict for configuration' assert len(_get_mons(ctx)) > 2, \ 'mon_thrash task requires at least 3 monitors' log.info('Beginning mon_thrash...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) thrash_proc = MonitorThrasher(ctx, manager, config, logger=log.getChild('mon_thrasher')) try: log.debug('Yielding') yield finally: log.info('joining mon_thrasher') thrash_proc.do_join() mons = _get_mons(ctx) manager.wait_for_mon_quorum_size(len(mons))
def task(ctx, config): """ Test monitor recovery from OSD """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager(mon, ctx=ctx, logger=log.getChild('ceph_manager')) mons = ctx.cluster.only(teuthology.is_type('mon')) # note down the first cluster_name and mon_id # we will recover it later on cluster_name, _, mon_id = teuthology.split_role(first_mon) _nuke_mons(manager, mons, mon_id) default_keyring = '/etc/ceph/{cluster}.keyring'.format( cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path) _revive_mons(manager, mons, mon_id, keyring_path) _revive_mgrs(ctx, manager) _revive_osds(ctx, manager)
def task(ctx, config): """ Use clas ClockSkewCheck to check for clock skews on the monitors. This task will spawn a thread running ClockSkewCheck's do_check(). All the configuration will be directly handled by ClockSkewCheck, so please refer to the class documentation for further information. """ if config is None: config = {} assert isinstance(config, dict), \ 'mon_clock_skew_check task only accepts a dict for configuration' log.info('Beginning mon_clock_skew_check...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) skew_check = ClockSkewCheck(ctx, manager, config, logger=log.getChild('mon_clock_skew_check')) skew_check_thread = gevent.spawn(skew_check.do_check) try: yield finally: log.info('joining mon_clock_skew_check') skew_check.finish() skew_check_thread.get()
def task(ctx, config): """ Test [deep] repair in several situations: Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] The config should be as follows: Must include the log-whitelist below Must enable filestore_debug_inject_read_err config example: tasks: - chef: - install: - ceph: log-whitelist: - 'candidate had a stat error' - 'candidate had a read error' - 'deep-scrub 0 missing, 1 inconsistent objects' - 'deep-scrub 0 missing, 4 inconsistent objects' - 'deep-scrub 1 errors' - 'deep-scrub 4 errors' - '!= known omap_digest' - 'repair 0 missing, 1 inconsistent objects' - 'repair 0 missing, 4 inconsistent objects' - 'repair 1 errors, 1 fixed' - 'repair 4 errors, 4 fixed' - 'scrub 0 missing, 1 inconsistent' - 'scrub 1 errors' - 'size 1 != known size' conf: osd: filestore debug inject read err: true - repair_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'repair_test task only accepts a dict for config' if not hasattr(ctx, 'manager'): first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager')) ctx.manager.wait_for_all_up() repair_test_1(ctx, mdataerr, choose_primary, "scrub") repair_test_1(ctx, mdataerr, choose_replica, "scrub") repair_test_1(ctx, dataerr, choose_primary, "deep-scrub") repair_test_1(ctx, dataerr, choose_replica, "deep-scrub") repair_test_1(ctx, trunc, choose_primary, "scrub") repair_test_1(ctx, trunc, choose_replica, "scrub") repair_test_2(ctx, config, choose_primary) repair_test_2(ctx, config, choose_replica) repair_test_erasure_code(ctx, hinfoerr, 'primary', "deep-scrub")
def task(ctx, config): """ Die if {testdir}/err exists or if an OSD dumps core """ if config is None: config = {} first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) testdir = teuthology.get_testdir(ctx) while True: for i in range(num_osds): (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.iterkeys() p = osd_remote.run( args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ], wait=True, check_status=False, ) exit_status = p.exitstatus if exit_status == 0: log.info("osd %d has an error" % i) raise Exception("osd %d error" % i) log_path = '/var/log/ceph/osd.%d.log' % (i) p = osd_remote.run( args = [ 'tail', '-1', log_path, run.Raw('|'), 'grep', '-q', 'end dump' ], wait=True, check_status=False, ) exit_status = p.exitstatus if exit_status == 0: log.info("osd %d dumped core" % i) raise Exception("osd %d dumped core" % i) time.sleep(5)
def task(ctx, config): """ Benchmark the recovery system. Generates objects with smalliobench, runs it normally to get a baseline performance measurement, then marks an OSD out and reruns to measure performance during recovery. The config should be as follows: recovery_bench: duration: <seconds for each measurement run> num_objects: <number of objects> io_size: <io size in bytes> example: tasks: - ceph: - recovery_bench: duration: 60 num_objects: 500 io_size: 4096 """ if config is None: config = {} assert isinstance(config, dict), \ 'recovery_bench task only accepts a dict for configuration' log.info('Beginning recovery bench...') first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') while len(manager.get_osd_status()['up']) < num_osds: manager.sleep(10) bench_proc = RecoveryBencher( manager, config, ) try: yield finally: log.info('joining recovery bencher') bench_proc.do_join()
def task(ctx, config): """ Test [deep] repair in several situations: Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] The config should be as follows: Must include the log-whitelist below Must enable filestore_debug_inject_read_err config example: tasks: - chef: - install: - ceph: log-whitelist: ['candidate had a read error', 'deep-scrub 0 missing, 1 inconsistent objects', 'deep-scrub 0 missing, 4 inconsistent objects', 'deep-scrub 1 errors', 'deep-scrub 4 errors', '!= known omap_digest', 'repair 0 missing, 1 inconsistent objects', 'repair 0 missing, 4 inconsistent objects', 'repair 1 errors, 1 fixed', 'repair 4 errors, 4 fixed', 'scrub 0 missing, 1 inconsistent', 'scrub 1 errors', 'size 1 != known size'] conf: osd: filestore debug inject read err: true - repair_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'repair_test task only accepts a dict for config' if not hasattr(ctx, 'manager'): first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager')) tests = [ gen_repair_test_1(mdataerr(ctx), choose_primary(ctx), "scrub"), gen_repair_test_1(mdataerr(ctx), choose_replica(ctx), "scrub"), gen_repair_test_1(dataerr(ctx), choose_primary(ctx), "deep-scrub"), gen_repair_test_1(dataerr(ctx), choose_replica(ctx), "deep-scrub"), gen_repair_test_1(trunc(ctx), choose_primary(ctx), "scrub"), gen_repair_test_1(trunc(ctx), choose_replica(ctx), "scrub"), gen_repair_test_2(choose_primary(ctx)), gen_repair_test_2(choose_replica(ctx)) ] for test in tests: run_test(ctx, config, test)
def task(ctx, config): """ Run scrub periodically. Randomly chooses an OSD to scrub. The config should be as follows: scrub: frequency: <seconds between scrubs> deep: <bool for deepness> example: tasks: - ceph: - scrub: frequency: 30 deep: 0 """ if config is None: config = {} assert isinstance(config, dict), \ 'scrub task only accepts a dict for configuration' log.info('Beginning scrub...') first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) scrub_proc = Scrubber( manager, config, ) try: yield finally: log.info('joining scrub') scrub_proc.do_join()
def setup(ctx, config): first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() ctx.manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) ctx.manager.clear_pools() ctx.manager.create_pool(POOLNAME, config.num_pgs) log.info("populating pool") ctx.manager.rados_write_objects( POOLNAME, config.num_objects, config.object_size, config.creation_time_limit, config.create_threads) log.info("done populating pool")
def task(ctx, config): """ Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio configuration settings In order for test to pass must use log-whitelist as follows tasks: - chef: - install: - ceph: log-whitelist: ['OSD near full', 'OSD full dropping all updates'] - osd_failsafe_enospc: """ if config is None: config = {} assert isinstance(config, dict), \ 'osd_failsafe_enospc task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) ctx.manager = manager # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding sleep_time = 50 # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' # create 1 pg pool with 1 rep which can only be on osd.0 osds = manager.get_osd_dump() for osd in osds: if osd['osd'] != 0: manager.mark_out_osd(osd['osd']) log.info('creating pool foo') manager.create_pool("foo") manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') # State NONE -> NEAR log.info('1. Verify warning messages when exceeding nearfull_ratio') proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count # State NEAR -> FULL log.info('2. Verify error messages when exceeding full_ratio') proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count log.info('3. Verify write failure when exceeding full_ratio') # Write data should fail ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) assert ret != 0, 'Expected write failure but it succeeded with exit status 0' # Put back default manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') time.sleep(10) # State FULL -> NEAR log.info('4. Verify write success when NOT exceeding full_ratio') # Write should succeed ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret log.info('5. Verify warning messages again when exceeding nearfull_ratio') proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') time.sleep(10) # State NONE -> FULL log.info('6. Verify error messages again when exceeding full_ratio') proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count # State FULL -> NONE log.info('7. Verify no messages settings back to default') manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') time.sleep(10) proc = mon.run( args=[ 'daemon-helper', 'kill', 'ceph', '-w' ], stdin=run.PIPE, stdout=StringIO(), wait=False, ) time.sleep(sleep_time) proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w proc.wait() lines = proc.stdout.getvalue().split('\n') count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count log.info('Test Passed') # Bring all OSDs back in manager.remove_pool("foo") for osd in osds: if osd['osd'] != 0: manager.mark_in_osd(osd['osd'])
def task(ctx, config): """ Test peering. """ if config is None: config = {} assert isinstance(config, dict), \ 'peer task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: manager.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean() for i in range(3): manager.set_config(i, osd_recovery_delay_start=120) # take on osd down manager.kill_osd(2) manager.mark_down_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_recovery() # kill another and revive 2, so that some pgs can't peer. manager.kill_osd(1) manager.mark_down_osd(1) manager.revive_osd(2) manager.wait_till_osd_is_up(2) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_active_or_down() manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') # look for down pgs num_down_pgs = 0 pgs = manager.get_pg_stats() for pg in pgs: out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') log.debug("out string %s", out) j = json.loads(out) log.info("pg is %s, query json is %s", pg, j) if pg['state'].count('down'): num_down_pgs += 1 # verify that it is blocked on osd.1 rs = j['recovery_state'] assert len(rs) > 0 assert rs[0]['name'] == 'Started/Primary/Peering/GetInfo' assert rs[1]['name'] == 'Started/Primary/Peering' assert rs[1]['blocked'] assert rs[1]['down_osds_we_would_probe'] == [1] assert len(rs[1]['peering_blocked_by']) == 1 assert rs[1]['peering_blocked_by'][0]['osd'] == 1 assert num_down_pgs > 0 # bring it all back manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean()
def task(ctx, config): """ Test handling of lost objects. A pretty rigid cluseter is brought up andtested by this task """ POOL = 'unfounddel_pool' if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.flush_pg_stats([0, 1, 2]) manager.wait_for_clean() manager.create_pool(POOL) # something that is always there dummyfile = '/etc/fstab' # take an osd out until the very end manager.kill_osd(2) manager.mark_down_osd(2) manager.mark_out_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) for f in range(1, 10): rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) # bring osd.0 back up, let it peer, but don't replicate the new # objects... log.info('osd.0 command_args is %s' % 'foo') log.info(ctx.daemons.get_daemon('osd', 0).command_args) ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend( ['--osd-recovery-delay-start', '1000']) manager.revive_osd(0) manager.mark_in_osd(0) manager.wait_till_osd_is_up(0) manager.flush_pg_stats([0, 1]) manager.wait_till_active() # take out osd.1 and the only copy of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.mark_out_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') # bring up osd.2 so that things would otherwise, in theory, recovery fully manager.revive_osd(2) manager.mark_in_osd(2) manager.wait_till_osd_is_up(2) manager.flush_pg_stats([0, 2]) manager.wait_till_active() manager.flush_pg_stats([0, 2]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append( mon.run(args=[ "/bin/sh", "-c", " ".join([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4 << 10), '-p', POOL, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild( 'radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False)) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: primary = 'osd.%d' % pg['acting'][0] # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_unfound(pg['pgid']) #log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] num_unfound = 0 for o in m['objects']: if len(o['locations']) == 0: num_unfound += 1 assert m['num_unfound'] == num_unfound log.info("reverting unfound in %s on %s", pg['pgid'], primary) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2]) manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.mark_in_osd(1) manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def task(ctx, config): """ Test monitor recovery from OSD """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager')) mons = ctx.cluster.only(teuthology.is_type('mon')) assert mons # note down the first cluster_name and mon_id # we will recover it later on cluster_name = None mon_id = None for remote, roles in mons.remotes.iteritems(): is_mon = teuthology.is_type('mon') for role in roles: if not is_mon(role): continue cluster, _, m = teuthology.split_role(role) if cluster_name is None: cluster_name = cluster mon_id = m assert cluster_name == cluster log.info('killing {cluster}:mon.{mon}'.format( cluster=cluster, mon=m)) manager.kill_mon(m) mon_data = os.path.join('/var/lib/ceph/mon/', '{0}-{1}'.format(cluster_name, m)) if m == mon_id: # so we will only need to recreate the store.db for the # first mon, would be easier than mkfs on it then replace # the its store.db with the recovered one store_dir = os.path.join(mon_data, 'store.db') remote.run(args=['sudo', 'rm', '-r', store_dir]) else: remote.run(args=['sudo', 'rm', '-r', mon_data]) local_mstore = tempfile.mkdtemp() # collect the maps from all OSDs osds = ctx.cluster.only(teuthology.is_type('osd')) assert osds for osd, roles in osds.remotes.iteritems(): is_osd = teuthology.is_type('osd') for role in roles: if not is_osd(role): continue cluster, _, osd_id = teuthology.split_role(role) assert cluster_name == cluster log.info('collecting maps from {cluster}:osd.{osd}'.format( cluster=cluster, osd=osd_id)) # push leveldb to OSD osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store') osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore]) push_directory(local_mstore, osd, osd_mstore) log.info('rm -rf {0}'.format(local_mstore)) shutil.rmtree(local_mstore) # update leveldb with OSD data options = '--op update-mon-db --mon-store-path {0}' log.info('cot {0}'.format(osd_mstore)) manager.objectstore_tool(pool=None, options=options.format(osd_mstore), args='', osd=osd_id, do_revive=False) # pull the updated mon db log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore)) local_mstore = tempfile.mkdtemp() teuthology.pull_directory(osd, osd_mstore, local_mstore) log.info('rm -rf osd:{0}'.format(osd_mstore)) osd.run(args=['sudo', 'rm', '-fr', osd_mstore]) # recover the first_mon with re-built mon db # pull from recovered leveldb from client mon_store_dir = os.path.join('/var/lib/ceph/mon', '{0}-{1}'.format(cluster_name, mon_id)) push_directory(local_mstore, mon, mon_store_dir) mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir]) shutil.rmtree(local_mstore) default_keyring = '/etc/ceph/{cluster}.keyring'.format( cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) # fill up the caps in the keyring file mon.run(args=['sudo', 'ceph-authtool', keyring_path, '-n', 'mon.', '--cap', 'mon', 'allow *']) mon.run(args=['sudo', 'ceph-authtool', keyring_path, '-n', 'client.admin', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', '--cap', 'mds', 'allow *']) mon.run(args=['sudo', '-u', 'ceph', 'ceph-monstore-tool', mon_store_dir, 'rebuild', '--', '--keyring', keyring_path]) # revive monitors # the initial monmap is in the ceph.conf, so we are good. n_mons = 0 for remote, roles in mons.remotes.iteritems(): is_mon = teuthology.is_type('mon') for role in roles: if not is_mon(role): continue cluster, _, m = teuthology.split_role(role) assert cluster_name == cluster if mon_id != m: log.info('running mkfs on {cluster}:mon.{mon}'.format( cluster=cluster, mon=m)) remote.run( args=[ 'sudo', 'ceph-mon', '--cluster', cluster, '--mkfs', '-i', m, '--keyring', keyring_path]) manager.revive_mon(m) n_mons += 1 manager.wait_for_mon_quorum_size(n_mons, timeout=30) for osd, roles in osds.remotes.iteritems(): is_osd = teuthology.is_type('osd') for role in roles: if not is_osd(role): continue _, _, osd_id = teuthology.split_role(role) log.info('reviving osd.{0}'.format(osd_id)) manager.revive_osd(osd_id)
def task(ctx, config): """ Stress test the mds by thrashing while another task/workunit is running. Please refer to MDSThrasher class for further information on the available options. """ mds_cluster = MDSCluster(ctx) if config is None: config = {} assert isinstance(config, dict), \ 'mds_thrash task only accepts a dict for configuration' mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) assert len(mdslist) > 1, \ 'mds_thrash task requires at least 2 metadata servers' # choose random seed if 'seed' in config: seed = int(config['seed']) else: seed = int(time.time()) log.info('mds thrasher using random seed: {seed}'.format(seed=seed)) random.seed(seed) (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.keys() manager = ceph_manager.CephManager( first, ctx=ctx, logger=log.getChild('ceph_manager'), ) # make sure everyone is in active, standby, or standby-replay log.info('Wait for all MDSs to reach steady state...') status = mds_cluster.status() while True: steady = True for info in status.get_all(): state = info['state'] if state not in ('up:active', 'up:standby', 'up:standby-replay'): steady = False break if steady: break sleep(2) status = mds_cluster.status() log.info('Ready to start thrashing') manager.wait_for_clean() assert manager.is_clean() if 'cluster' not in config: config['cluster'] = 'ceph' for fs in status.get_filesystems(): thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fs['id']), fs['mdsmap']['max_mds']) thrasher.start() ctx.ceph[config['cluster']].thrashers.append(thrasher) try: log.debug('Yielding') yield finally: log.info('joining mds_thrasher') thrasher.stop() if thrasher.exception is not None: raise RuntimeError('error during thrashing') thrasher.join() log.info('done joining')
def task(ctx, config): """ Test backfill """ if config is None: config = {} assert isinstance(config, dict), \ 'thrashosds task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) assert num_osds == 3 manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: manager.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean() # write some data p = rados_start( ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', '--no-cleanup']) err = p.wait() log.info('err is %d' % err) # mark osd.0 out to trigger a rebalance/backfill manager.mark_out_osd(0) # also mark it down to it won't be included in pg_temps manager.kill_osd(0) manager.mark_down_osd(0) # wait for everything to peer and be happy... manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_recovery() # write some new data p = rados_start( ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', '--no-cleanup']) time.sleep(15) # blackhole + restart osd.1 # this triggers a divergent backfill target manager.blackhole_kill_osd(1) time.sleep(2) manager.revive_osd(1) # wait for our writes to complete + succeed err = p.wait() log.info('err is %d' % err) # cluster must recover manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_recovery() # re-add osd.0 manager.revive_osd(0) manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean()
def task(ctx, config): """ Go through filesystem creation with a synthetic failure in an MDS in its 'up:creating' state, to exercise the retry behaviour. """ # Grab handles to the teuthology objects of interest mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds')) if len(mdslist) != 1: # Require exactly one MDS, the code path for creation failure when # a standby is available is different raise RuntimeError("This task requires exactly one MDS") mds_id = mdslist[0] (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.iterkeys() manager = ceph_manager.CephManager( mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'), ) # Stop MDS manager.raw_cluster_cmd('mds', 'set', "max_mds", "0") mds = ctx.daemons.get_daemon('mds', mds_id) mds.stop() manager.raw_cluster_cmd('mds', 'fail', mds_id) # Reset the filesystem so that next start will go into CREATING manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it") manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data") # Start the MDS with mds_kill_create_at set, it will crash during creation mds.restart_with_args(["--mds_kill_create_at=1"]) try: mds.wait_for_exit() except CommandFailedError as e: if e.exitstatus == 1: log.info("MDS creation killed as expected") else: log.error("Unexpected status code %s" % e.exitstatus) raise # Since I have intentionally caused a crash, I will clean up the resulting core # file to avoid task.internal.coredump seeing it as a failure. log.info("Removing core file from synthetic MDS failure") mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))]) # It should have left the MDS map state still in CREATING status = manager.get_mds_status(mds_id) assert status['state'] == 'up:creating' # Start the MDS again without the kill flag set, it should proceed with creation successfully mds.restart() # Wait for state ACTIVE t = 0 create_timeout = 120 while True: status = manager.get_mds_status(mds_id) if status['state'] == 'up:active': log.info("MDS creation completed successfully") break elif status['state'] == 'up:creating': log.info("MDS still in creating state") if t > create_timeout: log.error("Creating did not complete within %ss" % create_timeout) raise RuntimeError("Creating did not complete within %ss" % create_timeout) t += 1 time.sleep(1) else: log.error("Unexpected MDS state: %s" % status['state']) assert(status['state'] in ['up:active', 'up:creating']) # The system should be back up in a happy healthy state, go ahead and run any further tasks # inside this context. yield
def task(ctx, config): """ Test handling of object location going down """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' # take 0, 1 out manager.mark_out_osd(0) manager.mark_out_osd(1) manager.wait_for_clean() # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.0', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.2', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.3', 'injectargs', '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' ) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) manager.mark_out_osd(3) manager.wait_till_active() manager.mark_in_osd(0) manager.wait_till_active() manager.flush_pg_stats([2, 0]) manager.mark_out_osd(2) manager.wait_till_active() # bring up 1 manager.mark_in_osd(1) manager.wait_till_active() manager.flush_pg_stats([0, 1]) log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert not unfound manager.kill_osd(2) manager.mark_down_osd(2) manager.kill_osd(3) manager.mark_down_osd(3) manager.flush_pg_stats([0, 1]) log.info("Getting unfound objects") unfound = manager.get_num_unfound_objects() assert unfound
def task(ctx, config): """ Test [deep] scrub tasks: - chef: - install: - ceph: log-whitelist: - '!= known digest' - '!= known omap_digest' - deep-scrub 0 missing, 1 inconsistent objects - deep-scrub 1 errors - repair 0 missing, 1 inconsistent objects - repair 1 errors, 1 fixed - scrub_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'scrub_test task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) for i in range(num_osds): manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats') manager.wait_for_clean() # write some data p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096']) log.info('err is %d' % p.exitstatus) # wait for some PG to have data that we can mess with pg, acting = wait_for_victim_pg(manager) osd = acting[0] osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd) manager.do_rados(mon, ['-p', 'rbd', 'setomapval', obj_name, 'key', 'val']) log.info('err is %d' % p.exitstatus) manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', obj_name, 'hdr']) log.info('err is %d' % p.exitstatus) log.info('messing with PG %s on osd %d' % (pg, osd)) test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path) test_repair_bad_omap(ctx, manager, pg, osd, obj_name) test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd, obj_name, obj_path) log.info('test successful!')
def task(ctx, config): """ Test handling of lost objects on an ec pool. A pretty rigid cluster is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_clean() profile = config.get('erasure_code_profile', { 'k': '2', 'm': '2', 'ruleset-failure-domain': 'osd' }) profile_name = profile.get('name', 'lost_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( erasure_code_profile_name=profile_name) # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) manager.kill_osd(3) manager.mark_down_osd(3) for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) # take out osd.1 and a necessary shard of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') manager.revive_osd(0) manager.wait_till_osd_is_up(0) manager.revive_osd(3) manager.wait_till_osd_is_up(3) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_till_active() manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_missing(pg['pgid']) log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] log.info("reverting unfound in %s", pg['pgid']) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean()
def task(ctx, config): """ Test [deep] scrub tasks: - chef: - install: - ceph: log-whitelist: - '!= known digest' - '!= known omap_digest' - deep-scrub 0 missing, 1 inconsistent objects - deep-scrub 1 errors - repair 0 missing, 1 inconsistent objects - repair 1 errors, 1 fixed - scrub_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'scrub_test task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) for i in range(num_osds): manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'flush_pg_stats') manager.wait_for_clean() # write some data p = manager.do_rados( mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096']) err = p.exitstatus log.info('err is %d' % err) # wait for some PG to have data that we can mess with victim = None osd = None while victim is None: stats = manager.get_pg_stats() for pg in stats: size = pg['stat_sum']['num_bytes'] if size > 0: victim = pg['pgid'] osd = pg['acting'][0] break if victim is None: time.sleep(3) log.info('messing with PG %s on osd %d' % (victim, osd)) (osd_remote, ) = ctx.cluster.only('osd.%d' % osd).remotes.iterkeys() data_path = os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=osd), 'current', '{pg}_head'.format(pg=victim)) # fuzz time ls_fp = StringIO() osd_remote.run( args=['sudo', 'ls', data_path], stdout=ls_fp, ) ls_out = ls_fp.getvalue() ls_fp.close() # find an object file we can mess with osdfilename = None for line in ls_out.split('\n'): if 'object' in line: osdfilename = line break assert osdfilename is not None # Get actual object name from osd stored filename tmp = osdfilename.split('__') objname = tmp[0] objname = objname.replace('\u', '_') log.info('fuzzing %s' % objname) # put a single \0 at the beginning of the file osd_remote.run(args=[ 'sudo', 'dd', 'if=/dev/zero', 'of=%s' % os.path.join(data_path, osdfilename), 'bs=1', 'count=1', 'conv=notrunc' ]) # scrub, verify inconsistent manager.raw_cluster_cmd('pg', 'deep-scrub', victim) # Give deep-scrub a chance to start time.sleep(60) while True: stats = manager.get_single_pg_stats(victim) state = stats['state'] # wait for the scrub to finish if 'scrubbing' in state: time.sleep(3) continue inconsistent = stats['state'].find('+inconsistent') != -1 assert inconsistent break # repair, verify no longer inconsistent manager.raw_cluster_cmd('pg', 'repair', victim) # Give repair a chance to start time.sleep(60) while True: stats = manager.get_single_pg_stats(victim) state = stats['state'] # wait for the scrub to finish if 'scrubbing' in state: time.sleep(3) continue inconsistent = stats['state'].find('+inconsistent') != -1 assert not inconsistent break # Test deep-scrub with various omap modifications manager.do_rados(mon, ['-p', 'rbd', 'setomapval', objname, 'key', 'val']) manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', objname, 'hdr']) # Modify omap on specific osd log.info('fuzzing omap of %s' % objname) manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']) manager.osd_admin_socket( osd, ['setomapval', 'rbd', objname, 'badkey', 'badval']) manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']) # scrub, verify inconsistent manager.raw_cluster_cmd('pg', 'deep-scrub', victim) # Give deep-scrub a chance to start time.sleep(60) while True: stats = manager.get_single_pg_stats(victim) state = stats['state'] # wait for the scrub to finish if 'scrubbing' in state: time.sleep(3) continue inconsistent = stats['state'].find('+inconsistent') != -1 assert inconsistent break # repair, verify no longer inconsistent manager.raw_cluster_cmd('pg', 'repair', victim) # Give repair a chance to start time.sleep(60) while True: stats = manager.get_single_pg_stats(victim) state = stats['state'] # wait for the scrub to finish if 'scrubbing' in state: time.sleep(3) continue inconsistent = stats['state'].find('+inconsistent') != -1 assert not inconsistent break log.info('test successful!')
def task(ctx, config): """ Stress test the mds by thrashing while another task/workunit is running. Please refer to MDSThrasher class for further information on the available options. """ mds_cluster = MDSCluster(ctx) if config is None: config = {} assert isinstance(config, dict), \ 'mds_thrash task only accepts a dict for configuration' mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) assert len(mdslist) > 1, \ 'mds_thrash task requires at least 2 metadata servers' # choose random seed if 'seed' in config: seed = int(config['seed']) else: seed = int(time.time()) log.info('mds thrasher using random seed: {seed}'.format(seed=seed)) random.seed(seed) max_thrashers = config.get('max_thrash', 1) thrashers = {} (first, ) = ctx.cluster.only( 'mds.{_id}'.format(_id=mdslist[0])).remotes.iterkeys() manager = ceph_manager.CephManager( first, ctx=ctx, logger=log.getChild('ceph_manager'), ) # make sure everyone is in active, standby, or standby-replay log.info('Wait for all MDSs to reach steady state...') statuses = None statuses_by_rank = None while True: statuses = {m: mds_cluster.get_mds_info(m) for m in mdslist} statuses_by_rank = {} for _, s in statuses.iteritems(): if isinstance(s, dict): statuses_by_rank[s['rank']] = s ready = filter( lambda (_, s): s is not None and (s['state'] == 'up:active' or s[ 'state'] == 'up:standby' or s['state'] == 'up:standby-replay'), statuses.items()) if len(ready) == len(statuses): break time.sleep(2) log.info('Ready to start thrashing') # setup failure groups failure_groups = {} actives = { s['name']: s for (_, s) in statuses.iteritems() if s['state'] == 'up:active' } log.info('Actives is: {d}'.format(d=actives)) log.info('Statuses is: {d}'.format(d=statuses_by_rank)) for active in actives: for (r, s) in statuses.iteritems(): if s['standby_for_name'] == active: if not active in failure_groups: failure_groups[active] = [] log.info('Assigning mds rank {r} to failure group {g}'.format( r=r, g=active)) failure_groups[active].append(r) manager.wait_for_clean() for (active, standbys) in failure_groups.iteritems(): weight = 1.0 if 'thrash_weights' in config: weight = int(config['thrash_weights'].get( 'mds.{_id}'.format(_id=active), '0.0')) failure_group = [active] failure_group.extend(standbys) thrasher = MDSThrasher( ctx, manager, mds_cluster, config, logger=log.getChild( 'mds_thrasher.failure_group.[{a}, {sbs}]'.format( a=active, sbs=', '.join(standbys))), failure_group=failure_group, weight=weight) thrasher.start() thrashers[active] = thrasher # if thrash_weights isn't specified and we've reached max_thrash, # we're done if 'thrash_weights' not in config and len(thrashers) == max_thrashers: break try: log.debug('Yielding') yield finally: log.info('joining mds_thrashers') for t in thrashers: log.info('join thrasher for failure group [{fg}]'.format( fg=', '.join(failure_group))) thrashers[t].stop() thrashers[t].get() # Raise any exception from _run() thrashers[t].join() log.info('done joining')
def test_incomplete_pgs(ctx, config): """ Test handling of incomplete pgs. Requires 4 osds. """ testdir = teuthology.get_testdir(ctx) if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) assert num_osds == 4 manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 4: time.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_clean() log.info('Testing incomplete pgs...') for i in range(4): manager.set_config(i, osd_recovery_delay_start=1000) # move data off of osd.0, osd.1 manager.raw_cluster_cmd('osd', 'out', '0', '1') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_clean() # lots of objects in rbd (no pg log, will backfill) p = rados_start( testdir, mon, ['-p', 'rbd', 'bench', '20', 'write', '-b', '1', '--no-cleanup']) p.wait() # few objects in rbd pool (with pg log, normal recovery) for f in range(1, 20): p = rados_start(testdir, mon, ['-p', 'rbd', 'put', 'foo.%d' % f, '/etc/passwd']) p.wait() # move it back manager.raw_cluster_cmd('osd', 'in', '0', '1') manager.raw_cluster_cmd('osd', 'out', '2', '3') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.3', 'flush_pg_stats') manager.wait_for_active() assert not manager.is_clean() assert not manager.is_recovered() # kill 2 + 3 log.info('stopping 2,3') manager.kill_osd(2) manager.kill_osd(3) log.info('...') manager.raw_cluster_cmd('osd', 'down', '2', '3') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_active_or_down() assert manager.get_num_down() > 0 # revive 2 + 3 manager.revive_osd(2) manager.revive_osd(3) while len(manager.get_osd_status()['up']) < 4: log.info('waiting a bit...') time.sleep(2) log.info('all are up!') for i in range(4): manager.kick_recovery_wq(i) # cluster must recover manager.wait_for_clean()
def task(ctx, config): """ Test [deep] scrub tasks: - chef: - install: - ceph: log-whitelist: - '!= data_digest' - '!= omap_digest' - '!= size' - deep-scrub 0 missing, 1 inconsistent objects - deep-scrub [0-9]+ errors - repair 0 missing, 1 inconsistent objects - repair [0-9]+ errors, [0-9]+ fixed - shard [0-9]+ .* : missing - deep-scrub 1 missing, 1 inconsistent objects - does not match object info size - attr name mistmatch - deep-scrub 1 missing, 0 inconsistent objects - failed to pick suitable auth object - candidate size [0-9]+ info size [0-9]+ mismatch conf: osd: osd deep scrub update digest min age: 0 - scrub_test: """ if config is None: config = {} assert isinstance(config, dict), \ 'scrub_test task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.keys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < num_osds: time.sleep(10) for i in range(num_osds): manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', '--', '--osd-objectstore-fuse') manager.flush_pg_stats(range(num_osds)) manager.wait_for_clean() # write some data p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1', 'write', '-b', '4096']) log.info('err is %d' % p.exitstatus) # wait for some PG to have data that we can mess with pg, acting = wait_for_victim_pg(manager) osd = acting[0] osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd) manager.do_rados(mon, ['-p', 'rbd', 'setomapval', obj_name, 'key', 'val']) log.info('err is %d' % p.exitstatus) manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', obj_name, 'hdr']) log.info('err is %d' % p.exitstatus) # Update missing digests, requires "osd deep scrub update digest min age: 0" pgnum = get_pgnum(pg) manager.do_pg_scrub('rbd', pgnum, 'deep-scrub') log.info('messing with PG %s on osd %d' % (pg, osd)) test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd') test_repair_bad_omap(ctx, manager, pg, osd, obj_name) test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd, obj_name, obj_path) log.info('test successful!') # shut down fuse mount for i in range(num_osds): manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', '--', '--no-osd-objectstore-fuse') time.sleep(5) log.info('done')
def task(ctx, config): """ Test handling of lost objects on an ec pool. A pretty rigid cluster is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.keys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.wait_for_clean() profile = config.get('erasure_code_profile', { 'k': '2', 'm': '2', 'crush-failure-domain': 'osd' }) profile_name = profile.get('name', 'lost_unfound') manager.create_erasure_code_profile(profile_name, profile) pool = manager.create_pool_with_unique_name( erasure_code_profile_name=profile_name, min_size=2) # something that is always there, readable and never empty dummyfile = '/etc/group' # kludge to make sure they get a map rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) manager.flush_pg_stats([0, 1]) manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) manager.kill_osd(3) manager.mark_down_osd(3) for f in range(1, 10): rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) # take out osd.1 and a necessary shard of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') manager.revive_osd(0) manager.wait_till_osd_is_up(0) manager.revive_osd(3) manager.wait_till_osd_is_up(3) manager.flush_pg_stats([0, 2, 3]) manager.wait_till_active() manager.flush_pg_stats([0, 2, 3]) # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound testdir = teuthology.get_testdir(ctx) procs = [] if config.get('parallel_bench', True): procs.append( mon.run(args=[ "/bin/sh", "-c", " ".join([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage', 'rados', '--no-log-to-stderr', '--name', 'client.admin', '-b', str(4 << 10), '-p', pool, '-t', '20', 'bench', '240', 'write', ]).format(tdir=testdir), ], logger=log.getChild( 'radosbench.{id}'.format(id='client.admin')), stdin=run.PIPE, wait=False)) time.sleep(10) # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_unfound(pg['pgid']) log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] log.info("reverting unfound in %s", pg['pgid']) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') manager.flush_pg_stats([0, 2, 3]) manager.wait_for_recovery() if not config.get('parallel_bench', True): time.sleep(20) # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean() run.wait(procs)
def task(ctx, config): """ Test handling of divergent entries with prior_version prior to log_tail config: none Requires 3 osds. """ if config is None: config = {} assert isinstance(config, dict), \ 'divergent_priors task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) ctx.manager = manager while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' dummyfile2 = '/etc/resolv.conf' # create 1 pg pool log.info('creating foo') manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') osds = [0, 1, 2] for i in osds: manager.set_config(i, osd_min_pg_log_entries=1) # determine primary divergent = manager.get_pg_primary('foo', 0) log.info("primary and soon to be divergent is %d", divergent) non_divergent = [0, 1, 2] non_divergent.remove(divergent) log.info('writing initial objects') # write 1000 objects for i in range(1000): rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) manager.wait_for_clean() # blackhole non_divergent log.info("blackholing osds %s", str(non_divergent)) for i in non_divergent: manager.set_config(i, filestore_blackhole='') # write 1 (divergent) object log.info('writing divergent object existing_0') rados(ctx, mon, ['-p', 'foo', 'put', 'existing_0', dummyfile2], wait=False) time.sleep(10) mon.run(args=['killall', '-9', 'rados'], wait=True, check_status=False) # kill all the osds log.info('killing all the osds') for i in osds: manager.kill_osd(i) for i in osds: manager.mark_down_osd(i) for i in osds: manager.mark_out_osd(i) # bring up non-divergent log.info("bringing up non_divergent %s", str(non_divergent)) for i in non_divergent: manager.revive_osd(i) for i in non_divergent: manager.mark_in_osd(i) log.info('making log long to prevent backfill') for i in non_divergent: manager.set_config(i, osd_min_pg_log_entries=100000) # write 1 non-divergent object (ensure that old divergent one is divergent) log.info('writing non-divergent object existing_1') rados(ctx, mon, ['-p', 'foo', 'put', 'existing_1', dummyfile2]) manager.wait_for_recovery() # ensure no recovery log.info('delay recovery') for i in non_divergent: manager.set_config(i, osd_recovery_delay_start=100000) # bring in our divergent friend log.info("revive divergent %d", divergent) manager.revive_osd(divergent) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) log.info('delay recovery divergent') manager.set_config(divergent, osd_recovery_delay_start=100000) log.info('mark divergent in') manager.mark_in_osd(divergent) log.info('wait for peering') rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) log.info("killing divergent %d", divergent) manager.kill_osd(divergent) log.info("reviving divergent %d", divergent) manager.revive_osd(divergent) log.info('allowing recovery') for i in non_divergent: manager.set_config(i, osd_recovery_delay_start=0) log.info('reading existing_0') exit_status = rados( ctx, mon, ['-p', 'foo', 'get', 'existing_0', '-o', '/tmp/existing']) assert exit_status is 0 log.info("success")
def task(ctx, config): """ Test the dump_stuck command. :param ctx: Context :param config: Configuration """ assert config is None, \ 'dump_stuck requires no configuration' assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \ 'dump_stuck requires exactly 2 osds' timeout = 60 first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) manager.flush_pg_stats([0, 1]) manager.wait_for_clean(timeout) manager.raw_cluster_cmd( 'tell', 'mon.0', 'injectargs', '--', # '--mon-osd-report-timeout 90', '--mon-pg-stuck-threshold 10') # all active+clean check_stuck( manager, num_inactive=0, num_unclean=0, num_stale=0, ) num_pgs = manager.get_num_pgs() manager.mark_out_osd(0) time.sleep(timeout) manager.flush_pg_stats([1]) manager.wait_for_recovery(timeout) # all active+clean+remapped check_stuck( manager, num_inactive=0, num_unclean=0, num_stale=0, ) manager.mark_in_osd(0) manager.flush_pg_stats([0, 1]) manager.wait_for_clean(timeout) # all active+clean check_stuck( manager, num_inactive=0, num_unclean=0, num_stale=0, ) log.info('stopping first osd') manager.kill_osd(0) manager.mark_down_osd(0) log.info('waiting for all to be unclean') starttime = time.time() done = False while not done: try: check_stuck( manager, num_inactive=0, num_unclean=num_pgs, num_stale=0, ) done = True except AssertionError: # wait up to 15 minutes to become stale if time.time() - starttime > 900: raise log.info('stopping second osd') manager.kill_osd(1) manager.mark_down_osd(1) log.info('waiting for all to be stale') starttime = time.time() done = False while not done: try: check_stuck( manager, num_inactive=0, num_unclean=num_pgs, num_stale=num_pgs, ) done = True except AssertionError: # wait up to 15 minutes to become stale if time.time() - starttime > 900: raise log.info('reviving') for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'): manager.revive_osd(id_) manager.mark_in_osd(id_) while True: try: manager.flush_pg_stats([0, 1]) break except Exception: log.exception('osds must not be started yet, waiting...') time.sleep(1) manager.wait_for_clean(timeout) check_stuck( manager, num_inactive=0, num_unclean=0, num_stale=0, )
def task(ctx, config): """ Test handling of lost objects. A pretty rigid cluseter is brought up andtested by this task """ if config is None: config = {} assert isinstance(config, dict), \ 'lost_unfound task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: manager.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean() # something that is always there dummyfile = '/etc/fstab' # take an osd out until the very end manager.kill_osd(2) manager.mark_down_osd(2) manager.mark_out_osd(2) # kludge to make sure they get a map rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_recovery() # create old objects for f in range(1, 10): rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', 'data', 'rm', 'existed_%d' % f]) # delay recovery, and make the pg log very long (to prevent backfill) manager.raw_cluster_cmd( 'tell', 'osd.1', 'injectargs', '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000') manager.kill_osd(0) manager.mark_down_osd(0) for f in range(1, 10): rados(ctx, mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile]) rados(ctx, mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile]) rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) # bring osd.0 back up, let it peer, but don't replicate the new # objects... log.info('osd.0 command_args is %s' % 'foo') log.info(ctx.daemons.get_daemon('osd', 0).command_args) ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend( ['--osd-recovery-delay-start', '1000']) manager.revive_osd(0) manager.mark_in_osd(0) manager.wait_till_osd_is_up(0) manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.wait_till_active() # take out osd.1 and the only copy of those objects. manager.kill_osd(1) manager.mark_down_osd(1) manager.mark_out_osd(1) manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') # bring up osd.2 so that things would otherwise, in theory, recovery fully manager.revive_osd(2) manager.mark_in_osd(2) manager.wait_till_osd_is_up(2) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_till_active() manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') # verify that there are unfound objects unfound = manager.get_num_unfound_objects() log.info("there are %d unfound objects" % unfound) assert unfound # mark stuff lost pgs = manager.get_pg_stats() for pg in pgs: if pg['stat_sum']['num_objects_unfound'] > 0: primary = 'osd.%d' % pg['acting'][0] # verify that i can list them direct from the osd log.info('listing missing/lost in %s state %s', pg['pgid'], pg['state']) m = manager.list_pg_missing(pg['pgid']) #log.info('%s' % m) assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] num_unfound = 0 for o in m['objects']: if len(o['locations']) == 0: num_unfound += 1 assert m['num_unfound'] == num_unfound log.info("reverting unfound in %s on %s", pg['pgid'], primary) manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost', 'delete') else: log.info("no unfound in %s", pg['pgid']) manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_recovery() # verify result for f in range(1, 10): err = rados(ctx, mon, ['-p', 'data', 'get', 'new_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', 'data', 'get', 'existed_%d' % f, '-']) assert err err = rados(ctx, mon, ['-p', 'data', 'get', 'existing_%d' % f, '-']) assert err # see if osd.1 can cope manager.revive_osd(1) manager.mark_in_osd(1) manager.wait_till_osd_is_up(1) manager.wait_for_clean()
def task(ctx, config): """ Test (non-backfill) recovery """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' testdir = teuthology.get_testdir(ctx) first_mon = teuthology.get_first_mon(ctx, config) (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys() num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') log.info('num_osds is %s' % num_osds) assert num_osds == 3 manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager'), ) while len(manager.get_osd_status()['up']) < 3: time.sleep(10) manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean() # test some osdmap flags manager.raw_cluster_cmd('osd', 'set', 'noin') manager.raw_cluster_cmd('osd', 'set', 'noout') manager.raw_cluster_cmd('osd', 'set', 'noup') manager.raw_cluster_cmd('osd', 'set', 'nodown') manager.raw_cluster_cmd('osd', 'unset', 'noin') manager.raw_cluster_cmd('osd', 'unset', 'noout') manager.raw_cluster_cmd('osd', 'unset', 'noup') manager.raw_cluster_cmd('osd', 'unset', 'nodown') # write some new data p = rados_start( testdir, mon, ['-p', 'rbd', 'bench', '20', 'write', '-b', '4096', '--no-cleanup']) time.sleep(15) # trigger a divergent target: # blackhole + restart osd.1 (shorter log) manager.blackhole_kill_osd(1) # kill osd.2 (longer log... we'll make it divergent below) manager.kill_osd(2) time.sleep(2) manager.revive_osd(1) # wait for our writes to complete + succeed err = p.wait() log.info('err is %d' % err) # cluster must repeer manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.wait_for_active_or_down() # write some more (make sure osd.2 really is divergent) p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096']) p.wait() # revive divergent osd manager.revive_osd(2) while len(manager.get_osd_status()['up']) < 3: log.info('waiting a bit...') time.sleep(2) log.info('3 are up!') # cluster must recover manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') manager.wait_for_clean()