Exemplo n.º 1
0
def task(ctx, config):
    """
    Stress test the monitor by thrashing them while another task/workunit
    is running.

    Please refer to MonitorThrasher class for further information on the
    available options.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'mon_thrash task only accepts a dict for configuration'
    assert len(_get_mons(ctx)) > 2, \
        'mon_thrash task requires at least 3 monitors'
    log.info('Beginning mon_thrash...')
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()
    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )
    thrash_proc = MonitorThrasher(ctx,
        manager, config,
        logger=log.getChild('mon_thrasher'))
    try:
        log.debug('Yielding')
        yield
    finally:
        log.info('joining mon_thrasher')
        thrash_proc.do_join()
        mons = _get_mons(ctx)
        manager.wait_for_mon_quorum_size(len(mons))
Exemplo n.º 2
0
def task(ctx, config):
    """
    Test monitor recovery from OSD
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    # stash a monmap for later
    mon.run(args=['ceph', 'mon', 'getmap', '-o', '/tmp/monmap'])

    manager = ceph_manager.CephManager(mon,
                                       ctx=ctx,
                                       logger=log.getChild('ceph_manager'))

    mons = ctx.cluster.only(teuthology.is_type('mon'))
    # note down the first cluster_name and mon_id
    # we will recover it later on
    cluster_name, _, mon_id = teuthology.split_role(first_mon)
    _nuke_mons(manager, mons, mon_id)
    default_keyring = '/etc/ceph/{cluster}.keyring'.format(
        cluster=cluster_name)
    keyring_path = config.get('keyring_path', default_keyring)
    _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path)
    _revive_mons(manager, mons, mon_id, keyring_path)
    _revive_mgrs(ctx, manager)
    _revive_osds(ctx, manager)
Exemplo n.º 3
0
    def __init__(self, ctx, admin_remote=None):
        self._ctx = ctx

        self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
        if len(self.mds_ids) == 0:
            raise RuntimeError("This task requires at least one MDS")

        first_mon = misc.get_first_mon(ctx, None)
        if admin_remote is None:
            (self.admin_remote,
             ) = ctx.cluster.only(first_mon).remotes.iterkeys()
        else:
            self.admin_remote = admin_remote
        self.mon_manager = ceph_manager.CephManager(
            self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
        if hasattr(self._ctx, "daemons"):
            # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task
            self.mds_daemons = dict([
                (mds_id, self._ctx.daemons.get_daemon('mds', mds_id))
                for mds_id in self.mds_ids
            ])

        client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
        self.client_id = client_list[0]
        self.client_remote = list(
            misc.get_clients(ctx=ctx,
                             roles=["client.{0}".format(self.client_id)
                                    ]))[0][1]
Exemplo n.º 4
0
def task(ctx, config):
    """
    Die if {testdir}/err exists or if an OSD dumps core
    """
    if config is None:
        config = {}

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < num_osds:
        time.sleep(10)

    testdir = teuthology.get_testdir(ctx)

    while True:
        for i in range(num_osds):
            (osd_remote, ) = ctx.cluster.only('osd.%d' % i).remotes.keys()
            p = osd_remote.run(
                args=['test', '-e', '{tdir}/err'.format(tdir=testdir)],
                wait=True,
                check_status=False,
            )
            exit_status = p.exitstatus

            if exit_status == 0:
                log.info("osd %d has an error" % i)
                raise Exception("osd %d error" % i)

            log_path = '/var/log/ceph/osd.%d.log' % (i)

            p = osd_remote.run(
                args=[
                    'tail', '-1', log_path,
                    run.Raw('|'), 'grep', '-q', 'end dump'
                ],
                wait=True,
                check_status=False,
            )
            exit_status = p.exitstatus

            if exit_status == 0:
                log.info("osd %d dumped core" % i)
                raise Exception("osd %d dumped core" % i)

        time.sleep(5)
Exemplo n.º 5
0
    def __init__(self, ctx):
        self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
        self._ctx = ctx

        if len(self.mds_ids) == 0:
            raise RuntimeError("This task requires at least one MDS")

        self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
        if hasattr(self._ctx, "daemons"):
            # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task
            self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids])
Exemplo n.º 6
0
def task(ctx, config):
    """
    Run scrub periodically. Randomly chooses an OSD to scrub.

    The config should be as follows:

    scrub:
        frequency: <seconds between scrubs>
        deep: <bool for deepness>

    example:

    tasks:
    - ceph:
    - scrub:
        frequency: 30
        deep: 0
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'scrub task only accepts a dict for configuration'

    log.info('Beginning scrub...')

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    while len(manager.get_osd_status()['up']) < num_osds:
        time.sleep(10)

    scrub_proc = Scrubber(
        manager,
        config,
    )
    try:
        yield
    finally:
        log.info('joining scrub')
        scrub_proc.do_join()
Exemplo n.º 7
0
    def __init__(self, ctx):
        self._ctx = ctx

        self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
        if len(self.mds_ids) == 0:
            raise RuntimeError("This task requires at least one MDS")

        first_mon = misc.get_first_mon(ctx, None)
        (self.mon_remote, ) = ctx.cluster.only(first_mon).remotes.iterkeys()
        self.mon_manager = ceph_manager.CephManager(
            self.mon_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
        self.mds_daemons = dict([(mds_id,
                                  self._ctx.daemons.get_daemon('mds', mds_id))
                                 for mds_id in self.mds_ids])

        client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
        self.client_id = client_list[0]
        self.client_remote = list(
            misc.get_clients(ctx=ctx,
                             roles=["client.{0}".format(self.client_id)
                                    ]))[0][1]
Exemplo n.º 8
0
def task(ctx, config):
    """
    Stress test the mds by thrashing while another task/workunit
    is running.

    Please refer to MDSThrasher class for further information on the
    available options.
    """

    mds_cluster = MDSCluster(ctx)

    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'mds_thrash task only accepts a dict for configuration'
    mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
    assert len(mdslist) > 1, \
        'mds_thrash task requires at least 2 metadata servers'

    # choose random seed
    if 'seed' in config:
        seed = int(config['seed'])
    else:
        seed = int(time.time())
    log.info('mds thrasher using random seed: {seed}'.format(seed=seed))
    random.seed(seed)

    (first, ) = ctx.cluster.only(
        'mds.{_id}'.format(_id=mdslist[0])).remotes.keys()
    manager = ceph_manager.CephManager(
        first,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    # make sure everyone is in active, standby, or standby-replay
    log.info('Wait for all MDSs to reach steady state...')
    status = mds_cluster.status()
    while True:
        steady = True
        for info in status.get_all():
            state = info['state']
            if state not in ('up:active', 'up:standby', 'up:standby-replay'):
                steady = False
                break
        if steady:
            break
        sleep(2)
        status = mds_cluster.status()
    log.info('Ready to start thrashing')

    manager.wait_for_clean()
    assert manager.is_clean()

    if 'cluster' not in config:
        config['cluster'] = 'ceph'

    for fs in status.get_filesystems():
        thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fs['id']),
                               fs['mdsmap']['max_mds'])
        thrasher.start()
        ctx.ceph[config['cluster']].thrashers.append(thrasher)

    try:
        log.debug('Yielding')
        yield
    finally:
        log.info('joining mds_thrasher')
        thrasher.stop()
        if thrasher.exception is not None:
            raise RuntimeError('error during thrashing')
        thrasher.join()
        log.info('done joining')
Exemplo n.º 9
0
def task(ctx, config):
    """
    Test handling of object location going down
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.wait_for_clean()

    # something that is always there
    dummyfile = '/etc/fstab'

    # take 0, 1 out
    manager.mark_out_osd(0)
    manager.mark_out_osd(1)
    manager.wait_for_clean()

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.0',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.1',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.2',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )
    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.3',
            'injectargs',
            '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
            )

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])

    manager.mark_out_osd(3)
    manager.wait_till_active()

    manager.mark_in_osd(0)
    manager.wait_till_active()

    manager.flush_pg_stats([2, 0])

    manager.mark_out_osd(2)
    manager.wait_till_active()

    # bring up 1
    manager.mark_in_osd(1)
    manager.wait_till_active()

    manager.flush_pg_stats([0, 1])
    log.info("Getting unfound objects")
    unfound = manager.get_num_unfound_objects()
    assert not unfound

    manager.kill_osd(2)
    manager.mark_down_osd(2)
    manager.kill_osd(3)
    manager.mark_down_osd(3)

    manager.flush_pg_stats([0, 1])
    log.info("Getting unfound objects")
    unfound = manager.get_num_unfound_objects()
    assert unfound
Exemplo n.º 10
0
def task(ctx, config):
    """
    Test the dump_stuck command.

    :param ctx: Context
    :param config: Configuration
    """
    assert config is None, \
        'dump_stuck requires no configuration'
    assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \
        'dump_stuck requires exactly 2 osds'

    timeout = 60
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    manager.flush_pg_stats([0, 1])
    manager.wait_for_clean(timeout)

    manager.raw_cluster_cmd(
        'tell',
        'mon.0',
        'injectargs',
        '--',
        #                            '--mon-osd-report-timeout 90',
        '--mon-pg-stuck-threshold 10')

    # all active+clean
    check_stuck(
        manager,
        num_inactive=0,
        num_unclean=0,
        num_stale=0,
    )
    num_pgs = manager.get_num_pgs()

    manager.mark_out_osd(0)
    time.sleep(timeout)
    manager.flush_pg_stats([1])
    manager.wait_for_recovery(timeout)

    # all active+clean+remapped
    check_stuck(
        manager,
        num_inactive=0,
        num_unclean=0,
        num_stale=0,
    )

    manager.mark_in_osd(0)
    manager.flush_pg_stats([0, 1])
    manager.wait_for_clean(timeout)

    # all active+clean
    check_stuck(
        manager,
        num_inactive=0,
        num_unclean=0,
        num_stale=0,
    )

    log.info('stopping first osd')
    manager.kill_osd(0)
    manager.mark_down_osd(0)
    manager.wait_for_active(timeout)

    log.info('waiting for all to be unclean')
    starttime = time.time()
    done = False
    while not done:
        try:
            check_stuck(
                manager,
                num_inactive=0,
                num_unclean=num_pgs,
                num_stale=0,
            )
            done = True
        except AssertionError:
            # wait up to 15 minutes to become stale
            if time.time() - starttime > 900:
                raise

    log.info('stopping second osd')
    manager.kill_osd(1)
    manager.mark_down_osd(1)

    log.info('waiting for all to be stale')
    starttime = time.time()
    done = False
    while not done:
        try:
            check_stuck(
                manager,
                num_inactive=0,
                num_unclean=num_pgs,
                num_stale=num_pgs,
            )
            done = True
        except AssertionError:
            # wait up to 15 minutes to become stale
            if time.time() - starttime > 900:
                raise

    log.info('reviving')
    for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'):
        manager.revive_osd(id_)
        manager.mark_in_osd(id_)
    while True:
        try:
            manager.flush_pg_stats([0, 1])
            break
        except Exception:
            log.exception('osds must not be started yet, waiting...')
            time.sleep(1)
    manager.wait_for_clean(timeout)

    check_stuck(
        manager,
        num_inactive=0,
        num_unclean=0,
        num_stale=0,
    )
Exemplo n.º 11
0
def task(ctx, config):
    """
    Test backfill
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'thrashosds task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)
    assert num_osds == 3

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.flush_pg_stats([0, 1, 2])
    manager.wait_for_clean()

    # write some data
    p = rados_start(
        ctx, mon,
        ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', '--no-cleanup'])
    err = p.wait()
    log.info('err is %d' % err)

    # mark osd.0 out to trigger a rebalance/backfill
    manager.mark_out_osd(0)

    # also mark it down to it won't be included in pg_temps
    manager.kill_osd(0)
    manager.mark_down_osd(0)

    # wait for everything to peer and be happy...
    manager.flush_pg_stats([1, 2])
    manager.wait_for_recovery()

    # write some new data
    p = rados_start(
        ctx, mon,
        ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', '--no-cleanup'])

    time.sleep(15)

    # blackhole + restart osd.1
    # this triggers a divergent backfill target
    manager.blackhole_kill_osd(1)
    time.sleep(2)
    manager.revive_osd(1)

    # wait for our writes to complete + succeed
    err = p.wait()
    log.info('err is %d' % err)

    # wait for osd.1 and osd.2 to be up
    manager.wait_till_osd_is_up(1)
    manager.wait_till_osd_is_up(2)

    # cluster must recover
    manager.flush_pg_stats([1, 2])
    manager.wait_for_recovery()

    # re-add osd.0
    manager.revive_osd(0)
    manager.flush_pg_stats([1, 2])
    manager.wait_for_clean()
Exemplo n.º 12
0
def task(ctx, config):
    """
    Test backfill reservation calculates "toofull" condition correctly.

    A pretty rigid cluster is brought up and tested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'backfill_toofull task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    profile = config.get('erasure_code_profile', {
        'k': '2',
        'm': '1',
        'crush-failure-domain': 'osd'
    })
    profile_name = profile.get('name', 'backfill_toofull')
    manager.create_erasure_code_profile(profile_name, profile)
    pool = manager.create_pool_with_unique_name(
        pg_num=1, erasure_code_profile_name=profile_name, min_size=2)
    manager.raw_cluster_cmd('osd', 'pool', 'set', pool, 'pg_autoscale_mode',
                            'off')

    manager.flush_pg_stats([0, 1, 2, 3])
    manager.wait_for_clean()

    pool_id = manager.get_pool_num(pool)
    pgid = '%d.0' % pool_id
    pgs = manager.get_pg_stats()
    acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None)
    log.debug("acting=%s" % acting)
    assert acting
    primary = acting[0]
    target = acting[1]

    log.debug("write some data")
    rados(ctx, mon, ['-p', pool, 'bench', '120', 'write', '--no-cleanup'])
    df = manager.get_osd_df(target)
    log.debug("target osd df: %s" % df)

    total_kb = df['kb']
    used_kb = df['kb_used']

    log.debug("pause recovery")
    manager.raw_cluster_cmd('osd', 'set', 'noout')
    manager.raw_cluster_cmd('osd', 'set', 'nobackfill')
    manager.raw_cluster_cmd('osd', 'set', 'norecover')

    log.debug("stop tartget osd %s" % target)
    manager.kill_osd(target)
    manager.wait_till_active()

    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg

    log.debug("re-write data")
    rados(ctx, mon, ['-p', pool, 'cleanup'])
    time.sleep(10)
    rados(ctx, mon, ['-p', pool, 'bench', '60', 'write', '--no-cleanup'])

    df = manager.get_osd_df(primary)
    log.debug("primary osd df: %s" % df)

    primary_used_kb = df['kb_used']

    log.info("test backfill reservation rejected with toofull")

    # We set backfillfull ratio less than new data size and expect the pg
    # entering backfill_toofull state.
    #
    # We also need to update nearfull ratio to prevent "full ratio(s) out of order".

    backfillfull = 0.9 * primary_used_kb / total_kb
    nearfull = backfillfull * 0.9

    log.debug("update nearfull ratio to %s and backfillfull ratio to %s" %
              (nearfull, backfillfull))
    manager.raw_cluster_cmd('osd', 'set-nearfull-ratio',
                            '{:.3f}'.format(nearfull + 0.001))
    manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio',
                            '{:.3f}'.format(backfillfull + 0.001))

    log.debug("start tartget osd %s" % target)

    manager.revive_osd(target)
    manager.wait_for_active()
    manager.wait_till_osd_is_up(target)

    wait_for_pg_state(manager, pgid, 'backfill_toofull', target)

    log.info("test pg not enter backfill_toofull after restarting backfill")

    # We want to set backfillfull ratio to be big enough for the target to
    # successfully backfill new data but smaller than the sum of old and new
    # data, so if the osd backfill reservation incorrectly calculates "toofull"
    # the test will detect this (fail).
    #
    # Note, we need to operate with "uncompressed" bytes because currently
    # osd backfill reservation does not take compression into account.
    #
    # We also need to update nearfull ratio to prevent "full ratio(s) out of order".

    pdf = manager.get_pool_df(pool)
    log.debug("pool %s df: %s" % (pool, pdf))
    assert pdf
    compress_ratio = 1.0 * pdf['compress_under_bytes'] / pdf['compress_bytes_used'] \
        if pdf['compress_bytes_used'] > 0 else 1.0
    log.debug("compress_ratio: %s" % compress_ratio)

    backfillfull = (used_kb + primary_used_kb) * compress_ratio / total_kb
    assert backfillfull < 0.9
    nearfull_min = max(used_kb, primary_used_kb) * compress_ratio / total_kb
    assert nearfull_min < backfillfull
    delta = backfillfull - nearfull_min
    nearfull = nearfull_min + delta * 0.1
    backfillfull = nearfull_min + delta * 0.2

    log.debug("update nearfull ratio to %s and backfillfull ratio to %s" %
              (nearfull, backfillfull))
    manager.raw_cluster_cmd('osd', 'set-nearfull-ratio',
                            '{:.3f}'.format(nearfull + 0.001))
    manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio',
                            '{:.3f}'.format(backfillfull + 0.001))

    wait_for_pg_state(manager, pgid, 'backfilling', target)

    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg

    log.debug("interrupt %s backfill" % target)
    manager.mark_down_osd(target)
    # after marking the target osd down it will automatically be
    # up soon again

    log.debug("resume recovery")
    manager.raw_cluster_cmd('osd', 'unset', 'noout')
    manager.raw_cluster_cmd('osd', 'unset', 'nobackfill')
    manager.raw_cluster_cmd('osd', 'unset', 'norecover')

    # wait for everything to peer, backfill and recover
    manager.wait_for_clean()

    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.info('pg=%s' % pg)
    assert pg
    assert 'clean' in pg['state'].split('+')
Exemplo n.º 13
0
def test_incomplete_pgs(ctx, config):
    """
    Test handling of incomplete pgs.  Requires 4 osds.
    """
    testdir = teuthology.get_testdir(ctx)
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)
    assert num_osds == 4

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 4:
        time.sleep(10)

    manager.flush_pg_stats([0, 1, 2, 3])
    manager.wait_for_clean()

    log.info('Testing incomplete pgs...')

    for i in range(4):
        manager.set_config(i, osd_recovery_delay_start=1000)

    # move data off of osd.0, osd.1
    manager.raw_cluster_cmd('osd', 'out', '0', '1')
    manager.flush_pg_stats([0, 1, 2, 3], [0, 1])
    manager.wait_for_clean()

    # lots of objects in rbd (no pg log, will backfill)
    p = rados_start(
        testdir, mon,
        ['-p', 'rbd', 'bench', '20', 'write', '-b', '1', '--no-cleanup'])
    p.wait()

    # few objects in rbd pool (with pg log, normal recovery)
    for f in range(1, 20):
        p = rados_start(testdir, mon,
                        ['-p', 'rbd', 'put',
                         'foo.%d' % f, '/etc/passwd'])
        p.wait()

    # move it back
    manager.raw_cluster_cmd('osd', 'in', '0', '1')
    manager.raw_cluster_cmd('osd', 'out', '2', '3')
    time.sleep(10)
    manager.flush_pg_stats([0, 1, 2, 3], [2, 3])
    time.sleep(10)
    manager.wait_for_active()

    assert not manager.is_clean()
    assert not manager.is_recovered()

    # kill 2 + 3
    log.info('stopping 2,3')
    manager.kill_osd(2)
    manager.kill_osd(3)
    log.info('...')
    manager.raw_cluster_cmd('osd', 'down', '2', '3')
    manager.flush_pg_stats([0, 1])
    manager.wait_for_active_or_down()

    assert manager.get_num_down() > 0

    # revive 2 + 3
    manager.revive_osd(2)
    manager.revive_osd(3)
    while len(manager.get_osd_status()['up']) < 4:
        log.info('waiting a bit...')
        time.sleep(2)
    log.info('all are up!')

    for i in range(4):
        manager.kick_recovery_wq(i)

    # cluster must recover
    manager.wait_for_clean()
Exemplo n.º 14
0
def task(ctx, config):
    """
    Test monitor recovery.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)]
    log.info("mon ids = %s" % mons)

    manager.wait_for_mon_quorum_size(len(mons))

    log.info('verifying all monitors are in the quorum')
    for m in mons:
        s = manager.get_mon_status(m)
        assert s['state'] == 'leader' or s['state'] == 'peon'
        assert len(s['quorum']) == len(mons)

    log.info('restarting each monitor in turn')
    for m in mons:
        # stop a monitor
        manager.kill_mon(m)
        manager.wait_for_mon_quorum_size(len(mons) - 1)

        # restart
        manager.revive_mon(m)
        manager.wait_for_mon_quorum_size(len(mons))

    # in forward and reverse order,
    rmons = mons
    rmons.reverse()
    for mons in mons, rmons:
        log.info('stopping all monitors')
        for m in mons:
            manager.kill_mon(m)

        log.info('forming a minimal quorum for %s, then adding monitors' %
                 mons)
        qnum = (len(mons) // 2) + 1
        num = 0
        for m in mons:
            manager.revive_mon(m)
            num += 1
            if num >= qnum:
                manager.wait_for_mon_quorum_size(num)

    # on both leader and non-leader ranks...
    for rank in [0, 1]:
        # take one out
        log.info('removing mon %s' % mons[rank])
        manager.kill_mon(mons[rank])
        manager.wait_for_mon_quorum_size(len(mons) - 1)

        log.info('causing some monitor log activity')
        m = 30
        for n in range(1, m):
            manager.raw_cluster_cmd('log', '%d of %d' % (n, m))

        log.info('adding mon %s back in' % mons[rank])
        manager.revive_mon(mons[rank])
        manager.wait_for_mon_quorum_size(len(mons))
Exemplo n.º 15
0
def task(ctx, config):
    """
    Test (non-backfill) recovery
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'
    testdir = teuthology.get_testdir(ctx)
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)
    assert num_osds == 3

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.flush_pg_stats([0, 1, 2])
    manager.wait_for_clean()

    # test some osdmap flags
    manager.raw_cluster_cmd('osd', 'set', 'noin')
    manager.raw_cluster_cmd('osd', 'set', 'noout')
    manager.raw_cluster_cmd('osd', 'set', 'noup')
    manager.raw_cluster_cmd('osd', 'set', 'nodown')
    manager.raw_cluster_cmd('osd', 'unset', 'noin')
    manager.raw_cluster_cmd('osd', 'unset', 'noout')
    manager.raw_cluster_cmd('osd', 'unset', 'noup')
    manager.raw_cluster_cmd('osd', 'unset', 'nodown')

    # write some new data
    p = rados_start(
        testdir, mon,
        ['-p', 'rbd', 'bench', '20', 'write', '-b', '4096', '--no-cleanup'])

    time.sleep(15)

    # trigger a divergent target:
    #  blackhole + restart osd.1 (shorter log)
    manager.blackhole_kill_osd(1)
    #  kill osd.2 (longer log... we'll make it divergent below)
    manager.kill_osd(2)
    time.sleep(2)
    manager.revive_osd(1)

    # wait for our writes to complete + succeed
    err = p.wait()
    log.info('err is %d' % err)

    # cluster must repeer
    manager.flush_pg_stats([0, 1])
    manager.wait_for_active_or_down()

    # write some more (make sure osd.2 really is divergent)
    p = rados_start(testdir, mon,
                    ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096'])
    p.wait()

    # revive divergent osd
    manager.revive_osd(2)

    while len(manager.get_osd_status()['up']) < 3:
        log.info('waiting a bit...')
        time.sleep(2)
    log.info('3 are up!')

    # cluster must recover
    manager.flush_pg_stats([0, 1, 2])
    manager.wait_for_clean()
Exemplo n.º 16
0
 def __init__(self, ctx):
     self._ctx = ctx
     self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
Exemplo n.º 17
0
def task(ctx, config):
    """
    Test handling of lost objects on an ec pool.

    A pretty rigid cluster is brought up andtested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    manager.wait_for_clean()

    profile = config.get('erasure_code_profile', {
        'k': '2',
        'm': '2',
        'crush-failure-domain': 'osd'
    })
    profile_name = profile.get('name', 'lost_unfound')
    manager.create_erasure_code_profile(profile_name, profile)
    pool = manager.create_pool_with_unique_name(
        erasure_code_profile_name=profile_name,
        min_size=2)

    # something that is always there, readable and never empty
    dummyfile = '/etc/group'

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
            'tell', 'osd.1',
            'injectargs',
            '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
            )

    manager.kill_osd(0)
    manager.mark_down_osd(0)
    manager.kill_osd(3)
    manager.mark_down_osd(3)
    
    for f in range(1, 10):
        rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])

    # take out osd.1 and a necessary shard of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
    manager.revive_osd(0)
    manager.wait_till_osd_is_up(0)
    manager.revive_osd(3)
    manager.wait_till_osd_is_up(3)

    manager.flush_pg_stats([0, 2, 3])
    manager.wait_till_active()
    manager.flush_pg_stats([0, 2, 3])

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    testdir = teuthology.get_testdir(ctx)
    procs = []
    if config.get('parallel_bench', True):
        procs.append(mon.run(
            args=[
                "/bin/sh", "-c",
                " ".join(['adjust-ulimits',
                          'ceph-coverage',
                          '{tdir}/archive/coverage',
                          'rados',
                          '--no-log-to-stderr',
                          '--name', 'client.admin',
                          '-b', str(4<<10),
                          '-p' , pool,
                          '-t', '20',
                          'bench', '240', 'write',
                      ]).format(tdir=testdir),
            ],
            logger=log.getChild('radosbench.{id}'.format(id='client.admin')),
            stdin=run.PIPE,
            wait=False
        ))
    time.sleep(10)

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state']);
            m = manager.list_pg_unfound(pg['pgid'])
            log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']

            log.info("reverting unfound in %s", pg['pgid'])
            manager.raw_cluster_cmd('pg', pg['pgid'],
                                    'mark_unfound_lost', 'delete')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5')
    manager.flush_pg_stats([0, 2, 3])
    manager.wait_for_recovery()

    if not config.get('parallel_bench', True):
        time.sleep(20)

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-'])
        assert err

    # see if osd.1 can cope
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
    run.wait(procs)
Exemplo n.º 18
0
def task(ctx, config):
    """
    Test peering.
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'peer task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)
    manager.flush_pg_stats([0, 1, 2])
    manager.wait_for_clean()

    for i in range(3):
        manager.set_config(i, osd_recovery_delay_start=120)

    # take on osd down
    manager.kill_osd(2)
    manager.mark_down_osd(2)

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-'])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    # kill another and revive 2, so that some pgs can't peer.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.revive_osd(2)
    manager.wait_till_osd_is_up(2)

    manager.flush_pg_stats([0, 2])

    manager.wait_for_active_or_down()

    manager.flush_pg_stats([0, 2])

    # look for down pgs
    num_down_pgs = 0
    pgs = manager.get_pg_stats()
    for pg in pgs:
        out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query')
        log.debug("out string %s", out)
        j = json.loads(out)
        log.info("pg is %s, query json is %s", pg, j)

        if pg['state'].count('down'):
            num_down_pgs += 1
            # verify that it is blocked on osd.1
            rs = j['recovery_state']
            assert len(rs) >= 2
            assert rs[0]['name'] == 'Started/Primary/Peering/Down'
            assert rs[1]['name'] == 'Started/Primary/Peering'
            assert rs[1]['blocked']
            assert rs[1]['down_osds_we_would_probe'] == [1]
            assert len(rs[1]['peering_blocked_by']) == 1
            assert rs[1]['peering_blocked_by'][0]['osd'] == 1

    assert num_down_pgs > 0

    # bring it all back
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.flush_pg_stats([0, 1, 2])
    manager.wait_for_clean()
Exemplo n.º 19
0
def task(ctx, config):
    """
    Test [deep] scrub

    tasks:
    - chef:
    - install:
    - ceph:
        log-ignorelist:
        - '!= data_digest'
        - '!= omap_digest'
        - '!= size'
        - deep-scrub 0 missing, 1 inconsistent objects
        - deep-scrub [0-9]+ errors
        - repair 0 missing, 1 inconsistent objects
        - repair [0-9]+ errors, [0-9]+ fixed
        - shard [0-9]+ .* : missing
        - deep-scrub 1 missing, 1 inconsistent objects
        - does not match object info size
        - attr name mistmatch
        - deep-scrub 1 missing, 0 inconsistent objects
        - failed to pick suitable auth object
        - candidate size [0-9]+ info size [0-9]+ mismatch
      conf:
        osd:
          osd deep scrub update digest min age: 0
    - scrub_test:
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'scrub_test task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
    log.info('num_osds is %s' % num_osds)

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < num_osds:
        time.sleep(10)

    for i in range(num_osds):
        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', '--',
                                '--osd-objectstore-fuse')
    manager.flush_pg_stats(range(num_osds))
    manager.wait_for_clean()

    osd_dump = manager.get_osd_dump_json()
    poolid = -1
    for p in osd_dump['pools']:
        if p['pool_name'] == 'rbd':
            poolid = p['pool']
            break
    assert poolid != -1

    # write some data
    p = manager.do_rados(['bench', '--no-cleanup', '1', 'write', '-b', '4096'],
                         pool='rbd')
    log.info('err is %d' % p.exitstatus)

    # wait for some PG to have data that we can mess with
    pg, acting = wait_for_victim_pg(manager, poolid)
    osd = acting[0]

    osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd)
    manager.do_rados(['setomapval', obj_name, 'key', 'val'], pool='rbd')
    log.info('err is %d' % p.exitstatus)
    manager.do_rados(['setomapheader', obj_name, 'hdr'], pool='rbd')
    log.info('err is %d' % p.exitstatus)

    # Update missing digests, requires "osd deep scrub update digest min age: 0"
    pgnum = get_pgnum(pg)
    manager.do_pg_scrub('rbd', pgnum, 'deep-scrub')

    log.info('messing with PG %s on osd %d' % (pg, osd))
    test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd')
    test_repair_bad_omap(ctx, manager, pg, osd, obj_name)
    test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd,
                               obj_name, obj_path)
    log.info('test successful!')

    # shut down fuse mount
    for i in range(num_osds):
        manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', '--',
                                '--no-osd-objectstore-fuse')
    time.sleep(5)
    log.info('done')
Exemplo n.º 20
0
def task(ctx, config):
    """
    Go through filesystem creation with a synthetic failure in an MDS
    in its 'up:creating' state, to exercise the retry behaviour.
    """
    # Grab handles to the teuthology objects of interest
    mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
    if len(mdslist) != 1:
        # Require exactly one MDS, the code path for creation failure when
        # a standby is available is different
        raise RuntimeError("This task requires exactly one MDS")

    mds_id = mdslist[0]
    (mds_remote, ) = ctx.cluster.only(
        'mds.{_id}'.format(_id=mds_id)).remotes.keys()
    manager = ceph_manager.CephManager(
        mds_remote,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    # Stop MDS
    self.fs.set_max_mds(0)
    self.fs.mds_stop(mds_id)
    self.fs.mds_fail(mds_id)

    # Reset the filesystem so that next start will go into CREATING
    manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it")
    manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data")

    # Start the MDS with mds_kill_create_at set, it will crash during creation
    mds.restart_with_args(["--mds_kill_create_at=1"])
    try:
        mds.wait_for_exit()
    except CommandFailedError as e:
        if e.exitstatus == 1:
            log.info("MDS creation killed as expected")
        else:
            log.error("Unexpected status code %s" % e.exitstatus)
            raise

    # Since I have intentionally caused a crash, I will clean up the resulting core
    # file to avoid task.internal.coredump seeing it as a failure.
    log.info("Removing core file from synthetic MDS failure")
    mds_remote.run(args=[
        'rm', '-f',
        Raw("{archive}/coredump/*.core".format(
            archive=misc.get_archive_dir(ctx)))
    ])

    # It should have left the MDS map state still in CREATING
    status = self.fs.status().get_mds(mds_id)
    assert status['state'] == 'up:creating'

    # Start the MDS again without the kill flag set, it should proceed with creation successfully
    mds.restart()

    # Wait for state ACTIVE
    self.fs.wait_for_state("up:active", timeout=120, mds_id=mds_id)

    # The system should be back up in a happy healthy state, go ahead and run any further tasks
    # inside this context.
    yield
Exemplo n.º 21
0
def task(ctx, config):
    """
    Stress test the mds by running scrub iterations while another task/workunit
    is running.
    Example config:

    - fwd_scrub:
      scrub_timeout: 300
      sleep_between_iterations: 1
    """

    mds_cluster = MDSCluster(ctx)

    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'fwd_scrub task only accepts a dict for configuration'
    mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
    assert len(mdslist) > 0, \
        'fwd_scrub task requires at least 1 metadata server'

    (first, ) = ctx.cluster.only(f'mds.{mdslist[0]}').remotes.keys()
    manager = ceph_manager.CephManager(
        first,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    # make sure everyone is in active, standby, or standby-replay
    log.info('Wait for all MDSs to reach steady state...')
    status = mds_cluster.status()
    while True:
        steady = True
        for info in status.get_all():
            state = info['state']
            if state not in ('up:active', 'up:standby', 'up:standby-replay'):
                steady = False
                break
        if steady:
            break
        sleep(2)
        status = mds_cluster.status()

    log.info('Ready to start scrub thrashing')

    manager.wait_for_clean()
    assert manager.is_clean()

    if 'cluster' not in config:
        config['cluster'] = 'ceph'

    for fs in status.get_filesystems():
        fwd_scrubber = ForwardScrubber(Filesystem(ctx, fscid=fs['id']),
                                       config['scrub_timeout'],
                                       config['sleep_between_iterations'])
        fwd_scrubber.start()
        ctx.ceph[config['cluster']].thrashers.append(fwd_scrubber)

    try:
        log.debug('Yielding')
        yield
    finally:
        log.info('joining ForwardScrubbers')
        stop_all_fwd_scrubbers(ctx.ceph[config['cluster']].thrashers)
        log.info('done joining')
Exemplo n.º 22
0
def task(ctx, config):
    """
    Test handling of lost objects.

    A pretty rigid cluster is brought up and tested by this task
    """
    POOL = 'unfound_pool'
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'lost_unfound task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
    )

    while len(manager.get_osd_status()['up']) < 3:
        time.sleep(10)

    manager.wait_for_clean()

    manager.create_pool(POOL)

    # something that is always there
    dummyfile = '/etc/fstab'

    # take an osd out until the very end
    manager.kill_osd(2)
    manager.mark_down_osd(2)
    manager.mark_out_osd(2)

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    # create old objects
    for f in range(1, 10):
        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f])

    # delay recovery, and make the pg log very long (to prevent backfill)
    manager.raw_cluster_cmd(
        'tell', 'osd.1', 'injectargs',
        '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000')

    manager.kill_osd(0)
    manager.mark_down_osd(0)

    for f in range(1, 10):
        rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
        rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])

    # bring osd.0 back up, let it peer, but don't replicate the new
    # objects...
    log.info('osd.0 command_args is %s' % 'foo')
    log.info(ctx.daemons.get_daemon('osd', 0).command_args)
    ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend(
        ['--osd-recovery-delay-start', '1000'])
    manager.revive_osd(0)
    manager.mark_in_osd(0)
    manager.wait_till_osd_is_up(0)

    manager.flush_pg_stats([1, 0])
    manager.wait_till_active()

    # take out osd.1 and the only copy of those objects.
    manager.kill_osd(1)
    manager.mark_down_osd(1)
    manager.mark_out_osd(1)
    manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')

    # bring up osd.2 so that things would otherwise, in theory, recovery fully
    manager.revive_osd(2)
    manager.mark_in_osd(2)
    manager.wait_till_osd_is_up(2)

    manager.flush_pg_stats([0, 2])
    manager.wait_till_active()
    manager.flush_pg_stats([0, 2])

    # verify that there are unfound objects
    unfound = manager.get_num_unfound_objects()
    log.info("there are %d unfound objects" % unfound)
    assert unfound

    testdir = teuthology.get_testdir(ctx)
    procs = []
    if config.get('parallel_bench', True):
        procs.append(
            mon.run(args=[
                "/bin/sh",
                "-c",
                " ".join([
                    'adjust-ulimits',
                    'ceph-coverage',
                    '{tdir}/archive/coverage',
                    'rados',
                    '--no-log-to-stderr',
                    '--name',
                    'client.admin',
                    '-b',
                    str(4 << 10),
                    '-p',
                    POOL,
                    '-t',
                    '20',
                    'bench',
                    '240',
                    'write',
                ]).format(tdir=testdir),
            ],
                    logger=log.getChild(
                        'radosbench.{id}'.format(id='client.admin')),
                    stdin=run.PIPE,
                    wait=False))
    time.sleep(10)

    # mark stuff lost
    pgs = manager.get_pg_stats()
    for pg in pgs:
        if pg['stat_sum']['num_objects_unfound'] > 0:
            primary = 'osd.%d' % pg['acting'][0]

            # verify that i can list them direct from the osd
            log.info('listing missing/lost in %s state %s', pg['pgid'],
                     pg['state'])
            m = manager.list_pg_unfound(pg['pgid'])
            #log.info('%s' % m)
            assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
            assert m['available_might_have_unfound'] == True
            assert m['might_have_unfound'][0]['osd'] == "1"
            assert m['might_have_unfound'][0]['status'] == "osd is down"
            num_unfound = 0
            for o in m['objects']:
                if len(o['locations']) == 0:
                    num_unfound += 1
            assert m['num_unfound'] == num_unfound

            log.info("reverting unfound in %s on %s", pg['pgid'], primary)
            manager.raw_cluster_cmd('pg', pg['pgid'], 'mark_unfound_lost',
                                    'revert')
        else:
            log.info("no unfound in %s", pg['pgid'])

    manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
    manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
    manager.flush_pg_stats([0, 2])
    manager.wait_for_recovery()

    # verify result
    for f in range(1, 10):
        err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-'])
        assert err
        err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-'])
        assert not err

    # see if osd.1 can cope
    manager.mark_in_osd(1)
    manager.revive_osd(1)
    manager.wait_till_osd_is_up(1)
    manager.wait_for_clean()
    run.wait(procs)
    manager.wait_for_clean()
Exemplo n.º 23
0
def task(ctx, config):
    """
    Test handling of objects with inconsistent hash info during backfill and deep-scrub.

    A pretty rigid cluster is brought up and tested by this task
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'ec_inconsistent_hinfo task only accepts a dict for configuration'
    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.keys()

    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'),
        )

    profile = config.get('erasure_code_profile', {
        'k': '2',
        'm': '1',
        'crush-failure-domain': 'osd'
    })
    profile_name = profile.get('name', 'backfill_unfound')
    manager.create_erasure_code_profile(profile_name, profile)
    pool = manager.create_pool_with_unique_name(
        pg_num=1,
        erasure_code_profile_name=profile_name,
        min_size=2)
    manager.raw_cluster_cmd('osd', 'pool', 'set', pool,
                            'pg_autoscale_mode', 'off')

    manager.flush_pg_stats([0, 1, 2, 3])
    manager.wait_for_clean()

    pool_id = manager.get_pool_num(pool)
    pgid = '%d.0' % pool_id
    pgs = manager.get_pg_stats()
    acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None)
    log.info("acting=%s" % acting)
    assert acting
    primary = acting[0]

    # something that is always there, readable and never empty
    dummyfile = '/etc/group'

    # kludge to make sure they get a map
    rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])

    manager.flush_pg_stats([0, 1])
    manager.wait_for_recovery()

    log.debug("create test object")
    obj = 'test'
    rados(ctx, mon, ['-p', pool, 'put', obj, dummyfile])

    victim = acting[1]

    log.info("remove test object hash info from osd.%s shard and test deep-scrub and repair"
             % victim)

    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=victim)
    check_time_now = time.strftime('%s')
    manager.raw_cluster_cmd('pg', 'deep-scrub', pgid)
    wait_for_deep_scrub_complete(manager, pgid, check_time_now, True)

    check_time_now = time.strftime('%s')
    manager.raw_cluster_cmd('pg', 'repair', pgid)
    wait_for_deep_scrub_complete(manager, pgid, check_time_now, False)

    log.info("remove test object hash info from primary osd.%s shard and test backfill"
             % primary)

    log.debug("write some data")
    rados(ctx, mon, ['-p', pool, 'bench', '30', 'write', '-b', '4096',
                     '--no-cleanup'])

    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=primary)

    # mark the osd out to trigger a rebalance/backfill
    source = acting[1]
    target = [x for x in [0, 1, 2, 3] if x not in acting][0]
    manager.mark_out_osd(source)

    # wait for everything to peer, backfill and recover
    wait_for_backfilling_complete(manager, pgid, source, target)
    manager.wait_for_clean()

    manager.flush_pg_stats([0, 1, 2, 3])
    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg
    assert 'clean' in pg['state'].split('+')
    assert 'inconsistent' not in pg['state'].split('+')
    unfound = manager.get_num_unfound_objects()
    log.debug("there are %d unfound objects" % unfound)
    assert unfound == 0

    source, target = target, source
    log.info("remove test object hash info from non-primary osd.%s shard and test backfill"
             % source)

    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=source)

    # mark the osd in to trigger a rebalance/backfill
    manager.mark_in_osd(target)

    # wait for everything to peer, backfill and recover
    wait_for_backfilling_complete(manager, pgid, source, target)
    manager.wait_for_clean()

    manager.flush_pg_stats([0, 1, 2, 3])
    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg
    assert 'clean' in pg['state'].split('+')
    assert 'inconsistent' not in pg['state'].split('+')
    unfound = manager.get_num_unfound_objects()
    log.debug("there are %d unfound objects" % unfound)
    assert unfound == 0

    log.info("remove hash info from two shards and test backfill")

    source = acting[2]
    target = [x for x in [0, 1, 2, 3] if x not in acting][0]
    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=primary)
    manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key',
                             object_name=obj, osd=source)

    # mark the osd out to trigger a rebalance/backfill
    manager.mark_out_osd(source)

    # wait for everything to peer, backfill and detect unfound object
    wait_for_backfilling_complete(manager, pgid, source, target)

    # verify that there is unfound object
    manager.flush_pg_stats([0, 1, 2, 3])
    pgs = manager.get_pg_stats()
    pg = next((pg for pg in pgs if pg['pgid'] == pgid), None)
    log.debug('pg=%s' % pg)
    assert pg
    assert 'backfill_unfound' in pg['state'].split('+')
    unfound = manager.get_num_unfound_objects()
    log.debug("there are %d unfound objects" % unfound)
    assert unfound == 1
    m = manager.list_pg_unfound(pgid)
    log.debug('list_pg_unfound=%s' % m)
    assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']

    # mark stuff lost
    pgs = manager.get_pg_stats()
    manager.raw_cluster_cmd('pg', pgid, 'mark_unfound_lost', 'delete')

    # wait for everything to peer and be happy...
    manager.flush_pg_stats([0, 1, 2, 3])
    manager.wait_for_recovery()