def create_users(ctx, config): """ Create a main and an alternate s3 user. """ assert isinstance(config, dict) log.info('Creating rgw users...') testdir = teuthology.get_testdir(ctx) users = {'s3 main': 'foo', 's3 alt': 'bar'} for client in config['clients']: s3tests_conf = config['s3tests_conf'][client] s3tests_conf.setdefault('fixtures', {}) s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') for section, user in users.iteritems(): _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id ctx.cluster.only(client).run( args=[ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, 'user', 'create', '--uid', s3tests_conf[section]['user_id'], '--display-name', s3tests_conf[section]['display_name'], '--access-key', s3tests_conf[section]['access_key'], '--secret', s3tests_conf[section]['secret_key'], '--email', s3tests_conf[section]['email'], '--cluster', cluster_name, ], ) try: yield finally: for client in config['clients']: for user in users.itervalues(): uid = '{user}.{client}'.format(user=user, client=client) cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id ctx.cluster.only(client).run( args=[ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client_with_id, 'user', 'rm', '--uid', uid, '--purge-data', '--cluster', cluster_name, ], )
def create_users(ctx, config): """ Create rgw users to interact with the swift interface. """ assert isinstance(config, dict) log.info('Creating rgw users...') testdir = teuthology.get_testdir(ctx) users = {'': 'foo', '2': 'bar'} for client in config['clients']: cluster_name, daemon_type, client_id = teuthology.split_role(client) testswift_conf = config['testswift_conf'][client] for suffix, user in users.iteritems(): _config_user(testswift_conf, '{user}.{client}'.format(user=user, client=client), user, suffix) ctx.cluster.only(client).run( args=[ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client, '--cluster', cluster_name, 'user', 'create', '--subuser', '{account}:{user}'.format(account=testswift_conf['func_test']['account{s}'.format(s=suffix)],user=user), '--display-name', testswift_conf['func_test']['display_name{s}'.format(s=suffix)], '--secret', testswift_conf['func_test']['password{s}'.format(s=suffix)], '--email', testswift_conf['func_test']['email{s}'.format(s=suffix)], '--key-type', 'swift', '--access', 'full', ], ) try: yield finally: for client in config['clients']: for user in users.itervalues(): uid = '{user}.{client}'.format(user=user, client=client) cluster_name, daemon_type, client_id = teuthology.split_role(client) ctx.cluster.only(client).run( args=[ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin', '-n', client, '--cluster', cluster_name, 'user', 'rm', '--uid', uid, '--purge-data', ], )
def _revive_mons(manager, mons, recovered, keyring_path): # revive monitors # the initial monmap is in the ceph.conf, so we are good. n_mons = 0 is_mon = teuthology.is_type('mon') for remote, roles in mons.remotes.iteritems(): for role in roles: if not is_mon(role): continue cluster, _, m = teuthology.split_role(role) if recovered != m: log.info('running mkfs on {cluster}:mon.{mon}'.format( cluster=cluster, mon=m)) remote.run( args=[ 'sudo', 'ceph-mon', '--cluster', cluster, '--mkfs', '-i', m, '--keyring', keyring_path]) log.info('reviving mon.{0}'.format(m)) manager.revive_mon(m) n_mons += 1 manager.wait_for_mon_quorum_size(n_mons, timeout=30)
def wait_for_failure(ctx, config): """ Wait for a failure of a ceph daemon For example:: tasks: - ceph.wait_for_failure: [mds.*] tasks: - ceph.wait_for_failure: [osd.0, osd.2] tasks: - ceph.wait_for_failure: daemons: [osd.0, osd.2] """ if config is None: config = {} elif isinstance(config, list): config = {"daemons": config} daemons = ctx.daemons.resolve_role_list(config.get("daemons", None), CEPH_ROLE_TYPES, True) for role in daemons: cluster, type_, id_ = teuthology.split_role(role) try: ctx.daemons.get_daemon(type_, id_, cluster).wait() except: log.info("Saw expected daemon failure. Continuing.") pass else: raise RuntimeError("daemon %s did not fail" % role) yield
def task(ctx, config): """ Test monitor recovery from OSD """ if config is None: config = {} assert isinstance(config, dict), \ 'task only accepts a dict for configuration' first_mon = teuthology.get_first_mon(ctx, config) (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() manager = ceph_manager.CephManager( mon, ctx=ctx, logger=log.getChild('ceph_manager')) mons = ctx.cluster.only(teuthology.is_type('mon')) # note down the first cluster_name and mon_id # we will recover it later on cluster_name, _, mon_id = teuthology.split_role(first_mon) _nuke_mons(manager, mons, mon_id) default_keyring = '/etc/ceph/{cluster}.keyring'.format( cluster=cluster_name) keyring_path = config.get('keyring_path', default_keyring) _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path) _revive_mons(manager, mons, mon_id, keyring_path) _revive_mgrs(ctx, manager) _revive_osds(ctx, manager)
def stop(ctx, config): """ Stop ceph daemons For example:: tasks: - ceph.stop: [mds.*] tasks: - ceph.stop: [osd.0, osd.2] tasks: - ceph.stop: daemons: [osd.0, osd.2] """ if config is None: config = {} elif isinstance(config, list): config = {"daemons": config} daemons = ctx.daemons.resolve_role_list(config.get("daemons", None), CEPH_ROLE_TYPES, True) for role in daemons: cluster, type_, id_ = teuthology.split_role(role) ctx.daemons.get_daemon(type_, id_, cluster).stop() yield
def _delete_dir(ctx, role, created_mountpoint): """ Delete file used by this role, and delete the directory that this role appeared in. :param ctx: Context :param role: "role.#" where # is used for the role id. """ cluster, _, id_ = misc.split_role(role) remote = get_remote_for_role(ctx, role) mnt = _client_mountpoint(ctx, cluster, id_) client = os.path.join(mnt, 'client.{id}'.format(id=id_)) # Remove the directory inside the mount where the workunit ran remote.run( args=[ 'sudo', 'rm', '-rf', '--', client, ], ) log.info("Deleted dir {dir}".format(dir=client)) # If the mount was an artificially created dir, delete that too if created_mountpoint: remote.run( args=[ 'rmdir', '--', mnt, ], ) log.info("Deleted artificial mount point {dir}".format(dir=client))
def osd_scrub_pgs(ctx, config): """ Scrub pgs when we exit. First make sure all pgs are active and clean. Next scrub all osds. Then periodically check until all pgs have scrub time stamps that indicate the last scrub completed. Time out if no progess is made here after two minutes. """ retries = 12 delays = 10 cluster_name = config["cluster"] manager = ctx.managers[cluster_name] all_clean = False for _ in range(0, retries): stats = manager.get_pg_stats() states = [stat["state"] for stat in stats] if len(set(states)) == 1 and states[0] == "active+clean": all_clean = True break log.info("Waiting for all osds to be active and clean.") time.sleep(delays) if not all_clean: log.info("Scrubbing terminated -- not all pgs were active and clean.") return check_time_now = time.localtime() time.sleep(1) all_roles = teuthology.all_roles(ctx.cluster) for role in teuthology.cluster_roles_of_type(all_roles, "osd", cluster_name): log.info("Scrubbing {osd}".format(osd=role)) _, _, id_ = teuthology.split_role(role) manager.raw_cluster_cmd("osd", "deep-scrub", id_) prev_good = 0 gap_cnt = 0 loop = True while loop: stats = manager.get_pg_stats() timez = [stat["last_scrub_stamp"] for stat in stats] loop = False thiscnt = 0 for tmval in timez: pgtm = time.strptime(tmval[0 : tmval.find(".")], "%Y-%m-%d %H:%M:%S") if pgtm > check_time_now: thiscnt += 1 else: loop = True if thiscnt > prev_good: prev_good = thiscnt gap_cnt = 0 else: gap_cnt += 1 if gap_cnt > retries: log.info("Exiting scrub checking -- not all pgs scrubbed.") return if loop: log.info("Still waiting for all pgs to be scrubbed.") time.sleep(delays)
def start_apache(ctx, config, on_client = None, except_client = None): """ Start apache on remote sites. """ log.info('Starting apache...') testdir = teuthology.get_testdir(ctx) apaches = {} clients_to_run = [on_client] if on_client is None: clients_to_run = config.keys() for client in clients_to_run: cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_cluster = cluster_name + '.' + daemon_type + '.' + client_id if client == except_client: continue (remote,) = ctx.cluster.only(client).remotes.keys() system_type = teuthology.get_system_type(remote) if system_type == 'deb': apache_name = 'apache2' else: try: remote.run( args=[ 'stat', '/usr/sbin/httpd.worker', ], ) apache_name = '/usr/sbin/httpd.worker' except CommandFailedError: apache_name = '/usr/sbin/httpd' proc = remote.run( args=[ 'adjust-ulimits', 'daemon-helper', 'kill', apache_name, '-X', '-f', '{tdir}/apache/apache.{client_with_cluster}.conf'.format(tdir=testdir, client_with_cluster=client_with_cluster), ], logger=log.getChild(client), stdin=run.PIPE, wait=False, ) apaches[client_with_cluster] = proc try: yield finally: log.info('Stopping apache...') for client, proc in apaches.iteritems(): proc.stdin.close() run.wait(apaches.itervalues())
def _revive_osds(ctx, manager): is_osd = teuthology.is_type('osd') osds = ctx.cluster.only(is_osd) for _, roles in osds.remotes.iteritems(): for role in roles: if not is_osd(role): continue _, _, osd_id = teuthology.split_role(role) log.info('reviving osd.{0}'.format(osd_id)) manager.revive_osd(osd_id)
def _revive_mgrs(ctx, manager): is_mgr = teuthology.is_type('mgr') mgrs = ctx.cluster.only(is_mgr) for _, roles in mgrs.remotes.iteritems(): for role in roles: if not is_mgr(role): continue _, _, mgr_id = teuthology.split_role(role) log.info('reviving mgr.{0}'.format(mgr_id)) manager.revive_mgr(mgr_id)
def create_apache_dirs(ctx, config, on_client = None, except_client = None): """ Remotely create apache directories. Delete when finished. """ log.info('Creating apache directories...') log.debug('client is %r', on_client) testdir = teuthology.get_testdir(ctx) clients_to_create_as = [on_client] if on_client is None: clients_to_create_as = config.keys() for client in clients_to_create_as: if client == except_client: continue cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_cluster = cluster_name + '.' + daemon_type + '.' + client_id ctx.cluster.only(client).run( args=[ 'mkdir', '-p', '{tdir}/apache/htdocs.{client_with_cluster}'.format(tdir=testdir, client_with_cluster=client_with_cluster), '{tdir}/apache/tmp.{client_with_cluster}/fastcgi_sock'.format( tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('&&'), 'mkdir', '{tdir}/archive/apache.{client_with_cluster}'.format(tdir=testdir, client_with_cluster=client_with_cluster), ], ) try: yield finally: log.info('Cleaning up apache directories...') for client in clients_to_create_as: ctx.cluster.only(client).run( args=[ 'rm', '-rf', '{tdir}/apache/tmp.{client_with_cluster}'.format(tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('&&'), 'rmdir', '{tdir}/apache/htdocs.{client_with_cluster}'.format(tdir=testdir, client_with_cluster=client_with_cluster), ], ) for client in clients_to_create_as: ctx.cluster.only(client).run( args=[ 'rmdir', '{tdir}/apache'.format(tdir=testdir), ], check_status=False, # only need to remove once per host )
def restart(ctx, config): """ restart ceph daemons For example:: tasks: - ceph.restart: [all] For example:: tasks: - ceph.restart: [osd.0, mon.1, mds.*] or:: tasks: - ceph.restart: daemons: [osd.0, mon.1] wait-for-healthy: false wait-for-osds-up: true :param ctx: Context :param config: Configuration """ if config is None: config = {} elif isinstance(config, list): config = {'daemons': config} daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) clusters = set() log.info('daemons %s' % daemons) with tweaked_option(ctx, config): for role in daemons: cluster, type_, id_ = teuthology.split_role(role) d = ctx.daemons.get_daemon(type_, id_, cluster) assert d, 'daemon %s does not exist' % role d.stop() if type_ == 'osd': ctx.managers[cluster].mark_down_osd(id_) d.restart() clusters.add(cluster) if config.get('wait-for-healthy', True): for cluster in clusters: healthy(ctx=ctx, config=dict(cluster=cluster)) if config.get('wait-for-osds-up', False): for cluster in clusters: ctx.managers[cluster].wait_for_all_osds_up() yield
def run_pykmip(ctx, config): assert isinstance(config, dict) if hasattr(ctx, 'daemons'): pass elif has_ceph_task(ctx.config['tasks']): log.info('Delay start pykmip so ceph can do once-only daemon logic') try: yield finally: pass else: ctx.daemons = DaemonGroup() log.info('Running pykmip...') pykmipdir = get_pykmip_dir(ctx) for (client, _) in config.items(): (remote, ) = ctx.cluster.only(client).remotes.keys() cluster_name, _, client_id = teuthology.split_role(client) # start the public endpoint client_public_with_id = 'pykmip.public' + '.' + client_id run_cmd = 'cd ' + pykmipdir + ' && ' + \ '. .pykmipenv/bin/activate && ' + \ 'HOME={}'.format(pykmipdir) + ' && ' + \ 'exec pykmip-server -f pykmip.conf -l ' + \ pykmipdir + '/pykmip.log & { read; kill %1; }' ctx.daemons.add_daemon( remote, 'pykmip', client_public_with_id, cluster=cluster_name, args=['bash', '-c', run_cmd], logger=log.getChild(client), stdin=run.PIPE, cwd=pykmipdir, wait=False, check_status=False, ) # sleep driven synchronization time.sleep(10) try: yield finally: log.info('Stopping PyKMIP instance') ctx.daemons.get_daemon('pykmip', client_public_with_id, cluster_name).stop()
def restart(ctx, config): """ restart ceph daemons For example:: tasks: - ceph.restart: [all] For example:: tasks: - ceph.restart: [osd.0, mon.1, mds.*] or:: tasks: - ceph.restart: daemons: [osd.0, mon.1] wait-for-healthy: false wait-for-osds-up: true :param ctx: Context :param config: Configuration """ if config is None: config = {} elif isinstance(config, list): config = {'daemons': config} daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) clusters = set() for role in daemons: cluster, type_, id_ = teuthology.split_role(role) ctx.daemons.get_daemon(type_, id_, cluster).restart() clusters.add(cluster) if config.get('wait-for-healthy', True): for cluster in clusters: healthy(ctx=ctx, config=dict(cluster=cluster)) if config.get('wait-for-osds-up', False): for cluster in clusters: wait_for_osds_up(ctx=ctx, config=dict(cluster=cluster)) manager = ctx.managers['ceph'] for dmon in daemons: if '.' in dmon: dm_parts = dmon.split('.') if dm_parts[1].isdigit(): if dm_parts[0] == 'osd': manager.mark_down_osd(int(dm_parts[1])) yield
def setup(self): super(RBDMirror, self).setup() try: self.client = self.config['client'] except KeyError: raise ConfigError('rbd-mirror requires a client to connect with') self.cluster_name, type_, self.client_id = misc.split_role(self.client) if type_ != 'client': msg = 'client role ({0}) must be a client'.format(self.client) raise ConfigError(msg) self.remote = get_remote_for_role(self.ctx, self.client)
def create_apache_dirs(ctx, config, on_client=None, except_client=None): """ Remotely create apache directories. Delete when finished. """ log.info('Creating apache directories...') log.debug('client is %r', on_client) testdir = teuthology.get_testdir(ctx) clients_to_create_as = [on_client] if on_client is None: clients_to_create_as = config.keys() for client in clients_to_create_as: if client == except_client: continue cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_cluster = cluster_name + '.' + daemon_type + '.' + client_id ctx.cluster.only(client).run(args=[ 'mkdir', '-p', '{tdir}/apache/htdocs.{client_with_cluster}'.format( tdir=testdir, client_with_cluster=client_with_cluster), '{tdir}/apache/tmp.{client_with_cluster}/fastcgi_sock'.format( tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('&&'), 'mkdir', '{tdir}/archive/apache.{client_with_cluster}'.format( tdir=testdir, client_with_cluster=client_with_cluster), ], ) try: yield finally: log.info('Cleaning up apache directories...') for client in clients_to_create_as: ctx.cluster.only(client).run(args=[ 'rm', '-rf', '{tdir}/apache/tmp.{client_with_cluster}'.format( tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('&&'), 'rmdir', '{tdir}/apache/htdocs.{client_with_cluster}'.format( tdir=testdir, client_with_cluster=client_with_cluster), ], ) for client in clients_to_create_as: ctx.cluster.only(client).run( args=[ 'rmdir', '{tdir}/apache'.format(tdir=testdir), ], check_status=False, # only need to remove once per host )
def restart(ctx, config): """ restart ceph daemons For example:: tasks: - ceph.restart: [all] For example:: tasks: - ceph.restart: [osd.0, mon.1, mds.*] or:: tasks: - ceph.restart: daemons: [osd.0, mon.1] wait-for-healthy: false wait-for-osds-up: true :param ctx: Context :param config: Configuration """ if config is None: config = {} elif isinstance(config, list): config = {"daemons": config} daemons = ctx.daemons.resolve_role_list(config.get("daemons", None), CEPH_ROLE_TYPES, True) clusters = set() for role in daemons: cluster, type_, id_ = teuthology.split_role(role) ctx.daemons.get_daemon(type_, id_, cluster).restart() clusters.add(cluster) if config.get("wait-for-healthy", True): for cluster in clusters: healthy(ctx=ctx, config=dict(cluster=cluster)) if config.get("wait-for-osds-up", False): for cluster in clusters: wait_for_osds_up(ctx=ctx, config=dict(cluster=cluster)) manager = ctx.managers["ceph"] for dmon in daemons: if "." in dmon: dm_parts = dmon.split(".") if dm_parts[1].isdigit(): if dm_parts[0] == "osd": manager.mark_down_osd(int(dm_parts[1])) yield
def ceph_iscsi(ctx, config): """ Deploy iSCSIs """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid nodes = [] daemons = {} for remote, roles in ctx.cluster.remotes.items(): for role in [ r for r in roles if teuthology.is_type('iscsi', cluster_name)(r) ]: c_, _, id_ = teuthology.split_role(role) log.info('Adding %s on %s' % (role, remote.shortname)) nodes.append(remote.shortname + '=' + id_) daemons[role] = (remote, id_) if nodes: poolname = 'iscsi' # ceph osd pool create iscsi 3 3 replicated _shell(ctx, cluster_name, remote, [ 'ceph', 'osd', 'pool', 'create', poolname, '3', '3', 'replicated' ]) _shell( ctx, cluster_name, remote, ['ceph', 'osd', 'pool', 'application', 'enable', poolname, 'rbd']) # ceph orch apply iscsi iscsi user password _shell(ctx, cluster_name, remote, [ 'ceph', 'orch', 'apply', 'iscsi', poolname, 'user', 'password', '--placement', str(len(nodes)) + ';' + ';'.join(nodes) ]) for role, i in daemons.items(): remote, id_ = i ctx.daemons.register_daemon( remote, 'iscsi', id_, cluster=cluster_name, fsid=fsid, logger=log.getChild(role), wait=False, started=True, ) yield
def extract_zone_cluster_name(zone_config): """ return the cluster (must be common to all zone endpoints) """ cluster_name = None endpoints = zone_config.get('endpoints') if not endpoints: raise ConfigError('zone %s missing \'endpoints\' list' % \ zone_config['name']) for role in endpoints: name, _, _ = misc.split_role(role) if not cluster_name: cluster_name = name elif cluster_name != name: raise ConfigError('all zone %s endpoints must be in the same cluster' % \ zone_config['name']) return cluster_name
def get_zone_host_and_port(ctx, client, zone): cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id _, period = rgwadmin(ctx, client, check_status=True, cmd=['period', 'get']) period_map = period['period_map'] zonegroups = period_map['zonegroups'] for zonegroup in zonegroups: for zone_info in zonegroup['zones']: if zone_info['name'] == zone: endpoint = urlparse(zone_info['endpoints'][0]) host, port = endpoint.hostname, endpoint.port if port is None: port = 80 return host, port assert False, 'no endpoint for zone {zone} found'.format(zone=zone)
def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False, format='json', decode=True, log_level=logging.DEBUG): log.info('rgwadmin: {client} : {cmd}'.format(client=client, cmd=cmd)) testdir = teuthology.get_testdir(ctx) cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id pre = [ 'adjust-ulimits', 'ceph-coverage'.format(tdir=testdir), '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin'.format(tdir=testdir), '--log-to-stderr', '--format', format, '-n', client_with_id, '--cluster', cluster_name, ] pre.extend(cmd) log.log(log_level, 'rgwadmin: cmd=%s' % pre) (remote, ) = ctx.cluster.only(client).remotes.iterkeys() proc = remote.run( args=pre, check_status=check_status, stdout=StringIO(), stderr=StringIO(), stdin=stdin, ) r = proc.exitstatus out = proc.stdout.getvalue() if not decode: return (r, out) j = None if not r and out != '': try: j = json.loads(out) log.log(log_level, ' json result: %s' % j) except ValueError: j = out log.log(log_level, ' raw result: %s' % j) return (r, j)
def run_barbican(ctx, config): assert isinstance(config, dict) log.info('Running barbican...') for (client, _) in config.items(): (remote, ) = ctx.cluster.only(client).remotes.iterkeys() cluster_name, _, client_id = teuthology.split_role(client) # start the public endpoint client_public_with_id = 'barbican.public' + '.' + client_id client_public_with_cluster = cluster_name + '.' + client_public_with_id run_cmd = [ 'cd', get_barbican_dir(ctx), run.Raw('&&'), '.', '.barbicanenv/bin/activate', run.Raw('&&'), 'HOME={}'.format(get_barbican_dir(ctx)), run.Raw('&&'), 'bin/barbican-api', run.Raw('& { read; kill %1; }') ] #run.Raw('1>/dev/null') run_cmd = 'cd ' + get_barbican_dir(ctx) + ' && ' + \ '. .barbicanenv/bin/activate && ' + \ 'HOME={}'.format(get_barbican_dir(ctx)) + ' && ' + \ 'exec bin/barbican-api & { read; kill %1; }' ctx.daemons.add_daemon( remote, 'barbican', client_public_with_id, cluster=cluster_name, args=['bash', '-c', run_cmd], logger=log.getChild(client), stdin=run.PIPE, cwd=get_barbican_dir(ctx), wait=False, check_status=False, ) # sleep driven synchronization run_in_barbican_venv(ctx, client, ['sleep', '15']) try: yield finally: log.info('Stopping Barbican instance') ctx.daemons.get_daemon('barbican', client_public_with_id, cluster_name).stop()
def extract_sync_client_data(ctx, client_name): """ Extract synchronized client rgw zone and rgw region information. :param ctx: Context passed to the s3tests task :param name: Name of client that we are synching with """ return_region_name = None return_dict = None cluster_name, daemon_type, client_id = teuthology.split_role(client_name) client = ctx.ceph[cluster_name].conf.get(client_name, None) if client: current_client_zone = client.get('rgw zone', None) if current_client_zone: (endpoint_host, endpoint_port) = ctx.rgw.role_endpoints.get( client_name, (None, None)) # pull out the radosgw_agent stuff regions = ctx.rgw.regions for region in regions: log.debug('jbuck, region is {region}'.format(region=region)) region_data = ctx.rgw.regions[region] log.debug('region data is {region}'.format(region=region_data)) zones = region_data['zones'] for zone in zones: if current_client_zone in zone: return_region_name = region return_dict = dict() return_dict['api_name'] = region_data['api name'] return_dict['is_master'] = region_data['is master'] return_dict['port'] = endpoint_port return_dict['host'] = endpoint_host # The s3tests expect the sync_agent_[addr|port} to be # set on the non-master node for some reason if not region_data['is master']: (rgwagent_host, rgwagent_port) = ctx.radosgw_agent.endpoint (return_dict['sync_agent_addr'], _) = ctx.rgw.role_endpoints[rgwagent_host] return_dict['sync_agent_port'] = rgwagent_port else: #if client_zone: log.debug('No zone info for {host}'.format(host=client_name)) else: # if client log.debug('No ceph conf for {host}'.format(host=client_name)) return return_region_name, return_dict
def scan_for_leaked_encryption_keys(ctx, config): """ Scan radosgw logs for the encryption keys used by s3tests to verify that we're not leaking secrets. :param ctx: Context passed to task :param config: specific configuration information """ assert isinstance(config, dict) try: yield finally: # x-amz-server-side-encryption-customer-key s3test_customer_key = 'pO3upElrwuEXSoFwCfnZPdSsmt/xWeFa0N9KgDijwVs=' log.debug('Scanning radosgw logs for leaked encryption keys...') procs = list() for client, client_config in config.items(): if not client_config.get('scan_for_encryption_keys', True): continue cluster_name, daemon_type, client_id = teuthology.split_role( client) client_with_cluster = '.'.join( (cluster_name, daemon_type, client_id)) (remote, ) = ctx.cluster.only(client).remotes.keys() proc = remote.run( args=[ 'grep', '--binary-files=text', s3test_customer_key, '/var/log/ceph/rgw.{client}.log'.format( client=client_with_cluster), ], wait=False, check_status=False, ) procs.append(proc) for proc in procs: proc.wait() if proc.returncode == 1: # 1 means no matches continue log.error('radosgw log is leaking encryption keys!') raise Exception('radosgw log is leaking encryption keys')
def ceph_mgrs(ctx, config): """ Deploy any additional mgrs """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid testdir = teuthology.get_testdir(ctx) try: nodes = [] daemons = {} for remote, roles in ctx.cluster.remotes.items(): for mgr in [ r for r in roles if teuthology.is_type('mgr', cluster_name)(r) ]: c_, _, id_ = teuthology.split_role(mgr) if c_ == cluster_name and id_ == ctx.ceph[ cluster_name].first_mgr: continue log.info('Adding %s on %s' % (mgr, remote.shortname)) nodes.append(remote.shortname + '=' + id_) daemons[mgr] = (remote, id_) if nodes: _shell( ctx, cluster_name, remote, ['ceph', 'orchestrator', 'mgr', 'update', str(len(nodes) + 1)] + nodes) for mgr, i in daemons.items(): remote, id_ = i ctx.daemons.register_daemon( remote, 'mgr', id_, cluster=cluster_name, fsid=fsid, logger=log.getChild(mgr), wait=False, started=True, ) yield finally: pass
def validate_config(ctx, config): """ Perform some simple validation on task configuration. Raises exceptions.ConfigError if an error is found. """ # check for osds from multiple clusters on the same host for remote, roles_for_host in ctx.cluster.remotes.items(): last_cluster = None last_role = None for role in roles_for_host: role_cluster, role_type, _ = teuthology.split_role(role) if role_type != "osd": continue if last_cluster and last_cluster != role_cluster: msg = "Host should not have osds (%s and %s) from multiple clusters" % (last_role, role) raise exceptions.ConfigError(msg) last_cluster = role_cluster last_role = role
def extract_sync_client_data(ctx, client_name): """ Extract synchronized client rgw zone and rgw region information. :param ctx: Context passed to the s3tests task :param name: Name of client that we are synching with """ return_region_name = None return_dict = None cluster_name, daemon_type, client_id = teuthology.split_role(client_name) client = ctx.ceph[cluster_name].conf.get(client_name, None) if client: current_client_zone = client.get('rgw zone', None) if current_client_zone: (endpoint_host, endpoint_port) = ctx.rgw.role_endpoints.get(client_name, (None, None)) # pull out the radosgw_agent stuff regions = ctx.rgw.regions for region in regions: log.debug('jbuck, region is {region}'.format(region=region)) region_data = ctx.rgw.regions[region] log.debug('region data is {region}'.format(region=region_data)) zones = region_data['zones'] for zone in zones: if current_client_zone in zone: return_region_name = region return_dict = dict() return_dict['api_name'] = region_data['api name'] return_dict['is_master'] = region_data['is master'] return_dict['port'] = endpoint_port return_dict['host'] = endpoint_host # The s3tests expect the sync_agent_[addr|port} to be # set on the non-master node for some reason if not region_data['is master']: (rgwagent_host, rgwagent_port) = ctx.radosgw_agent.endpoint (return_dict['sync_agent_addr'], _) = ctx.rgw.role_endpoints[rgwagent_host] return_dict['sync_agent_port'] = rgwagent_port else: #if client_zone: log.debug('No zone info for {host}'.format(host=client_name)) else: # if client log.debug('No ceph conf for {host}'.format(host=client_name)) return return_region_name, return_dict
def get_master_zone(ctx, client): cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id _, period = rgwadmin(ctx, client, check_status=True, cmd=['period', 'get']) period_map = period['period_map'] zonegroups = period_map['zonegroups'] for zonegroup in zonegroups: is_master = (zonegroup['is_master'] == "true") log.info('zonegroup={z} is_master={ism}'.format(z=zonegroup, ism=is_master)) if not is_master: continue master_zone = zonegroup['master_zone'] log.info('master_zone=%s' % master_zone) for zone_info in zonegroup['zones']: if zone_info['name'] == master_zone: return master_zone log.info('couldn\'t find master zone') return None
def extract_clusters_and_gateways(ctx, role_endpoints): """ create cluster and gateway instances for all of the radosgw roles """ clusters = {} gateways = {} for role, (host, port) in role_endpoints.iteritems(): cluster_name, daemon_type, client_id = misc.split_role(role) # find or create the cluster by name cluster = clusters.get(cluster_name) if not cluster: clusters[cluster_name] = cluster = Cluster(ctx, cluster_name, role) # create a gateway for this daemon client_with_id = daemon_type + '.' + client_id # match format from rgw.py daemon = ctx.daemons.get_daemon('rgw', client_with_id, cluster_name) if not daemon: raise ConfigError('no daemon for role=%s cluster=%s type=rgw id=%s' % \ (role, cluster_name, client_id)) (remote,) = ctx.cluster.only(role).remotes.keys() gateways[role] = Gateway(role, remote, daemon, host, port, cluster) return clusters, gateways
def create_pools(ctx, config): """Create replicated or erasure coded data pools for rgw.""" log.info('Creating data pools') for client in config.keys(): (remote,) = ctx.cluster.only(client).remotes.iterkeys() data_pool = '.rgw.buckets' cluster_name, daemon_type, client_id = teuthology.split_role(client) if ctx.rgw.ec_data_pool: create_ec_pool(remote, data_pool, client, 64, ctx.rgw.erasure_code_profile, cluster_name) else: create_replicated_pool(remote, data_pool, 64, cluster_name) if ctx.rgw.cache_pools: create_cache_pool(remote, data_pool, data_pool + '.cache', 64, 64*1024*1024, cluster_name) log.debug('Pools created') yield
def validate_config(ctx, config): """ Perform some simple validation on task configuration. Raises exceptions.ConfigError if an error is found. """ # check for osds from multiple clusters on the same host for remote, roles_for_host in ctx.cluster.remotes.items(): last_cluster = None last_role = None for role in roles_for_host: role_cluster, role_type, _ = teuthology.split_role(role) if role_type != 'osd': continue if last_cluster and last_cluster != role_cluster: msg = "Host should not have osds (%s and %s) from multiple clusters" % ( last_role, role) raise exceptions.ConfigError(msg) last_cluster = role_cluster last_role = role
def generic_mount(ctx, config, devname_rtn): """ Generic Mount an rbd or tgt image. Rbd for example, now makes the following calls: - rbd.create_image: [client.0] - rbd.modprobe: [client.0] - rbd.dev_create: [client.0] - common_fs_utils.generic_mkfs: [client.0] - common_fs_utils.generic_mount: client.0: testimage.client.0 """ assert isinstance(config, list) or isinstance( config, dict ), "task mount must be configured with a list or dictionary" if isinstance(config, dict): role_images = config.items() else: role_images = [(role, None) for role in config] testdir = teuthology.get_testdir(ctx) mnt_template = "{tdir}/mnt.{id}" mounted = [] for role, image in role_images: if image is None: image = default_image_name(role) (remote,) = ctx.cluster.only(role).remotes.keys() _, _, id_ = teuthology.split_role(role) mnt = mnt_template.format(tdir=testdir, id=id_) mounted.append((remote, mnt)) remote.run(args=["mkdir", "--", mnt]) remote.run(args=["sudo", "mount", devname_rtn(ctx, image), mnt]) try: yield finally: log.info("Unmounting rbd images... %s", mounted) for remote, mnt in mounted: remote.run(args=["sudo", "umount", mnt]) remote.run(args=["rmdir", "--", mnt])
def scan_for_leaked_encryption_keys(ctx, config): """ Scan radosgw logs for the encryption keys used by s3tests to verify that we're not leaking secrets. :param ctx: Context passed to task :param config: specific configuration information """ assert isinstance(config, dict) try: yield finally: # x-amz-server-side-encryption-customer-key s3test_customer_key = 'pO3upElrwuEXSoFwCfnZPdSsmt/xWeFa0N9KgDijwVs=' log.debug('Scanning radosgw logs for leaked encryption keys...') procs = list() for client, client_config in config.iteritems(): if not client_config.get('scan_for_encryption_keys', True): continue cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_cluster = '.'.join((cluster_name, daemon_type, client_id)) (remote,) = ctx.cluster.only(client).remotes.keys() proc = remote.run( args=[ 'grep', '--binary-files=text', s3test_customer_key, '/var/log/ceph/rgw.{client}.log'.format(client=client_with_cluster), ], wait=False, check_status=False, ) procs.append(proc) for proc in procs: proc.wait() if proc.returncode == 1: # 1 means no matches continue log.error('radosgw log is leaking encryption keys!') raise Exception('radosgw log is leaking encryption keys')
def restart(ctx, config): """ restart ceph daemons For example:: tasks: - ceph.restart: [all] For example:: tasks: - ceph.restart: [osd.0, mon.1, mds.*] or:: tasks: - ceph.restart: daemons: [osd.0, mon.1] wait-for-healthy: false wait-for-osds-up: true :param ctx: Context :param config: Configuration """ if config is None: config = {} elif isinstance(config, list): config = {'daemons': config} daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) clusters = set() for role in daemons: cluster, type_, id_ = teuthology.split_role(role) ctx.daemons.get_daemon(type_, id_, cluster).restart() clusters.add(cluster) if config.get('wait-for-healthy', True): for cluster in clusters: healthy(ctx=ctx, config=dict(cluster=cluster)) if config.get('wait-for-osds-up', False): for cluster in clusters: wait_for_osds_up(ctx=ctx, config=dict(cluster=cluster)) yield
def _nuke_mons(manager, mons, mon_id): assert mons is_mon = teuthology.is_type('mon') for remote, roles in mons.remotes.iteritems(): for role in roles: if not is_mon(role): continue cluster, _, m = teuthology.split_role(role) log.info('killing {cluster}:mon.{mon}'.format(cluster=cluster, mon=m)) manager.kill_mon(m) mon_data = os.path.join('/var/lib/ceph/mon/', '{0}-{1}'.format(cluster, m)) if m == mon_id: # so we will only need to recreate the store.db for the # first mon, would be easier than mkfs on it then replace # the its store.db with the recovered one store_dir = os.path.join(mon_data, 'store.db') remote.run(args=['sudo', 'rm', '-r', store_dir]) else: remote.run(args=['sudo', 'rm', '-r', mon_data])
def _revive_mons(manager, mons, recovered, keyring_path): # revive monitors # the initial monmap is in the ceph.conf, so we are good. n_mons = 0 is_mon = teuthology.is_type('mon') for remote, roles in mons.remotes.iteritems(): for role in roles: if not is_mon(role): continue cluster, _, m = teuthology.split_role(role) if recovered != m: log.info('running mkfs on {cluster}:mon.{mon}'.format( cluster=cluster, mon=m)) remote.run(args=[ 'sudo', 'ceph-mon', '--cluster', cluster, '--mkfs', '-i', m, '--keyring', keyring_path ]) log.info('reviving mon.{0}'.format(m)) manager.revive_mon(m) n_mons += 1 manager.wait_for_mon_quorum_size(n_mons, timeout=30)
def ceph_rgw(ctx, config): """ Deploy rgw """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid nodes = {} daemons = {} for remote, roles in ctx.cluster.remotes.items(): for role in [ r for r in roles if teuthology.is_type('rgw', cluster_name)(r) ]: c_, _, id_ = teuthology.split_role(role) log.info('Adding %s on %s' % (role, remote.shortname)) svc = '.'.join(id_.split('.')[0:2]) if svc not in nodes: nodes[svc] = [] nodes[svc].append(remote.shortname + '=' + id_) daemons[role] = (remote, id_) for svc, nodes in nodes.items(): _shell(ctx, cluster_name, remote, [ 'ceph', 'orch', 'apply', 'rgw', svc, '--placement', str(len(nodes)) + ';' + ';'.join(nodes) ]) for role, i in daemons.items(): remote, id_ = i ctx.daemons.register_daemon( remote, 'rgw', id_, cluster=cluster_name, fsid=fsid, logger=log.getChild(role), wait=False, started=True, ) yield
def run_vault(ctx, config): assert isinstance(config, dict) for (client, cconf) in config.items(): (remote, ) = ctx.cluster.only(client).remotes.keys() cluster_name, _, client_id = teuthology.split_role(client) _, port = ctx.vault.endpoints[client] listen_addr = "0.0.0.0:{}".format(port) root_token = ctx.vault.root_token = cconf.get('root_token', 'root') log.info("Starting Vault listening on %s ...", listen_addr) v_params = [ '-dev', '-dev-listen-address={}'.format(listen_addr), '-dev-no-store-token', '-dev-root-token-id={}'.format(root_token) ] cmd = "chmod +x {vdir}/vault && {vdir}/vault server {vargs}".format( vdir=get_vault_dir(ctx), vargs=" ".join(v_params)) ctx.daemons.add_daemon( remote, 'vault', client_id, cluster=cluster_name, args=['bash', '-c', cmd, run.Raw('& { read; kill %1; }')], logger=log.getChild(client), stdin=run.PIPE, cwd=get_vault_dir(ctx), wait=False, check_status=False, ) time.sleep(10) try: yield finally: log.info('Stopping Vault instance') ctx.daemons.get_daemon('vault', client_id, cluster_name).stop()
def _nuke_mons(manager, mons, mon_id): assert mons is_mon = teuthology.is_type('mon') for remote, roles in mons.remotes.iteritems(): for role in roles: if not is_mon(role): continue cluster, _, m = teuthology.split_role(role) log.info('killing {cluster}:mon.{mon}'.format( cluster=cluster, mon=m)) manager.kill_mon(m) mon_data = os.path.join('/var/lib/ceph/mon/', '{0}-{1}'.format(cluster, m)) if m == mon_id: # so we will only need to recreate the store.db for the # first mon, would be easier than mkfs on it then replace # the its store.db with the recovered one store_dir = os.path.join(mon_data, 'store.db') remote.run(args=['sudo', 'rm', '-r', store_dir]) else: remote.run(args=['sudo', 'rm', '-r', mon_data])
def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False, format='json', decode=True, log_level=logging.DEBUG): log.info('rgwadmin: {client} : {cmd}'.format(client=client,cmd=cmd)) testdir = teuthology.get_testdir(ctx) cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id pre = [ 'adjust-ulimits', 'ceph-coverage'.format(tdir=testdir), '{tdir}/archive/coverage'.format(tdir=testdir), 'radosgw-admin'.format(tdir=testdir), '--log-to-stderr', '--format', format, '-n', client_with_id, '--cluster', cluster_name, ] pre.extend(cmd) log.log(log_level, 'rgwadmin: cmd=%s' % pre) (remote,) = ctx.cluster.only(client).remotes.iterkeys() proc = remote.run( args=pre, check_status=check_status, stdout=StringIO(), stderr=StringIO(), stdin=stdin, ) r = proc.exitstatus out = proc.stdout.getvalue() if not decode: return (r, out) j = None if not r and out != '': try: j = json.loads(out) log.log(log_level, ' json result: %s' % j) except ValueError: j = out log.log(log_level, ' raw result: %s' % j) return (r, j)
def poweron(ctx, config): """ tasks: ceph-ipmi.poweron: [osd.0] check_status: false """ assert isinstance(config, dict) or isinstance(config, list), \ "task ceph_ipmi only supports a list or dictionary for configuration" if config is None: config = {} elif isinstance(config, list): config = dict((role, None) for role in config) roles = config.keys() last_remote = [] for role in roles: (remote, ) = ctx.cluster.only(role).remotes.iterkeys() cluster_name, _, _ = teuthology.split_role(role) if remote not in last_remote: log.info("Powering on host containing %s" % role) ipmi = IpmiCapabilities( remote, ctx.teuthology_config.get('ipmi_user', None), ctx.teuthology_config.get('ipmi_password', None), ctx.teuthology_config.get('ipmi_domain', None), timeout=180) ipmi.power_on() last_remote.append(remote) if config.get('check_status', True): ipmi.check_status() teuthology.reconnect(ctx, 360) yield
def ceph_mdss(ctx, config): """ Deploy MDSss """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid testdir = teuthology.get_testdir(ctx) nodes = [] daemons = {} for remote, roles in ctx.cluster.remotes.items(): for role in [ r for r in roles if teuthology.is_type('mds', cluster_name)(r) ]: c_, _, id_ = teuthology.split_role(role) log.info('Adding %s on %s' % (role, remote.shortname)) nodes.append(remote.shortname + '=' + id_) daemons[role] = (remote, id_) if nodes: _shell( ctx, cluster_name, remote, ['ceph', 'orchestrator', 'mds', 'update', 'all', str(len(nodes))] + nodes) for role, i in daemons.items(): remote, id_ = i ctx.daemons.register_daemon( remote, 'mds', id_, cluster=cluster_name, fsid=fsid, logger=log.getChild(role), wait=False, started=True, ) yield
def ceph_monitoring(daemon_type, ctx, config): """ Deploy prometheus, node-exporter, etc. """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid nodes = [] daemons = {} for remote, roles in ctx.cluster.remotes.items(): for role in [ r for r in roles if teuthology.is_type(daemon_type, cluster_name)(r) ]: c_, _, id_ = teuthology.split_role(role) log.info('Adding %s on %s' % (role, remote.shortname)) nodes.append(remote.shortname + '=' + id_) daemons[role] = (remote, id_) if nodes: _shell(ctx, cluster_name, remote, [ 'ceph', 'orch', 'apply', daemon_type, str(len(nodes)) + ';' + ';'.join(nodes) ]) for role, i in daemons.items(): remote, id_ = i ctx.daemons.register_daemon( remote, daemon_type, id_, cluster=cluster_name, fsid=fsid, logger=log.getChild(role), wait=False, started=True, ) yield
def test_pid(): ctx = argparse.Namespace() ctx.daemons = DaemonGroup(use_systemd=True) remote = FakeRemote() ps_ef_output_path = os.path.join( os.path.dirname(__file__), "files/daemon-systemdstate-pid-ps-ef.output") # patching ps -ef command output using a file def sh(args): args[0:2] = ["cat", ps_ef_output_path] debug(args) return subprocess.getoutput(quote(args)) remote.sh = sh remote.init_system = 'systemd' remote.shortname = 'host1' ctx.cluster = cluster.Cluster(remotes=[ (remote, ['rgw.0', 'mon.a', 'mgr.a', 'mds.a', 'osd.0']) ], ) for remote, roles in ctx.cluster.remotes.items(): for role in roles: _, rol, id_ = misc.split_role(role) if any(rol.startswith(x) for x in ['mon', 'mgr', 'mds']): ctx.daemons.register_daemon(remote, rol, remote.shortname) else: ctx.daemons.register_daemon(remote, rol, id_) for _, daemons in ctx.daemons.daemons.items(): for daemon in daemons.values(): pid = daemon.pid debug(pid) assert pid
def create_pools(ctx, clients): """Create replicated or erasure coded data pools for rgw.""" log.info('Creating data pools') for client in clients: log.debug("Obtaining remote for client {}".format(client)) (remote,) = ctx.cluster.only(client).remotes.iterkeys() data_pool = 'default.rgw.buckets.data' cluster_name, daemon_type, client_id = teuthology.split_role(client) if ctx.rgw.ec_data_pool: create_ec_pool(remote, data_pool, client, ctx.rgw.data_pool_pg_size, ctx.rgw.erasure_code_profile, cluster_name, 'rgw') else: create_replicated_pool(remote, data_pool, ctx.rgw.data_pool_pg_size, cluster_name, 'rgw') index_pool = 'default.rgw.buckets.index' create_replicated_pool(remote, index_pool, ctx.rgw.index_pool_pg_size, cluster_name, 'rgw') if ctx.rgw.cache_pools: create_cache_pool(remote, data_pool, data_pool + '.cache', 64, 64*1024*1024, cluster_name) log.debug('Pools created') yield
def stop(ctx, config): """ Stop ceph daemons For example:: tasks: - ceph.stop: [mds.*] tasks: - ceph.stop: [osd.0, osd.2] tasks: - ceph.stop: daemons: [osd.0, osd.2] """ if config is None: config = {} elif isinstance(config, list): config = {'daemons': config} daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) clusters = set() for role in daemons: cluster, type_, id_ = teuthology.split_role(role) ctx.daemons.get_daemon(type_, id_, cluster).stop() clusters.add(cluster) # for cluster in clusters: # ctx.ceph[cluster].watchdog.stop() # ctx.ceph[cluster].watchdog.join() yield
def _run_one_client(ctx, config, role): """Spawned task that runs the client""" krbd = config.get('krbd', False) nbd = config.get('nbd', False) testdir = teuthology.get_testdir(ctx) (remote, ) = ctx.cluster.only(role).remotes.keys() args = [] if krbd or nbd: args.append('sudo') # rbd(-nbd) map/unmap need privileges args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir) ]) overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('rbd_fsx', {})) if config.get('valgrind'): args = teuthology.get_valgrind_args(testdir, 'fsx_{id}'.format(id=role), args, config.get('valgrind')) cluster_name, type_, client_id = teuthology.split_role(role) if type_ != 'client': msg = 'client role ({0}) must be a client'.format(role) raise ConfigError(msg) args.extend([ 'ceph_test_librbd_fsx', '--cluster', cluster_name, '--id', client_id, '-d', # debug output for all operations '-W', '-R', # mmap doesn't work with rbd '-p', str(config.get('progress_interval', 100)), # show progress '-P', '{tdir}/archive'.format(tdir=testdir), '-r', str(config.get('readbdy', 1)), '-w', str(config.get('writebdy', 1)), '-t', str(config.get('truncbdy', 1)), '-h', str(config.get('holebdy', 1)), '-l', str(config.get('size', 250000000)), '-S', str(config.get('seed', 0)), '-N', str(config.get('ops', 1000)), ]) if krbd: args.append('-K') # -K enables krbd mode if nbd: args.append('-M') # -M enables nbd mode if config.get('direct_io', False): args.append('-Z') # -Z use direct IO if not config.get('randomized_striping', True): args.append('-U') # -U disables randomized striping if not config.get('punch_holes', True): args.append('-H') # -H disables discard ops if config.get('deep_copy', False): args.append('-g') # -g deep copy instead of clone if config.get('journal_replay', False): args.append('-j') # -j replay all IO events from journal if config.get('keep_images', False): args.append('-k') # -k keep images on success args.extend([ config.get('pool_name', 'pool_{pool}'.format(pool=role)), 'image_{image}'.format(image=role), ]) remote.run(args=args)
def initialize_config(ctx, config): cluster_name = config['cluster'] testdir = teuthology.get_testdir(ctx) ctx.ceph[cluster_name].thrashers = [] # fixme: setup watchdog, ala ceph.py ctx.ceph[cluster_name].roleless = False # see below first_ceph_cluster = False if not hasattr(ctx, 'daemons'): first_ceph_cluster = True # cephadm mode? if 'cephadm_mode' not in config: config['cephadm_mode'] = 'root' assert config['cephadm_mode'] in ['root', 'cephadm-package'] if config['cephadm_mode'] == 'root': ctx.cephadm = testdir + '/cephadm' else: ctx.cephadm = 'cephadm' # in the path if first_ceph_cluster: # FIXME: this is global for all clusters ctx.daemons = DaemonGroup( use_cephadm=ctx.cephadm) # uuid fsid = str(uuid.uuid1()) log.info('Cluster fsid is %s' % fsid) ctx.ceph[cluster_name].fsid = fsid # mon ips log.info('Choosing monitor IPs and ports...') remotes_and_roles = ctx.cluster.remotes.items() ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] if config.get('roleless', False): # mons will be named after hosts first_mon = None for remote, _ in remotes_and_roles: ctx.cluster.remotes[remote].append('mon.' + remote.shortname) if not first_mon: first_mon = remote.shortname bootstrap_remote = remote log.info('No mon roles; fabricating mons') roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()] ctx.ceph[cluster_name].mons = get_mons( roles, ips, cluster_name, mon_bind_msgr2=config.get('mon_bind_msgr2', True), mon_bind_addrvec=config.get('mon_bind_addrvec', True), ) log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) if config.get('roleless', False): ctx.ceph[cluster_name].roleless = True ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote ctx.ceph[cluster_name].first_mon = first_mon ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon else: first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] _, _, first_mon = teuthology.split_role(first_mon_role) (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys() log.info('First mon is mon.%s on %s' % (first_mon, bootstrap_remote.shortname)) ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote ctx.ceph[cluster_name].first_mon = first_mon ctx.ceph[cluster_name].first_mon_role = first_mon_role others = ctx.cluster.remotes[bootstrap_remote] mgrs = sorted([r for r in others if teuthology.is_type('mgr', cluster_name)(r)]) if not mgrs: raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) _, _, first_mgr = teuthology.split_role(mgrs[0]) log.info('First mgr is %s' % (first_mgr)) ctx.ceph[cluster_name].first_mgr = first_mgr yield
def ceph_mons(ctx, config): """ Deploy any additional mons """ cluster_name = config['cluster'] fsid = ctx.ceph[cluster_name].fsid num_mons = 1 try: for remote, roles in ctx.cluster.remotes.items(): for mon in [r for r in roles if teuthology.is_type('mon', cluster_name)(r)]: c_, _, id_ = teuthology.split_role(mon) if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: continue log.info('Adding %s on %s' % (mon, remote.shortname)) num_mons += 1 _shell(ctx, cluster_name, remote, [ 'ceph', 'orch', 'daemon', 'add', 'mon', remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, ]) ctx.daemons.register_daemon( remote, 'mon', id_, cluster=cluster_name, fsid=fsid, logger=log.getChild(mon), wait=False, started=True, ) with contextutil.safe_while(sleep=1, tries=180) as proceed: while proceed(): log.info('Waiting for %d mons in monmap...' % (num_mons)) r = _shell( ctx=ctx, cluster_name=cluster_name, remote=remote, args=[ 'ceph', 'mon', 'dump', '-f', 'json', ], stdout=StringIO(), ) j = json.loads(r.stdout.getvalue()) if len(j['mons']) == num_mons: break # refresh our (final) ceph.conf file log.info('Generating final ceph.conf file...') r = _shell( ctx=ctx, cluster_name=cluster_name, remote=remote, args=[ 'ceph', 'config', 'generate-minimal-conf', ], stdout=StringIO(), ) ctx.ceph[cluster_name].config_file = r.stdout.getvalue() yield finally: pass
def run_keystone(ctx, config): assert isinstance(config, dict) log.info('Configuring keystone...') for (client, _) in config.items(): (remote,) = ctx.cluster.only(client).remotes.iterkeys() cluster_name, _, client_id = teuthology.split_role(client) # start the public endpoint client_public_with_id = 'keystone.public' + '.' + client_id client_public_with_cluster = cluster_name + '.' + client_public_with_id public_host, public_port = ctx.keystone.public_endpoints[client] run_cmd = get_keystone_venved_cmd(ctx, 'keystone-wsgi-public', [ '--host', public_host, '--port', str(public_port), # Let's put the Keystone in background, wait for EOF # and after receiving it, send SIGTERM to the daemon. # This crazy hack is because Keystone, in contrast to # our other daemons, doesn't quit on stdin.close(). # Teuthology relies on this behaviour. run.Raw('& { read; kill %1; }') ] ) ctx.daemons.add_daemon( remote, 'keystone', client_public_with_id, cluster=cluster_name, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, cwd=get_keystone_dir(ctx), wait=False, check_status=False, ) # start the admin endpoint client_admin_with_id = 'keystone.admin' + '.' + client_id admin_host, admin_port = ctx.keystone.admin_endpoints[client] run_cmd = get_keystone_venved_cmd(ctx, 'keystone-wsgi-admin', [ '--host', admin_host, '--port', str(admin_port), run.Raw('& { read; kill %1; }') ] ) ctx.daemons.add_daemon( remote, 'keystone', client_admin_with_id, cluster=cluster_name, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, cwd=get_keystone_dir(ctx), wait=False, check_status=False, ) # sleep driven synchronization run_in_keystone_venv(ctx, client, [ 'sleep', '15' ]) try: yield finally: log.info('Stopping Keystone admin instance') ctx.daemons.get_daemon('keystone', client_admin_with_id, cluster_name).stop() log.info('Stopping Keystone public instance') ctx.daemons.get_daemon('keystone', client_public_with_id, cluster_name).stop()
def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None): """ Run the individual test. Create a scratch directory and then extract the workunits from git. Make the executables, and then run the tests. Clean up (remove files created) after the tests are finished. :param ctx: Context :param refspec: branch, sha1, or version tag used to identify this build :param tests: specific tests specified. :param env: environment set in yaml file. Could be None. :param subdir: subdirectory set in yaml file. Could be None :param timeout: If present, use the 'timeout' command on the remote host to limit execution time. Must be specified by a number followed by 's' for seconds, 'm' for minutes, 'h' for hours, or 'd' for days. If '0' or anything that evaluates to False is passed, the 'timeout' command is not used. """ testdir = misc.get_testdir(ctx) assert isinstance(role, basestring) cluster, type_, id_ = misc.split_role(role) assert type_ == 'client' remote = get_remote_for_role(ctx, role) mnt = _client_mountpoint(ctx, cluster, id_) # subdir so we can remove and recreate this a lot without sudo if subdir is None: scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') else: scratch_tmp = os.path.join(mnt, subdir) clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) srcdir = '{cdir}/qa/workunits'.format(cdir=clonedir) git_url = teuth_config.get_ceph_git_url() try: remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', clonedir, run.Raw('&&'), 'git', 'clone', git_url, clonedir, run.Raw('&&'), 'cd', '--', clonedir, run.Raw('&&'), 'git', 'checkout', refspec, ], ) except CommandFailedError: alt_git_url = git_url.replace('ceph-ci', 'ceph') log.info( "failed to check out '%s' from %s; will also try in %s", refspec, git_url, alt_git_url, ) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', clonedir, run.Raw('&&'), 'git', 'clone', alt_git_url, clonedir, run.Raw('&&'), 'cd', '--', clonedir, run.Raw('&&'), 'git', 'checkout', refspec, ], ) remote.run( logger=log.getChild(role), args=[ 'cd', '--', srcdir, run.Raw('&&'), 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', run.Raw('&&'), 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir), run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), ], ) workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) workunits = sorted(misc.get_file(remote, workunits_file).split('\0')) assert workunits try: assert isinstance(tests, list) for spec in tests: log.info('Running workunits matching %s on %s...', spec, role) prefix = '{spec}/'.format(spec=spec) to_run = [w for w in workunits if w == spec or w.startswith(prefix)] if not to_run: raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) for workunit in to_run: log.info('Running workunit %s...', workunit) args = [ 'mkdir', '-p', '--', scratch_tmp, run.Raw('&&'), 'cd', '--', scratch_tmp, run.Raw('&&'), run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), run.Raw('CEPH_REF={ref}'.format(ref=refspec)), run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)), run.Raw('CEPH_ID="{id}"'.format(id=id_)), run.Raw('PATH=$PATH:/usr/sbin'), run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)), ] if env is not None: for var, val in env.iteritems(): quoted_val = pipes.quote(val) env_arg = '{var}={val}'.format(var=var, val=quoted_val) args.append(run.Raw(env_arg)) args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir)]) if timeout and timeout != '0': args.extend(['timeout', timeout]) args.extend([ '{srcdir}/{workunit}'.format( srcdir=srcdir, workunit=workunit, ), ]) remote.run( logger=log.getChild(role), args=args, label="workunit test {workunit}".format(workunit=workunit) ) remote.run( logger=log.getChild(role), args=['sudo', 'rm', '-rf', '--', scratch_tmp], ) finally: log.info('Stopping %s on %s...', tests, role) remote.run( logger=log.getChild(role), args=[ 'rm', '-rf', '--', workunits_file, clonedir, ], )
def _make_scratch_dir(ctx, role, subdir): """ Make scratch directories for this role. This also makes the mount point if that directory does not exist. :param ctx: Context :param role: "role.#" where # is used for the role id. :param subdir: use this subdir (False if not used) """ created_mountpoint = False cluster, _, id_ = misc.split_role(role) remote = get_remote_for_role(ctx, role) dir_owner = remote.user mnt = _client_mountpoint(ctx, cluster, id_) # if neither kclient nor ceph-fuse are required for a workunit, # mnt may not exist. Stat and create the directory if it doesn't. try: remote.run( args=[ 'stat', '--', mnt, ], ) log.info('Did not need to create dir {dir}'.format(dir=mnt)) except CommandFailedError: remote.run( args=[ 'mkdir', '--', mnt, ], ) log.info('Created dir {dir}'.format(dir=mnt)) created_mountpoint = True if not subdir: subdir = 'client.{id}'.format(id=id_) if created_mountpoint: remote.run( args=[ 'cd', '--', mnt, run.Raw('&&'), 'mkdir', '--', subdir, ], ) else: remote.run( args=[ # cd first so this will fail if the mount point does # not exist; pure install -d will silently do the # wrong thing 'cd', '--', mnt, run.Raw('&&'), 'sudo', 'install', '-d', '-m', '0755', '--owner={user}'.format(user=dir_owner), '--', subdir, ], ) return created_mountpoint