def shell(ctx, config): """ Execute (shell) commands """ cluster_name = config.get('cluster', 'ceph') env = [] if 'env' in config: for k in config['env']: env.extend(['-e', k + '=' + ctx.config.get(k, '')]) del config['env'] if 'all-roles' in config and len(config) == 1: a = config['all-roles'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles if not id_.startswith('host.')) elif 'all-hosts' in config and len(config) == 1: a = config['all-hosts'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles if id_.startswith('host.')) for role, cmd in config.items(): (remote,) = ctx.cluster.only(role).remotes.keys() log.info('Running commands on role %s host %s', role, remote.name) if isinstance(cmd, list): for c in cmd: _shell(ctx, cluster_name, remote, ['bash', '-c', subst_vip(ctx, c)], extra_cephadm_args=env) else: assert isinstance(cmd, str) _shell(ctx, cluster_name, remote, ['bash', '-ex', '-c', subst_vip(ctx, cmd)], extra_cephadm_args=env)
def exec(ctx, config): """ This is similar to the standard 'exec' task, but does the VIP substitutions. """ assert isinstance(config, dict), "task exec got invalid config" testdir = teuthology.get_testdir(ctx) if 'all-roles' in config and len(config) == 1: a = config['all-roles'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles if not id_.startswith('host.')) elif 'all-hosts' in config and len(config) == 1: a = config['all-hosts'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles if id_.startswith('host.')) for role, ls in config.items(): (remote, ) = ctx.cluster.only(role).remotes.keys() log.info('Running commands on role %s host %s', role, remote.name) for c in ls: c.replace('$TESTDIR', testdir) remote.run(args=[ 'sudo', 'TESTDIR={tdir}'.format(tdir=testdir), 'bash', '-c', subst_vip(ctx, c) ], )
def task(ctx, config): """ Execute commands on a given role tasks: - ceph: - kclient: [client.a] - exec: client.a: - "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control" - "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control" - interactive: It stops and fails with the first command that does not return on success. It means that if the first command fails, the second won't run at all. You can run a command on all hosts `all-hosts`, or all roles with `all-roles`: tasks: - exec: all-hosts: - touch /etc/passwd - exec: all-roles: - pwd To avoid confusion it is recommended to explicitly enclose the commands in double quotes. For instance if the command is false (without double quotes) it will be interpreted as a boolean by the YAML parser. :param ctx: Context :param config: Configuration """ log.info('Executing custom commands...') assert isinstance(config, dict), "task exec got invalid config" testdir = teuthology.get_testdir(ctx) if 'all' in config and len(config) == 1: a = config['all'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) elif 'all-roles' in config and len(config) == 1: a = config['all-roles'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) elif 'all-hosts' in config and len(config) == 1: a = config['all-hosts'] roles = [roles[0] for roles in ctx.cluster.remotes.values()] config = dict((id_, a) for id_ in roles) for role, ls in config.items(): (remote, ) = ctx.cluster.only(role).remotes.keys() log.info('Running commands on role %s host %s', role, remote.name) for c in ls: c.replace('$TESTDIR', testdir) remote.run(args=[ 'sudo', 'TESTDIR={tdir}'.format(tdir=testdir), 'bash', '-c', c ], )
def shell(ctx, config): """ Execute (shell) commands """ cluster_name = config.get('cluster', 'ceph') env = [] if 'env' in config: for k in config['env']: env.extend(['-e', k + '=' + ctx.config.get(k, '')]) del config['env'] if 'all' in config and len(config) == 1: a = config['all'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) for role, ls in config.items(): (remote, ) = ctx.cluster.only(role).remotes.keys() log.info('Running commands on role %s host %s', role, remote.name) for c in ls: _shell(ctx, cluster_name, remote, ['bash', '-c', c], extra_cephadm_args=env)
def task(ctx, config): """ Execute commands on a given role tasks: - ceph: - kclient: [client.a] - exec: client.a: - echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control - echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control - interactive: """ log.info('Executing custom commands...') assert isinstance(config, dict), "task exec got invalid config" testdir = teuthology.get_testdir(ctx) if 'all' in config and len(config) == 1: a = config['all'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) for role, ls in config.iteritems(): (remote, ) = ctx.cluster.only(role).remotes.iterkeys() log.info('Running commands on role %s host %s', role, remote.name) for c in ls: c.replace('$TESTDIR', testdir) remote.run(args=[ 'sudo', 'TESTDIR={tdir}'.format(tdir=testdir), 'bash', '-c', c ], )
def task(ctx, config): """ Execute commands on a given role tasks: - ceph: - kclient: [client.a] - exec: client.a: - echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control - echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control - interactive: """ log.info("Executing custom commands...") assert isinstance(config, dict), "task exec got invalid config" if "all" in config and len(config) == 1: a = config["all"] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) for role, ls in config.iteritems(): (remote,) = ctx.cluster.only(role).remotes.iterkeys() log.info("Running commands on role %s host %s", role, remote.name) for c in ls: remote.run(args=["sudo", "bash", "-c", c])
def task(ctx, config): """ Execute commands on multiple roles in parallel tasks: - ceph: - ceph-fuse: [client.0, client.1] - pexec: client.0: - while true; do echo foo >> bar; done client.1: - sleep 1 - tail -f bar - interactive: """ log.info('Executing custom commands...') assert isinstance(config, dict), "task pexec got invalid config" sudo = False if 'sudo' in config: sudo = config['sudo'] del config['sudo'] if 'all' in config and len(config) == 1: a = config['all'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) with parallel() as p: for role, ls in config.iteritems(): (remote,) = ctx.cluster.only(role).remotes.iterkeys() p.spawn(_exec_role, remote, role, sudo, ls)
def osd_scrub_pgs(ctx, config): """ Scrub pgs when we exit. First make sure all pgs are active and clean. Next scrub all osds. Then periodically check until all pgs have scrub time stamps that indicate the last scrub completed. Time out if no progess is made here after two minutes. """ retries = 12 delays = 10 cluster_name = config['cluster'] manager = ctx.managers[cluster_name] all_clean = False for _ in range(0, retries): stats = manager.get_pg_stats() states = [stat['state'] for stat in stats] if len(set(states)) == 1 and states[0] == 'active+clean': all_clean = True break log.info("Waiting for all osds to be active and clean.") time.sleep(delays) if not all_clean: log.info("Scrubbing terminated -- not all pgs were active and clean.") return check_time_now = time.localtime() time.sleep(1) all_roles = teuthology.all_roles(ctx.cluster) for role in teuthology.cluster_roles_of_type(all_roles, 'osd', cluster_name): log.info("Scrubbing {osd}".format(osd=role)) _, _, id_ = teuthology.split_role(role) manager.raw_cluster_cmd('osd', 'deep-scrub', id_) prev_good = 0 gap_cnt = 0 loop = True while loop: stats = manager.get_pg_stats() timez = [stat['last_scrub_stamp'] for stat in stats] loop = False thiscnt = 0 for tmval in timez: pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S') if pgtm > check_time_now: thiscnt += 1 else: loop = True if thiscnt > prev_good: prev_good = thiscnt gap_cnt = 0 else: gap_cnt += 1 if gap_cnt > retries: log.info('Exiting scrub checking -- not all pgs scrubbed.') return if loop: log.info('Still waiting for all pgs to be scrubbed.') time.sleep(delays)
def task(ctx, config): log.info('Executing commands test...') assert isinstance(config, dict), "task exec got invalid config" test_result = {} testdir = teuthology.get_testdir(ctx) cmd = ['ceph', 'osd', 'pool', 'ls'] fd_popen = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout data = fd_popen.read().strip() fd_popen.close() pool_name = data.split("\n")[0] cmd = ['ceph', 'osd', 'pool', 'stats', pool_name] fd_popen = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout data = fd_popen.read().strip() fd_popen.close() data=data.split("\n")[0].split(" ") pool_id = data[3] log.info("using {name}({id}) pool".format(name=pool_name,id=pool_id)) for idx in range(len(cmd_list)): raw_cmd = cmd_list[idx] if raw_cmd.find("{pool_name}") is not -1: cmd_list[idx] = raw_cmd.format(pool_name=pool_name) for idx in range(len(cmd_list)): raw_cmd = cmd_list[idx] if raw_cmd.find("{pool_id}") is not -1: cmd_list[idx] = raw_cmd.format(pool_id=pool_id) if 'all' in config and len(config) == 1: a = config['all'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) for role in config: (remote,) = ctx.cluster.only(role).remotes.iterkeys() log.info('Running commands on role %s host %s', role, remote.name) for c in cmd_list: log.info("custom commnad: {command}".format(command=c)) command_result = subprocess.call(\ "ssh {remote} sudo {command}".format(remote=remote,command=c), shell=True) if command_result is not 0: test_result[c] = (command_result, remote.name) if test_result: log.info("failed commands") fcmds = "" for command in test_result: cr, remote = test_result[command] fcmds = fcmds + "{c}, ".format(c=command) log.info("{remote}: \"{c}\" return {r}".format(remote=remote,c=command,r=cr)) assert not test_result, "command fail - {fcmds}".format(fcmds=fcmds)
def osd_scrub_pgs(ctx, config): """ Scrub pgs when we exit. First make sure all pgs are active and clean. Next scrub all osds. Then periodically check until all pgs have scrub time stamps that indicate the last scrub completed. Time out if no progess is made here after two minutes. """ retries = 12 delays = 10 cluster_name = config["cluster"] manager = ctx.managers[cluster_name] all_clean = False for _ in range(0, retries): stats = manager.get_pg_stats() states = [stat["state"] for stat in stats] if len(set(states)) == 1 and states[0] == "active+clean": all_clean = True break log.info("Waiting for all osds to be active and clean.") time.sleep(delays) if not all_clean: log.info("Scrubbing terminated -- not all pgs were active and clean.") return check_time_now = time.localtime() time.sleep(1) all_roles = teuthology.all_roles(ctx.cluster) for role in teuthology.cluster_roles_of_type(all_roles, "osd", cluster_name): log.info("Scrubbing {osd}".format(osd=role)) _, _, id_ = teuthology.split_role(role) manager.raw_cluster_cmd("osd", "deep-scrub", id_) prev_good = 0 gap_cnt = 0 loop = True while loop: stats = manager.get_pg_stats() timez = [stat["last_scrub_stamp"] for stat in stats] loop = False thiscnt = 0 for tmval in timez: pgtm = time.strptime(tmval[0 : tmval.find(".")], "%Y-%m-%d %H:%M:%S") if pgtm > check_time_now: thiscnt += 1 else: loop = True if thiscnt > prev_good: prev_good = thiscnt gap_cnt = 0 else: gap_cnt += 1 if gap_cnt > retries: log.info("Exiting scrub checking -- not all pgs scrubbed.") return if loop: log.info("Still waiting for all pgs to be scrubbed.") time.sleep(delays)
def task(ctx, config): """ Execute commands on a given role tasks: - ceph: - kclient: [client.a] - exec: client.a: - "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control" - "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control" - interactive: It stops and fails with the first command that does not return on success. It means that if the first command fails, the second won't run at all. To avoid confusion it is recommended to explicitly enclose the commands in double quotes. For instance if the command is false (without double quotes) it will be interpreted as a boolean by the YAML parser. :param ctx: Context :param config: Configuration """ try: yield finally: log.info('Executing custom commands...') assert isinstance(config, dict), "task exec got invalid config" testdir = teuthology.get_testdir(ctx) if 'all' in config and len(config) == 1: a = config['all'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) for role, ls in config.iteritems(): (remote,) = ctx.cluster.only(role).remotes.iterkeys() log.info('Running commands on role %s host %s', role, remote.name) for c in ls: c.replace('$TESTDIR', testdir) remote.run( args=[ 'sudo', 'TESTDIR={tdir}'.format(tdir=testdir), 'bash', '-c', c], )
def shell(ctx, config): """ Execute (shell) commands """ cluster_name = config.get('cluster', 'ceph') if 'all' in config and len(config) == 1: a = config['all'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) for role, ls in config.items(): (remote, ) = ctx.cluster.only(role).remotes.keys() log.info('Running commands on role %s host %s', role, remote.name) for c in ls: _shell(ctx, cluster_name, remote, c.split(' '))
def task(ctx, config): """ Execute commands on a given role tasks: - ceph: - kclient: [client.a] - exec: client.a: - echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control - echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control - interactive: :param ctx: Context :param config: Configuration """ log.info('Executing custom commands...') assert isinstance(config, dict), "task exec got invalid config" testdir = teuthology.get_testdir(ctx) if 'all' in config and len(config) == 1: a = config['all'] roles = teuthology.all_roles(ctx.cluster) config = dict((id_, a) for id_ in roles) for role, ls in config.iteritems(): (remote,) = ctx.cluster.only(role).remotes.iterkeys() log.info('Running commands on role %s host %s', role, remote.name) for c in ls: c.replace('$TESTDIR', testdir) remote.run( args=[ 'sudo', 'TESTDIR={tdir}'.format(tdir=testdir), 'bash', '-c', c], )
def normalize_config(ctx, config): """ Returns a config whose keys are all real roles. Generic roles (client, mon, osd, etc.) are replaced with the actual roles (client.0, client.1, etc.). If the config specifies a different version for a specific role, this is unchanged. For example, with 4 OSDs this:: osd: tag: v3.0 kdb: true osd.1: branch: new_btrfs kdb: false osd.3: deb: /path/to/linux-whatever.deb is transformed into:: osd.0: tag: v3.0 kdb: true osd.1: branch: new_btrfs kdb: false osd.2: tag: v3.0 kdb: true osd.3: deb: /path/to/linux-whatever.deb If config is None or just specifies a version to use, it is applied to all nodes. :param ctx: Context :param config: Configuration """ if not config or \ len(filter(lambda x: x in VERSION_KEYS + ['kdb', 'flavor'], config.keys())) == len(config.keys()): new_config = {} if not config: config = CONFIG_DEFAULT for role in teuthology.all_roles(ctx.cluster): new_config[role] = config.copy() return new_config new_config = {} for role, role_config in config.iteritems(): if role_config is None: role_config = CONFIG_DEFAULT if '.' in role: new_config[role] = role_config.copy() else: for id_ in teuthology.all_roles_of_type(ctx.cluster, role): name = '{type}.{id}'.format(type=role, id=id_) # specific overrides generic if name not in config: new_config[name] = role_config.copy() return new_config
def task(ctx, config): """ Setup MPI and execute commands Example that starts an MPI process on specific clients:: tasks: - ceph: - ceph-fuse: [client.0, client.1] - ssh_keys: - mpi: nodes: [client.0, client.1] exec: ior ... Example that starts MPI processes on all clients:: tasks: - ceph: - ceph-fuse: - ssh_keys: - mpi: exec: ior ... Example that starts MPI processes on all roles:: tasks: - ceph: - ssh_keys: - mpi: nodes: all exec: ... Example that specifies a working directory for MPI processes: tasks: - ceph: - ceph-fuse: - pexec: clients: - ln -s {testdir}/mnt.* {testdir}/gmnt - ssh_keys: - mpi: exec: fsx-mpi workdir: {testdir}/gmnt - pexec: clients: - rm -f {testdir}/gmnt :param ctx: Context :param config: Configuration """ assert isinstance(config, dict), 'task mpi got invalid config' assert 'exec' in config, 'task mpi got invalid config, missing exec' testdir = teuthology.get_testdir(ctx) mpiexec = config['exec'].replace('$TESTDIR', testdir) hosts = [] remotes = [] master_remote = None if 'nodes' in config: if isinstance(config['nodes'], basestring) and config['nodes'] == 'all': for role in teuthology.all_roles(ctx.cluster): (remote,) = ctx.cluster.only(role).remotes.iterkeys() ip,port = remote.ssh.get_transport().getpeername() hosts.append(ip) remotes.append(remote) (master_remote,) = ctx.cluster.only(config['nodes'][0]).remotes.iterkeys() elif isinstance(config['nodes'], list): for role in config['nodes']: (remote,) = ctx.cluster.only(role).remotes.iterkeys() ip,port = remote.ssh.get_transport().getpeername() hosts.append(ip) remotes.append(remote) (master_remote,) = ctx.cluster.only(config['nodes'][0]).remotes.iterkeys() else: roles = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] (master_remote,) = ctx.cluster.only(roles[0]).remotes.iterkeys() for role in roles: (remote,) = ctx.cluster.only(role).remotes.iterkeys() ip,port = remote.ssh.get_transport().getpeername() hosts.append(ip) remotes.append(remote) workdir = [] if 'workdir' in config: workdir = ['-wdir', config['workdir'].replace('$TESTDIR', testdir) ] log.info('mpi rank 0 is: {name}'.format(name=master_remote.name)) # write out the mpi hosts file log.info('mpi nodes: [%s]' % (', '.join(hosts))) teuthology.write_file(remote=master_remote, path='{tdir}/mpi-hosts'.format(tdir=testdir), data='\n'.join(hosts)) log.info('mpiexec on {name}: {cmd}'.format(name=master_remote.name, cmd=mpiexec)) args=['mpiexec', '-f', '{tdir}/mpi-hosts'.format(tdir=testdir)] args.extend(workdir) args.extend(mpiexec.split(' ')) master_remote.run(args=args, ) log.info('mpi task completed') master_remote.run(args=['rm', '{tdir}/mpi-hosts'.format(tdir=testdir)])
def task(ctx, config): """ Setup MPI and execute commands Example that starts an MPI process on specific clients:: tasks: - ceph: - ceph-fuse: [client.0, client.1] - ssh_keys: - mpi: nodes: [client.0, client.1] exec: ior ... Example that starts MPI processes on all clients:: tasks: - ceph: - ceph-fuse: - ssh_keys: - mpi: exec: ior ... Example that starts MPI processes on all roles:: tasks: - ceph: - ssh_keys: - mpi: nodes: all exec: ... Example that specifies a working directory for MPI processes: tasks: - ceph: - ceph-fuse: - pexec: clients: - ln -s {testdir}/mnt.* {testdir}/gmnt - ssh_keys: - mpi: exec: fsx-mpi workdir: {testdir}/gmnt - pexec: clients: - rm -f {testdir}/gmnt :param ctx: Context :param config: Configuration """ assert isinstance(config, dict), 'task mpi got invalid config' assert 'exec' in config, 'task mpi got invalid config, missing exec' testdir = teuthology.get_testdir(ctx) mpiexec = config['exec'].replace('$TESTDIR', testdir) hosts = [] remotes = [] master_remote = None if 'nodes' in config: if isinstance(config['nodes'], basestring) and config['nodes'] == 'all': for role in teuthology.all_roles(ctx.cluster): (remote, ) = ctx.cluster.only(role).remotes.keys() ip, port = remote.ssh.get_transport().getpeername() hosts.append(ip) remotes.append(remote) (master_remote, ) = ctx.cluster.only( config['nodes'][0]).remotes.keys() elif isinstance(config['nodes'], list): for role in config['nodes']: (remote, ) = ctx.cluster.only(role).remotes.keys() ip, port = remote.ssh.get_transport().getpeername() hosts.append(ip) remotes.append(remote) (master_remote, ) = ctx.cluster.only( config['nodes'][0]).remotes.keys() else: roles = [ 'client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client') ] (master_remote, ) = ctx.cluster.only(roles[0]).remotes.keys() for role in roles: (remote, ) = ctx.cluster.only(role).remotes.keys() ip, port = remote.ssh.get_transport().getpeername() hosts.append(ip) remotes.append(remote) # mpich is sensitive to different versions on different nodes _check_mpi_version(remotes) workdir = [] if 'workdir' in config: workdir = ['-wdir', config['workdir'].replace('$TESTDIR', testdir)] log.info('mpi rank 0 is: {name}'.format(name=master_remote.name)) # write out the mpi hosts file log.info('mpi nodes: [%s]' % (', '.join(hosts))) teuthology.write_file(remote=master_remote, path='{tdir}/mpi-hosts'.format(tdir=testdir), data='\n'.join(hosts)) log.info('mpiexec on {name}: {cmd}'.format(name=master_remote.name, cmd=mpiexec)) args = ['mpiexec', '-f', '{tdir}/mpi-hosts'.format(tdir=testdir)] args.extend(workdir) args.extend(mpiexec.split(' ')) master_remote.run(args=args, ) log.info('mpi task completed') master_remote.run(args=['rm', '{tdir}/mpi-hosts'.format(tdir=testdir)])