Exemplo n.º 1
0
def shell(ctx, config):
    """
    Execute (shell) commands
    """
    cluster_name = config.get('cluster', 'ceph')

    env = []
    if 'env' in config:
        for k in config['env']:
            env.extend(['-e', k + '=' + ctx.config.get(k, '')])
        del config['env']

    if 'all-roles' in config and len(config) == 1:
        a = config['all-roles']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
    elif 'all-hosts' in config and len(config) == 1:
        a = config['all-hosts']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles if id_.startswith('host.'))

    for role, cmd in config.items():
        (remote,) = ctx.cluster.only(role).remotes.keys()
        log.info('Running commands on role %s host %s', role, remote.name)
        if isinstance(cmd, list):
            for c in cmd:
                _shell(ctx, cluster_name, remote,
                       ['bash', '-c', subst_vip(ctx, c)],
                       extra_cephadm_args=env)
        else:
            assert isinstance(cmd, str)
            _shell(ctx, cluster_name, remote,
                   ['bash', '-ex', '-c', subst_vip(ctx, cmd)],
                   extra_cephadm_args=env)
Exemplo n.º 2
0
def exec(ctx, config):
    """
    This is similar to the standard 'exec' task, but does the VIP substitutions.
    """
    assert isinstance(config, dict), "task exec got invalid config"

    testdir = teuthology.get_testdir(ctx)

    if 'all-roles' in config and len(config) == 1:
        a = config['all-roles']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
    elif 'all-hosts' in config and len(config) == 1:
        a = config['all-hosts']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles if id_.startswith('host.'))

    for role, ls in config.items():
        (remote, ) = ctx.cluster.only(role).remotes.keys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in ls:
            c.replace('$TESTDIR', testdir)
            remote.run(args=[
                'sudo', 'TESTDIR={tdir}'.format(tdir=testdir), 'bash', '-c',
                subst_vip(ctx, c)
            ], )
Exemplo n.º 3
0
def task(ctx, config):
    """
    Execute commands on a given role

        tasks:
        - ceph:
        - kclient: [client.a]
        - exec:
            client.a:
              - "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"
              - "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"
        - interactive:

    It stops and fails with the first command that does not return on success. It means
    that if the first command fails, the second won't run at all.

    You can run a command on all hosts `all-hosts`, or all roles with `all-roles`:

        tasks:
        - exec:
            all-hosts:
              - touch /etc/passwd
        - exec:
            all-roles:
              - pwd

    To avoid confusion it is recommended to explicitly enclose the commands in 
    double quotes. For instance if the command is false (without double quotes) it will
    be interpreted as a boolean by the YAML parser.

    :param ctx: Context
    :param config: Configuration
    """
    log.info('Executing custom commands...')
    assert isinstance(config, dict), "task exec got invalid config"

    testdir = teuthology.get_testdir(ctx)

    if 'all' in config and len(config) == 1:
        a = config['all']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)
    elif 'all-roles' in config and len(config) == 1:
        a = config['all-roles']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)
    elif 'all-hosts' in config and len(config) == 1:
        a = config['all-hosts']
        roles = [roles[0] for roles in ctx.cluster.remotes.values()]
        config = dict((id_, a) for id_ in roles)

    for role, ls in config.items():
        (remote, ) = ctx.cluster.only(role).remotes.keys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in ls:
            c.replace('$TESTDIR', testdir)
            remote.run(args=[
                'sudo', 'TESTDIR={tdir}'.format(tdir=testdir), 'bash', '-c', c
            ], )
Exemplo n.º 4
0
def shell(ctx, config):
    """
    Execute (shell) commands
    """
    cluster_name = config.get('cluster', 'ceph')

    env = []
    if 'env' in config:
        for k in config['env']:
            env.extend(['-e', k + '=' + ctx.config.get(k, '')])
        del config['env']

    if 'all' in config and len(config) == 1:
        a = config['all']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)

    for role, ls in config.items():
        (remote, ) = ctx.cluster.only(role).remotes.keys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in ls:
            _shell(ctx,
                   cluster_name,
                   remote, ['bash', '-c', c],
                   extra_cephadm_args=env)
Exemplo n.º 5
0
def task(ctx, config):
    """
    Execute commands on a given role

        tasks:
        - ceph:
        - kclient: [client.a]
        - exec:
            client.a:
              - echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control
              - echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control
        - interactive:

    """
    log.info('Executing custom commands...')
    assert isinstance(config, dict), "task exec got invalid config"

    testdir = teuthology.get_testdir(ctx)

    if 'all' in config and len(config) == 1:
        a = config['all']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)

    for role, ls in config.iteritems():
        (remote, ) = ctx.cluster.only(role).remotes.iterkeys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in ls:
            c.replace('$TESTDIR', testdir)
            remote.run(args=[
                'sudo', 'TESTDIR={tdir}'.format(tdir=testdir), 'bash', '-c', c
            ], )
Exemplo n.º 6
0
def task(ctx, config):
    """
    Execute commands on a given role

        tasks:
        - ceph:
        - kclient: [client.a]
        - exec:
            client.a:
              - echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control
              - echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control
        - interactive:

    """
    log.info("Executing custom commands...")
    assert isinstance(config, dict), "task exec got invalid config"

    if "all" in config and len(config) == 1:
        a = config["all"]
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)

    for role, ls in config.iteritems():
        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
        log.info("Running commands on role %s host %s", role, remote.name)
        for c in ls:
            remote.run(args=["sudo", "bash", "-c", c])
Exemplo n.º 7
0
def task(ctx, config):
    """
    Execute commands on multiple roles in parallel

        tasks:
        - ceph:
        - ceph-fuse: [client.0, client.1]
        - pexec:
            client.0:
              - while true; do echo foo >> bar; done
            client.1:
              - sleep 1
              - tail -f bar
        - interactive:

    """
    log.info('Executing custom commands...')
    assert isinstance(config, dict), "task pexec got invalid config"

    sudo = False
    if 'sudo' in config:
        sudo = config['sudo']
        del config['sudo']

    if 'all' in config and len(config) == 1:
        a = config['all']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)

    with parallel() as p:
        for role, ls in config.iteritems():
            (remote,) = ctx.cluster.only(role).remotes.iterkeys()
            p.spawn(_exec_role, remote, role, sudo, ls)
Exemplo n.º 8
0
def osd_scrub_pgs(ctx, config):
    """
    Scrub pgs when we exit.

    First make sure all pgs are active and clean.
    Next scrub all osds.
    Then periodically check until all pgs have scrub time stamps that
    indicate the last scrub completed.  Time out if no progess is made
    here after two minutes.
    """
    retries = 12
    delays = 10
    cluster_name = config['cluster']
    manager = ctx.managers[cluster_name]
    all_clean = False
    for _ in range(0, retries):
        stats = manager.get_pg_stats()
        states = [stat['state'] for stat in stats]
        if len(set(states)) == 1 and states[0] == 'active+clean':
            all_clean = True
            break
        log.info("Waiting for all osds to be active and clean.")
        time.sleep(delays)
    if not all_clean:
        log.info("Scrubbing terminated -- not all pgs were active and clean.")
        return
    check_time_now = time.localtime()
    time.sleep(1)
    all_roles = teuthology.all_roles(ctx.cluster)
    for role in teuthology.cluster_roles_of_type(all_roles, 'osd',
                                                 cluster_name):
        log.info("Scrubbing {osd}".format(osd=role))
        _, _, id_ = teuthology.split_role(role)
        manager.raw_cluster_cmd('osd', 'deep-scrub', id_)
    prev_good = 0
    gap_cnt = 0
    loop = True
    while loop:
        stats = manager.get_pg_stats()
        timez = [stat['last_scrub_stamp'] for stat in stats]
        loop = False
        thiscnt = 0
        for tmval in timez:
            pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S')
            if pgtm > check_time_now:
                thiscnt += 1
            else:
                loop = True
        if thiscnt > prev_good:
            prev_good = thiscnt
            gap_cnt = 0
        else:
            gap_cnt += 1
            if gap_cnt > retries:
                log.info('Exiting scrub checking -- not all pgs scrubbed.')
                return
        if loop:
            log.info('Still waiting for all pgs to be scrubbed.')
            time.sleep(delays)
Exemplo n.º 9
0
def task(ctx, config):

    log.info('Executing commands test...')
    assert isinstance(config, dict), "task exec got invalid config"

    test_result = {}
    testdir = teuthology.get_testdir(ctx)

    cmd = ['ceph', 'osd', 'pool', 'ls']
    fd_popen = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
    data = fd_popen.read().strip()
    fd_popen.close()

    pool_name = data.split("\n")[0]

    cmd = ['ceph', 'osd', 'pool', 'stats', pool_name]
    fd_popen = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
    data = fd_popen.read().strip()
    fd_popen.close()

    data=data.split("\n")[0].split(" ")
    pool_id = data[3]

    log.info("using {name}({id}) pool".format(name=pool_name,id=pool_id))

    for idx in range(len(cmd_list)):
        raw_cmd = cmd_list[idx]
        if raw_cmd.find("{pool_name}") is not -1:
            cmd_list[idx] = raw_cmd.format(pool_name=pool_name)

    for idx in range(len(cmd_list)):
        raw_cmd = cmd_list[idx]
        if raw_cmd.find("{pool_id}") is not -1:
            cmd_list[idx] = raw_cmd.format(pool_id=pool_id)

    if 'all' in config and len(config) == 1:
        a = config['all']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)

    for role in config:
        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in cmd_list:
            log.info("custom commnad: {command}".format(command=c))
            command_result = subprocess.call(\
                "ssh {remote} sudo {command}".format(remote=remote,command=c), shell=True)
            if command_result is not 0:
                test_result[c] = (command_result, remote.name)

    if test_result:
        log.info("failed commands")
        fcmds = ""
        for command in test_result:
            cr, remote = test_result[command]
            fcmds = fcmds + "{c}, ".format(c=command)
            log.info("{remote}: \"{c}\" return {r}".format(remote=remote,c=command,r=cr))

    assert not test_result, "command fail - {fcmds}".format(fcmds=fcmds)
Exemplo n.º 10
0
def osd_scrub_pgs(ctx, config):
    """
    Scrub pgs when we exit.

    First make sure all pgs are active and clean.
    Next scrub all osds.
    Then periodically check until all pgs have scrub time stamps that
    indicate the last scrub completed.  Time out if no progess is made
    here after two minutes.
    """
    retries = 12
    delays = 10
    cluster_name = config["cluster"]
    manager = ctx.managers[cluster_name]
    all_clean = False
    for _ in range(0, retries):
        stats = manager.get_pg_stats()
        states = [stat["state"] for stat in stats]
        if len(set(states)) == 1 and states[0] == "active+clean":
            all_clean = True
            break
        log.info("Waiting for all osds to be active and clean.")
        time.sleep(delays)
    if not all_clean:
        log.info("Scrubbing terminated -- not all pgs were active and clean.")
        return
    check_time_now = time.localtime()
    time.sleep(1)
    all_roles = teuthology.all_roles(ctx.cluster)
    for role in teuthology.cluster_roles_of_type(all_roles, "osd", cluster_name):
        log.info("Scrubbing {osd}".format(osd=role))
        _, _, id_ = teuthology.split_role(role)
        manager.raw_cluster_cmd("osd", "deep-scrub", id_)
    prev_good = 0
    gap_cnt = 0
    loop = True
    while loop:
        stats = manager.get_pg_stats()
        timez = [stat["last_scrub_stamp"] for stat in stats]
        loop = False
        thiscnt = 0
        for tmval in timez:
            pgtm = time.strptime(tmval[0 : tmval.find(".")], "%Y-%m-%d %H:%M:%S")
            if pgtm > check_time_now:
                thiscnt += 1
            else:
                loop = True
        if thiscnt > prev_good:
            prev_good = thiscnt
            gap_cnt = 0
        else:
            gap_cnt += 1
            if gap_cnt > retries:
                log.info("Exiting scrub checking -- not all pgs scrubbed.")
                return
        if loop:
            log.info("Still waiting for all pgs to be scrubbed.")
            time.sleep(delays)
Exemplo n.º 11
0
def task(ctx, config):
    """
    Execute commands on a given role

        tasks:
        - ceph:
        - kclient: [client.a]
        - exec:
            client.a:
              - "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"
              - "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"
        - interactive:

    It stops and fails with the first command that does not return on success. It means
    that if the first command fails, the second won't run at all.

    To avoid confusion it is recommended to explicitly enclose the commands in 
    double quotes. For instance if the command is false (without double quotes) it will
    be interpreted as a boolean by the YAML parser.

    :param ctx: Context
    :param config: Configuration
    """
    try:
        yield
    finally:
        log.info('Executing custom commands...')
        assert isinstance(config, dict), "task exec got invalid config"

        testdir = teuthology.get_testdir(ctx)

        if 'all' in config and len(config) == 1:
            a = config['all']
            roles = teuthology.all_roles(ctx.cluster)
            config = dict((id_, a) for id_ in roles)

            for role, ls in config.iteritems():
                (remote,) = ctx.cluster.only(role).remotes.iterkeys()
                log.info('Running commands on role %s host %s', role, remote.name)
                for c in ls:
                    c.replace('$TESTDIR', testdir)
                    remote.run(
                        args=[
                            'sudo',
                            'TESTDIR={tdir}'.format(tdir=testdir),
                            'bash',
                            '-c',
                            c],
                    )
Exemplo n.º 12
0
def shell(ctx, config):
    """
    Execute (shell) commands
    """
    cluster_name = config.get('cluster', 'ceph')

    if 'all' in config and len(config) == 1:
        a = config['all']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)

    for role, ls in config.items():
        (remote, ) = ctx.cluster.only(role).remotes.keys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in ls:
            _shell(ctx, cluster_name, remote, c.split(' '))
Exemplo n.º 13
0
def task(ctx, config):
    """
    Execute commands on a given role

        tasks:
        - ceph:
        - kclient: [client.a]
        - exec:
            client.a:
              - echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control
              - echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control
        - interactive:

    :param ctx: Context
    :param config: Configuration
    """
    log.info('Executing custom commands...')
    assert isinstance(config, dict), "task exec got invalid config"

    testdir = teuthology.get_testdir(ctx)

    if 'all' in config and len(config) == 1:
        a = config['all']
        roles = teuthology.all_roles(ctx.cluster)
        config = dict((id_, a) for id_ in roles)

    for role, ls in config.iteritems():
        (remote,) = ctx.cluster.only(role).remotes.iterkeys()
        log.info('Running commands on role %s host %s', role, remote.name)
        for c in ls:
            c.replace('$TESTDIR', testdir)
            remote.run(
                args=[
                    'sudo',
                    'TESTDIR={tdir}'.format(tdir=testdir),
                    'bash',
                    '-c',
                    c],
                )
Exemplo n.º 14
0
def normalize_config(ctx, config):
    """
    Returns a config whose keys are all real roles.
    Generic roles (client, mon, osd, etc.) are replaced with
    the actual roles (client.0, client.1, etc.). If the config
    specifies a different version for a specific role, this is
    unchanged.

    For example, with 4 OSDs this::

         osd:
           tag: v3.0
           kdb: true
         osd.1:
           branch: new_btrfs
           kdb: false
         osd.3:
           deb: /path/to/linux-whatever.deb

    is transformed into::

         osd.0:
           tag: v3.0
           kdb: true
         osd.1:
           branch: new_btrfs
           kdb: false
         osd.2:
           tag: v3.0
           kdb: true
         osd.3:
           deb: /path/to/linux-whatever.deb

    If config is None or just specifies a version to use,
    it is applied to all nodes.

    :param ctx: Context
    :param config: Configuration
    """
    if not config or \
            len(filter(lambda x: x in VERSION_KEYS + ['kdb', 'flavor'],
                       config.keys())) == len(config.keys()):
        new_config = {}
        if not config:
            config = CONFIG_DEFAULT
        for role in teuthology.all_roles(ctx.cluster):
            new_config[role] = config.copy()
        return new_config

    new_config = {}
    for role, role_config in config.iteritems():
        if role_config is None:
            role_config = CONFIG_DEFAULT
        if '.' in role:
            new_config[role] = role_config.copy()
        else:
            for id_ in teuthology.all_roles_of_type(ctx.cluster, role):
                name = '{type}.{id}'.format(type=role, id=id_)
                # specific overrides generic
                if name not in config:
                    new_config[name] = role_config.copy()
    return new_config
Exemplo n.º 15
0
def task(ctx, config):
    """
    Setup MPI and execute commands

    Example that starts an MPI process on specific clients::

        tasks:
        - ceph:
        - ceph-fuse: [client.0, client.1]
        - ssh_keys:
        - mpi: 
            nodes: [client.0, client.1]
            exec: ior ...

    Example that starts MPI processes on all clients::

        tasks:
        - ceph:
        - ceph-fuse:
        - ssh_keys:
        - mpi:
            exec: ior ...

    Example that starts MPI processes on all roles::

        tasks:
        - ceph:
        - ssh_keys:
        - mpi:
            nodes: all
            exec: ...

    Example that specifies a working directory for MPI processes:

        tasks:
        - ceph:
        - ceph-fuse:
        - pexec:
            clients:
              - ln -s {testdir}/mnt.* {testdir}/gmnt
        - ssh_keys:
        - mpi:
            exec: fsx-mpi
            workdir: {testdir}/gmnt
        - pexec:
            clients:
              - rm -f {testdir}/gmnt

    :param ctx: Context
    :param config: Configuration
    """
    assert isinstance(config, dict), 'task mpi got invalid config'
    assert 'exec' in config, 'task mpi got invalid config, missing exec'

    testdir = teuthology.get_testdir(ctx)

    mpiexec = config['exec'].replace('$TESTDIR', testdir)
    hosts = []
    remotes = []
    master_remote = None
    if 'nodes' in config:
        if isinstance(config['nodes'], basestring) and config['nodes'] == 'all':
            for role in  teuthology.all_roles(ctx.cluster):
                (remote,) = ctx.cluster.only(role).remotes.iterkeys()
                ip,port = remote.ssh.get_transport().getpeername()
                hosts.append(ip)
                remotes.append(remote)
            (master_remote,) = ctx.cluster.only(config['nodes'][0]).remotes.iterkeys()
        elif isinstance(config['nodes'], list):
            for role in config['nodes']:
                (remote,) = ctx.cluster.only(role).remotes.iterkeys()
                ip,port = remote.ssh.get_transport().getpeername()
                hosts.append(ip)
                remotes.append(remote)
            (master_remote,) = ctx.cluster.only(config['nodes'][0]).remotes.iterkeys()
    else:
        roles = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
        (master_remote,) = ctx.cluster.only(roles[0]).remotes.iterkeys()
        for role in roles:
            (remote,) = ctx.cluster.only(role).remotes.iterkeys()
            ip,port = remote.ssh.get_transport().getpeername()
            hosts.append(ip)
            remotes.append(remote)

    workdir = []
    if 'workdir' in config:
        workdir = ['-wdir', config['workdir'].replace('$TESTDIR', testdir) ]

    log.info('mpi rank 0 is: {name}'.format(name=master_remote.name))

    # write out the mpi hosts file
    log.info('mpi nodes: [%s]' % (', '.join(hosts)))
    teuthology.write_file(remote=master_remote,
                          path='{tdir}/mpi-hosts'.format(tdir=testdir),
                          data='\n'.join(hosts))
    log.info('mpiexec on {name}: {cmd}'.format(name=master_remote.name, cmd=mpiexec))
    args=['mpiexec', '-f', '{tdir}/mpi-hosts'.format(tdir=testdir)]
    args.extend(workdir)
    args.extend(mpiexec.split(' '))
    master_remote.run(args=args, )
    log.info('mpi task completed')
    master_remote.run(args=['rm', '{tdir}/mpi-hosts'.format(tdir=testdir)])
Exemplo n.º 16
0
def task(ctx, config):
    """
    Setup MPI and execute commands

    Example that starts an MPI process on specific clients::

        tasks:
        - ceph:
        - ceph-fuse: [client.0, client.1]
        - ssh_keys:
        - mpi: 
            nodes: [client.0, client.1]
            exec: ior ...

    Example that starts MPI processes on all clients::

        tasks:
        - ceph:
        - ceph-fuse:
        - ssh_keys:
        - mpi:
            exec: ior ...

    Example that starts MPI processes on all roles::

        tasks:
        - ceph:
        - ssh_keys:
        - mpi:
            nodes: all
            exec: ...

    Example that specifies a working directory for MPI processes:

        tasks:
        - ceph:
        - ceph-fuse:
        - pexec:
            clients:
              - ln -s {testdir}/mnt.* {testdir}/gmnt
        - ssh_keys:
        - mpi:
            exec: fsx-mpi
            workdir: {testdir}/gmnt
        - pexec:
            clients:
              - rm -f {testdir}/gmnt

    :param ctx: Context
    :param config: Configuration
    """
    assert isinstance(config, dict), 'task mpi got invalid config'
    assert 'exec' in config, 'task mpi got invalid config, missing exec'

    testdir = teuthology.get_testdir(ctx)

    mpiexec = config['exec'].replace('$TESTDIR', testdir)
    hosts = []
    remotes = []
    master_remote = None
    if 'nodes' in config:
        if isinstance(config['nodes'],
                      basestring) and config['nodes'] == 'all':
            for role in teuthology.all_roles(ctx.cluster):
                (remote, ) = ctx.cluster.only(role).remotes.keys()
                ip, port = remote.ssh.get_transport().getpeername()
                hosts.append(ip)
                remotes.append(remote)
            (master_remote, ) = ctx.cluster.only(
                config['nodes'][0]).remotes.keys()
        elif isinstance(config['nodes'], list):
            for role in config['nodes']:
                (remote, ) = ctx.cluster.only(role).remotes.keys()
                ip, port = remote.ssh.get_transport().getpeername()
                hosts.append(ip)
                remotes.append(remote)
            (master_remote, ) = ctx.cluster.only(
                config['nodes'][0]).remotes.keys()
    else:
        roles = [
            'client.{id}'.format(id=id_)
            for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')
        ]
        (master_remote, ) = ctx.cluster.only(roles[0]).remotes.keys()
        for role in roles:
            (remote, ) = ctx.cluster.only(role).remotes.keys()
            ip, port = remote.ssh.get_transport().getpeername()
            hosts.append(ip)
            remotes.append(remote)

    # mpich is sensitive to different versions on different nodes
    _check_mpi_version(remotes)

    workdir = []
    if 'workdir' in config:
        workdir = ['-wdir', config['workdir'].replace('$TESTDIR', testdir)]

    log.info('mpi rank 0 is: {name}'.format(name=master_remote.name))

    # write out the mpi hosts file
    log.info('mpi nodes: [%s]' % (', '.join(hosts)))
    teuthology.write_file(remote=master_remote,
                          path='{tdir}/mpi-hosts'.format(tdir=testdir),
                          data='\n'.join(hosts))
    log.info('mpiexec on {name}: {cmd}'.format(name=master_remote.name,
                                               cmd=mpiexec))
    args = ['mpiexec', '-f', '{tdir}/mpi-hosts'.format(tdir=testdir)]
    args.extend(workdir)
    args.extend(mpiexec.split(' '))
    master_remote.run(args=args, )
    log.info('mpi task completed')
    master_remote.run(args=['rm', '{tdir}/mpi-hosts'.format(tdir=testdir)])