示例#1
0
def run_on_all_nodes(runner, output_dir, cmd_args, print_stderr=True):
    """Given an :py:class:`EMRJobRunner`, run the command specified by
    *cmd_args* on all nodes in the cluster and save the stdout and stderr of
    each run to subdirectories of *output_dir*.

    You should probably have run :py:meth:`_enable_slave_ssh_access()` on the
    runner before calling this function.
    """
    master_addr = runner._address_of_master()
    addresses = [master_addr]

    ssh_bin = runner._opts['ssh_bin']
    ec2_key_pair_file = runner._opts['ec2_key_pair_file']

    keyfile = None
    if runner._opts['num_ec2_instances'] > 1:
        addresses += [
            '%s!%s' % (master_addr, slave_addr)
            for slave_addr in runner._addresses_of_slaves()
        ]
        # copying key file like a boss (name of keyfile doesn't really matter)
        keyfile = 'mrboss-%s.pem' % random_identifier()
        ssh_copy_key(ssh_bin, master_addr, ec2_key_pair_file, keyfile)

    for addr in addresses:

        stdout, stderr = ssh_run_with_recursion(
            ssh_bin,
            addr,
            ec2_key_pair_file,
            keyfile,
            cmd_args,
        )

        if print_stderr:
            print('---')
            print('Command completed on %s.' % addr)
            print(stderr, end=' ')

        if '!' in addr:
            base_dir = os.path.join(output_dir, 'slave ' + addr.split('!')[1])
        else:
            base_dir = os.path.join(output_dir, 'master')

        if not os.path.exists(base_dir):
            os.makedirs(base_dir)

        with open(os.path.join(base_dir, 'stdout'), 'wb') as f:
            f.write(stdout)

        with open(os.path.join(base_dir, 'stderr'), 'wb') as f:
            f.write(stderr)
示例#2
0
文件: mrboss.py 项目: kartheek6/mrjob
def run_on_all_nodes(runner, output_dir, cmd_args, print_stderr=True):
    """Given an :py:class:`EMRJobRunner`, run the command specified by
    *cmd_args* on all nodes in the job flow and save the stdout and stderr of
    each run to subdirectories of *output_dir*.

    You should probably have run :py:meth:`_enable_slave_ssh_access()` on the
    runner before calling this function.
    """
    master_addr = runner._address_of_master()
    addresses = [master_addr]

    ssh_bin = runner._opts['ssh_bin']
    ec2_key_pair_file = runner._opts['ec2_key_pair_file']

    keyfile = None
    if runner._opts['num_ec2_instances'] > 1:
        addresses += ['%s!%s' % (master_addr, slave_addr)
                      for slave_addr in runner._addresses_of_slaves()]
        # copying key file like a boss (name of keyfile doesn't really matter)
        keyfile = 'mrboss-%s.pem' % random_identifier()
        ssh_copy_key(ssh_bin, master_addr, ec2_key_pair_file, keyfile)

    for addr in addresses:

        stdout, stderr = ssh_run_with_recursion(
            ssh_bin,
            addr,
            ec2_key_pair_file,
            keyfile,
            cmd_args,
        )

        if print_stderr:
            print('---')
            print('Command completed on %s.' % addr)
            print(stderr, end=' ')

        if '!' in addr:
            base_dir = os.path.join(output_dir, 'slave ' + addr.split('!')[1])
        else:
            base_dir = os.path.join(output_dir, 'master')

        if not os.path.exists(base_dir):
            os.makedirs(base_dir)

        with open(os.path.join(base_dir, 'stdout'), 'wb') as f:
            f.write(stdout)

        with open(os.path.join(base_dir, 'stderr'), 'wb') as f:
            f.write(stderr)
示例#3
0
文件: mrboss.py 项目: seatgeek/mrjob
def run_on_all_nodes(runner, output_dir, cmd_args, print_stderr=True):
    """Given an :py:class:`EMRJobRunner`, run the command specified by
    *cmd_args* on all nodes in the job flow and save the stdout and stderr of
    each run to subdirectories of *output_dir*.

    You should probably have run :py:meth:`_enable_slave_ssh_access()` on the
    runner before calling this function.
    """

    master_addr = runner._address_of_master()
    addresses = [master_addr]
    if runner._opts['num_ec2_instances'] > 1:
        addresses += [
            '%s!%s' % (master_addr, slave_addr)
            for slave_addr in runner._addresses_of_slaves()
        ]

    for addr in addresses:
        stdout, stderr = ssh_run_with_recursion(
            runner._opts['ssh_bin'],
            addr,
            runner._opts['ec2_key_pair_file'],
            runner._ssh_key_name,
            cmd_args,
        )

        if print_stderr:
            print '---'
            print 'Command completed on %s.' % addr
            print stderr,

        if '!' in addr:
            base_dir = os.path.join(output_dir, 'slave ' + addr.split('!')[1])
        else:
            base_dir = os.path.join(output_dir, 'master')

        if not os.path.exists(base_dir):
            os.makedirs(base_dir)

        with open(os.path.join(base_dir, 'stdout'), 'w') as f:
            f.write(stdout)

        with open(os.path.join(base_dir, 'stderr'), 'w') as f:
            f.write(stderr)
示例#4
0
def run_on_all_nodes(runner, output_dir, cmd_args, print_stderr=True):
    """Given an :py:class:`EMRJobRunner`, run the command specified by
    *cmd_args* on all nodes in the job flow and save the stdout and stderr of
    each run to subdirectories of *output_dir*.

    You should probably have run :py:meth:`_enable_slave_ssh_access()` on the
    runner before calling this function.
    """

    master_addr = runner._address_of_master()
    addresses = [master_addr]
    if runner._opts['num_ec2_instances'] > 1:
        addresses += ['%s!%s' % (master_addr, slave_addr)
                      for slave_addr in runner._addresses_of_slaves()]

    for addr in addresses:
        stdout, stderr = ssh_run_with_recursion(
            runner._opts['ssh_bin'],
            addr,
            runner._opts['ec2_key_pair_file'],
            runner._ssh_key_name,
            cmd_args,
        )

        if print_stderr:
            print '---'
            print 'Command completed on %s.' % addr
            print stderr,

        if '!' in addr:
            base_dir = os.path.join(output_dir, 'slave ' + addr.split('!')[1])
        else:
            base_dir = os.path.join(output_dir, 'master')

        if not os.path.exists(base_dir):
            os.makedirs(base_dir)

        with open(os.path.join(base_dir, 'stdout'), 'w') as f:
            f.write(stdout)

        with open(os.path.join(base_dir, 'stderr'), 'w') as f:
            f.write(stderr)