Python EMRJobRunner.make_persistent_job_flow示例

编程语言: Python

命名空间/包名称: mrjob.emr

类/类型: EMRJobRunner

方法/功能: make_persistent_job_flow

hotexamples.com的示例: 11

Python EMRJobRunner.make_persistent_job_flow - 已找到11个示例。这些是从开源项目中提取的最受好评的mrjob.emr.EMRJobRunner.make_persistent_job_flow现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

EMRJobRunner(19)

make_emr_conn(13)

make_s3_conn(7)

cleanup(6)

make_persistent_job_flow(5)

make_emr_client(5)

run(4)

describe_cluster(3)

get_s3_keys(2)

_create_master_bootstrap_script(2)

simulate_progress(2)

path_exists(2)

make_persistent_cluster(2)

run_jobflow(1)

get_contents_as_string(1)

ls(1)

_create_dir_archive(1)

describe_jobflows(1)

describe_jobflow(1)

_s3_ls(1)

_pick_error(1)

_logs_needed_to_pick_error(1)

_find_probable_cause_of_failure(1)

_enable_slave_ssh_access(1)

_dir_archive_path(1)

usable_job_flows(1)

示例#1

显示文件

文件： create_job_flow.py 项目： chomp/mrjob

def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)

    # create the persistent job
    runner_kwargs = {
        'conf_path': options.conf_path,
        'ec2_instance_type': options.ec2_instance_type,
        'ec2_master_instance_type': options.ec2_master_instance_type,
        'ec2_slave_instance_type': options.ec2_slave_instance_type,
        'label': options.label,
        'num_ec2_instances': options.num_ec2_instances,
        'owner': options.owner,
    }
    runner = EMRJobRunner(**runner_kwargs)
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id

示例#2

显示文件

文件： emr_test.py 项目： boursier/mrjob

    def test_create_scratch_uri(self):
        # "walrus" bucket will be ignored; it doesn't start with "mrjob-"
        self.add_mock_s3_data({'walrus': {}, 'zebra': {}})

        runner = EMRJobRunner(conf_path=False, s3_sync_wait_time=0.01)

        # bucket name should be mrjob- plus 16 random hex digits
        s3_scratch_uri = runner._opts['s3_scratch_uri']
        assert_equal(s3_scratch_uri[:11], 's3://mrjob-')
        assert_equal(s3_scratch_uri[27:], '/tmp/')

        # bucket shouldn't actually exist yet
        scratch_bucket, _ = parse_s3_uri(s3_scratch_uri)
        assert_not_in(scratch_bucket, self.mock_s3_fs.keys())

        # need to do something to ensure that the bucket actually gets
        # created. let's launch a (mock) job flow
        jfid = runner.make_persistent_job_flow()
        assert_in(scratch_bucket, self.mock_s3_fs.keys())
        runner.make_emr_conn().terminate_jobflow(jfid)

        # once our scratch bucket is created, we should re-use it
        runner2 = EMRJobRunner(conf_path=False)
        assert_equal(runner2._opts['s3_scratch_uri'], s3_scratch_uri)
        s3_scratch_uri = runner._opts['s3_scratch_uri']

示例#3

显示文件

文件： emr_test.py 项目： boursier/mrjob

    def test_create_scratch_uri(self):
        # "walrus" bucket will be ignored; it doesn't start with "mrjob-"
        self.add_mock_s3_data({'walrus': {}, 'zebra': {}})

        runner = EMRJobRunner(conf_path=False, s3_sync_wait_time=0.01)

        # bucket name should be mrjob- plus 16 random hex digits
        s3_scratch_uri = runner._opts['s3_scratch_uri']
        assert_equal(s3_scratch_uri[:11], 's3://mrjob-')
        assert_equal(s3_scratch_uri[27:], '/tmp/')

        # bucket shouldn't actually exist yet
        scratch_bucket, _ = parse_s3_uri(s3_scratch_uri)
        assert_not_in(scratch_bucket, self.mock_s3_fs.keys())

        # need to do something to ensure that the bucket actually gets
        # created. let's launch a (mock) job flow
        jfid = runner.make_persistent_job_flow()
        assert_in(scratch_bucket, self.mock_s3_fs.keys())
        runner.make_emr_conn().terminate_jobflow(jfid)

        # once our scratch bucket is created, we should re-use it
        runner2 = EMRJobRunner(conf_path=False)
        assert_equal(runner2._opts['s3_scratch_uri'], s3_scratch_uri)
        s3_scratch_uri = runner._opts['s3_scratch_uri']

示例#4

显示文件

文件： emr_test.py 项目： hblanks/mrjob

    def test_local_bootstrap_action(self):
        # make sure that local bootstrap action scripts get uploaded to S3
        action_path = os.path.join(self.tmp_dir, 'apt-install.sh')
        with open(action_path, 'w') as f:
            f.write('for $pkg in $@; do sudo apt-get install $pkg; done\n')

        bootstrap_actions = [
            action_path + ' python-scipy mysql-server']

        runner = EMRJobRunner(conf_path=False,
                              bootstrap_actions=bootstrap_actions,
                              s3_sync_wait_time=0.01)

        job_flow_id = runner.make_persistent_job_flow()

        emr_conn = runner.make_emr_conn()
        job_flow = emr_conn.describe_jobflow(job_flow_id)
        actions = job_flow.bootstrapactions

        assert_equal(len(actions), 2)

        assert actions[0].path.startswith('s3://mrjob-')
        assert actions[0].path.endswith('/apt-install.sh')
        assert_equal(actions[0].name, 'apt-install.sh')
        assert_equal(actions[0].args, ['python-scipy', 'mysql-server'])

        # check for master boostrap script
        assert actions[1].path.startswith('s3://mrjob-')
        assert actions[1].path.endswith('b.py')
        assert_equal(actions[1].args, [])
        assert_equal(actions[1].name, 'master')

        # make sure master bootstrap script is on S3
        assert runner.path_exists(actions[1].path)

示例#5

显示文件

def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)

    # create the persistent job
    runner_kwargs = {
        'conf_path': options.conf_path,
        'ec2_instance_type': options.ec2_instance_type,
        'ec2_master_instance_type': options.ec2_master_instance_type,
        'ec2_slave_instance_type': options.ec2_slave_instance_type,
        'label': options.label,
        'num_ec2_instances': options.num_ec2_instances,
        'owner': options.owner,
    }
    runner = EMRJobRunner(**runner_kwargs)
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id

示例#6

显示文件

def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner_kwargs = options.__dict__.copy()
    del runner_kwargs['quiet']
    del runner_kwargs['verbose']

    runner = EMRJobRunner(**runner_kwargs)
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id

示例#7

显示文件

文件： create_job_flow.py 项目： Jyrsa/mrjob

def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)

    # create the persistent job
    runner_kwargs = options.__dict__.copy()
    del runner_kwargs['quiet']
    del runner_kwargs['verbose']

    runner = EMRJobRunner(**runner_kwargs)
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id

示例#8

显示文件

def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    # set up logging
    if not options.quiet:
        log_to_stream(name='mrjob', debug=options.verbose)

    # create the persistent job
    runner_kwargs = options.__dict__.copy()
    del runner_kwargs['quiet']
    del runner_kwargs['verbose']

    runner = EMRJobRunner(**runner_kwargs)
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id

示例#9

显示文件

文件： emr_test.py 项目： hblanks/mrjob

    def test_bootstrap_actions_get_added(self):
        bootstrap_actions = [
            's3://elasticmapreduce/bootstrap-actions/configure-hadoop -m,mapred.tasktracker.map.tasks.maximum=1',
            's3://foo/bar#xyzzy', # use alternate name for script
        ]

        runner = EMRJobRunner(conf_path=False,
                              bootstrap_actions=bootstrap_actions,
                              s3_sync_wait_time=0.01)

        job_flow_id = runner.make_persistent_job_flow()

        emr_conn = runner.make_emr_conn()
        job_flow = emr_conn.describe_jobflow(job_flow_id)
        actions = job_flow.bootstrapactions

        assert_equal(len(actions), 3)

        assert_equal(
            actions[0].path,
            's3://elasticmapreduce/bootstrap-actions/configure-hadoop')
        assert_equal(
            actions[0].args,
            ['-m,mapred.tasktracker.map.tasks.maximum=1'])
        assert_equal(actions[0].name, 'configure-hadoop')

        assert_equal(actions[1].path, 's3://foo/bar')
        assert_equal(actions[1].args, [])
        assert_equal(actions[1].name, 'xyzzy')

        # check for master bootstrap script
        assert actions[2].path.startswith('s3://mrjob-')
        assert actions[2].path.endswith('b.py')
        assert_equal(actions[2].args, [])
        assert_equal(actions[2].name, 'master')

        # make sure master bootstrap script is on S3
        assert runner.path_exists(actions[2].path)

示例#10

显示文件

def main(args=None):
    """Run the create_job_flow tool with arguments from ``sys.argv`` and
    printing to ``sys.stdout``."""
    runner = EMRJobRunner(**runner_kwargs(args))
    emr_job_flow_id = runner.make_persistent_job_flow()
    print(emr_job_flow_id)

示例#11

显示文件

文件： create_job_flow.py 项目： Infolaber/mrjob

def main(args=None):
    """Run the create_job_flow tool with arguments from ``sys.argv`` and
    printing to ``sys.stdout``."""
    runner = EMRJobRunner(**runner_kwargs(args))
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id