def _make_option_parser(): usage = '%prog [options] <time-untouched> <URIs>' description = ( 'Delete all files in a given URI that are older than a specified' ' time.\n\nThe time parameter defines the threshold for removing' ' files. If the file has not been accessed for *time*, the file is' ' removed. The time argument is a number with an optional' ' single-character suffix specifying the units: m for minutes, h for' ' hours, d for days. If no suffix is specified, time is in hours.') option_parser = OptionParser(usage=usage, description=description) option_parser.add_option( '-t', '--test', dest='test', default=False, action='store_true', help="Don't actually delete any files; just log that we would") _add_basic_opts(option_parser) scrape_options_into_new_groups(MRJob().all_option_groups(), { option_parser: ('aws_region', 's3_endpoint'), }) _alphabetize_options(option_parser) return option_parser
def _make_option_parser(): usage = '%prog [options]' description = ( 'Create a persistent EMR cluster to run jobs in, and print its ID to' ' stdout. WARNING: Do not run' ' this without mrjob terminate-idle-clusters in your' ' crontab; clusters left idle can quickly become expensive!') option_parser = OptionParser(usage=usage, description=description) _add_basic_opts(option_parser) # these aren't nicely broken down, just scrape specific options scrape_options_into_new_groups(MRJob().all_option_groups(), { option_parser: ( 'bootstrap_mrjob', 'label', 'owner', ), }) _add_emr_connect_opts(option_parser) _add_emr_launch_opts(option_parser) _add_dataproc_emr_opts(option_parser) _alphabetize_options(option_parser) return option_parser
def make_option_parser(): usage = '%prog [options] <time-untouched> <URIs>' description = ( 'Delete all files in a given URI that are older than a specified' ' time.\n\nThe time parameter defines the threshold for removing' ' files. If the file has not been accessed for *time*, the file is' ' removed. The time argument is a number with an optional' ' single-character suffix specifying the units: m for minutes, h for' ' hours, d for days. If no suffix is specified, time is in hours.') option_parser = OptionParser(usage=usage, description=description) option_parser.add_option( '-t', '--test', dest='test', default=False, action='store_true', help="Don't actually delete any files; just log that we would") add_basic_opts(option_parser) scrape_options_into_new_groups(MRJob().all_option_groups(), { option_parser: ('aws_region', 's3_endpoint'), }) alphabetize_options(option_parser) return option_parser
def make_option_parser(): usage = '%prog [options]' description = ( 'Create a persistent EMR job flow to run jobs in. WARNING: do not run' ' this without mrjob.tools.emr.terminate.idle_job_flows in your' ' crontab; job flows left idle can quickly become expensive!') option_parser = OptionParser(usage=usage, description=description) def make_option_group(halp): g = OptionGroup(option_parser, halp) option_parser.add_option_group(g) return g runner_group = make_option_group('Running the entire job') hadoop_emr_opt_group = make_option_group( 'Running on Hadoop or EMR (these apply when you set -r hadoop or -r' ' emr)') emr_opt_group = make_option_group( 'Running on Amazon Elastic MapReduce (these apply when you set -r' ' emr)') assignments = { runner_group: ('bootstrap_mrjob', 'conf_path', 'quiet', 'verbose'), hadoop_emr_opt_group: ( 'label', 'owner', ), emr_opt_group: ( 'additional_emr_info', 'aws_availability_zone', 'aws_region', 'bootstrap_actions', 'bootstrap_cmds', 'bootstrap_files', 'bootstrap_python_packages', 'ec2_instance_type', 'ec2_key_pair', 'ec2_master_instance_type', 'ec2_slave_instance_type', 'emr_endpoint', 'emr_job_flow_pool_name', 'enable_emr_debugging', 'hadoop_version', 'num_ec2_instances', 'pool_emr_job_flows', 's3_endpoint', 's3_log_uri', 's3_scratch_uri', 's3_sync_wait_time', ), } # Scrape options from MRJob and index them by dest mr_job = MRJob() job_option_groups = mr_job.all_option_groups() scrape_options_into_new_groups(job_option_groups, assignments) return option_parser
def make_option_parser(): usage = '%prog [options]' description = 'Create a persistent EMR job flow to run jobs in. WARNING: do not run this without mrjob.tools.emr.terminate.idle_job_flows in your crontab; job flows left idle can quickly become expensive!' option_parser = OptionParser(usage=usage, description=description) def make_option_group(halp): g = OptionGroup(option_parser, halp) option_parser.add_option_group(g) return g runner_group = make_option_group('Running the entire job') hadoop_emr_opt_group = make_option_group('Running on Hadoop or EMR (these apply when you set -r hadoop or -r emr)') emr_opt_group = make_option_group('Running on Amazon Elastic MapReduce (these apply when you set -r emr)') assignments = { runner_group: ( 'bootstrap_mrjob', 'conf_path', 'quiet', 'verbose' ), hadoop_emr_opt_group: ( 'label', 'owner', ), emr_opt_group: ( 'additional_emr_info', 'aws_availability_zone', 'aws_region', 'bootstrap_actions', 'bootstrap_cmds', 'bootstrap_files', 'bootstrap_python_packages', 'ec2_instance_type', 'ec2_key_pair', 'ec2_master_instance_type', 'ec2_slave_instance_type', 'emr_endpoint', 'enable_emr_debugging', 'hadoop_version', 'num_ec2_instances', 's3_endpoint', 's3_log_uri', 's3_scratch_uri', 's3_sync_wait_time', ), } # Scrape options from MRJob and index them by dest mr_job = MRJob() job_option_groups = (mr_job.option_parser, mr_job.mux_opt_group, mr_job.proto_opt_group, mr_job.runner_opt_group, mr_job.hadoop_emr_opt_group, mr_job.emr_opt_group) scrape_options_into_new_groups(job_option_groups, assignments) return option_parser
def test_scrape_all(self): assignments = { self.new_parser: ('a',), self.new_group_1: ('x', 'y'), } old_groups = (self.original_parser, self.original_group) scrape_options_into_new_groups(old_groups, assignments) self.assertEqual(self.original_parser.option_list[1:], self.new_parser.option_list[1:]) self.assertEqual(self.original_group.option_list, self.new_group_1.option_list)
def test_scrape_all(self): assignments = { self.new_parser: ('a', ), self.new_group_1: ('x', 'y'), } old_groups = (self.original_parser, self.original_group) scrape_options_into_new_groups(old_groups, assignments) self.assertEqual(self.original_parser.option_list[1:], self.new_parser.option_list[1:]) self.assertEqual(self.original_group.option_list, self.new_group_1.option_list)
def test_scrape_different(self): assignments = {self.new_parser: ("x",), self.new_group_1: ("y",), self.new_group_2: ("a",)} old_groups = (self.original_parser, self.original_group) scrape_options_into_new_groups(old_groups, assignments) target_1 = self.original_group.option_list[:1] target_2 = self.original_group.option_list[1:] target_3 = self.original_parser.option_list[1:] self.assertEqual(target_1, self.new_parser.option_list[1:]) self.assertEqual(target_2, self.new_group_1.option_list) self.assertEqual(target_3, self.new_group_2.option_list) options, args = self.new_parser.parse_args(["-x", "happy"]) self.assertEqual(options.x, "happy")
def make_option_parser(): usage = 'usage: %prog [options] JOB_FLOW_ID' description = ( 'List, display, and parse Hadoop logs associated with EMR job flows.' ' Useful for debugging failed jobs for which mrjob did not display a' ' useful error message or for inspecting jobs whose output has been' ' lost.') option_parser = OptionParser(usage=usage, description=description) option_parser.add_option('-f', '--find-failure', dest='find_failure', action='store_true', default=False, help=('Search the logs for information about why' ' the job failed')) option_parser.add_option('-l', '--list', dest='list_relevant', action="store_true", default=False, help='List log files MRJob finds relevant') option_parser.add_option('-L', '--list-all', dest='list_all', action="store_true", default=False, help='List all log files') option_parser.add_option('-a', '--cat', dest='cat_relevant', action="store_true", default=False, help='Cat log files MRJob finds relevant') option_parser.add_option('-A', '--cat-all', dest='cat_all', action="store_true", default=False, help='Cat all log files to JOB_FLOW_ID/') option_parser.add_option('-s', '--step-num', dest='step_num', action='store', type='int', default=None, help=('Limit results to a single step. To be used' ' with --list and --cat.')) option_parser.add_option('--counters', dest='get_counters', action='store_true', default=False, help='Show counters from the job flow') assignments = { option_parser: ('conf_paths', 'quiet', 'verbose', 'ec2_key_pair_file', 's3_sync_wait_time') } mr_job = MRJob() job_option_groups = (mr_job.option_parser, mr_job.mux_opt_group, mr_job.proto_opt_group, mr_job.runner_opt_group, mr_job.hadoop_emr_opt_group, mr_job.emr_opt_group, mr_job.hadoop_opts_opt_group) scrape_options_into_new_groups(job_option_groups, assignments) return option_parser
def make_option_parser(): usage = 'usage: %prog [options] JOB_FLOW_ID' description = ( 'List, display, and parse Hadoop logs associated with EMR job flows.' ' Useful for debugging failed jobs for which mrjob did not display a' ' useful error message or for inspecting jobs whose output has been' ' lost.') option_parser = OptionParser(usage=usage, description=description) add_basic_opts(option_parser) option_parser.add_option('-f', '--find-failure', dest='find_failure', action='store_true', default=False, help=('Search the logs for information about why' ' the job failed')) option_parser.add_option('-l', '--list', dest='list_relevant', action="store_true", default=False, help='List log files MRJob finds relevant') option_parser.add_option('-L', '--list-all', dest='list_all', action="store_true", default=False, help='List all log files') option_parser.add_option('-a', '--cat', dest='cat_relevant', action="store_true", default=False, help='Cat log files MRJob finds relevant') option_parser.add_option('-A', '--cat-all', dest='cat_all', action="store_true", default=False, help='Cat all log files to JOB_FLOW_ID/') option_parser.add_option('-s', '--step-num', dest='step_num', action='store', type='int', default=None, help=('Limit results to a single step. To be used' ' with --list and --cat.')) option_parser.add_option('--counters', dest='get_counters', action='store_true', default=False, help='Show counters from the job flow') add_emr_connect_opts(option_parser) scrape_options_into_new_groups(MRJob().all_option_groups(), { option_parser: ('ec2_key_pair_file', 's3_sync_wait_time', 'ssh_bin') }) alphabetize_options(option_parser) return option_parser
def test_scrape_different(self): assignments = { self.new_parser: ('x',), self.new_group_1: ('y',), self.new_group_2: ('a',), } old_groups = (self.original_parser, self.original_group) scrape_options_into_new_groups(old_groups, assignments) target_1 = self.original_group.option_list[:1] target_2 = self.original_group.option_list[1:] target_3 = self.original_parser.option_list[1:] assert_equal(target_1, self.new_parser.option_list[1:]) assert_equal(target_2, self.new_group_1.option_list) assert_equal(target_3, self.new_group_2.option_list) options, args = self.new_parser.parse_args(['-x', 'happy']) assert_equal(options.x, 'happy')
def test_scrape_different(self): assignments = { self.new_parser: ('x', ), self.new_group_1: ('y', ), self.new_group_2: ('a', ), } old_groups = (self.original_parser, self.original_group) scrape_options_into_new_groups(old_groups, assignments) target_1 = self.original_group.option_list[:1] target_2 = self.original_group.option_list[1:] target_3 = self.original_parser.option_list[1:] self.assertEqual(target_1, self.new_parser.option_list[1:]) self.assertEqual(target_2, self.new_group_1.option_list) self.assertEqual(target_3, self.new_group_2.option_list) options, args = self.new_parser.parse_args(['-x', 'happy']) self.assertEqual(options.x, 'happy')
def main(): usage = 'usage: %prog JOB_FLOW_ID OUTPUT_DIR [options] "command string"' description = ('Run a command on the master and all slaves of an EMR job' ' flow. Store stdout and stderr for results in OUTPUT_DIR.') option_parser = OptionParser(usage=usage, description=description) assignments = { option_parser: ('conf_path', 'quiet', 'verbose', 'ec2_key_pair_file') } option_parser.add_option('-o', '--output-dir', dest='output_dir', default=None, help="Specify an output directory (default:" " JOB_FLOW_ID)") mr_job = MRJob() scrape_options_into_new_groups(mr_job.all_option_groups(), assignments) options, args = option_parser.parse_args() if not options.quiet: log_to_stream(name='mrjob', debug=options.verbose) runner_kwargs = options.__dict__.copy() for unused_arg in ('output_dir', 'quiet', 'verbose'): del runner_kwargs[unused_arg] if len(args) < 2: option_parser.print_help() sys.exit(1) job_flow_id, cmd_string = args[:2] cmd_args = shlex.split(cmd_string) output_dir = os.path.abspath(options.output_dir or job_flow_id) with EMRJobRunner(emr_job_flow_id=job_flow_id, **runner_kwargs) as runner: runner._enable_slave_ssh_access() run_on_all_nodes(runner, output_dir, cmd_args)
def main(cl_args=None): usage = 'usage: %prog CLUSTER_ID OUTPUT_DIR [options] "command string"' description = ('Run a command on the master and all slaves of an EMR' ' cluster. Store stdout/stderr for results in OUTPUT_DIR.') option_parser = OptionParser(usage=usage, description=description) option_parser.add_option('-o', '--output-dir', dest='output_dir', default=None, help="Specify an output directory (default:" " CLUSTER_ID)") add_basic_opts(option_parser) add_emr_connect_opts(option_parser) scrape_options_into_new_groups(MRJob().all_option_groups(), { option_parser: ('ec2_key_pair_file', 'ssh_bin'), }) alphabetize_options(option_parser) options, args = option_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = options.__dict__.copy() for unused_arg in ('output_dir', 'quiet', 'verbose'): del runner_kwargs[unused_arg] if len(args) < 2: option_parser.print_help() sys.exit(1) cluster_id, cmd_string = args[:2] cmd_args = shlex_split(cmd_string) output_dir = os.path.abspath(options.output_dir or cluster_id) with EMRJobRunner(cluster_id=cluster_id, **runner_kwargs) as runner: runner._enable_slave_ssh_access() run_on_all_nodes(runner, output_dir, cmd_args)
def main(cl_args=None): usage = 'usage: %prog JOB_FLOW_ID OUTPUT_DIR [options] "command string"' description = ('Run a command on the master and all slaves of an EMR job' ' flow. Store stdout and stderr for results in OUTPUT_DIR.') option_parser = OptionParser(usage=usage, description=description) option_parser.add_option('-o', '--output-dir', dest='output_dir', default=None, help="Specify an output directory (default:" " JOB_FLOW_ID)") add_basic_opts(option_parser) add_emr_connect_opts(option_parser) scrape_options_into_new_groups(MRJob().all_option_groups(), { option_parser: ('ec2_key_pair_file', 'ssh_bin'), }) alphabetize_options(option_parser) options, args = option_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = options.__dict__.copy() for unused_arg in ('output_dir', 'quiet', 'verbose'): del runner_kwargs[unused_arg] if len(args) < 2: option_parser.print_help() sys.exit(1) job_flow_id, cmd_string = args[:2] cmd_args = shlex_split(cmd_string) output_dir = os.path.abspath(options.output_dir or job_flow_id) with EMRJobRunner(emr_job_flow_id=job_flow_id, **runner_kwargs) as runner: runner._enable_slave_ssh_access() run_on_all_nodes(runner, output_dir, cmd_args)
def make_option_parser(): usage = '%prog [options]' description = ( 'Inspect available job flow pools or identify job flows suitable for' ' running a job with the specified options.') option_parser = OptionParser(usage=usage, description=description) def make_option_group(halp): g = OptionGroup(option_parser, halp) option_parser.add_option_group(g) return g ec2_opt_group = make_option_group('EC2 instance configuration') hadoop_opt_group = make_option_group('Hadoop configuration') job_opt_group = make_option_group('Job flow configuration') assignments = { option_parser: ( 'conf_paths', 'emr_job_flow_pool_name', 'quiet', 'verbose', ), ec2_opt_group: ( 'aws_availability_zone', 'ec2_instance_type', 'ec2_key_pair', 'ec2_key_pair_file', 'ec2_master_instance_type', 'ec2_core_instance_type', 'emr_endpoint', 'num_ec2_instances', ), hadoop_opt_group: ( 'hadoop_version', 'label', 'owner', ), job_opt_group: ( 'bootstrap_actions', 'bootstrap_cmds', 'bootstrap_files', 'bootstrap_mrjob', 'bootstrap_python_packages', ), } option_parser.add_option('-a', '--all', action='store_true', default=False, dest='list_all', help=('List all available job flows without' ' filtering by configuration')) option_parser.add_option('-f', '--find', action='store_true', default=False, dest='find', help=('Find a job flow matching the pool name,' ' bootstrap configuration, and instance' ' number/type as specified on the command' ' line and in the configuration files')) option_parser.add_option('-t', '--terminate', action='store', default=None, dest='terminate', metavar='JOB_FLOW_ID', help=('Terminate all job flows in the given pool' ' (defaults to pool "default")')) # Scrape options from MRJob and index them by dest mr_job = MRJob() scrape_options_into_new_groups(mr_job.all_option_groups(), assignments) return option_parser
def main(): usage = '%prog [options]' description = ( 'Inspect available job flow pools or identify job flows suitable for' ' running a job with the specified options.') option_parser = OptionParser(usage=usage, description=description) import boto.emr.connection boto.emr.connection.JobFlow.Fields.add('HadoopVersion') def make_option_group(halp): g = OptionGroup(option_parser, halp) option_parser.add_option_group(g) return g ec2_opt_group = make_option_group('EC2 instance configuration') hadoop_opt_group = make_option_group('Hadoop configuration') job_opt_group = make_option_group('Job flow configuration') assignments = { option_parser: ( 'conf_path', 'emr_job_flow_pool_name', 'quiet', 'verbose', ), ec2_opt_group: ( 'aws_availability_zone', 'ec2_instance_type', 'ec2_key_pair', 'ec2_key_pair_file', 'ec2_master_instance_type', 'ec2_slave_instance_type', 'emr_endpoint', 'num_ec2_instances', ), hadoop_opt_group: ( 'hadoop_version', 'label', 'owner', ), job_opt_group: ( 'bootstrap_actions', 'bootstrap_cmds', 'bootstrap_files', 'bootstrap_mrjob', 'bootstrap_python_packages', ), } option_parser.add_option('-a', '--all', action='store_true', default=False, dest='list_all', help=('List all available job flows without' ' filtering by configuration')) option_parser.add_option('-f', '--find', action='store_true', default=False, dest='find', help=('Find a job flow matching the pool name,' ' bootstrap configuration, and instance' ' number/type as specified on the command' ' line and in the configuration files')) option_parser.add_option('-t', '--terminate', action='store', default=None, dest='terminate', metavar='JOB_FLOW_ID', help=('Terminate all job flows in the given pool' ' (defaults to pool "default")')) # Scrape options from MRJob and index them by dest mr_job = MRJob() scrape_options_into_new_groups(mr_job.all_option_groups(), assignments) options, args = option_parser.parse_args() log_to_stream(name='mrjob', debug=options.verbose) runner_kwargs = options.__dict__.copy() for non_runner_kwarg in ('quiet', 'verbose', 'list_all', 'find', 'terminate'): del runner_kwargs[non_runner_kwarg] runner = EMRJobRunner(**runner_kwargs) if options.list_all: pprint_pools(runner) if options.find: sorted_job_flows = runner.usable_job_flows() if sorted_job_flows: jf = sorted_job_flows[-1] print 'You should use this one:' pprint_job_flow(jf) else: print 'No idle job flows match criteria' if options.terminate: terminate(runner, options.terminate)
def make_option_parser(): usage = 'usage: %prog [options] JOB_FLOW_ID' description = ( 'List, display, and parse Hadoop logs associated with EMR job flows.' ' Useful for debugging failed jobs for which mrjob did not display a' ' useful error message or for inspecting jobs whose output has been' ' lost.') option_parser = OptionParser(usage=usage, description=description) add_basic_opts(option_parser) option_parser.add_option('-f', '--find-failure', dest='find_failure', action='store_true', default=False, help=('Search the logs for information about why' ' the job failed')) option_parser.add_option('-l', '--list', dest='list_relevant', action="store_true", default=False, help='List log files MRJob finds relevant') option_parser.add_option('-L', '--list-all', dest='list_all', action="store_true", default=False, help='List all log files') option_parser.add_option('-a', '--cat', dest='cat_relevant', action="store_true", default=False, help='Cat log files MRJob finds relevant') option_parser.add_option('-A', '--cat-all', dest='cat_all', action="store_true", default=False, help='Cat all log files to JOB_FLOW_ID/') option_parser.add_option('-s', '--step-num', dest='step_num', action='store', type='int', default=None, help=('Limit results to a single step. To be used' ' with --list and --cat.')) option_parser.add_option('--counters', dest='get_counters', action='store_true', default=False, help='Show counters from the job flow') add_emr_connect_opts(option_parser) scrape_options_into_new_groups( MRJob().all_option_groups(), {option_parser: ('ec2_key_pair_file', 's3_sync_wait_time', 'ssh_bin')}) alphabetize_options(option_parser) return option_parser
def main(): usage = '%prog [options]' description = ( 'Inspect available job flow pools or identify job flows suitable for' ' running a job with the specified options.') option_parser = OptionParser(usage=usage, description=description) import boto.emr.connection boto.emr.connection.JobFlow.Fields.add('HadoopVersion') def make_option_group(halp): g = OptionGroup(option_parser, halp) option_parser.add_option_group(g) return g ec2_opt_group = make_option_group('EC2 instance configuration') hadoop_opt_group = make_option_group('Hadoop configuration') job_opt_group = make_option_group('Job flow configuration') assignments = { option_parser: ( 'conf_path', 'emr_job_flow_pool_name', 'quiet', 'verbose', ), ec2_opt_group: ( 'aws_availability_zone', 'ec2_instance_type', 'ec2_key_pair', 'ec2_key_pair_file', 'ec2_master_instance_type', 'ec2_slave_instance_type', 'emr_endpoint', 'num_ec2_instances', ), hadoop_opt_group: ( 'hadoop_version', 'label', 'owner', ), job_opt_group: ( 'bootstrap_actions', 'bootstrap_cmds', 'bootstrap_files', 'bootstrap_mrjob', 'bootstrap_python_packages', ), } option_parser.add_option('-a', '--all', action='store_true', default=False, dest='list_all', help=('List all available job flows without' ' filtering by configuration')) option_parser.add_option('-f', '--find', action='store_true', default=False, dest='find', help=('Find a job flow matching the pool name,' ' bootstrap configuration, and instance' ' number/type as specified on the command' ' line and in the configuration files')) option_parser.add_option('-t', '--terminate', action='store', default=None, dest='terminate', metavar='JOB_FLOW_ID', help=('Terminate all job flows in the given pool' ' (defaults to pool "default")')) # Scrape options from MRJob and index them by dest mr_job = MRJob() scrape_options_into_new_groups(mr_job.all_option_groups(), assignments) options, args = option_parser.parse_args() MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = options.__dict__.copy() for non_runner_kwarg in ('quiet', 'verbose', 'list_all', 'find', 'terminate'): del runner_kwargs[non_runner_kwarg] runner = EMRJobRunner(**runner_kwargs) if options.list_all: pprint_pools(runner) if options.find: sorted_job_flows = runner.usable_job_flows() if sorted_job_flows: jf = sorted_job_flows[-1] print 'You should use this one:' pprint_job_flow(jf) else: print 'No idle job flows match criteria' if options.terminate: terminate(runner, options.terminate)
def main(): usage = 'usage: %prog [options] JOB_FLOW_ID' description = ( 'List, display, and parse Hadoop logs associated with EMR job flows.' ' Useful for debugging failed jobs for which mrjob did not display a' ' useful error message or for inspecting jobs whose output has been' ' lost.') option_parser = OptionParser(usage=usage, description=description) option_parser.add_option('-f', '--find-failure', dest='find_failure', action='store_true', default=False, help=('Search the logs for information about why' ' the job failed')) option_parser.add_option('-l', '--list', dest='list_relevant', action="store_true", default=False, help='List log files MRJob finds relevant') option_parser.add_option('-L', '--list-all', dest='list_all', action="store_true", default=False, help='List all log files') option_parser.add_option('-a', '--cat', dest='cat_relevant', action="store_true", default=False, help='Cat log files MRJob finds relevant') option_parser.add_option('-A', '--cat-all', dest='cat_all', action="store_true", default=False, help='Cat all log files to JOB_FLOW_ID/') option_parser.add_option('-s', '--step-num', dest='step_num', action='store', type='int', default=None, help=('Limit results to a single step. To be used' ' with --list and --cat.')) option_parser.add_option('--counters', dest='get_counters', action='store_true', default=False, help='Show counters from the job flow') assignments = { option_parser: ('conf_path', 'quiet', 'verbose', 'ec2_key_pair_file') } mr_job = MRJob() job_option_groups = (mr_job.option_parser, mr_job.mux_opt_group, mr_job.proto_opt_group, mr_job.runner_opt_group, mr_job.hadoop_emr_opt_group, mr_job.emr_opt_group, mr_job.hadoop_opts_opt_group) scrape_options_into_new_groups(job_option_groups, assignments) options, args = option_parser.parse_args() MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) if options.step_num: step_nums = [options.step_num] else: step_nums = None runner_kwargs = options.__dict__.copy() for unused_arg in ('quiet', 'verbose', 'list_relevant', 'list_all', 'cat_relevant', 'cat_all', 'get_counters', 'step_num', 'find_failure'): del runner_kwargs[unused_arg] with EMRJobRunner(emr_job_flow_id=args[0], **runner_kwargs) as runner: if options.list_relevant: list_relevant(runner, step_nums) if options.list_all: list_all(runner) if options.cat_relevant: cat_relevant(runner, step_nums) if options.cat_all: cat_all(runner) if options.get_counters: desc = runner._describe_jobflow() runner._set_s3_job_log_uri(desc) runner._fetch_counters( xrange(1, len(desc.steps) + 1), skip_s3_wait=True) runner.print_counters() if options.find_failure: find_failure(runner, options.step_num)