def test_verbose(self): with patch.object(sys, 'stderr', StringIO()) as stderr: MRJob.set_up_logging(verbose=True) log = logging.getLogger('__main__') log.info('INFO') log.debug('DEBUG') self.assertEqual(stderr.getvalue(), 'INFO\nDEBUG\n')
def main(args=None): now = _boto3_now() arg_parser = _make_arg_parser() options = arg_parser.parse_args(args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) log.info('getting information about running jobs') min_time = timedelta(hours=options.min_hours) emr_client = EMRJobRunner(**_runner_kwargs(options)).make_emr_client() cluster_summaries = _boto3_paginate( 'Clusters', emr_client, 'list_clusters', ClusterStates=['STARTING', 'BOOTSTRAPPING', 'RUNNING']) if not options.exclude: filtered_cluster_summaries = cluster_summaries else: filtered_cluster_summaries = _filter_clusters( cluster_summaries, emr_client, options.exclude) job_info = _find_long_running_jobs( emr_client, filtered_cluster_summaries, min_time, now=now) _print_report(job_info)
def main(cl_args=None): parser = _make_arg_parser() options = parser.parse_args(cl_args) runner_alias = options.runner or _DEFAULT_RUNNER runner_class = _runner_class(runner_alias) if options.help or not options.script_or_jar: _print_help(options, runner_class) sys.exit(0) MRJob.set_up_logging( quiet=options.quiet, verbose=options.verbose, ) kwargs = _get_runner_opt_kwargs(options, runner_class) kwargs.update(_HARD_CODED_OPTS) kwargs['input_paths'] = [os.devnull] step = _get_step(options, parser, cl_args) kwargs['steps'] = [step.description()] runner = runner_class(**kwargs) try: runner.run() finally: runner.cleanup()
def main(args, now=None): if now is None: now = datetime.utcnow() option_parser = _make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) log.info('getting information about running jobs') emr_conn = EMRJobRunner(**_runner_kwargs(options)).make_emr_conn() cluster_summaries = _yield_all_clusters( emr_conn, cluster_states=['STARTING', 'BOOTSTRAPPING', 'RUNNING']) min_time = timedelta(hours=options.min_hours) job_info = _find_long_running_jobs(emr_conn, cluster_summaries, min_time, now=now) _print_report(job_info)
def main(args): # parser command-line args usage = '%prog [options]' description = "Collect EMR stats from active jobflows. " description += "Active jobflows are those in states of: " description += "BOOTSTRAPPING, RUNNING, STARTING, and WAITING. " description += "Collected stats include total number of active jobflows" description += "and total number of Amazon EC2 instances used to execute" description += "these jobflows. The instance counts are not separated by" description += "instance type." option_parser = OptionParser(usage=usage, description=description) option_parser.add_option( "-p", "--pretty-print", action="store_true", dest="pretty_print", default=False, help=('Pretty print the collected stats')) add_basic_opts(option_parser) options, args = option_parser.parse_args(args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) log.info('collecting EMR active jobflows...') job_flows = collect_active_job_flows(options.conf_paths) log.info('compiling stats from collected jobflows...') stats = job_flows_to_stats(job_flows) if options.pretty_print: pretty_print(stats) else: print(json.dumps(stats))
def main(cl_args=None): arg_parser = _make_arg_parser() options = arg_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # max_hours_idle -> max_mins_idle max_mins_idle = options.max_mins_idle if max_mins_idle is None and options.max_hours_idle is not None: log.warning('--max-hours-idle is deprecated and will be removed' ' in v0.7.0. Please use --max-mins-idle instead.') max_mins_idle = options.max_hours_idle * 60 if options.mins_to_end_of_hour is not None: log.warning('--mins-to-end-of-hour is deprecated as of v0.6.0' ' and does nothing') _maybe_terminate_clusters( dry_run=options.dry_run, max_mins_idle=max_mins_idle, unpooled_only=options.unpooled_only, now=_boto3_now(), pool_name=options.pool_name, pooled_only=options.pooled_only, max_mins_locked=options.max_mins_locked, quiet=options.quiet, **_runner_kwargs(options) )
def main(cl_args=None): arg_parser = _make_arg_parser() options = arg_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # max_hours_idle -> max_mins_idle max_mins_idle = options.max_mins_idle if max_mins_idle is None and options.max_hours_idle is not None: log.warning('--max-hours-idle is deprecated and will be removed' ' in v0.7.0. Please use --max-mins-idle instead.') max_mins_idle = options.max_hours_idle * 60 if options.mins_to_end_of_hour is not None: log.warning('--mins-to-end-of-hour is deprecated as of v0.6.0' ' and does nothing') _maybe_terminate_clusters(dry_run=options.dry_run, max_mins_idle=max_mins_idle, unpooled_only=options.unpooled_only, now=_boto3_now(), pool_name=options.pool_name, pooled_only=options.pooled_only, max_mins_locked=options.max_mins_locked, quiet=options.quiet, **_runner_kwargs(options))
def main(args=None): option_parser = make_option_parser() options = parse_args(option_parser, args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) with EMRJobRunner(**runner_kwargs(options)) as runner: perform_actions(options, runner)
def test_default_options(self): with no_handlers_for_logger('__main__'): with patch.object(sys, 'stderr', StringIO()) as stderr: MRJob.set_up_logging() log = logging.getLogger('__main__') log.info('INFO') log.debug('DEBUG') self.assertEqual(stderr.getvalue(), 'INFO\n')
def main(args=None): option_parser = make_option_parser() try: options = parse_args(option_parser, args) except OptionError: option_parser.error('This tool takes exactly one argument.') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) with EMRJobRunner(**runner_kwargs(options)) as runner: perform_actions(options, runner)
def main(args): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) print_report(options)
def main(): option_parser = make_option_parser() try: options = parse_args(option_parser) except OptionError: option_parser.error('This tool takes exactly one argument.') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) with EMRJobRunner(**runner_kwargs(options)) as runner: perform_actions(options, runner)
def main(cl_args=None): arg_parser = _make_arg_parser() options = arg_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) time_old = _process_time(options.time_untouched) for path in options.uris: _s3_cleanup(path, time_old, dry_run=options.test, **_runner_kwargs(options))
def main(cl_args=None): # parser command-line args arg_parser = _make_arg_parser() options = arg_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # create the persistent job runner = EMRJobRunner(**_runner_kwargs(options)) log.debug('Terminating cluster %s' % options.cluster_id) runner.make_emr_client().terminate_job_flows( JobFlowIds=[options.cluster_id]) log.info('Terminated cluster %s' % options.cluster_id)
def main(): option_parser = make_option_parser() options, args = option_parser.parse_args() if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) inspect_and_maybe_terminate_job_flows( conf_path=options.conf_path, max_hours_idle=options.max_hours_idle, now=datetime.utcnow(), dry_run=options.dry_run)
def main(): option_parser = make_option_parser() options, args = option_parser.parse_args() # make sure time and uris are given if not args or len(args) < 2: option_parser.error("Please specify time and one or more URIs") MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) time_old = process_time(args[0]) for path in args[1:]: s3_cleanup(path, time_old, conf_path=options.conf_path, dry_run=options.test)
def main(cl_args=None): option_parser = make_option_parser() options, args = option_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # make sure time and uris are given if not args or len(args) < 2: option_parser.error('Please specify time and one or more URIs') time_old = process_time(args[0]) for path in args[1:]: s3_cleanup(path, time_old, dry_run=options.text, **runner_kwargs(options))
def main(cl_args=None): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args(cl_args) if len(args) != 1: option_parser.error('This tool takes exactly one argument.') emr_job_flow_id = args[0] MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # create the persistent job runner = EMRJobRunner(conf_paths=options.conf_paths) log.debug('Terminating job flow %s' % emr_job_flow_id) runner.make_emr_conn().terminate_jobflow(emr_job_flow_id) log.info('Terminated job flow %s' % emr_job_flow_id)
def main(cl_args=None): option_parser = _make_option_parser() options, args = option_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # make sure time and uris are given if not args or len(args) < 2: option_parser.error('Please specify time and one or more URIs') time_old = _process_time(args[0]) for path in args[1:]: _s3_cleanup(path, time_old, dry_run=options.test, **_runner_kwargs(options))
def main(cl_args=None): arg_parser = _make_arg_parser() options = arg_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) _maybe_terminate_clusters(dry_run=options.dry_run, max_hours_idle=options.max_hours_idle, mins_to_end_of_hour=options.mins_to_end_of_hour, unpooled_only=options.unpooled_only, now=_boto3_now(), pool_name=options.pool_name, pooled_only=options.pooled_only, max_mins_locked=options.max_mins_locked, quiet=options.quiet, **_runner_kwargs(options))
def main(): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args() if len(args) != 1: option_parser.error('takes exactly one argument') emr_job_flow_id = args[0] MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # create the persistent job runner = EMRJobRunner(conf_path=options.conf_path) log.debug('Terminating job flow %s' % emr_job_flow_id) runner.make_emr_conn().terminate_jobflow(emr_job_flow_id) log.info('Terminated job flow %s' % emr_job_flow_id)
def main(cl_args=None): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args(cl_args) if len(args) != 1: option_parser.error('This tool takes exactly one argument.') cluster_id = args[0] MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # create the persistent job runner = EMRJobRunner(**runner_kwargs(options)) log.debug('Terminating job flow %s' % cluster_id) runner.make_emr_conn().terminate_jobflow(cluster_id) log.info('Terminated job flow %s' % cluster_id)
def main(cl_args=None): # parser command-line args option_parser = _make_option_parser() options, args = option_parser.parse_args(cl_args) if len(args) != 1: option_parser.error('This tool takes exactly one argument.') cluster_id = args[0] MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # create the persistent job runner = EMRJobRunner(**_runner_kwargs(options)) log.debug('Terminating cluster %s' % cluster_id) runner.make_emr_conn().terminate_jobflow(cluster_id) log.info('Terminated cluster %s' % cluster_id)
def main(): option_parser = make_option_parser() options, args = option_parser.parse_args() MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # make sure time and uris are given if not args or len(args) < 2: option_parser.error('Please specify time and one or more URIs') time_old = process_time(args[0]) for path in args[1:]: s3_cleanup(path, time_old, conf_paths=options.conf_paths, dry_run=options.test)
def _runner_kwargs(cl_args=None): """Parse command line arguments into arguments for :py:class:`EMRJobRunner` """ # parser command-line args arg_parser = _make_arg_parser() options = arg_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # create the persistent job kwargs = options.__dict__.copy() del kwargs['quiet'] del kwargs['verbose'] return kwargs
def runner_kwargs(): """Parse command line arguments into arguments for :py:class:`EMRJobRunner` """ # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args() if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # create the persistent job kwargs = options.__dict__.copy() del kwargs['quiet'] del kwargs['verbose'] return kwargs
def main(args=None): # parse command-line args arg_parser = _make_arg_parser() options = arg_parser.parse_args(args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) now = _boto3_now() log.info('getting cluster history...') clusters = list(_yield_clusters( max_days_ago=options.max_days_ago, now=now, **_runner_kwargs(options))) log.info('compiling cluster stats...') stats = _clusters_to_stats(clusters, now=now) _print_report(stats, now=now)
def main(args): option_parser = make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) log.info('getting information about running jobs') emr_conn = EMRJobRunner(conf_path=options.conf_path).make_emr_conn() job_flows = describe_all_job_flows(emr_conn, states=['RUNNING']) min_time = timedelta(hours=options.min_hours) job_info = find_long_running_jobs(job_flows, min_time) print_report(job_info)
def main(): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args() if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) # create the persistent job runner_kwargs = options.__dict__.copy() del runner_kwargs['quiet'] del runner_kwargs['verbose'] runner = EMRJobRunner(**runner_kwargs) emr_job_flow_id = runner.make_persistent_job_flow() print emr_job_flow_id
def main(args): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) now = datetime.utcnow() log.info('getting job flow history...') job_flows = get_job_flows(options.conf_path, options.max_days_ago, now=now) log.info('compiling job flow stats...') stats = job_flows_to_stats(job_flows, now=now) print_report(stats, now=now)
def main(args): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error("takes no arguments") MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) now = datetime.utcnow() log.info("getting job flow history...") clusters = list(yield_clusters(max_days_ago=options.max_days_ago, now=now, **runner_kwargs(options))) log.info("compiling job flow stats...") stats = clusters_to_stats(clusters, now=now) print_report(stats, now=now)
def main(cl_args=None): option_parser = _make_option_parser() options, args = option_parser.parse_args(cl_args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) _maybe_terminate_clusters(dry_run=options.dry_run, max_hours_idle=options.max_hours_idle, mins_to_end_of_hour=options.mins_to_end_of_hour, unpooled_only=options.unpooled_only, now=datetime.utcnow(), pool_name=options.pool_name, pooled_only=options.pooled_only, max_mins_locked=options.max_mins_locked, quiet=options.quiet, **_runner_kwargs(options))
def main(args=None): # parse command-line args arg_parser = _make_arg_parser() options = arg_parser.parse_args(args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) now = _boto3_now() log.info('getting cluster history...') clusters = list( _yield_clusters(max_days_ago=options.max_days_ago, now=now, **_runner_kwargs(options))) log.info('compiling cluster stats...') stats = _clusters_to_stats(clusters, now=now) _print_report(stats, now=now)
def main(args): # parser command-line args option_parser = make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) now = datetime.utcnow() log.info('getting cluster history...') clusters = list(yield_clusters( max_days_ago=options.max_days_ago, now=now, **runner_kwargs(options))) log.info('compiling cluster stats...') stats = clusters_to_stats(clusters, now=now) print_report(stats, now=now)
def main(args=None): # parser command-line args option_parser = _make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) now = datetime.utcnow() log.info('getting cluster history...') clusters = list(_yield_clusters( max_days_ago=options.max_days_ago, now=now, **_runner_kwargs(options))) log.info('compiling cluster stats...') stats = _clusters_to_stats(clusters, now=now) _print_report(stats, now=now)
def main(cl_args=None): usage = 'usage: %(prog)s CLUSTER_ID [options] "command string"' description = ('Run a command on the master and all worker nodes of an EMR' ' cluster. Store stdout/stderr for results in OUTPUT_DIR.') arg_parser = ArgumentParser(usage=usage, description=description) arg_parser.add_argument('-o', '--output-dir', dest='output_dir', default=None, help="Specify an output directory (default:" " CLUSTER_ID)") arg_parser.add_argument(dest='cluster_id', help='ID of cluster to run command on') arg_parser.add_argument(dest='cmd_string', help='command to run, as a single string') _add_basic_args(arg_parser) _add_runner_args( arg_parser, {'ec2_key_pair_file', 'ssh_bin'} | _filter_by_role( EMRJobRunner.OPT_NAMES, 'connect') ) _alphabetize_actions(arg_parser) options = arg_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = options.__dict__.copy() for unused_arg in ('cluster_id', 'cmd_string', 'output_dir', 'quiet', 'verbose'): del runner_kwargs[unused_arg] cmd_args = shlex_split(options.cmd_string) output_dir = os.path.abspath(options.output_dir or options.cluster_id) with EMRJobRunner( cluster_id=options.cluster_id, **runner_kwargs) as runner: _run_on_all_nodes(runner, output_dir, cmd_args)
def main(): usage = 'usage: %prog JOB_FLOW_ID OUTPUT_DIR [options] "command string"' description = ('Run a command on the master and all slaves of an EMR job' ' flow. Store stdout and stderr for results in OUTPUT_DIR.') option_parser = OptionParser(usage=usage, description=description) assignments = { option_parser: ('conf_paths', 'quiet', 'verbose', 'ec2_key_pair_file') } option_parser.add_option('-o', '--output-dir', dest='output_dir', default=None, help="Specify an output directory (default:" " JOB_FLOW_ID)") mr_job = MRJob() scrape_options_into_new_groups(mr_job.all_option_groups(), assignments) options, args = option_parser.parse_args() MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = options.__dict__.copy() for unused_arg in ('output_dir', 'quiet', 'verbose'): del runner_kwargs[unused_arg] if len(args) < 2: option_parser.print_help() sys.exit(1) job_flow_id, cmd_string = args[:2] cmd_args = shlex_split(cmd_string) output_dir = os.path.abspath(options.output_dir or job_flow_id) with EMRJobRunner(emr_job_flow_id=job_flow_id, **runner_kwargs) as runner: runner._enable_slave_ssh_access() run_on_all_nodes(runner, output_dir, cmd_args)
def main(args, now=None): if now is None: now = datetime.utcnow() option_parser = _make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error("takes no arguments") MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) log.info("getting information about running jobs") emr_conn = EMRJobRunner(**_runner_kwargs(options)).make_emr_conn() cluster_summaries = _yield_all_clusters(emr_conn, cluster_states=["STARTING", "BOOTSTRAPPING", "RUNNING"]) min_time = timedelta(hours=options.min_hours) job_info = _find_long_running_jobs(emr_conn, cluster_summaries, min_time, now=now) _print_report(job_info)
def main(cl_args=None): option_parser = make_option_parser() options, args = option_parser.parse_args(cl_args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) inspect_and_maybe_terminate_job_flows( conf_paths=options.conf_paths, dry_run=options.dry_run, max_hours_idle=options.max_hours_idle, mins_to_end_of_hour=options.mins_to_end_of_hour, unpooled_only=options.unpooled_only, now=datetime.utcnow(), pool_name=options.pool_name, pooled_only=options.pooled_only, max_mins_locked=options.max_mins_locked, quiet=(options.quiet > 1), )
def main(cl_args=None): option_parser = make_option_parser() options, args = option_parser.parse_args(cl_args) if args: option_parser.error("takes no arguments") MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) inspect_and_maybe_terminate_job_flows( conf_paths=options.conf_paths, dry_run=options.dry_run, max_hours_idle=options.max_hours_idle, mins_to_end_of_hour=options.mins_to_end_of_hour, unpooled_only=options.unpooled_only, now=datetime.utcnow(), pool_name=options.pool_name, pooled_only=options.pooled_only, max_mins_locked=options.max_mins_locked, quiet=(options.quiet > 1), )
def main(): usage = 'usage: %prog JOB_FLOW_ID OUTPUT_DIR [options] "command string"' description = ('Run a command on the master and all slaves of an EMR job' ' flow. Store stdout and stderr for results in OUTPUT_DIR.') option_parser = OptionParser(usage=usage, description=description) assignments = { option_parser: ('conf_paths', 'quiet', 'verbose', 'ec2_key_pair_file') } option_parser.add_option('-o', '--output-dir', dest='output_dir', default=None, help="Specify an output directory (default:" " JOB_FLOW_ID)") mr_job = MRJob() scrape_options_into_new_groups(mr_job.all_option_groups(), assignments) options, args = option_parser.parse_args() MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = options.__dict__.copy() for unused_arg in ('output_dir', 'quiet', 'verbose'): del runner_kwargs[unused_arg] if len(args) < 2: option_parser.print_help() sys.exit(1) job_flow_id, cmd_string = args[:2] cmd_args = shlex.split(cmd_string) output_dir = os.path.abspath(options.output_dir or job_flow_id) with EMRJobRunner(emr_job_flow_id=job_flow_id, **runner_kwargs) as runner: runner._enable_slave_ssh_access() run_on_all_nodes(runner, output_dir, cmd_args)
def main(cl_args=None): arg_parser = _make_arg_parser() options = arg_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = { k: v for k, v in options.__dict__.items() if k not in ('quiet', 'verbose', 'step_id') } runner = EMRJobRunner(**runner_kwargs) emr_client = runner.make_emr_client() # pick step step = _get_step(emr_client, options.cluster_id, options.step_id) if not step: raise SystemExit(1) if step['Status']['State'] != 'FAILED': log.warning('step %s has state %s, not FAILED' % (step['Id'], step['Status']['State'])) # interpret logs log.info('Diagnosing step %s (%s)' % (step['Id'], step['Name'])) log_interpretation = dict(step_id=step['Id']) step_type = _infer_step_type(step) error = runner._pick_error(log_interpretation, step_type) # print error if error: log.error('Probable cause of failure:\n\n%s\n\n' % _format_error(error)) else: log.warning('No error detected')
def main(cl_args=None): option_parser = _make_option_parser() options, args = option_parser.parse_args(cl_args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) _maybe_terminate_clusters( dry_run=options.dry_run, max_hours_idle=options.max_hours_idle, mins_to_end_of_hour=options.mins_to_end_of_hour, unpooled_only=options.unpooled_only, now=datetime.utcnow(), pool_name=options.pool_name, pooled_only=options.pooled_only, max_mins_locked=options.max_mins_locked, quiet=options.quiet, **_runner_kwargs(options) )
def main(cl_args=None): usage = 'usage: %prog CLUSTER_ID OUTPUT_DIR [options] "command string"' description = ('Run a command on the master and all slaves of an EMR' ' cluster. Store stdout/stderr for results in OUTPUT_DIR.') option_parser = OptionParser(usage=usage, description=description) option_parser.add_option('-o', '--output-dir', dest='output_dir', default=None, help="Specify an output directory (default:" " CLUSTER_ID)") add_basic_opts(option_parser) add_emr_connect_opts(option_parser) scrape_options_into_new_groups(MRJob().all_option_groups(), { option_parser: ('ec2_key_pair_file', 'ssh_bin'), }) alphabetize_options(option_parser) options, args = option_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = options.__dict__.copy() for unused_arg in ('output_dir', 'quiet', 'verbose'): del runner_kwargs[unused_arg] if len(args) < 2: option_parser.print_help() sys.exit(1) cluster_id, cmd_string = args[:2] cmd_args = shlex_split(cmd_string) output_dir = os.path.abspath(options.output_dir or cluster_id) with EMRJobRunner(cluster_id=cluster_id, **runner_kwargs) as runner: runner._enable_slave_ssh_access() run_on_all_nodes(runner, output_dir, cmd_args)
def main(cl_args=None): usage = 'usage: %prog CLUSTER_ID [options] "command string"' description = ('Run a command on the master and all slaves of an EMR' ' cluster. Store stdout/stderr for results in OUTPUT_DIR.') option_parser = OptionParser(usage=usage, description=description) option_parser.add_option('-o', '--output-dir', dest='output_dir', default=None, help="Specify an output directory (default:" " CLUSTER_ID)") _add_basic_options(option_parser) _add_runner_options( option_parser, _pick_runner_opts('emr', 'connect') | set( ['ssh_bin', 'ec2_key_pair_file']) ) _alphabetize_options(option_parser) options, args = option_parser.parse_args(cl_args) MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) runner_kwargs = options.__dict__.copy() for unused_arg in ('output_dir', 'quiet', 'verbose'): del runner_kwargs[unused_arg] if len(args) < 2: option_parser.print_help() sys.exit(1) cluster_id, cmd_string = args[:2] cmd_args = shlex_split(cmd_string) output_dir = os.path.abspath(options.output_dir or cluster_id) with EMRJobRunner(cluster_id=cluster_id, **runner_kwargs) as runner: _run_on_all_nodes(runner, output_dir, cmd_args)