示例#1
0
 def test_verbose(self):
     with patch.object(sys, 'stderr', StringIO()) as stderr:
         MRJob.set_up_logging(verbose=True)
         log = logging.getLogger('__main__')
         log.info('INFO')
         log.debug('DEBUG')
         self.assertEqual(stderr.getvalue(), 'INFO\nDEBUG\n')
示例#2
0
def main(args=None):
    now = _boto3_now()

    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    log.info('getting information about running jobs')

    min_time = timedelta(hours=options.min_hours)

    emr_client = EMRJobRunner(**_runner_kwargs(options)).make_emr_client()
    cluster_summaries = _boto3_paginate(
        'Clusters', emr_client, 'list_clusters',
        ClusterStates=['STARTING', 'BOOTSTRAPPING', 'RUNNING'])

    if not options.exclude:
        filtered_cluster_summaries = cluster_summaries
    else:
        filtered_cluster_summaries = _filter_clusters(
            cluster_summaries, emr_client, options.exclude)

    job_info = _find_long_running_jobs(
        emr_client, filtered_cluster_summaries, min_time, now=now)

    _print_report(job_info)
示例#3
0
def main(cl_args=None):
    parser = _make_arg_parser()
    options = parser.parse_args(cl_args)

    runner_alias = options.runner or _DEFAULT_RUNNER
    runner_class = _runner_class(runner_alias)

    if options.help or not options.script_or_jar:
        _print_help(options, runner_class)
        sys.exit(0)

    MRJob.set_up_logging(
        quiet=options.quiet,
        verbose=options.verbose,
    )

    kwargs = _get_runner_opt_kwargs(options, runner_class)
    kwargs.update(_HARD_CODED_OPTS)

    kwargs['input_paths'] = [os.devnull]

    step = _get_step(options, parser, cl_args)
    kwargs['steps'] = [step.description()]

    runner = runner_class(**kwargs)

    try:
        runner.run()
    finally:
        runner.cleanup()
示例#4
0
文件: spark_submit.py 项目: qui/mrjob
def main(cl_args=None):
    parser = _make_arg_parser()
    options = parser.parse_args(cl_args)

    runner_alias = options.runner or _DEFAULT_RUNNER
    runner_class = _runner_class(runner_alias)

    if options.help or not options.script_or_jar:
        _print_help(options, runner_class)
        sys.exit(0)

    MRJob.set_up_logging(
        quiet=options.quiet,
        verbose=options.verbose,
    )

    kwargs = _get_runner_opt_kwargs(options, runner_class)
    kwargs.update(_HARD_CODED_OPTS)

    kwargs['input_paths'] = [os.devnull]

    step = _get_step(options, parser, cl_args)
    kwargs['steps'] = [step.description()]

    runner = runner_class(**kwargs)

    try:
        runner.run()
    finally:
        runner.cleanup()
示例#5
0
def main(args, now=None):
    if now is None:
        now = datetime.utcnow()

    option_parser = _make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    log.info('getting information about running jobs')
    emr_conn = EMRJobRunner(**_runner_kwargs(options)).make_emr_conn()
    cluster_summaries = _yield_all_clusters(
        emr_conn, cluster_states=['STARTING', 'BOOTSTRAPPING', 'RUNNING'])

    min_time = timedelta(hours=options.min_hours)

    job_info = _find_long_running_jobs(emr_conn,
                                       cluster_summaries,
                                       min_time,
                                       now=now)

    _print_report(job_info)
示例#6
0
def main(args):
    # parser command-line args
    usage = '%prog [options]'
    description = "Collect EMR stats from active jobflows. "
    description += "Active jobflows are those in states of: "
    description += "BOOTSTRAPPING, RUNNING, STARTING, and WAITING. "
    description += "Collected stats include total number of active jobflows"
    description += "and total number of Amazon EC2 instances used to execute"
    description += "these jobflows. The instance counts are not separated by"
    description += "instance type."
    option_parser = OptionParser(usage=usage, description=description)
    option_parser.add_option(
        "-p", "--pretty-print",
        action="store_true", dest="pretty_print", default=False,
        help=('Pretty print the collected stats'))
    add_basic_opts(option_parser)

    options, args = option_parser.parse_args(args)
    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
    log.info('collecting EMR active jobflows...')
    job_flows = collect_active_job_flows(options.conf_paths)
    log.info('compiling stats from collected jobflows...')
    stats = job_flows_to_stats(job_flows)

    if options.pretty_print:
        pretty_print(stats)
    else:
        print(json.dumps(stats))
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet,
                         verbose=options.verbose)

    # max_hours_idle -> max_mins_idle
    max_mins_idle = options.max_mins_idle
    if max_mins_idle is None and options.max_hours_idle is not None:
        log.warning('--max-hours-idle is deprecated and will be removed'
                    ' in v0.7.0. Please use --max-mins-idle instead.')
        max_mins_idle = options.max_hours_idle * 60

    if options.mins_to_end_of_hour is not None:
        log.warning('--mins-to-end-of-hour is deprecated as of v0.6.0'
                    ' and does nothing')

    _maybe_terminate_clusters(
        dry_run=options.dry_run,
        max_mins_idle=max_mins_idle,
        unpooled_only=options.unpooled_only,
        now=_boto3_now(),
        pool_name=options.pool_name,
        pooled_only=options.pooled_only,
        max_mins_locked=options.max_mins_locked,
        quiet=options.quiet,
        **_runner_kwargs(options)
    )
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # max_hours_idle -> max_mins_idle
    max_mins_idle = options.max_mins_idle
    if max_mins_idle is None and options.max_hours_idle is not None:
        log.warning('--max-hours-idle is deprecated and will be removed'
                    ' in v0.7.0. Please use --max-mins-idle instead.')
        max_mins_idle = options.max_hours_idle * 60

    if options.mins_to_end_of_hour is not None:
        log.warning('--mins-to-end-of-hour is deprecated as of v0.6.0'
                    ' and does nothing')

    _maybe_terminate_clusters(dry_run=options.dry_run,
                              max_mins_idle=max_mins_idle,
                              unpooled_only=options.unpooled_only,
                              now=_boto3_now(),
                              pool_name=options.pool_name,
                              pooled_only=options.pooled_only,
                              max_mins_locked=options.max_mins_locked,
                              quiet=options.quiet,
                              **_runner_kwargs(options))
示例#9
0
def main(args=None):
    now = _boto3_now()

    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    log.info('getting information about running jobs')

    min_time = timedelta(hours=options.min_hours)

    emr_client = EMRJobRunner(**_runner_kwargs(options)).make_emr_client()
    cluster_summaries = _boto3_paginate(
        'Clusters', emr_client, 'list_clusters',
        ClusterStates=['STARTING', 'BOOTSTRAPPING', 'RUNNING'])

    if not options.exclude:
        filtered_cluster_summaries = cluster_summaries
    else:
        filtered_cluster_summaries = _filter_clusters(
            cluster_summaries, emr_client, options.exclude)

    job_info = _find_long_running_jobs(
        emr_client, filtered_cluster_summaries, min_time, now=now)

    _print_report(job_info)
示例#10
0
文件: test_launch.py 项目: Yelp/mrjob
 def test_verbose(self):
     with patch.object(sys, 'stderr', StringIO()) as stderr:
         MRJob.set_up_logging(verbose=True)
         log = logging.getLogger('__main__')
         log.info('INFO')
         log.debug('DEBUG')
         self.assertEqual(stderr.getvalue(), 'INFO\nDEBUG\n')
示例#11
0
def main(args):
    # parser command-line args
    usage = '%prog [options]'
    description = "Collect EMR stats from active jobflows. "
    description += "Active jobflows are those in states of: "
    description += "BOOTSTRAPPING, RUNNING, STARTING, and WAITING. "
    description += "Collected stats include total number of active jobflows"
    description += "and total number of Amazon EC2 instances used to execute"
    description += "these jobflows. The instance counts are not separated by"
    description += "instance type."
    option_parser = OptionParser(usage=usage, description=description)
    option_parser.add_option(
        "-p", "--pretty-print",
        action="store_true", dest="pretty_print", default=False,
        help=('Pretty print the collected stats'))
    add_basic_opts(option_parser)

    options, args = option_parser.parse_args(args)
    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
    log.info('collecting EMR active jobflows...')
    job_flows = collect_active_job_flows(options.conf_paths)
    log.info('compiling stats from collected jobflows...')
    stats = job_flows_to_stats(job_flows)

    if options.pretty_print:
        pretty_print(stats)
    else:
        print(json.dumps(stats))
示例#12
0
def main(args=None):
    option_parser = make_option_parser()
    options = parse_args(option_parser, args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    with EMRJobRunner(**runner_kwargs(options)) as runner:
        perform_actions(options, runner)
示例#13
0
 def test_default_options(self):
     with no_handlers_for_logger('__main__'):
         with patch.object(sys, 'stderr', StringIO()) as stderr:
             MRJob.set_up_logging()
             log = logging.getLogger('__main__')
             log.info('INFO')
             log.debug('DEBUG')
             self.assertEqual(stderr.getvalue(), 'INFO\n')
示例#14
0
def main(args=None):
    option_parser = make_option_parser()
    options = parse_args(option_parser, args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    with EMRJobRunner(**runner_kwargs(options)) as runner:
        perform_actions(options, runner)
示例#15
0
 def test_default_options(self):
     with no_handlers_for_logger('__main__'):
         with patch.object(sys, 'stderr', StringIO()) as stderr:
             MRJob.set_up_logging()
             log = logging.getLogger('__main__')
             log.info('INFO')
             log.debug('DEBUG')
             self.assertEqual(stderr.getvalue(), 'INFO\n')
示例#16
0
def main(args=None):
    option_parser = make_option_parser()
    try:
        options = parse_args(option_parser, args)
    except OptionError:
        option_parser.error('This tool takes exactly one argument.')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    with EMRJobRunner(**runner_kwargs(options)) as runner:
        perform_actions(options, runner)
示例#17
0
def main(args):
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    print_report(options)
示例#18
0
def main(args):
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    print_report(options)
示例#19
0
def main():
    option_parser = make_option_parser()
    try:
        options = parse_args(option_parser)
    except OptionError:
        option_parser.error('This tool takes exactly one argument.')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    with EMRJobRunner(**runner_kwargs(options)) as runner:
        perform_actions(options, runner)
示例#20
0
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    time_old = _process_time(options.time_untouched)

    for path in options.uris:
        _s3_cleanup(path, time_old,
                    dry_run=options.test,
                    **_runner_kwargs(options))
示例#21
0
文件: s3_tmpwatch.py 项目: Yelp/mrjob
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    time_old = _process_time(options.time_untouched)

    for path in options.uris:
        _s3_cleanup(path, time_old,
                    dry_run=options.test,
                    **_runner_kwargs(options))
示例#22
0
def main(cl_args=None):
    # parser command-line args
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner = EMRJobRunner(**_runner_kwargs(options))
    log.debug('Terminating cluster %s' % options.cluster_id)
    runner.make_emr_client().terminate_job_flows(
        JobFlowIds=[options.cluster_id])
    log.info('Terminated cluster %s' % options.cluster_id)
def main(cl_args=None):
    # parser command-line args
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner = EMRJobRunner(**_runner_kwargs(options))
    log.debug('Terminating cluster %s' % options.cluster_id)
    runner.make_emr_client().terminate_job_flows(
        JobFlowIds=[options.cluster_id])
    log.info('Terminated cluster %s' % options.cluster_id)
示例#24
0
def main():
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    inspect_and_maybe_terminate_job_flows(
        conf_path=options.conf_path,
        max_hours_idle=options.max_hours_idle,
        now=datetime.utcnow(),
        dry_run=options.dry_run)
示例#25
0
def main():
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    # make sure time and uris are given
    if not args or len(args) < 2:
        option_parser.error("Please specify time and one or more URIs")

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    time_old = process_time(args[0])

    for path in args[1:]:
        s3_cleanup(path, time_old, conf_path=options.conf_path, dry_run=options.test)
示例#26
0
def main(cl_args=None):
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # make sure time and uris are given
    if not args or len(args) < 2:
        option_parser.error('Please specify time and one or more URIs')

    time_old = process_time(args[0])

    for path in args[1:]:
        s3_cleanup(path, time_old,
                   dry_run=options.text,
                   **runner_kwargs(options))
示例#27
0
def main(cl_args=None):
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    if len(args) != 1:
        option_parser.error('This tool takes exactly one argument.')
    emr_job_flow_id = args[0]

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner = EMRJobRunner(conf_paths=options.conf_paths)
    log.debug('Terminating job flow %s' % emr_job_flow_id)
    runner.make_emr_conn().terminate_jobflow(emr_job_flow_id)
    log.info('Terminated job flow %s' % emr_job_flow_id)
示例#28
0
def main(cl_args=None):
    option_parser = _make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # make sure time and uris are given
    if not args or len(args) < 2:
        option_parser.error('Please specify time and one or more URIs')

    time_old = _process_time(args[0])

    for path in args[1:]:
        _s3_cleanup(path, time_old,
                    dry_run=options.test,
                    **_runner_kwargs(options))
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    _maybe_terminate_clusters(dry_run=options.dry_run,
                              max_hours_idle=options.max_hours_idle,
                              mins_to_end_of_hour=options.mins_to_end_of_hour,
                              unpooled_only=options.unpooled_only,
                              now=_boto3_now(),
                              pool_name=options.pool_name,
                              pooled_only=options.pooled_only,
                              max_mins_locked=options.max_mins_locked,
                              quiet=options.quiet,
                              **_runner_kwargs(options))
示例#30
0
def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if len(args) != 1:
        option_parser.error('takes exactly one argument')
    emr_job_flow_id = args[0]

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner = EMRJobRunner(conf_path=options.conf_path)
    log.debug('Terminating job flow %s' % emr_job_flow_id)
    runner.make_emr_conn().terminate_jobflow(emr_job_flow_id)
    log.info('Terminated job flow %s' % emr_job_flow_id)
示例#31
0
def main(cl_args=None):
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    if len(args) != 1:
        option_parser.error('This tool takes exactly one argument.')
    cluster_id = args[0]

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner = EMRJobRunner(**runner_kwargs(options))
    log.debug('Terminating job flow %s' % cluster_id)
    runner.make_emr_conn().terminate_jobflow(cluster_id)
    log.info('Terminated job flow %s' % cluster_id)
示例#32
0
def main(cl_args=None):
    # parser command-line args
    option_parser = _make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    if len(args) != 1:
        option_parser.error('This tool takes exactly one argument.')
    cluster_id = args[0]

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner = EMRJobRunner(**_runner_kwargs(options))
    log.debug('Terminating cluster %s' % cluster_id)
    runner.make_emr_conn().terminate_jobflow(cluster_id)
    log.info('Terminated cluster %s' % cluster_id)
示例#33
0
def main():
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # make sure time and uris are given
    if not args or len(args) < 2:
        option_parser.error('Please specify time and one or more URIs')

    time_old = process_time(args[0])

    for path in args[1:]:
        s3_cleanup(path,
                   time_old,
                   conf_paths=options.conf_paths,
                   dry_run=options.test)
示例#34
0
def _runner_kwargs(cl_args=None):
    """Parse command line arguments into arguments for
    :py:class:`EMRJobRunner`
    """
    # parser command-line args
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    kwargs = options.__dict__.copy()

    del kwargs['quiet']
    del kwargs['verbose']

    return kwargs
示例#35
0
def runner_kwargs():
    """Parse command line arguments into arguments for
    :py:class:`EMRJobRunner`
    """
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    kwargs = options.__dict__.copy()
    del kwargs['quiet']
    del kwargs['verbose']
    return kwargs
示例#36
0
def runner_kwargs():
    """Parse command line arguments into arguments for
    :py:class:`EMRJobRunner`
    """
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    kwargs = options.__dict__.copy()
    del kwargs['quiet']
    del kwargs['verbose']
    return kwargs
示例#37
0
def main(args=None):
    # parse command-line args
    arg_parser = _make_arg_parser()

    options = arg_parser.parse_args(args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    now = _boto3_now()

    log.info('getting cluster history...')
    clusters = list(_yield_clusters(
        max_days_ago=options.max_days_ago, now=now, **_runner_kwargs(options)))

    log.info('compiling cluster stats...')
    stats = _clusters_to_stats(clusters, now=now)

    _print_report(stats, now=now)
示例#38
0
def main(args):
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    log.info('getting information about running jobs')
    emr_conn = EMRJobRunner(conf_path=options.conf_path).make_emr_conn()
    job_flows = describe_all_job_flows(emr_conn, states=['RUNNING'])

    min_time = timedelta(hours=options.min_hours)

    job_info = find_long_running_jobs(job_flows, min_time)

    print_report(job_info)
示例#39
0
def main():
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args()

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner_kwargs = options.__dict__.copy()
    del runner_kwargs['quiet']
    del runner_kwargs['verbose']

    runner = EMRJobRunner(**runner_kwargs)
    emr_job_flow_id = runner.make_persistent_job_flow()
    print emr_job_flow_id
示例#40
0
def main(args):
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    now = datetime.utcnow()

    log.info('getting job flow history...')
    job_flows = get_job_flows(options.conf_path, options.max_days_ago, now=now)

    log.info('compiling job flow stats...')
    stats = job_flows_to_stats(job_flows, now=now)

    print_report(stats, now=now)
示例#41
0
def main(args):
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error("takes no arguments")

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    now = datetime.utcnow()

    log.info("getting job flow history...")
    clusters = list(yield_clusters(max_days_ago=options.max_days_ago, now=now, **runner_kwargs(options)))

    log.info("compiling job flow stats...")
    stats = clusters_to_stats(clusters, now=now)

    print_report(stats, now=now)
示例#42
0
def main(cl_args=None):
    option_parser = _make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    _maybe_terminate_clusters(dry_run=options.dry_run,
                              max_hours_idle=options.max_hours_idle,
                              mins_to_end_of_hour=options.mins_to_end_of_hour,
                              unpooled_only=options.unpooled_only,
                              now=datetime.utcnow(),
                              pool_name=options.pool_name,
                              pooled_only=options.pooled_only,
                              max_mins_locked=options.max_mins_locked,
                              quiet=options.quiet,
                              **_runner_kwargs(options))
示例#43
0
def main(args):
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    now = datetime.utcnow()

    log.info('getting job flow history...')
    job_flows = get_job_flows(options.conf_path, options.max_days_ago, now=now)

    log.info('compiling job flow stats...')
    stats = job_flows_to_stats(job_flows, now=now)

    print_report(stats, now=now)
示例#44
0
def main(args=None):
    # parse command-line args
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    now = _boto3_now()

    log.info('getting cluster history...')
    clusters = list(
        _yield_clusters(max_days_ago=options.max_days_ago,
                        now=now,
                        **_runner_kwargs(options)))

    log.info('compiling cluster stats...')
    stats = _clusters_to_stats(clusters, now=now)

    _print_report(stats, now=now)
示例#45
0
def main(args):
    # parser command-line args
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    now = datetime.utcnow()

    log.info('getting cluster history...')
    clusters = list(yield_clusters(
        max_days_ago=options.max_days_ago, now=now, **runner_kwargs(options)))

    log.info('compiling cluster stats...')
    stats = clusters_to_stats(clusters, now=now)

    print_report(stats, now=now)
示例#46
0
def main(args=None):
    # parser command-line args
    option_parser = _make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    now = datetime.utcnow()

    log.info('getting cluster history...')
    clusters = list(_yield_clusters(
        max_days_ago=options.max_days_ago, now=now, **_runner_kwargs(options)))

    log.info('compiling cluster stats...')
    stats = _clusters_to_stats(clusters, now=now)

    _print_report(stats, now=now)
示例#47
0
def main(cl_args=None):
    usage = 'usage: %(prog)s CLUSTER_ID [options] "command string"'
    description = ('Run a command on the master and all worker nodes of an EMR'
                   ' cluster. Store stdout/stderr for results in OUTPUT_DIR.')

    arg_parser = ArgumentParser(usage=usage, description=description)
    arg_parser.add_argument('-o', '--output-dir', dest='output_dir',
                            default=None,
                            help="Specify an output directory (default:"
                            " CLUSTER_ID)")

    arg_parser.add_argument(dest='cluster_id',
                            help='ID of cluster to run command on')
    arg_parser.add_argument(dest='cmd_string',
                            help='command to run, as a single string')

    _add_basic_args(arg_parser)
    _add_runner_args(
        arg_parser,
        {'ec2_key_pair_file', 'ssh_bin'} | _filter_by_role(
            EMRJobRunner.OPT_NAMES, 'connect')
    )

    _alphabetize_actions(arg_parser)

    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('cluster_id', 'cmd_string', 'output_dir',
                       'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    cmd_args = shlex_split(options.cmd_string)

    output_dir = os.path.abspath(options.output_dir or options.cluster_id)

    with EMRJobRunner(
            cluster_id=options.cluster_id, **runner_kwargs) as runner:
        _run_on_all_nodes(runner, output_dir, cmd_args)
示例#48
0
文件: mrboss.py 项目: seatgeek/mrjob
def main():
    usage = 'usage: %prog JOB_FLOW_ID OUTPUT_DIR [options] "command string"'
    description = ('Run a command on the master and all slaves of an EMR job'
                   ' flow. Store stdout and stderr for results in OUTPUT_DIR.')

    option_parser = OptionParser(usage=usage, description=description)

    assignments = {
        option_parser: ('conf_paths', 'quiet', 'verbose', 'ec2_key_pair_file')
    }

    option_parser.add_option('-o',
                             '--output-dir',
                             dest='output_dir',
                             default=None,
                             help="Specify an output directory (default:"
                             " JOB_FLOW_ID)")

    mr_job = MRJob()
    scrape_options_into_new_groups(mr_job.all_option_groups(), assignments)

    options, args = option_parser.parse_args()

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('output_dir', 'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    if len(args) < 2:
        option_parser.print_help()
        sys.exit(1)

    job_flow_id, cmd_string = args[:2]
    cmd_args = shlex_split(cmd_string)

    output_dir = os.path.abspath(options.output_dir or job_flow_id)

    with EMRJobRunner(emr_job_flow_id=job_flow_id, **runner_kwargs) as runner:
        runner._enable_slave_ssh_access()
        run_on_all_nodes(runner, output_dir, cmd_args)
示例#49
0
def main(args, now=None):
    if now is None:
        now = datetime.utcnow()

    option_parser = _make_option_parser()
    options, args = option_parser.parse_args(args)

    if args:
        option_parser.error("takes no arguments")

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    log.info("getting information about running jobs")
    emr_conn = EMRJobRunner(**_runner_kwargs(options)).make_emr_conn()
    cluster_summaries = _yield_all_clusters(emr_conn, cluster_states=["STARTING", "BOOTSTRAPPING", "RUNNING"])

    min_time = timedelta(hours=options.min_hours)

    job_info = _find_long_running_jobs(emr_conn, cluster_summaries, min_time, now=now)

    _print_report(job_info)
示例#50
0
def main(cl_args=None):
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    inspect_and_maybe_terminate_job_flows(
        conf_paths=options.conf_paths,
        dry_run=options.dry_run,
        max_hours_idle=options.max_hours_idle,
        mins_to_end_of_hour=options.mins_to_end_of_hour,
        unpooled_only=options.unpooled_only,
        now=datetime.utcnow(),
        pool_name=options.pool_name,
        pooled_only=options.pooled_only,
        max_mins_locked=options.max_mins_locked,
        quiet=(options.quiet > 1),
    )
示例#51
0
def main(cl_args=None):
    option_parser = make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    if args:
        option_parser.error("takes no arguments")

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    inspect_and_maybe_terminate_job_flows(
        conf_paths=options.conf_paths,
        dry_run=options.dry_run,
        max_hours_idle=options.max_hours_idle,
        mins_to_end_of_hour=options.mins_to_end_of_hour,
        unpooled_only=options.unpooled_only,
        now=datetime.utcnow(),
        pool_name=options.pool_name,
        pooled_only=options.pooled_only,
        max_mins_locked=options.max_mins_locked,
        quiet=(options.quiet > 1),
    )
示例#52
0
def main():
    usage = 'usage: %prog JOB_FLOW_ID OUTPUT_DIR [options] "command string"'
    description = ('Run a command on the master and all slaves of an EMR job'
                   ' flow. Store stdout and stderr for results in OUTPUT_DIR.')

    option_parser = OptionParser(usage=usage, description=description)

    assignments = {
        option_parser: ('conf_paths', 'quiet', 'verbose',
                        'ec2_key_pair_file')
    }

    option_parser.add_option('-o', '--output-dir', dest='output_dir',
                             default=None,
                             help="Specify an output directory (default:"
                             " JOB_FLOW_ID)")

    mr_job = MRJob()
    scrape_options_into_new_groups(mr_job.all_option_groups(), assignments)

    options, args = option_parser.parse_args()

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('output_dir', 'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    if len(args) < 2:
        option_parser.print_help()
        sys.exit(1)

    job_flow_id, cmd_string = args[:2]
    cmd_args = shlex.split(cmd_string)

    output_dir = os.path.abspath(options.output_dir or job_flow_id)

    with EMRJobRunner(emr_job_flow_id=job_flow_id, **runner_kwargs) as runner:
        runner._enable_slave_ssh_access()
        run_on_all_nodes(runner, output_dir, cmd_args)
示例#53
0
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = {
        k: v
        for k, v in options.__dict__.items()
        if k not in ('quiet', 'verbose', 'step_id')
    }

    runner = EMRJobRunner(**runner_kwargs)
    emr_client = runner.make_emr_client()

    # pick step
    step = _get_step(emr_client, options.cluster_id, options.step_id)

    if not step:
        raise SystemExit(1)

    if step['Status']['State'] != 'FAILED':
        log.warning('step %s has state %s, not FAILED' %
                    (step['Id'], step['Status']['State']))

    # interpret logs
    log.info('Diagnosing step %s (%s)' % (step['Id'], step['Name']))

    log_interpretation = dict(step_id=step['Id'])

    step_type = _infer_step_type(step)

    error = runner._pick_error(log_interpretation, step_type)

    # print error
    if error:
        log.error('Probable cause of failure:\n\n%s\n\n' %
                  _format_error(error))
    else:
        log.warning('No error detected')
示例#54
0
def main(cl_args=None):
    option_parser = _make_option_parser()
    options, args = option_parser.parse_args(cl_args)

    if args:
        option_parser.error('takes no arguments')

    MRJob.set_up_logging(quiet=options.quiet,
                         verbose=options.verbose)

    _maybe_terminate_clusters(
        dry_run=options.dry_run,
        max_hours_idle=options.max_hours_idle,
        mins_to_end_of_hour=options.mins_to_end_of_hour,
        unpooled_only=options.unpooled_only,
        now=datetime.utcnow(),
        pool_name=options.pool_name,
        pooled_only=options.pooled_only,
        max_mins_locked=options.max_mins_locked,
        quiet=options.quiet,
        **_runner_kwargs(options)
    )
示例#55
0
def main(cl_args=None):
    usage = 'usage: %prog CLUSTER_ID OUTPUT_DIR [options] "command string"'
    description = ('Run a command on the master and all slaves of an EMR'
                   ' cluster. Store stdout/stderr for results in OUTPUT_DIR.')

    option_parser = OptionParser(usage=usage, description=description)
    option_parser.add_option('-o',
                             '--output-dir',
                             dest='output_dir',
                             default=None,
                             help="Specify an output directory (default:"
                             " CLUSTER_ID)")
    add_basic_opts(option_parser)
    add_emr_connect_opts(option_parser)
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: ('ec2_key_pair_file', 'ssh_bin'),
    })
    alphabetize_options(option_parser)

    options, args = option_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('output_dir', 'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    if len(args) < 2:
        option_parser.print_help()
        sys.exit(1)

    cluster_id, cmd_string = args[:2]
    cmd_args = shlex_split(cmd_string)

    output_dir = os.path.abspath(options.output_dir or cluster_id)

    with EMRJobRunner(cluster_id=cluster_id, **runner_kwargs) as runner:
        runner._enable_slave_ssh_access()
        run_on_all_nodes(runner, output_dir, cmd_args)
示例#56
0
def main(cl_args=None):
    usage = 'usage: %prog CLUSTER_ID [options] "command string"'
    description = ('Run a command on the master and all slaves of an EMR'
                   ' cluster. Store stdout/stderr for results in OUTPUT_DIR.')

    option_parser = OptionParser(usage=usage, description=description)
    option_parser.add_option('-o', '--output-dir', dest='output_dir',
                             default=None,
                             help="Specify an output directory (default:"
                             " CLUSTER_ID)")
    _add_basic_options(option_parser)
    _add_runner_options(
        option_parser,
        _pick_runner_opts('emr', 'connect') | set(
            ['ssh_bin', 'ec2_key_pair_file'])
    )

    _alphabetize_options(option_parser)

    options, args = option_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('output_dir', 'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    if len(args) < 2:
        option_parser.print_help()
        sys.exit(1)

    cluster_id, cmd_string = args[:2]
    cmd_args = shlex_split(cmd_string)

    output_dir = os.path.abspath(options.output_dir or cluster_id)

    with EMRJobRunner(cluster_id=cluster_id, **runner_kwargs) as runner:
        _run_on_all_nodes(runner, output_dir, cmd_args)