Python _add_basic_opts примеры, mrjob.options._add_basic_opts Python примеры использования

Пример #1

0

Показать файл

Файл: create_cluster.py Проект: Pandafisher/mrjob

def _make_option_parser():
    usage = '%prog [options]'
    description = (
        'Create a persistent EMR cluster to run jobs in, and print its ID to'
        ' stdout. WARNING: Do not run'
        ' this without mrjob terminate-idle-clusters in your'
        ' crontab; clusters left idle can quickly become expensive!')
    option_parser = OptionParser(usage=usage, description=description)

    _add_basic_opts(option_parser)
    # these aren't nicely broken down, just scrape specific options
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: (
            'bootstrap_mrjob',
            'label',
            'owner',
        ),
    })

    _add_emr_connect_opts(option_parser)
    _add_emr_launch_opts(option_parser)
    _add_dataproc_emr_opts(option_parser)

    _alphabetize_options(option_parser)
    return option_parser

Пример #2

0

Показать файл

def _make_option_parser():
    usage = '%prog [options] <time-untouched> <URIs>'
    description = (
        'Delete all files in a given URI that are older than a specified'
        ' time.\n\nThe time parameter defines the threshold for removing'
        ' files. If the file has not been accessed for *time*, the file is'
        ' removed. The time argument is a number with an optional'
        ' single-character suffix specifying the units: m for minutes, h for'
        ' hours, d for days.  If no suffix is specified, time is in hours.')

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '-t',
        '--test',
        dest='test',
        default=False,
        action='store_true',
        help="Don't actually delete any files; just log that we would")

    _add_basic_opts(option_parser)
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: ('aws_region', 's3_endpoint'),
    })

    _alphabetize_options(option_parser)

    return option_parser

Пример #3

0

Показать файл

Файл: terminate_idle_clusters.py Проект: JayZ12138/EC601-Food-and-Stuff

def _make_option_parser():
    usage = '%prog [options]'
    description = ('Terminate idle EMR clusters that meet the criteria'
                   ' passed in on the command line (or, by default,'
                   ' clusters that have been idle for one hour).')

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--max-hours-idle',
        dest='max_hours_idle',
        default=None,
        type='float',
        help=('Max number of hours a cluster can go without bootstrapping,'
              ' running a step, or having a new step created. This will fire'
              ' even if there are pending steps which EMR has failed to'
              ' start. Make sure you set this higher than the amount of time'
              ' your jobs can take to start instances and bootstrap.'))
    option_parser.add_option(
        '--max-mins-locked',
        dest='max_mins_locked',
        default=_DEFAULT_MAX_MINUTES_LOCKED,
        type='float',
        help='Max number of minutes a cluster can be locked while idle.')
    option_parser.add_option(
        '--mins-to-end-of-hour',
        dest='mins_to_end_of_hour',
        default=None,
        type='float',
        help=('Terminate clusters that are within this many minutes of'
              ' the end of a full hour since the job started running'
              ' AND have no pending steps.'))
    option_parser.add_option('--unpooled-only',
                             dest='unpooled_only',
                             action='store_true',
                             default=False,
                             help='Only terminate un-pooled clusters')
    option_parser.add_option('--pooled-only',
                             dest='pooled_only',
                             action='store_true',
                             default=False,
                             help='Only terminate pooled clusters')
    option_parser.add_option(
        '--pool-name',
        dest='pool_name',
        default=None,
        help='Only terminate clusters in the given named pool.')
    option_parser.add_option(
        '--dry-run',
        dest='dry_run',
        default=False,
        action='store_true',
        help="Don't actually kill idle jobs; just log that we would")

    _add_basic_opts(option_parser)
    _add_dataproc_emr_connect_opts(option_parser)
    _add_emr_connect_opts(option_parser)
    _alphabetize_options(option_parser)

    return option_parser

Пример #4

0

Показать файл

Файл: s3_tmpwatch.py Проект: Dean838/mrjob

def _make_option_parser():
    usage = '%prog [options] <time-untouched> <URIs>'
    description = (
        'Delete all files in a given URI that are older than a specified'
        ' time.\n\nThe time parameter defines the threshold for removing'
        ' files. If the file has not been accessed for *time*, the file is'
        ' removed. The time argument is a number with an optional'
        ' single-character suffix specifying the units: m for minutes, h for'
        ' hours, d for days.  If no suffix is specified, time is in hours.')

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '-t', '--test', dest='test', default=False,
        action='store_true',
        help="Don't actually delete any files; just log that we would")

    _add_basic_opts(option_parser)
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: ('aws_region', 's3_endpoint'),
    })

    _alphabetize_options(option_parser)

    return option_parser

Пример #5

0

Показать файл

Файл: create_cluster.py Проект: Jeremyfanfan/mrjob

def _make_option_parser():
    usage = '%prog [options]'
    description = (
        'Create a persistent EMR cluster to run jobs in, and print its ID to'
        ' stdout. WARNING: Do not run'
        ' this without mrjob terminate-idle-clusters in your'
        ' crontab; clusters left idle can quickly become expensive!')
    option_parser = OptionParser(usage=usage, description=description)

    _add_basic_opts(option_parser)
    # these aren't nicely broken down, just scrape specific options
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: (
            'bootstrap_mrjob',
            'label',
            'owner',
        ),
    })

    _add_emr_connect_opts(option_parser)
    _add_emr_launch_opts(option_parser)
    _add_dataproc_emr_opts(option_parser)

    _alphabetize_options(option_parser)
    return option_parser

Пример #6

0

Показать файл

Файл: terminate_idle_clusters.py Проект: anukat2015/mrjob

def _make_option_parser():
    usage = '%prog [options]'
    description = ('Terminate idle EMR clusters that meet the criteria'
                   ' passed in on the command line (or, by default,'
                   ' clusters that have been idle for one hour).')

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--max-hours-idle', dest='max_hours_idle',
        default=None, type='float',
        help=('Max number of hours a cluster can go without bootstrapping,'
              ' running a step, or having a new step created. This will fire'
              ' even if there are pending steps which EMR has failed to'
              ' start. Make sure you set this higher than the amount of time'
              ' your jobs can take to start instances and bootstrap.'))
    option_parser.add_option(
        '--max-mins-locked', dest='max_mins_locked',
        default=DEFAULT_MAX_MINUTES_LOCKED, type='float',
        help='Max number of minutes a cluster can be locked while idle.')
    option_parser.add_option(
        '--mins-to-end-of-hour', dest='mins_to_end_of_hour',
        default=None, type='float',
        help=('Terminate clusters that are within this many minutes of'
              ' the end of a full hour since the job started running'
              ' AND have no pending steps.'))
    option_parser.add_option(
        '--unpooled-only', dest='unpooled_only', action='store_true',
        default=False,
        help='Only terminate un-pooled clusters')
    option_parser.add_option(
        '--pooled-only', dest='pooled_only', action='store_true',
        default=False,
        help='Only terminate pooled clusters')
    option_parser.add_option(
        '--pool-name', dest='pool_name', default=None,
        help='Only terminate clusters in the given named pool.')
    option_parser.add_option(
        '--dry-run', dest='dry_run', default=False,
        action='store_true',
        help="Don't actually kill idle jobs; just log that we would")

    option_parser.add_option(
        '-t', '--test', dest='test', default=False,
        action='store_true',
        help="Don't actually delete any files; just log that we would")

    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)
    _alphabetize_options(option_parser)

    return option_parser

Пример #7

0

Показать файл

Файл: terminate_cluster.py Проект: Dean838/mrjob

def _make_option_parser():
    usage = '%prog [options] cluster-id'
    description = 'Terminate an existing EMR cluster.'

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '-t', '--test', dest='test', default=False,
        action='store_true',
        help="Don't actually delete any files; just log that we would")

    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)
    _alphabetize_options(option_parser)

    return option_parser

Пример #8

0

Показать файл

Файл: terminate_cluster.py Проект: Milkigit/mrjob

def _make_option_parser():
    usage = '%prog [options] cluster-id'
    description = 'Terminate an existing EMR cluster.'

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '-t', '--test', dest='test', default=False,
        action='store_true',
        help="Don't actually delete any files; just log that we would")

    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)
    _alphabetize_options(option_parser)

    return option_parser

Пример #9

0

Показать файл

Файл: audit_usage.py Проект: anirudhreddy92/mrjob

def _make_option_parser():
    usage = '%prog [options]'
    description = 'Print a giant report on EMR usage.'

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--max-days-ago', dest='max_days_ago', type='float', default=None,
        help=('Max number of days ago to look at jobs. By default, we go back'
              ' as far as EMR supports (currently about 2 months)'))

    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)

    _alphabetize_options(option_parser)

    return option_parser

Пример #10

0

Показать файл

Файл: audit_usage.py Проект: Milkigit/mrjob

def _make_option_parser():
    usage = '%prog [options]'
    description = 'Print a giant report on EMR usage.'

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--max-days-ago', dest='max_days_ago', type='float', default=None,
        help=('Max number of days ago to look at jobs. By default, we go back'
              ' as far as EMR supports (currently about 2 months)'))

    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)

    _alphabetize_options(option_parser)

    return option_parser

Пример #11

0

Показать файл

Файл: report_long_jobs.py Проект: anirudhreddy92/mrjob

def _make_option_parser():
    usage = '%prog [options]'
    description = ('Report jobs running for more than a certain number of'
                   ' hours (by default, %.1f). This can help catch buggy jobs'
                   ' and Hadoop/EMR operational issues.' % DEFAULT_MIN_HOURS)

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--min-hours', dest='min_hours', type='float',
        default=DEFAULT_MIN_HOURS,
        help=('Minimum number of hours a job can run before we report it.'
              ' Default: %default'))

    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)

    _alphabetize_options(option_parser)

    return option_parser

Пример #12

0

Показать файл

def _make_option_parser():
    usage = '%prog [options]'
    description = ('Report jobs running for more than a certain number of'
                   ' hours (by default, %.1f). This can help catch buggy jobs'
                   ' and Hadoop/EMR operational issues.' % DEFAULT_MIN_HOURS)

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        '--min-hours',
        dest='min_hours',
        type='float',
        default=DEFAULT_MIN_HOURS,
        help=('Minimum number of hours a job can run before we report it.'
              ' Default: %default'))

    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)

    _alphabetize_options(option_parser)

    return option_parser

Пример #13

0

Показать файл

def main(cl_args=None):
    usage = 'usage: %prog CLUSTER_ID [options] "command string"'
    description = ('Run a command on the master and all slaves of an EMR'
                   ' cluster. Store stdout/stderr for results in OUTPUT_DIR.')

    option_parser = OptionParser(usage=usage, description=description)
    option_parser.add_option('-o',
                             '--output-dir',
                             dest='output_dir',
                             default=None,
                             help="Specify an output directory (default:"
                             " CLUSTER_ID)")
    _add_basic_opts(option_parser)
    _add_dataproc_emr_connect_opts(option_parser)
    _add_emr_connect_opts(option_parser)
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: ('ec2_key_pair_file', 'ssh_bin'),
    })
    _alphabetize_options(option_parser)

    options, args = option_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('output_dir', 'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    if len(args) < 2:
        option_parser.print_help()
        sys.exit(1)

    cluster_id, cmd_string = args[:2]
    cmd_args = shlex_split(cmd_string)

    output_dir = os.path.abspath(options.output_dir or cluster_id)

    with EMRJobRunner(cluster_id=cluster_id, **runner_kwargs) as runner:
        _run_on_all_nodes(runner, output_dir, cmd_args)

Пример #14

0

Показать файл

Файл: mrboss.py Проект: irskep/mrjob

def main(cl_args=None):
    usage = 'usage: %prog CLUSTER_ID OUTPUT_DIR [options] "command string"'
    description = ('Run a command on the master and all slaves of an EMR'
                   ' cluster. Store stdout/stderr for results in OUTPUT_DIR.')

    option_parser = OptionParser(usage=usage, description=description)
    option_parser.add_option('-o', '--output-dir', dest='output_dir',
                             default=None,
                             help="Specify an output directory (default:"
                             " CLUSTER_ID)")
    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)
    scrape_options_into_new_groups(MRJob().all_option_groups(), {
        option_parser: ('ec2_key_pair_file', 'ssh_bin'),
    })
    _alphabetize_options(option_parser)

    options, args = option_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('output_dir', 'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    if len(args) < 2:
        option_parser.print_help()
        sys.exit(1)

    cluster_id, cmd_string = args[:2]
    cmd_args = shlex_split(cmd_string)

    output_dir = os.path.abspath(options.output_dir or cluster_id)

    with EMRJobRunner(cluster_id=cluster_id, **runner_kwargs) as runner:
        runner._enable_slave_ssh_access()
        _run_on_all_nodes(runner, output_dir, cmd_args)

Пример #15

0

Показать файл

Файл: report_long_jobs.py Проект: irskep/mrjob

def _make_option_parser():
    usage = "%prog [options]"
    description = (
        "Report jobs running for more than a certain number of"
        " hours (by default, %.1f). This can help catch buggy jobs"
        " and Hadoop/EMR operational issues." % DEFAULT_MIN_HOURS
    )

    option_parser = OptionParser(usage=usage, description=description)

    option_parser.add_option(
        "--min-hours",
        dest="min_hours",
        type="float",
        default=DEFAULT_MIN_HOURS,
        help=("Minimum number of hours a job can run before we report it." " Default: %default"),
    )

    _add_basic_opts(option_parser)
    _add_emr_connect_opts(option_parser)

    _alphabetize_options(option_parser)

    return option_parser

Пример #16

0

Показать файл

    def configure_options(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Run ``python -m mrjob.job.MRJob --help`` to see all options.

        Re-define to define custom command-line arguments::

            def configure_options(self):
                super(MRYourJob, self).configure_options

                self.add_passthrough_option(...)
                self.add_file_option(...)
                ...
        """
        self.option_parser.add_option('--help',
                                      dest='help_main',
                                      action='store_true',
                                      default=False,
                                      help='show this message and exit')

        self.option_parser.add_option('--help-emr',
                                      dest='help_emr',
                                      action='store_true',
                                      default=False,
                                      help='show EMR-related options')

        self.option_parser.add_option('--help-hadoop',
                                      dest='help_hadoop',
                                      action='store_true',
                                      default=False,
                                      help='show Hadoop-related options')

        self.option_parser.add_option(
            '--help-local',
            dest='help_local',
            action='store_true',
            default=False,
            help='show local/inline runner-related options')

        self.option_parser.add_option('--help-runner',
                                      dest='help_runner',
                                      action='store_true',
                                      default=False,
                                      help='show runner-related options')

        # protocol stuff
        self.proto_opt_group = OptionGroup(self.option_parser, 'Protocols')
        self.option_parser.add_option_group(self.proto_opt_group)

        _add_protocol_opts(self.proto_opt_group)

        # options for running the entire job
        self.runner_opt_group = OptionGroup(self.option_parser,
                                            'Running the entire job')
        self.option_parser.add_option_group(self.runner_opt_group)

        _add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
        _add_basic_opts(self.runner_opt_group)

        # options for inline/local runners
        self.local_opt_group = OptionGroup(
            self.option_parser,
            'Running locally (these apply when you set -r inline or -r local)')
        self.option_parser.add_option_group(self.local_opt_group)

        _add_local_opts(self.local_opt_group)

        # options common to Hadoop and EMR
        self.hadoop_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop or EMR (these apply when you set -r hadoop or'
            ' -r emr)')
        self.option_parser.add_option_group(self.hadoop_emr_opt_group)

        _add_hadoop_emr_opts(self.hadoop_emr_opt_group)

        # options for running the job on Hadoop
        self.hadoop_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop (these apply when you set -r hadoop)')
        self.option_parser.add_option_group(self.hadoop_opt_group)

        _add_hadoop_opts(self.hadoop_opt_group)

        # options for running the job on EMR
        self.emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on EMR (these apply when you set -r emr)')
        self.option_parser.add_option_group(self.emr_opt_group)

        _add_emr_opts(self.emr_opt_group)

Пример #17

0

Показать файл

Файл: launch.py Проект: kaiyik/mrjob

    def configure_options(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Run ``python -m mrjob.job.MRJob --help`` to see all options.

        Re-define to define custom command-line arguments::

            def configure_options(self):
                super(MRYourJob, self).configure_options

                self.add_passthrough_option(...)
                self.add_file_option(...)
                ...
        """
        self.option_parser.add_option(
            '--help', dest='help_main', action='store_true', default=False,
            help='show this message and exit')

        self.option_parser.add_option(
            '--help-dataproc', dest='help_dataproc', action='store_true',
            default=False,
            help='show Dataproc-related options')

        self.option_parser.add_option(
            '--help-emr', dest='help_emr', action='store_true', default=False,
            help='show EMR-related options')

        self.option_parser.add_option(
            '--help-hadoop', dest='help_hadoop', action='store_true',
            default=False,
            help='show Hadoop-related options')

        self.option_parser.add_option(
            '--help-local', dest='help_local', action='store_true',
            default=False,
            help='show local/inline runner-related options')

        self.option_parser.add_option(
            '--help-runner', dest='help_runner', action='store_true',
            default=False, help='show runner-related options')

        # protocol stuff
        self.proto_opt_group = OptionGroup(
            self.option_parser, 'Protocols')
        self.option_parser.add_option_group(self.proto_opt_group)

        _add_protocol_opts(self.proto_opt_group)

        # options for running the entire job
        self.runner_opt_group = OptionGroup(
            self.option_parser, 'Running the entire job')
        self.option_parser.add_option_group(self.runner_opt_group)

        _add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
        _add_basic_opts(self.runner_opt_group)

        # options for inline/local runners
        self.local_opt_group = OptionGroup(
            self.option_parser,
            'Running locally (these apply when you set -r inline or -r local)')
        self.option_parser.add_option_group(self.local_opt_group)

        _add_local_opts(self.local_opt_group)

        # options common to Hadoop and EMR
        self.hadoop_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop or EMR (these apply when you set -r hadoop or'
            ' -r emr)')
        self.option_parser.add_option_group(self.hadoop_emr_opt_group)

        _add_hadoop_emr_opts(self.hadoop_emr_opt_group)

        # options for running the job on Hadoop
        self.hadoop_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop (these apply when you set -r hadoop)')
        self.option_parser.add_option_group(self.hadoop_opt_group)

        _add_hadoop_opts(self.hadoop_opt_group)

        # options for running the job on Dataproc or EMR
        self.dataproc_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Dataproc or EMR (these apply when you set -r dataproc'
            ' or -r emr)')
        self.option_parser.add_option_group(self.dataproc_emr_opt_group)

        _add_dataproc_emr_opts(self.dataproc_emr_opt_group)

        # options for running the job on Dataproc
        self.dataproc_opt_group = OptionGroup(
            self.option_parser,
            'Running on Dataproc (these apply when you set -r dataproc)')
        self.option_parser.add_option_group(self.dataproc_opt_group)

        _add_dataproc_opts(self.dataproc_opt_group)

        # options for running the job on EMR
        self.emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on EMR (these apply when you set -r emr)')
        self.option_parser.add_option_group(self.emr_opt_group)

        _add_emr_opts(self.emr_opt_group)

Python _add_basic_opts примеры использования