Пример #1
0
    def configure_options(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Run ``python -m mrjob.job.MRJob --help`` to see all options.

        Re-define to define custom command-line arguments::

            def configure_options(self):
                super(MRYourJob, self).configure_options

                self.add_passthrough_option(...)
                self.add_file_option(...)
                ...
        """
        self.option_parser.add_option('--help',
                                      dest='help_main',
                                      action='store_true',
                                      default=False,
                                      help='show this message and exit')

        self.option_parser.add_option('--help-emr',
                                      dest='help_emr',
                                      action='store_true',
                                      default=False,
                                      help='show EMR-related options')

        self.option_parser.add_option('--help-hadoop',
                                      dest='help_hadoop',
                                      action='store_true',
                                      default=False,
                                      help='show Hadoop-related options')

        self.option_parser.add_option(
            '--help-local',
            dest='help_local',
            action='store_true',
            default=False,
            help='show local/inline runner-related options')

        self.option_parser.add_option('--help-runner',
                                      dest='help_runner',
                                      action='store_true',
                                      default=False,
                                      help='show runner-related options')

        # protocol stuff
        self.proto_opt_group = OptionGroup(self.option_parser, 'Protocols')
        self.option_parser.add_option_group(self.proto_opt_group)

        _add_protocol_opts(self.proto_opt_group)

        # options for running the entire job
        self.runner_opt_group = OptionGroup(self.option_parser,
                                            'Running the entire job')
        self.option_parser.add_option_group(self.runner_opt_group)

        _add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
        _add_basic_opts(self.runner_opt_group)

        # options for inline/local runners
        self.local_opt_group = OptionGroup(
            self.option_parser,
            'Running locally (these apply when you set -r inline or -r local)')
        self.option_parser.add_option_group(self.local_opt_group)

        _add_local_opts(self.local_opt_group)

        # options common to Hadoop and EMR
        self.hadoop_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop or EMR (these apply when you set -r hadoop or'
            ' -r emr)')
        self.option_parser.add_option_group(self.hadoop_emr_opt_group)

        _add_hadoop_emr_opts(self.hadoop_emr_opt_group)

        # options for running the job on Hadoop
        self.hadoop_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop (these apply when you set -r hadoop)')
        self.option_parser.add_option_group(self.hadoop_opt_group)

        _add_hadoop_opts(self.hadoop_opt_group)

        # options for running the job on EMR
        self.emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on EMR (these apply when you set -r emr)')
        self.option_parser.add_option_group(self.emr_opt_group)

        _add_emr_opts(self.emr_opt_group)
Пример #2
0
    def configure_options(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Run ``python -m mrjob.job.MRJob --help`` to see all options.

        Re-define to define custom command-line arguments::

            def configure_options(self):
                super(MRYourJob, self).configure_options

                self.add_passthrough_option(...)
                self.add_file_option(...)
                ...
        """
        self.option_parser.add_option(
            '--help', dest='help_main', action='store_true', default=False,
            help='show this message and exit')

        self.option_parser.add_option(
            '--help-dataproc', dest='help_dataproc', action='store_true',
            default=False,
            help='show Dataproc-related options')

        self.option_parser.add_option(
            '--help-emr', dest='help_emr', action='store_true', default=False,
            help='show EMR-related options')

        self.option_parser.add_option(
            '--help-hadoop', dest='help_hadoop', action='store_true',
            default=False,
            help='show Hadoop-related options')

        self.option_parser.add_option(
            '--help-local', dest='help_local', action='store_true',
            default=False,
            help='show local/inline runner-related options')

        self.option_parser.add_option(
            '--help-runner', dest='help_runner', action='store_true',
            default=False, help='show runner-related options')

        # protocol stuff
        self.proto_opt_group = OptionGroup(
            self.option_parser, 'Protocols')
        self.option_parser.add_option_group(self.proto_opt_group)

        _add_protocol_opts(self.proto_opt_group)

        # options for running the entire job
        self.runner_opt_group = OptionGroup(
            self.option_parser, 'Running the entire job')
        self.option_parser.add_option_group(self.runner_opt_group)

        _add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
        _add_basic_opts(self.runner_opt_group)

        # options for inline/local runners
        self.local_opt_group = OptionGroup(
            self.option_parser,
            'Running locally (these apply when you set -r inline or -r local)')
        self.option_parser.add_option_group(self.local_opt_group)

        _add_local_opts(self.local_opt_group)

        # options common to Hadoop and EMR
        self.hadoop_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop or EMR (these apply when you set -r hadoop or'
            ' -r emr)')
        self.option_parser.add_option_group(self.hadoop_emr_opt_group)

        _add_hadoop_emr_opts(self.hadoop_emr_opt_group)

        # options for running the job on Hadoop
        self.hadoop_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop (these apply when you set -r hadoop)')
        self.option_parser.add_option_group(self.hadoop_opt_group)

        _add_hadoop_opts(self.hadoop_opt_group)

        # options for running the job on Dataproc or EMR
        self.dataproc_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Dataproc or EMR (these apply when you set -r dataproc'
            ' or -r emr)')
        self.option_parser.add_option_group(self.dataproc_emr_opt_group)

        _add_dataproc_emr_opts(self.dataproc_emr_opt_group)

        # options for running the job on Dataproc
        self.dataproc_opt_group = OptionGroup(
            self.option_parser,
            'Running on Dataproc (these apply when you set -r dataproc)')
        self.option_parser.add_option_group(self.dataproc_opt_group)

        _add_dataproc_opts(self.dataproc_opt_group)

        # options for running the job on EMR
        self.emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on EMR (these apply when you set -r emr)')
        self.option_parser.add_option_group(self.emr_opt_group)

        _add_emr_opts(self.emr_opt_group)