示例#1
0
def _CommonArgs(parser):
    """Register flags common to all tracks."""
    base.ASYNC_FLAG.AddToParser(parser)
    # Allow the user to specify new labels as well as update/remove existing
    labels_util.AddUpdateLabelsFlags(parser)
    # Updates can take hours if a lot of data needs to be moved on HDFS
    flags.AddTimeoutFlag(parser, default='3h')
    parser.add_argument('name', help='The name of the cluster to update.')
    parser.add_argument('--num-workers',
                        type=int,
                        help='The new number of worker nodes in the cluster.')
    parser.add_argument(
        '--num-preemptible-workers',
        type=int,
        help='The new number of preemptible worker nodes in the cluster.')

    parser.add_argument('--graceful-decommission-timeout',
                        type=arg_parsers.Duration(lower_bound='0s',
                                                  upper_bound='1d'),
                        help="""
            The graceful decommission timeout for decommissioning Node Managers
            in the cluster, used when removing nodes. Graceful decommissioning
            allows removing nodes from the cluster without interrupting jobs in
            progress. Timeout specifies how long to wait for jobs in progress to
            finish before forcefully removing nodes (and potentially
            interrupting jobs). Timeout defaults to 0 if not set (for forceful
            decommission), and the maximum allowed timeout is 1 day.
            See $ gcloud topic datetimes for information on duration formats.
            """)
示例#2
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddClusterResourceArg(parser, 'import', dataproc.api_version)
     export_util.AddImportFlags(parser, cls.GetSchemaPath(for_help=True))
     base.ASYNC_FLAG.AddToParser(parser)
     # 30m is backend timeout + 5m for safety buffer.
     flags.AddTimeoutFlag(parser, default='35m')
 def Args(cls, parser):
     flags.AddTimeoutFlag(parser)
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddClusterResourceArg(parser, 'diagnose', dataproc.api_version)
     parser.add_argument(
         '--tarball-access',
         type=arg_utils.ChoiceToEnumName,
         choices=Diagnose._GetValidTarballAccessChoices(dataproc),
         hidden=True,
         help='Target access privileges for diagnose tarball.')
示例#4
0
    def Args(parser):
        parser.add_argument('--file',
                            help="""
        The path to a YAML file containing a Dataproc Cluster resource.

        For more information, see:
        https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.clusters#Cluster.
        """,
                            required=True)
        # TODO(b/80197067): Move defaults to a common location.
        flags.AddTimeoutFlag(parser, default='35m')
        base.ASYNC_FLAG.AddToParser(parser)
    def Args(cls, parser):
        dataproc = dp.Dataproc(cls.ReleaseTrack())
        base.ASYNC_FLAG.AddToParser(parser)
        flags.AddClusterResourceArg(parser, 'create', dataproc.api_version)

        # 30m is backend timeout + 5m for safety buffer.
        flags.AddTimeoutFlag(parser, default='35m')

        parser.add_argument('--spark-engine-version',
                            required=True,
                            help="""\
        The version of the Spark engine to run on this cluster.
        """)

        parser.add_argument('--staging-bucket',
                            help="""\
        The Cloud Storage bucket to use to stage job dependencies, miscellaneous
        config files, and job driver console output when using this cluster.
        """)

        parser.add_argument('--properties',
                            type=arg_parsers.ArgDict(),
                            action=arg_parsers.UpdateAction,
                            default={},
                            metavar='PREFIX:PROPERTY=VALUE',
                            help="""\
        Specifies configuration properties for installed packages, such as
        Spark. Properties are mapped to configuration files by specifying a
        prefix, such as "core:io.serializations".
        """)

        flags.AddGkeClusterResourceArg(parser)
        parser.add_argument('--namespace',
                            help="""\
            The name of the Kubernetes namespace to deploy Dataproc system
            components in. This namespace does not need to exist.
            """)

        gke_clusters.AddPoolsArg(parser)
        parser.add_argument('--setup-workload-identity',
                            action='store_true',
                            help="""\
            Sets up the GKE Workload Identity for your Dataproc on GKE cluster.
            Note that running this requires elevated permissions as it will
            manipulate IAM policies on the Google Service Accounts that will be
            used by your Dataproc on GKE cluster.
            """)
        flags.AddMetastoreServiceResourceArg(parser)
        flags.AddHistoryServerClusterResourceArg(parser)
示例#6
0
 def Args(parser):
     flags.AddTemplateResourceArg(parser, 'run', api_version='v1beta2')
     flags.AddTimeoutFlag(parser, default='24h')
     base.ASYNC_FLAG.AddToParser(parser)
示例#7
0
    def Args(cls, parser):
        dataproc = dp.Dataproc(cls.ReleaseTrack())
        base.ASYNC_FLAG.AddToParser(parser)
        # Allow the user to specify new labels as well as update/remove existing
        labels_util.AddUpdateLabelsFlags(parser)
        # Updates can take hours if a lot of data needs to be moved on HDFS
        flags.AddTimeoutFlag(parser, default='3h')
        flags.AddClusterResourceArg(parser, 'update', dataproc.api_version)
        parser.add_argument(
            '--num-workers',
            type=int,
            help='The new number of worker nodes in the cluster.')
        parser.add_argument(
            '--num-preemptible-workers',
            type=int,
            help='The new number of preemptible worker nodes in the cluster.')

        parser.add_argument('--graceful-decommission-timeout',
                            type=arg_parsers.Duration(lower_bound='0s',
                                                      upper_bound='1d'),
                            help="""
              The graceful decommission timeout for decommissioning Node Managers
              in the cluster, used when removing nodes. Graceful decommissioning
              allows removing nodes from the cluster without interrupting jobs in
              progress. Timeout specifies how long to wait for jobs in progress to
              finish before forcefully removing nodes (and potentially
              interrupting jobs). Timeout defaults to 0 if not set (for forceful
              decommission), and the maximum allowed timeout is 1 day.
              See $ gcloud topic datetimes for information on duration formats.
              """)

        idle_delete_group = parser.add_mutually_exclusive_group()
        idle_delete_group.add_argument('--max-idle',
                                       type=arg_parsers.Duration(),
                                       help="""\
        The duration before cluster is auto-deleted after last job finished,
        such as "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)
        idle_delete_group.add_argument('--no-max-idle',
                                       action='store_true',
                                       help="""\
        Cancels the cluster auto-deletion by cluster idle duration (configured
         by --max-idle flag)
        """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
        The lifespan of the cluster before it is auto-deleted, such as
        "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)
        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
        The time when cluster will be auto-deleted, such as
        "2017-08-29T18:52:51.142Z". See $ gcloud topic datetimes for
        information on time formats.
        """)
        auto_delete_group.add_argument('--no-max-age',
                                       action='store_true',
                                       help="""\
        Cancels the cluster auto-deletion by maximum cluster age (configured by
         --max-age or --expiration-time flags)
        """)

        # Can only specify one of --autoscaling-policy or --disable-autoscaling
        autoscaling_group = parser.add_mutually_exclusive_group()
        flags.AddAutoscalingPolicyResourceArgForCluster(autoscaling_group,
                                                        api_version='v1')
        autoscaling_group.add_argument('--disable-autoscaling',
                                       action='store_true',
                                       help="""\
        Disable autoscaling, if it is enabled. This is an alias for passing the
        empty string to --autoscaling-policy'.
        """)
示例#8
0
def _CommonArgs(parser, beta=False):
    """Register flags common to all tracks."""
    base.ASYNC_FLAG.AddToParser(parser)
    # Allow the user to specify new labels as well as update/remove existing
    labels_util.AddUpdateLabelsFlags(parser)
    # Updates can take hours if a lot of data needs to be moved on HDFS
    flags.AddTimeoutFlag(parser, default='3h')
    parser.add_argument('name', help='The name of the cluster to update.')
    parser.add_argument('--num-workers',
                        type=int,
                        help='The new number of worker nodes in the cluster.')
    parser.add_argument(
        '--num-preemptible-workers',
        type=int,
        help='The new number of preemptible worker nodes in the cluster.')

    parser.add_argument('--graceful-decommission-timeout',
                        type=arg_parsers.Duration(lower_bound='0s',
                                                  upper_bound='1d'),
                        help="""
            The graceful decommission timeout for decommissioning Node Managers
            in the cluster, used when removing nodes. Graceful decommissioning
            allows removing nodes from the cluster without interrupting jobs in
            progress. Timeout specifies how long to wait for jobs in progress to
            finish before forcefully removing nodes (and potentially
            interrupting jobs). Timeout defaults to 0 if not set (for forceful
            decommission), and the maximum allowed timeout is 1 day.
            See $ gcloud topic datetimes for information on duration formats.
            """)

    idle_delete_group = parser.add_mutually_exclusive_group()
    idle_delete_group.add_argument('--max-idle',
                                   type=arg_parsers.Duration(),
                                   hidden=not (beta),
                                   help="""\
      The duration before cluster is auto-deleted after last job finished,
      such as "2h" or "1d".
      See $ gcloud topic datetimes for information on duration formats.
      """)
    idle_delete_group.add_argument('--no-max-idle',
                                   action='store_true',
                                   hidden=not (beta),
                                   help="""\
      Cancels the cluster auto-deletion by cluster idle duration (configured
       by --max-idle flag)
      """)

    auto_delete_group = parser.add_mutually_exclusive_group()
    auto_delete_group.add_argument('--max-age',
                                   type=arg_parsers.Duration(),
                                   hidden=not (beta),
                                   help="""\
      The lifespan of the cluster before it is auto-deleted, such as
      "2h" or "1d".
      See $ gcloud topic datetimes for information on duration formats.
      """)
    auto_delete_group.add_argument('--expiration-time',
                                   type=arg_parsers.Datetime.Parse,
                                   hidden=not (beta),
                                   help="""\
      The time when cluster will be auto-deleted, such as
      "2017-08-29T18:52:51.142Z". See $ gcloud topic datetimes for
      information on time formats.
      """)
    auto_delete_group.add_argument('--no-max-age',
                                   action='store_true',
                                   hidden=not (beta),
                                   help="""\
      Cancels the cluster auto-deletion by maximum cluster age (configured by
       --max-age or --expiration-time flags)
      """)
def ArgsForClusterRef(parser,
                      beta=False,
                      include_deprecated=True,
                      include_ttl_config=False,
                      include_gke_platform_args=False):
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
    include_deprecated: whether deprecated flags should be included
    include_ttl_config: whether to include Scheduled Delete(TTL) args
    include_gke_platform_args: whether to include GKE-based cluster args
  """
    labels_util.AddCreateLabelsFlags(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser, short_flags=include_deprecated)
    flags.AddComponentFlag(parser)

    platform_group = parser.add_argument_group(mutex=True)
    gce_platform_group = platform_group.add_argument_group(help="""\
    Compute Engine options for Dataproc clusters.
    """)

    instances_flags.AddTagsArgs(gce_platform_group)
    gce_platform_group.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-secondary-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--secondary-worker-type',
        hidden=True,
        metavar='TYPE',
        choices=['preemptible', 'non-preemptible', 'unspecified'],
        default='unspecified',
        help='The type of the secondary worker group.')
    num_secondary_workers = worker_group.add_argument_group(mutex=True)
    num_secondary_workers.add_argument(
        '--num-preemptible-workers',
        action=actions.DeprecationAction(
            '--num-preemptible-workers',
            warn=('The `--num-preemptible-workers` flag is deprecated. '
                  'Use the `--num-secondary-workers` flag instead.')),
        type=int,
        hidden=True,
        help='The number of preemptible worker nodes in the cluster.')
    num_secondary_workers.add_argument(
        '--num-secondary-workers',
        type=int,
        help='The number of secondary worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument('--bucket',
                        help="""\
      The Google Cloud Storage bucket to use by default to stage job
      dependencies, miscellaneous config files, and job driver console output
      when using this cluster.
      """)

    netparser = gce_platform_group.add_argument_group(mutex=True)
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    secondary_worker_local_ssds = parser.add_argument_group(mutex=True)
    secondary_worker_local_ssds.add_argument(
        '--num-preemptible-worker-local-ssds',
        type=int,
        hidden=True,
        action=actions.DeprecationAction(
            '--num-preemptible-worker-local-ssds',
            warn=(
                'The `--num-preemptible-worker-local-ssds` flag is deprecated. '
                'Use the `--num-secondary-worker-local-ssds` flag instead.')),
        help="""\
      The number of local SSDs to attach to each secondary worker in
      a cluster.
      """)
    secondary_worker_local_ssds.add_argument(
        '--num-secondary-worker-local-ssds',
        type=int,
        help="""\
      The number of local SSDs to attach to each preemptible worker in
      a cluster.
      """)
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      Number of Masters | Cluster Mode
      --- | ---
      1 | Standard
      3 | High Availability
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        action=arg_parsers.UpdateAction,
                        default={},
                        metavar='PREFIX:PROPERTY=VALUE',
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

Prefix | File | Purpose of file
--- | --- | ---
capacity-scheduler | capacity-scheduler.xml | Hadoop YARN Capacity Scheduler configuration
core | core-site.xml | Hadoop general configuration
distcp | distcp-default.xml | Hadoop Distributed Copy configuration
hadoop-env | hadoop-env.sh | Hadoop specific environment variables
hdfs | hdfs-site.xml | Hadoop HDFS configuration
hive | hive-site.xml | Hive configuration
mapred | mapred-site.xml | Hadoop MapReduce configuration
mapred-env | mapred-env.sh | Hadoop MapReduce specific environment variables
pig | pig.properties | Pig configuration
spark | spark-defaults.conf | Spark configuration
spark-env | spark-env.sh | Spark specific environment variables
yarn | yarn-site.xml | Hadoop YARN configuration
yarn-env | yarn-env.sh | Hadoop YARN specific environment variables

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    gce_platform_group.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    gce_platform_group.add_argument('--scopes',
                                    type=arg_parsers.ArgList(min_length=1),
                                    metavar='SCOPE',
                                    help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

  {minimum_scopes}

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

  {additional_scopes}

If you want to enable all scopes use the 'cloud-platform' scope.

{scopes_help}
""".format(minimum_scopes='\n  '.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n  '.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           scopes_help=compute_helpers.SCOPES_HELP))

    if include_deprecated:
        _AddDiskArgsDeprecated(parser)
    else:
        _AddDiskArgs(parser)

    # --no-address is an exception to the no negative-flag style guildline to be
    # consistent with gcloud compute instances create --no-address
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      If omitted the instances in the cluster will each be assigned an
      ephemeral external IP address.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    secondary_worker_boot_disk_type = parser.add_argument_group(mutex=True)
    secondary_worker_boot_disk_type.add_argument(
        '--preemptible-worker-boot-disk-type',
        help=boot_disk_type_detailed_help,
        hidden=True,
        action=actions.DeprecationAction(
            '--preemptible-worker-boot-disk-type',
            warn=(
                'The `--preemptible-worker-boot-disk-type` flag is deprecated. '
                'Use the `--secondary-worker-boot-disk-type` flag instead.')))
    secondary_worker_boot_disk_type.add_argument(
        '--secondary-worker-boot-disk-type', help=boot_disk_type_detailed_help)

    autoscaling_group = parser.add_argument_group()
    flags.AddAutoscalingPolicyResourceArgForCluster(
        autoscaling_group, api_version=('v1beta2' if beta else 'v1'))

    if include_ttl_config:
        parser.add_argument('--max-idle',
                            type=arg_parsers.Duration(),
                            help="""\
          The duration before cluster is auto-deleted after last job completes,
          such as "2h" or "1d".
          See $ gcloud topic datetimes for information on duration formats.
          """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
          The lifespan of the cluster before it is auto-deleted, such as
          "2h" or "1d".
          See $ gcloud topic datetimes for information on duration formats.
          """)

        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
          The time when cluster will be auto-deleted, such as
          "2017-08-29T18:52:51.142Z." See $ gcloud topic datetimes for
          information on time formats.
          """)

    AddKerberosGroup(parser)

    flags.AddMinCpuPlatformArgs(parser)

    _AddAcceleratorArgs(parser)

    AddReservationAffinityGroup(
        gce_platform_group,
        group_text='Specifies the reservation for the instance.',
        affinity_text='The type of reservation for the instance.')
    if include_gke_platform_args:
        gke_based_cluster_group = platform_group.add_argument_group(
            hidden=True,
            help="""\
          Options for creating a GKE-based Dataproc cluster. Specifying any of these
          will indicate that this cluster is intended to be a GKE-based cluster.
          These options are mutually exclusive with GCE-based options.
          """)
        gke_based_cluster_group.add_argument('--gke-cluster',
                                             hidden=True,
                                             help="""\
            Required for GKE-based clusters. Specify the name of the GKE cluster to
            deploy this GKE-based Dataproc cluster to. This should be the short name
            and not the full path name.
            """)
        gke_based_cluster_group.add_argument('--gke-cluster-namespace',
                                             hidden=True,
                                             help="""\
            Optional. Specify the name of the namespace to deploy Dataproc system
            components into. This namespace does not need to already exist.
            """)
示例#10
0
 def Args(cls, parser):
   parser.add_argument('name', help='The name of the cluster to import.')
   export_util.AddImportFlags(parser, cls.GetSchemaPath(for_help=True))
   base.ASYNC_FLAG.AddToParser(parser)
   # 30m is backend timeout + 5m for safety buffer.
   flags.AddTimeoutFlag(parser, default='35m')
示例#11
0
 def Args(parser):
     flags.AddTemplateFlag(parser, 'run')
     flags.AddTimeoutFlag(parser, default='35m')
     flags.AddParametersFlag(parser)
     base.ASYNC_FLAG.AddToParser(parser)
示例#12
0
 def Args(cls, parser):
     flags.AddTimeoutFlag(parser)
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddClusterResourceArg(parser, 'diagnose', dataproc.api_version)
示例#13
0
 def Args(parser):
     flags.AddTemplateFlag(parser, 'run')
     flags.AddTimeoutFlag(parser, default='24h')
     base.ASYNC_FLAG.AddToParser(parser)
示例#14
0
def _CommonArgs(parser):
    flags.AddTimeoutFlag(parser, default='35m')
    base.ASYNC_FLAG.AddToParser(parser)
    flags.AddParametersFlag(parser)
示例#15
0
 def Args(cls, parser):
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddTimeoutFlag(parser, default='35m')
     base.ASYNC_FLAG.AddToParser(parser)
     flags.AddParametersFlag(parser)
     flags.AddTemplateResourceArg(parser, 'run', dataproc.api_version)
示例#16
0
def ArgsForClusterRef(parser, beta=False, include_deprecated=True):     \
    # pylint: disable=unused-argument
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
    include_deprecated: whether deprecated flags should be included
  """
    labels_util.AddCreateLabelsFlags(parser)
    instances_flags.AddTagsArgs(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser, short_flags=include_deprecated)

    parser.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-preemptible-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--num-preemptible-workers',
        type=int,
        help='The number of preemptible worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument(
        '--bucket',
        help='The Google Cloud Storage bucket to use with the Google Cloud '
        'Storage connector. A bucket is auto created when this parameter is '
        'not specified.')

    netparser = parser.add_mutually_exclusive_group()
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      Number of Masters | Cluster Mode
      --- | ---
      1 | Standard
      3 | High Availability
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        metavar='PREFIX:PROPERTY=VALUE',
                        default={},
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

Prefix | File | Purpose of file
--- | --- | ---
capacity-scheduler | capacity-scheduler.xml | Hadoop YARN Capacity Scheduler configuration
core | core-site.xml | Hadoop general configuration
distcp | distcp-default.xml | Hadoop Distributed Copy configuration
hadoop-env | hadoop-env.sh | Hadoop specific environment variables
hdfs | hdfs-site.xml | Hadoop HDFS configuration
hive | hive-site.xml | Hive configuration
mapred | mapred-site.xml | Hadoop MapReduce configuration
mapred-env | mapred-env.sh | Hadoop MapReduce specific environment variables
pig | pig.properties | Pig configuration
spark | spark-defaults.conf | Spark configuration
spark-env | spark-env.sh | Spark specific environment variables
yarn | yarn-site.xml | Hadoop YARN configuration
yarn-env | yarn-env.sh | Hadoop YARN specific environment variables

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    parser.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    parser.add_argument('--scopes',
                        type=arg_parsers.ArgList(min_length=1),
                        metavar='SCOPE',
                        help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

  {minimum_scopes}

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

  {additional_scopes}

If you want to enable all scopes use the 'cloud-platform' scope.

{scopes_help}
""".format(minimum_scopes='\n  '.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n  '.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           scopes_help=compute_helpers.SCOPES_HELP))

    if include_deprecated:
        _AddDiskArgsDeprecated(parser)
    else:
        _AddDiskArgs(parser)

    # --no-address is an exception to the no negative-flag style guildline to be
    # consistent with gcloud compute instances create --no-address
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      If omitted the instances in the cluster will each be assigned an
      ephemeral external IP address.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
示例#17
0
 def Args(parser):
     base.ASYNC_FLAG.AddToParser(parser)
     flags.AddTimeoutFlag(parser)
     dataproc = dp.Dataproc()
     flags.AddSessionResourceArg(parser, 'terminate', dataproc.api_version)
示例#18
0
 def Args(parser):
     parser.add_argument('name', help='The name of the cluster to delete.')
     base.ASYNC_FLAG.AddToParser(parser)
     flags.AddTimeoutFlag(parser)
示例#19
0
def ArgsForClusterRef(parser, beta=False):
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
  """
    labels_util.AddCreateLabelsFlags(parser)
    instances_flags.AddTagsArgs(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser)

    parser.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-preemptible-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--num-preemptible-workers',
        type=int,
        help='The number of preemptible worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument(
        '--bucket',
        help='The Google Cloud Storage bucket to use with the Google Cloud '
        'Storage connector. A bucket is auto created when this parameter is '
        'not specified.')

    netparser = parser.add_mutually_exclusive_group()
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      [format="csv",options="header"]
      |========
      Number of Masters,Cluster Mode
      1,Standard
      3,High Availability
      |========
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        metavar='PREFIX:PROPERTY=VALUE',
                        default={},
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

[format="csv",options="header"]
|========
Prefix,File,Purpose of file
capacity-scheduler,capacity-scheduler.xml,Hadoop YARN Capacity Scheduler configuration
core,core-site.xml,Hadoop general configuration
distcp,distcp-default.xml,Hadoop Distributed Copy configuration
hadoop-env,hadoop-env.sh,Hadoop specific environment variables
hdfs,hdfs-site.xml,Hadoop HDFS configuration
hive,hive-site.xml,Hive configuration
mapred,mapred-site.xml,Hadoop MapReduce configuration
mapred-env,mapred-env.sh,Hadoop MapReduce specific environment variables
pig,pig.properties,Pig configuration
spark,spark-defaults.conf,Spark configuration
spark-env,spark-env.sh,Spark specific environment variables
yarn,yarn-site.xml,Hadoop YARN configuration
yarn-env,yarn-env.sh,Hadoop YARN specific environment variables
|========

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    parser.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    parser.add_argument('--scopes',
                        type=arg_parsers.ArgList(min_length=1),
                        metavar='SCOPE',
                        help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

[format="csv"]
|========
{minimum_scopes}
|========

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

[format="csv"]
|========
{additional_scopes}
|========

If you want to enable all scopes use the 'cloud-platform' scope.

SCOPE can be either the full URI of the scope or an alias.
Available aliases are:

[format="csv",options="header"]
|========
Alias,URI
{aliases}
|========

{scope_deprecation_msg}
""".format(minimum_scopes='\n'.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n'.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           aliases=compute_helpers.SCOPE_ALIASES_FOR_HELP,
           scope_deprecation_msg=compute_constants.DEPRECATED_SCOPES_MESSAGES))

    master_boot_disk_size = parser.add_mutually_exclusive_group()
    worker_boot_disk_size = parser.add_mutually_exclusive_group()

    # Deprecated, to be removed at a future date.
    master_boot_disk_size.add_argument(
        '--master-boot-disk-size-gb',
        action=actions.DeprecationAction(
            '--master-boot-disk-size-gb',
            warn=(
                'The `--master-boot-disk-size-gb` flag is deprecated. '
                'Use `--master-boot-disk-size` flag with "GB" after value.')),
        type=int,
        hidden=True,
        help='Use `--master-boot-disk-size` flag with "GB" after value.')
    worker_boot_disk_size.add_argument(
        '--worker-boot-disk-size-gb',
        action=actions.DeprecationAction(
            '--worker-boot-disk-size-gb',
            warn=(
                'The `--worker-boot-disk-size-gb` flag is deprecated. '
                'Use `--worker-boot-disk-size` flag with "GB" after value.')),
        type=int,
        hidden=True,
        help='Use `--worker-boot-disk-size` flag with "GB" after value.')

    boot_disk_size_detailed_help = """\
      The size of the boot disk. The value must be a
      whole number followed by a size unit of ``KB'' for kilobyte, ``MB''
      for megabyte, ``GB'' for gigabyte, or ``TB'' for terabyte. For example,
      ``10GB'' will produce a 10 gigabyte disk. The minimum size a boot disk
      can have is 10 GB. Disk size must be a multiple of 1 GB.
      """
    master_boot_disk_size.add_argument(
        '--master-boot-disk-size',
        type=arg_parsers.BinarySize(lower_bound='10GB'),
        help=boot_disk_size_detailed_help)
    worker_boot_disk_size.add_argument(
        '--worker-boot-disk-size',
        type=arg_parsers.BinarySize(lower_bound='10GB'),
        help=boot_disk_size_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-size',
                        type=arg_parsers.BinarySize(lower_bound='10GB'),
                        help=boot_disk_size_detailed_help)

    # Args that are visible only in Beta track
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """,
                        hidden=not beta)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
示例#20
0
 def Args(cls, parser):
     base.ASYNC_FLAG.AddToParser(parser)
     flags.AddTimeoutFlag(parser)
     dataproc = dp.Dataproc(cls.ReleaseTrack())
     flags.AddClusterResourceArg(parser, 'stop', dataproc.api_version)
示例#21
0
 def Args(parser):
   flags.AddTimeoutFlag(parser)
   parser.add_argument(
       'name',
       help='The name of the cluster to diagnose.')
示例#22
0
 def Args(parser):
     parser.add_argument('name', help='The name of the cluster to import.')
     flags.AddClusterSourceFlag(parser)
     base.ASYNC_FLAG.AddToParser(parser)
     # 30m is backend timeout + 5m for safety buffer.
     flags.AddTimeoutFlag(parser, default='35m')