示例#1
0
  def Args(parser):
    _CommonArgs(parser, beta=True)
    flags.AddMinCpuPlatformArgs(parser, base.ReleaseTrack.BETA)

    parser.add_argument(
        '--max-idle',
        type=arg_parsers.Duration(),
        help="""\
        The duration before cluster is auto-deleted after last job completes,
        such as "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)

    auto_delete_group = parser.add_mutually_exclusive_group()
    auto_delete_group.add_argument(
        '--max-age',
        type=arg_parsers.Duration(),
        help="""\
        The lifespan of the cluster before it is auto-deleted, such as
        "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)

    auto_delete_group.add_argument(
        '--expiration-time',
        type=arg_parsers.Datetime.Parse,
        help="""\
        The time when cluster will be auto-deleted, such as
        "2017-08-29T18:52:51.142Z". See $ gcloud topic datetimes for
        information on time formats.
        """)

    for instance_type in ('master', 'worker'):
      help_msg = """\
      Attaches accelerators (e.g. GPUs) to the {instance_type}
      instance(s).
      """.format(instance_type=instance_type)
      if instance_type == 'worker':
        help_msg += """
      Note:
      No accelerators will be attached to preemptible workers, because
      preemptible VMs do not support accelerators.
      """
      help_msg += """
      *type*::: The specific type (e.g. nvidia-tesla-k80 for nVidia Tesla
      K80) of accelerator to attach to the instances. Use 'gcloud compute
      accelerator-types list' to learn about all available accelerator
      types.

      *count*::: The number of pieces of the accelerator to attach to each
      of the instances. The default value is 1.
      """
      parser.add_argument(
          '--{0}-accelerator'.format(instance_type),
          type=arg_parsers.ArgDict(spec={
              'type': str,
              'count': int,
          }),
          metavar='type=TYPE,[count=COUNT]',
          help=help_msg)
示例#2
0
def BetaArgsForClusterRef(parser):
  """Register beta-only flags for creating a Dataproc cluster."""
  flags.AddMinCpuPlatformArgs(parser, base.ReleaseTrack.BETA)

  autoscaling_group = parser.add_argument_group()
  flags.AddAutoscalingPolicyResourceArgForCluster(
      autoscaling_group, api_version='v1beta2')

  parser.add_argument(
      '--enable-component-gateway',
      action='store_true',
      help="""\
        Enable access to the web UIs of selected components on the cluster
        through the component gateway.
        """)

  for instance_type in ('main', 'worker'):
    help_msg = """\
      Attaches accelerators (e.g. GPUs) to the {instance_type}
      instance(s).
      """.format(instance_type=instance_type)
    if instance_type == 'worker':
      help_msg += """
      Note:
      No accelerators will be attached to preemptible workers, because
      preemptible VMs do not support accelerators.
      """
    help_msg += """
      *type*::: The specific type (e.g. nvidia-tesla-k80 for nVidia Tesla
      K80) of accelerator to attach to the instances. Use 'gcloud compute
      accelerator-types list' to learn about all available accelerator
      types.

      *count*::: The number of pieces of the accelerator to attach to each
      of the instances. The default value is 1.
      """
    parser.add_argument(
        '--{0}-accelerator'.format(instance_type),
        type=arg_parsers.ArgDict(spec={
            'type': str,
            'count': int,
        }),
        metavar='type=TYPE,[count=COUNT]',
        help=help_msg)

  AddReservationAffinityGroup(parser)
def ArgsForClusterRef(parser,
                      beta=False,
                      include_deprecated=True,
                      include_ttl_config=False,
                      include_gke_platform_args=False):
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
    include_deprecated: whether deprecated flags should be included
    include_ttl_config: whether to include Scheduled Delete(TTL) args
    include_gke_platform_args: whether to include GKE-based cluster args
  """
    labels_util.AddCreateLabelsFlags(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser, short_flags=include_deprecated)
    flags.AddComponentFlag(parser)

    platform_group = parser.add_argument_group(mutex=True)
    gce_platform_group = platform_group.add_argument_group(help="""\
    Compute Engine options for Dataproc clusters.
    """)

    instances_flags.AddTagsArgs(gce_platform_group)
    gce_platform_group.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-secondary-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--secondary-worker-type',
        hidden=True,
        metavar='TYPE',
        choices=['preemptible', 'non-preemptible', 'unspecified'],
        default='unspecified',
        help='The type of the secondary worker group.')
    num_secondary_workers = worker_group.add_argument_group(mutex=True)
    num_secondary_workers.add_argument(
        '--num-preemptible-workers',
        action=actions.DeprecationAction(
            '--num-preemptible-workers',
            warn=('The `--num-preemptible-workers` flag is deprecated. '
                  'Use the `--num-secondary-workers` flag instead.')),
        type=int,
        hidden=True,
        help='The number of preemptible worker nodes in the cluster.')
    num_secondary_workers.add_argument(
        '--num-secondary-workers',
        type=int,
        help='The number of secondary worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument('--bucket',
                        help="""\
      The Google Cloud Storage bucket to use by default to stage job
      dependencies, miscellaneous config files, and job driver console output
      when using this cluster.
      """)

    netparser = gce_platform_group.add_argument_group(mutex=True)
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    secondary_worker_local_ssds = parser.add_argument_group(mutex=True)
    secondary_worker_local_ssds.add_argument(
        '--num-preemptible-worker-local-ssds',
        type=int,
        hidden=True,
        action=actions.DeprecationAction(
            '--num-preemptible-worker-local-ssds',
            warn=(
                'The `--num-preemptible-worker-local-ssds` flag is deprecated. '
                'Use the `--num-secondary-worker-local-ssds` flag instead.')),
        help="""\
      The number of local SSDs to attach to each secondary worker in
      a cluster.
      """)
    secondary_worker_local_ssds.add_argument(
        '--num-secondary-worker-local-ssds',
        type=int,
        help="""\
      The number of local SSDs to attach to each preemptible worker in
      a cluster.
      """)
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      Number of Masters | Cluster Mode
      --- | ---
      1 | Standard
      3 | High Availability
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        action=arg_parsers.UpdateAction,
                        default={},
                        metavar='PREFIX:PROPERTY=VALUE',
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

Prefix | File | Purpose of file
--- | --- | ---
capacity-scheduler | capacity-scheduler.xml | Hadoop YARN Capacity Scheduler configuration
core | core-site.xml | Hadoop general configuration
distcp | distcp-default.xml | Hadoop Distributed Copy configuration
hadoop-env | hadoop-env.sh | Hadoop specific environment variables
hdfs | hdfs-site.xml | Hadoop HDFS configuration
hive | hive-site.xml | Hive configuration
mapred | mapred-site.xml | Hadoop MapReduce configuration
mapred-env | mapred-env.sh | Hadoop MapReduce specific environment variables
pig | pig.properties | Pig configuration
spark | spark-defaults.conf | Spark configuration
spark-env | spark-env.sh | Spark specific environment variables
yarn | yarn-site.xml | Hadoop YARN configuration
yarn-env | yarn-env.sh | Hadoop YARN specific environment variables

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    gce_platform_group.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    gce_platform_group.add_argument('--scopes',
                                    type=arg_parsers.ArgList(min_length=1),
                                    metavar='SCOPE',
                                    help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

  {minimum_scopes}

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

  {additional_scopes}

If you want to enable all scopes use the 'cloud-platform' scope.

{scopes_help}
""".format(minimum_scopes='\n  '.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n  '.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           scopes_help=compute_helpers.SCOPES_HELP))

    if include_deprecated:
        _AddDiskArgsDeprecated(parser)
    else:
        _AddDiskArgs(parser)

    # --no-address is an exception to the no negative-flag style guildline to be
    # consistent with gcloud compute instances create --no-address
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      If omitted the instances in the cluster will each be assigned an
      ephemeral external IP address.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    secondary_worker_boot_disk_type = parser.add_argument_group(mutex=True)
    secondary_worker_boot_disk_type.add_argument(
        '--preemptible-worker-boot-disk-type',
        help=boot_disk_type_detailed_help,
        hidden=True,
        action=actions.DeprecationAction(
            '--preemptible-worker-boot-disk-type',
            warn=(
                'The `--preemptible-worker-boot-disk-type` flag is deprecated. '
                'Use the `--secondary-worker-boot-disk-type` flag instead.')))
    secondary_worker_boot_disk_type.add_argument(
        '--secondary-worker-boot-disk-type', help=boot_disk_type_detailed_help)

    autoscaling_group = parser.add_argument_group()
    flags.AddAutoscalingPolicyResourceArgForCluster(
        autoscaling_group, api_version=('v1beta2' if beta else 'v1'))

    if include_ttl_config:
        parser.add_argument('--max-idle',
                            type=arg_parsers.Duration(),
                            help="""\
          The duration before cluster is auto-deleted after last job completes,
          such as "2h" or "1d".
          See $ gcloud topic datetimes for information on duration formats.
          """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
          The lifespan of the cluster before it is auto-deleted, such as
          "2h" or "1d".
          See $ gcloud topic datetimes for information on duration formats.
          """)

        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
          The time when cluster will be auto-deleted, such as
          "2017-08-29T18:52:51.142Z." See $ gcloud topic datetimes for
          information on time formats.
          """)

    AddKerberosGroup(parser)

    flags.AddMinCpuPlatformArgs(parser)

    _AddAcceleratorArgs(parser)

    AddReservationAffinityGroup(
        gce_platform_group,
        group_text='Specifies the reservation for the instance.',
        affinity_text='The type of reservation for the instance.')
    if include_gke_platform_args:
        gke_based_cluster_group = platform_group.add_argument_group(
            hidden=True,
            help="""\
          Options for creating a GKE-based Dataproc cluster. Specifying any of these
          will indicate that this cluster is intended to be a GKE-based cluster.
          These options are mutually exclusive with GCE-based options.
          """)
        gke_based_cluster_group.add_argument('--gke-cluster',
                                             hidden=True,
                                             help="""\
            Required for GKE-based clusters. Specify the name of the GKE cluster to
            deploy this GKE-based Dataproc cluster to. This should be the short name
            and not the full path name.
            """)
        gke_based_cluster_group.add_argument('--gke-cluster-namespace',
                                             hidden=True,
                                             help="""\
            Optional. Specify the name of the namespace to deploy Dataproc system
            components into. This namespace does not need to already exist.
            """)
    def Args(parser):
        flags.AddTemplateResourceArg(parser, 'set managed cluster')
        parser.add_argument('--cluster-name',
                            help="""\
        The name of the managed dataproc cluster.
        If unspecified, the workflow template ID will be used.""")
        clusters.ArgsForClusterRef(parser, beta=True)
        flags.AddMinCpuPlatformArgs(parser, base.ReleaseTrack.BETA)

        # TODO(b/70164645): Consolidate these arguments with the other beta args
        # All arguments for these arguments are duplicated from the cluster creation
        # beta track. There should be a ArgsForClusterRefBeta method in clusters.py
        # that is invoked here so that we don't have to duplicate the arguments.
        parser.add_argument('--max-idle',
                            type=arg_parsers.Duration(),
                            help="""\
        The duration before cluster is auto-deleted after last job completes,
        such as "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
        The lifespan of the cluster before it is auto-deleted, such as
        "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)

        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
        The time when cluster will be auto-deleted, such as
        "2017-08-29T18:52:51.142Z." See $ gcloud topic datetimes for
        information on time formats.
        """)

        for instance_type in ('master', 'worker'):
            help_msg = """\
      Attaches accelerators (e.g. GPUs) to the {instance_type}
      instance(s).
      """.format(instance_type=instance_type)
            if instance_type == 'worker':
                help_msg += """
      Note:
      No accelerators will be attached to preemptible workers, because
      preemptible VMs do not support accelerators.
      """
            help_msg += """
      *type*::: The specific type (e.g. nvidia-tesla-k80 for nVidia Tesla
      K80) of accelerator to attach to the instances. Use 'gcloud compute
      accelerator-types list' to learn about all available accelerator
      types.

      *count*::: The number of pieces of the accelerator to attach to each
      of the instances. The default value is 1.
      """
            parser.add_argument('--{0}-accelerator'.format(instance_type),
                                type=arg_parsers.ArgDict(spec={
                                    'type': str,
                                    'count': int,
                                }),
                                metavar='type=TYPE,[count=COUNT]',
                                help=help_msg)
示例#5
0
def BetaArgsForClusterRef(parser):
    """Register beta-only flags for creating a Dataproc cluster."""
    flags.AddMinCpuPlatformArgs(parser, base.ReleaseTrack.BETA)

    autoscaling_group = parser.add_argument_group()
    flags.AddAutoscalingPolicyResourceArgForCluster(autoscaling_group,
                                                    api_version='v1beta2')

    AddKerberosGroup(parser)

    parser.add_argument('--enable-component-gateway',
                        hidden=True,
                        action='store_true',
                        help="""\
        Enable access to the web UIs of selected components on the cluster
        through the component gateway.
        """)

    parser.add_argument('--max-idle',
                        type=arg_parsers.Duration(),
                        help="""\
        The duration before cluster is auto-deleted after last job completes,
        such as "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)

    auto_delete_group = parser.add_mutually_exclusive_group()
    auto_delete_group.add_argument('--max-age',
                                   type=arg_parsers.Duration(),
                                   help="""\
        The lifespan of the cluster before it is auto-deleted, such as
        "2h" or "1d".
        See $ gcloud topic datetimes for information on duration formats.
        """)

    auto_delete_group.add_argument('--expiration-time',
                                   type=arg_parsers.Datetime.Parse,
                                   help="""\
        The time when cluster will be auto-deleted, such as
        "2017-08-29T18:52:51.142Z." See $ gcloud topic datetimes for
        information on time formats.
        """)

    for instance_type in ('master', 'worker'):
        help_msg = """\
      Attaches accelerators (e.g. GPUs) to the {instance_type}
      instance(s).
      """.format(instance_type=instance_type)
        if instance_type == 'worker':
            help_msg += """
      Note:
      No accelerators will be attached to preemptible workers, because
      preemptible VMs do not support accelerators.
      """
        help_msg += """
      *type*::: The specific type (e.g. nvidia-tesla-k80 for nVidia Tesla
      K80) of accelerator to attach to the instances. Use 'gcloud compute
      accelerator-types list' to learn about all available accelerator
      types.

      *count*::: The number of pieces of the accelerator to attach to each
      of the instances. The default value is 1.
      """
        parser.add_argument('--{0}-accelerator'.format(instance_type),
                            type=arg_parsers.ArgDict(spec={
                                'type': str,
                                'count': int,
                            }),
                            metavar='type=TYPE,[count=COUNT]',
                            help=help_msg)

    AddReservationAffinityGroup(parser)