def Args(parser):
     flags.AddTemplateFlag(parser, 'set cluster selector')
     flags.AddZoneFlag(parser)
     parser.add_argument('--cluster-labels',
                         metavar='KEY=VALUE',
                         type=arg_parsers.ArgDict(
                             key_type=labels_util.KEY_FORMAT_VALIDATOR,
                             value_type=labels_util.VALUE_FORMAT_VALIDATOR,
                             min_length=1),
                         action=arg_parsers.UpdateAction,
                         help='A list of label KEY=VALUE pairs to add.')
Пример #2
0
def ArgsForClusterRef(parser,
                      beta=False,
                      include_deprecated=True,
                      include_ttl_config=False,
                      include_gke_platform_args=False):
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
    include_deprecated: whether deprecated flags should be included
    include_ttl_config: whether to include Scheduled Delete(TTL) args
    include_gke_platform_args: whether to include GKE-based cluster args
  """
    labels_util.AddCreateLabelsFlags(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser, short_flags=include_deprecated)
    flags.AddComponentFlag(parser)

    platform_group = parser.add_argument_group(mutex=True)
    gce_platform_group = platform_group.add_argument_group(help="""\
    Compute Engine options for Dataproc clusters.
    """)

    instances_flags.AddTagsArgs(gce_platform_group)
    gce_platform_group.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-secondary-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--secondary-worker-type',
        hidden=True,
        metavar='TYPE',
        choices=['preemptible', 'non-preemptible', 'unspecified'],
        default='unspecified',
        help='The type of the secondary worker group.')
    num_secondary_workers = worker_group.add_argument_group(mutex=True)
    num_secondary_workers.add_argument(
        '--num-preemptible-workers',
        action=actions.DeprecationAction(
            '--num-preemptible-workers',
            warn=('The `--num-preemptible-workers` flag is deprecated. '
                  'Use the `--num-secondary-workers` flag instead.')),
        type=int,
        hidden=True,
        help='The number of preemptible worker nodes in the cluster.')
    num_secondary_workers.add_argument(
        '--num-secondary-workers',
        type=int,
        help='The number of secondary worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument('--bucket',
                        help="""\
      The Google Cloud Storage bucket to use by default to stage job
      dependencies, miscellaneous config files, and job driver console output
      when using this cluster.
      """)

    netparser = gce_platform_group.add_argument_group(mutex=True)
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    secondary_worker_local_ssds = parser.add_argument_group(mutex=True)
    secondary_worker_local_ssds.add_argument(
        '--num-preemptible-worker-local-ssds',
        type=int,
        hidden=True,
        action=actions.DeprecationAction(
            '--num-preemptible-worker-local-ssds',
            warn=(
                'The `--num-preemptible-worker-local-ssds` flag is deprecated. '
                'Use the `--num-secondary-worker-local-ssds` flag instead.')),
        help="""\
      The number of local SSDs to attach to each secondary worker in
      a cluster.
      """)
    secondary_worker_local_ssds.add_argument(
        '--num-secondary-worker-local-ssds',
        type=int,
        help="""\
      The number of local SSDs to attach to each preemptible worker in
      a cluster.
      """)
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      Number of Masters | Cluster Mode
      --- | ---
      1 | Standard
      3 | High Availability
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        action=arg_parsers.UpdateAction,
                        default={},
                        metavar='PREFIX:PROPERTY=VALUE',
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

Prefix | File | Purpose of file
--- | --- | ---
capacity-scheduler | capacity-scheduler.xml | Hadoop YARN Capacity Scheduler configuration
core | core-site.xml | Hadoop general configuration
distcp | distcp-default.xml | Hadoop Distributed Copy configuration
hadoop-env | hadoop-env.sh | Hadoop specific environment variables
hdfs | hdfs-site.xml | Hadoop HDFS configuration
hive | hive-site.xml | Hive configuration
mapred | mapred-site.xml | Hadoop MapReduce configuration
mapred-env | mapred-env.sh | Hadoop MapReduce specific environment variables
pig | pig.properties | Pig configuration
spark | spark-defaults.conf | Spark configuration
spark-env | spark-env.sh | Spark specific environment variables
yarn | yarn-site.xml | Hadoop YARN configuration
yarn-env | yarn-env.sh | Hadoop YARN specific environment variables

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    gce_platform_group.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    gce_platform_group.add_argument('--scopes',
                                    type=arg_parsers.ArgList(min_length=1),
                                    metavar='SCOPE',
                                    help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

  {minimum_scopes}

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

  {additional_scopes}

If you want to enable all scopes use the 'cloud-platform' scope.

{scopes_help}
""".format(minimum_scopes='\n  '.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n  '.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           scopes_help=compute_helpers.SCOPES_HELP))

    if include_deprecated:
        _AddDiskArgsDeprecated(parser)
    else:
        _AddDiskArgs(parser)

    # --no-address is an exception to the no negative-flag style guildline to be
    # consistent with gcloud compute instances create --no-address
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      If omitted the instances in the cluster will each be assigned an
      ephemeral external IP address.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    secondary_worker_boot_disk_type = parser.add_argument_group(mutex=True)
    secondary_worker_boot_disk_type.add_argument(
        '--preemptible-worker-boot-disk-type',
        help=boot_disk_type_detailed_help,
        hidden=True,
        action=actions.DeprecationAction(
            '--preemptible-worker-boot-disk-type',
            warn=(
                'The `--preemptible-worker-boot-disk-type` flag is deprecated. '
                'Use the `--secondary-worker-boot-disk-type` flag instead.')))
    secondary_worker_boot_disk_type.add_argument(
        '--secondary-worker-boot-disk-type', help=boot_disk_type_detailed_help)

    autoscaling_group = parser.add_argument_group()
    flags.AddAutoscalingPolicyResourceArgForCluster(
        autoscaling_group, api_version=('v1beta2' if beta else 'v1'))

    if include_ttl_config:
        parser.add_argument('--max-idle',
                            type=arg_parsers.Duration(),
                            help="""\
          The duration before cluster is auto-deleted after last job completes,
          such as "2h" or "1d".
          See $ gcloud topic datetimes for information on duration formats.
          """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
          The lifespan of the cluster before it is auto-deleted, such as
          "2h" or "1d".
          See $ gcloud topic datetimes for information on duration formats.
          """)

        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
          The time when cluster will be auto-deleted, such as
          "2017-08-29T18:52:51.142Z." See $ gcloud topic datetimes for
          information on time formats.
          """)

    AddKerberosGroup(parser)

    flags.AddMinCpuPlatformArgs(parser)

    _AddAcceleratorArgs(parser)

    AddReservationAffinityGroup(
        gce_platform_group,
        group_text='Specifies the reservation for the instance.',
        affinity_text='The type of reservation for the instance.')
    if include_gke_platform_args:
        gke_based_cluster_group = platform_group.add_argument_group(
            hidden=True,
            help="""\
          Options for creating a GKE-based Dataproc cluster. Specifying any of these
          will indicate that this cluster is intended to be a GKE-based cluster.
          These options are mutually exclusive with GCE-based options.
          """)
        gke_based_cluster_group.add_argument('--gke-cluster',
                                             hidden=True,
                                             help="""\
            Required for GKE-based clusters. Specify the name of the GKE cluster to
            deploy this GKE-based Dataproc cluster to. This should be the short name
            and not the full path name.
            """)
        gke_based_cluster_group.add_argument('--gke-cluster-namespace',
                                             hidden=True,
                                             help="""\
            Optional. Specify the name of the namespace to deploy Dataproc system
            components into. This namespace does not need to already exist.
            """)
Пример #3
0
def ArgsForClusterRef(parser, beta=False):
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
  """
    labels_util.AddCreateLabelsFlags(parser)
    instances_flags.AddTagsArgs(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser)

    parser.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-preemptible-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--num-preemptible-workers',
        type=int,
        help='The number of preemptible worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument(
        '--bucket',
        help='The Google Cloud Storage bucket to use with the Google Cloud '
        'Storage connector. A bucket is auto created when this parameter is '
        'not specified.')

    netparser = parser.add_mutually_exclusive_group()
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      [format="csv",options="header"]
      |========
      Number of Masters,Cluster Mode
      1,Standard
      3,High Availability
      |========
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        metavar='PREFIX:PROPERTY=VALUE',
                        default={},
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

[format="csv",options="header"]
|========
Prefix,File,Purpose of file
capacity-scheduler,capacity-scheduler.xml,Hadoop YARN Capacity Scheduler configuration
core,core-site.xml,Hadoop general configuration
distcp,distcp-default.xml,Hadoop Distributed Copy configuration
hadoop-env,hadoop-env.sh,Hadoop specific environment variables
hdfs,hdfs-site.xml,Hadoop HDFS configuration
hive,hive-site.xml,Hive configuration
mapred,mapred-site.xml,Hadoop MapReduce configuration
mapred-env,mapred-env.sh,Hadoop MapReduce specific environment variables
pig,pig.properties,Pig configuration
spark,spark-defaults.conf,Spark configuration
spark-env,spark-env.sh,Spark specific environment variables
yarn,yarn-site.xml,Hadoop YARN configuration
yarn-env,yarn-env.sh,Hadoop YARN specific environment variables
|========

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    parser.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    parser.add_argument('--scopes',
                        type=arg_parsers.ArgList(min_length=1),
                        metavar='SCOPE',
                        help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

[format="csv"]
|========
{minimum_scopes}
|========

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

[format="csv"]
|========
{additional_scopes}
|========

If you want to enable all scopes use the 'cloud-platform' scope.

SCOPE can be either the full URI of the scope or an alias.
Available aliases are:

[format="csv",options="header"]
|========
Alias,URI
{aliases}
|========

{scope_deprecation_msg}
""".format(minimum_scopes='\n'.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n'.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           aliases=compute_helpers.SCOPE_ALIASES_FOR_HELP,
           scope_deprecation_msg=compute_constants.DEPRECATED_SCOPES_MESSAGES))

    master_boot_disk_size = parser.add_mutually_exclusive_group()
    worker_boot_disk_size = parser.add_mutually_exclusive_group()

    # Deprecated, to be removed at a future date.
    master_boot_disk_size.add_argument(
        '--master-boot-disk-size-gb',
        action=actions.DeprecationAction(
            '--master-boot-disk-size-gb',
            warn=(
                'The `--master-boot-disk-size-gb` flag is deprecated. '
                'Use `--master-boot-disk-size` flag with "GB" after value.')),
        type=int,
        hidden=True,
        help='Use `--master-boot-disk-size` flag with "GB" after value.')
    worker_boot_disk_size.add_argument(
        '--worker-boot-disk-size-gb',
        action=actions.DeprecationAction(
            '--worker-boot-disk-size-gb',
            warn=(
                'The `--worker-boot-disk-size-gb` flag is deprecated. '
                'Use `--worker-boot-disk-size` flag with "GB" after value.')),
        type=int,
        hidden=True,
        help='Use `--worker-boot-disk-size` flag with "GB" after value.')

    boot_disk_size_detailed_help = """\
      The size of the boot disk. The value must be a
      whole number followed by a size unit of ``KB'' for kilobyte, ``MB''
      for megabyte, ``GB'' for gigabyte, or ``TB'' for terabyte. For example,
      ``10GB'' will produce a 10 gigabyte disk. The minimum size a boot disk
      can have is 10 GB. Disk size must be a multiple of 1 GB.
      """
    master_boot_disk_size.add_argument(
        '--master-boot-disk-size',
        type=arg_parsers.BinarySize(lower_bound='10GB'),
        help=boot_disk_size_detailed_help)
    worker_boot_disk_size.add_argument(
        '--worker-boot-disk-size',
        type=arg_parsers.BinarySize(lower_bound='10GB'),
        help=boot_disk_size_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-size',
                        type=arg_parsers.BinarySize(lower_bound='10GB'),
                        help=boot_disk_size_detailed_help)

    # Args that are visible only in Beta track
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """,
                        hidden=not beta)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
Пример #4
0
 def Args(parser):
     labels_util.AddCreateLabelsFlags(parser)
     flags.AddZoneFlag(parser)
     flags.AddTemplateFlag(parser, 'create')
Пример #5
0
def ArgsForClusterRef(parser, beta=False, include_deprecated=True):     \
    # pylint: disable=unused-argument
    """Register flags for creating a dataproc cluster.

  Args:
    parser: The argparse.ArgParser to configure with dataproc cluster arguments.
    beta: whether or not this is a beta command (may affect flag visibility)
    include_deprecated: whether deprecated flags should be included
  """
    labels_util.AddCreateLabelsFlags(parser)
    instances_flags.AddTagsArgs(parser)
    # 30m is backend timeout + 5m for safety buffer.
    flags.AddTimeoutFlag(parser, default='35m')
    flags.AddZoneFlag(parser, short_flags=include_deprecated)

    parser.add_argument(
        '--metadata',
        type=arg_parsers.ArgDict(min_length=1),
        action='append',
        default=None,
        help=('Metadata to be made available to the guest operating system '
              'running on the instances'),
        metavar='KEY=VALUE')

    # Either allow creating a single node cluster (--single-node), or specifying
    # the number of workers in the multi-node cluster (--num-workers and
    # --num-preemptible-workers)
    node_group = parser.add_argument_group(mutex=True)  # Mutually exclusive
    node_group.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes. If this flag is not
      specified, a cluster with separate workers is created.
      """)
    # Not mutually exclusive
    worker_group = node_group.add_argument_group(
        help='Multi-node cluster flags')
    worker_group.add_argument(
        '--num-workers',
        type=int,
        help='The number of worker nodes in the cluster. Defaults to '
        'server-specified.')
    worker_group.add_argument(
        '--num-preemptible-workers',
        type=int,
        help='The number of preemptible worker nodes in the cluster.')

    parser.add_argument(
        '--master-machine-type',
        help='The type of machine to use for the master. Defaults to '
        'server-specified.')
    parser.add_argument(
        '--worker-machine-type',
        help='The type of machine to use for workers. Defaults to '
        'server-specified.')
    image_parser = parser.add_mutually_exclusive_group()
    # TODO(b/73291743): Add external doc link to --image
    image_parser.add_argument(
        '--image',
        metavar='IMAGE',
        help='The full custom image URI or the custom image name that '
        'will be used to create a cluster.')
    image_parser.add_argument(
        '--image-version',
        metavar='VERSION',
        help='The image version to use for the cluster. Defaults to the '
        'latest version.')
    parser.add_argument(
        '--bucket',
        help='The Google Cloud Storage bucket to use with the Google Cloud '
        'Storage connector. A bucket is auto created when this parameter is '
        'not specified.')

    netparser = parser.add_mutually_exclusive_group()
    netparser.add_argument('--network',
                           help="""\
      The Compute Engine network that the VM instances of the cluster will be
      part of. This is mutually exclusive with --subnet. If neither is
      specified, this defaults to the "default" network.
      """)
    netparser.add_argument('--subnet',
                           help="""\
      Specifies the subnet that the cluster will be part of. This is mutally
      exclusive with --network.
      """)
    parser.add_argument(
        '--num-worker-local-ssds',
        type=int,
        help='The number of local SSDs to attach to each worker in a cluster.')
    parser.add_argument(
        '--num-master-local-ssds',
        type=int,
        help='The number of local SSDs to attach to the master in a cluster.')
    parser.add_argument(
        '--initialization-actions',
        type=arg_parsers.ArgList(min_length=1),
        metavar='CLOUD_STORAGE_URI',
        help=('A list of Google Cloud Storage URIs of '
              'executables to run on each node in the cluster.'))
    parser.add_argument(
        '--initialization-action-timeout',
        type=arg_parsers.Duration(),
        metavar='TIMEOUT',
        default='10m',
        help=('The maximum duration of each initialization action. See '
              '$ gcloud topic datetimes for information on duration formats.'))
    parser.add_argument(
        '--num-masters',
        type=arg_parsers.CustomFunctionValidator(
            lambda n: int(n) in [1, 3],
            'Number of masters must be 1 (Standard) or 3 (High Availability)',
            parser=arg_parsers.BoundedInt(1, 3)),
        help="""\
      The number of master nodes in the cluster.

      Number of Masters | Cluster Mode
      --- | ---
      1 | Standard
      3 | High Availability
      """)
    parser.add_argument('--properties',
                        type=arg_parsers.ArgDict(),
                        metavar='PREFIX:PROPERTY=VALUE',
                        default={},
                        help="""\
Specifies configuration properties for installed packages, such as Hadoop
and Spark.

Properties are mapped to configuration files by specifying a prefix, such as
"core:io.serializations". The following are supported prefixes and their
mappings:

Prefix | File | Purpose of file
--- | --- | ---
capacity-scheduler | capacity-scheduler.xml | Hadoop YARN Capacity Scheduler configuration
core | core-site.xml | Hadoop general configuration
distcp | distcp-default.xml | Hadoop Distributed Copy configuration
hadoop-env | hadoop-env.sh | Hadoop specific environment variables
hdfs | hdfs-site.xml | Hadoop HDFS configuration
hive | hive-site.xml | Hive configuration
mapred | mapred-site.xml | Hadoop MapReduce configuration
mapred-env | mapred-env.sh | Hadoop MapReduce specific environment variables
pig | pig.properties | Pig configuration
spark | spark-defaults.conf | Spark configuration
spark-env | spark-env.sh | Spark specific environment variables
yarn | yarn-site.xml | Hadoop YARN configuration
yarn-env | yarn-env.sh | Hadoop YARN specific environment variables

See https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties
for more information.

""")
    parser.add_argument(
        '--service-account',
        help='The Google Cloud IAM service account to be authenticated as.')
    parser.add_argument('--scopes',
                        type=arg_parsers.ArgList(min_length=1),
                        metavar='SCOPE',
                        help="""\
Specifies scopes for the node instances. Multiple SCOPEs can be specified,
separated by commas.
Examples:

  $ {{command}} example-cluster --scopes https://www.googleapis.com/auth/bigtable.admin

  $ {{command}} example-cluster --scopes sqlservice,bigquery

The following *minimum scopes* are necessary for the cluster to function
properly and are always added, even if not explicitly specified:

  {minimum_scopes}

If the `--scopes` flag is not specified, the following *default scopes*
are also included:

  {additional_scopes}

If you want to enable all scopes use the 'cloud-platform' scope.

{scopes_help}
""".format(minimum_scopes='\n  '.join(constants.MINIMUM_SCOPE_URIS),
           additional_scopes='\n  '.join(
               constants.ADDITIONAL_DEFAULT_SCOPE_URIS),
           scopes_help=compute_helpers.SCOPES_HELP))

    if include_deprecated:
        _AddDiskArgsDeprecated(parser)
    else:
        _AddDiskArgs(parser)

    # --no-address is an exception to the no negative-flag style guildline to be
    # consistent with gcloud compute instances create --no-address
    parser.add_argument('--no-address',
                        action='store_true',
                        help="""\
      If provided, the instances in the cluster will not be assigned external
      IP addresses.

      If omitted the instances in the cluster will each be assigned an
      ephemeral external IP address.

      Note: Dataproc VMs need access to the Dataproc API. This can be achieved
      without external IP addresses using Private Google Access
      (https://cloud.google.com/compute/docs/private-google-access).
      """)

    boot_disk_type_detailed_help = """\
      The type of the boot disk. The value must be ``pd-standard'' or
      ``pd-ssd''.
      """
    parser.add_argument('--master-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    parser.add_argument('--preemptible-worker-boot-disk-type',
                        help=boot_disk_type_detailed_help)
    def Args(parser):
        flags.AddTemplateFlag(parser, 'set managed cluster')
        parser.add_argument('--cluster-name',
                            help='The name of the managed dataproc cluster.')
        clusters.ArgsForClusterRef(parser, beta=True)
        flags.AddZoneFlag(parser)
        flags.AddMinCpuPlatformArgs(parser, base.ReleaseTrack.BETA)

        # TODO(b/70164645): Consolidate these arguments with the other beta args
        # All arguments for these arguments are duplicated from the cluster creation
        # beta track. There should be a ArgsForClusterRefBeta method in clusters.py
        # that is invoked here so that we don't have to duplicate the arguments.
        parser.add_argument('--max-idle',
                            type=arg_parsers.Duration(),
                            help="""\
        The duration before cluster is auto-deleted after last job completes,
        such as "30m", "2h" or "1d".
        """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
        The lifespan of the cluster before it is auto-deleted, such as "30m",
        "2h" or "1d".
        """)

        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
        The time when cluster will be auto-deleted, such as
        "2017-08-29T18:52:51.142Z"
        """)

        for instance_type in ('master', 'worker'):
            help_msg = """\
      Attaches accelerators (e.g. GPUs) to the {instance_type}
      instance(s).
      """.format(instance_type=instance_type)
            if instance_type == 'worker':
                help_msg += """
      Note:
      No accelerators will be attached to preemptible workers, because
      preemptible VMs do not support accelerators.
      """
            help_msg += """
      *type*::: The specific type (e.g. nvidia-tesla-k80 for nVidia Tesla
      K80) of accelerator to attach to the instances. Use 'gcloud compute
      accelerator-types list' to learn about all available accelerator
      types.

      *count*::: The number of pieces of the accelerator to attach to each
      of the instances. The default value is 1.
      """
            parser.add_argument('--{0}-accelerator'.format(instance_type),
                                type=arg_parsers.ArgDict(spec={
                                    'type': str,
                                    'count': int,
                                }),
                                metavar='type=TYPE,[count=COUNT]',
                                help=help_msg)
Пример #7
0
    def Args(parser):
        flags.AddTemplateFlag(parser, 'set managed cluster')
        parser.add_argument('--cluster-name',
                            help='The name of the managed dataproc cluster.')
        clusters.ArgsForClusterRef(parser)
        flags.AddZoneFlag(parser)

        parser.add_argument('--num-masters',
                            type=int,
                            help="""\
      The number of master nodes in the cluster.

      [format="csv",options="header"]
      |========
      Number of Masters,Cluster Mode
      1,Standard
      3,High Availability
      |========
      """)

        parser.add_argument('--single-node',
                            action='store_true',
                            help="""\
      Create a single node cluster.

      A single node cluster has all master and worker components.
      It cannot have any separate worker nodes.
      """)

        parser.add_argument('--max-idle',
                            type=arg_parsers.Duration(),
                            help="""\
        The duration before cluster is auto-deleted after last job completes,
        such as "30m", "2h" or "1d".
        """)

        auto_delete_group = parser.add_mutually_exclusive_group()
        auto_delete_group.add_argument('--max-age',
                                       type=arg_parsers.Duration(),
                                       help="""\
        The lifespan of the cluster before it is auto-deleted, such as "30m",
        "2h" or "1d".
        """)

        auto_delete_group.add_argument('--expiration-time',
                                       type=arg_parsers.Datetime.Parse,
                                       help="""\
        The time when cluster will be auto-deleted, such as
        "2017-08-29T18:52:51.142Z"
        """)

        for instance_type in ('master', 'worker'):
            help_msg = """\
      Attaches accelerators (e.g. GPUs) to the {instance_type}
      instance(s).
      """.format(instance_type=instance_type)
            if instance_type == 'worker':
                help_msg += """
      Note:
      No accelerators will be attached to preemptible workers, because
      preemptible VMs do not support accelerators.
      """
            help_msg += """
      *type*::: The specific type (e.g. nvidia-tesla-k80 for nVidia Tesla
      K80) of accelerator to attach to the instances. Use 'gcloud compute
      accelerator-types list' to learn about all available accelerator
      types.

      *count*::: The number of pieces of the accelerator to attach to each
      of the instances. The default value is 1.
      """
            parser.add_argument('--{0}-accelerator'.format(instance_type),
                                type=arg_parsers.ArgDict(spec={
                                    'type': str,
                                    'count': int,
                                }),
                                metavar='type=TYPE,[count=COUNT]',
                                help=help_msg)
        parser.add_argument('--no-address',
                            action='store_true',
                            help="""\
        If provided, the instances in the cluster will not be assigned external
        IP addresses.

        Note: Dataproc VMs need access to the Dataproc API. This can be achieved
        without external IP addresses using Private Google Access
        (https://cloud.google.com/compute/docs/private-google-access).
        """)