Пример #1
0
    def Run(self, args):
        self.ValidateArgs(args)

        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), args.name)

        cluster_config = clusters.GetClusterConfig(args,
                                                   dataproc,
                                                   cluster_ref.projectId,
                                                   compute_resources,
                                                   self.BETA,
                                                   include_ttl_config=True)

        cluster = dataproc.messages.Cluster(
            config=cluster_config,
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        self.ConfigureCluster(dataproc.messages, args, cluster)

        return clusters.CreateCluster(dataproc, cluster, args. async,
                                      args.timeout)
Пример #2
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        request = messages.DataprocProjectsRegionsClustersDeleteRequest(
            clusterName=cluster_ref.clusterName,
            region=cluster_ref.region,
            projectId=cluster_ref.projectId)

        console_io.PromptContinue(
            message="The cluster '{0}' and all attached disks will be "
            'deleted.'.format(args.name),
            cancel_on_no=True,
            cancel_string='Deletion aborted by user.')

        operation = client.projects_regions_clusters.Delete(request)

        if args. async:
            log.status.write('Deleting [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return operation

        operation = util.WaitForOperation(
            operation, self.context, 'Waiting for cluster deletion operation')
        log.DeletedResource(cluster_ref)

        return operation
Пример #3
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)

        request = dataproc.messages.DataprocProjectsRegionsClustersDeleteRequest(
            clusterName=cluster_ref.clusterName,
            region=cluster_ref.region,
            projectId=cluster_ref.projectId)

        console_io.PromptContinue(
            message="The cluster '{0}' and all attached disks will be "
            'deleted.'.format(args.name),
            cancel_on_no=True,
            cancel_string='Deletion aborted by user.')

        operation = dataproc.client.projects_regions_clusters.Delete(request)

        if args. async:
            log.status.write('Deleting [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return operation

        operation = util.WaitForOperation(
            dataproc,
            operation,
            message='Waiting for cluster deletion operation',
            timeout_s=args.timeout)
        log.DeletedResource(cluster_ref)

        return operation
Пример #4
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = dp_util.ParseCluster(args.name, dataproc)

        request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)

        cluster = dataproc.client.projects_regions_clusters.Get(request)

        # Filter out Dataproc-generated labels.
        clusters.DeleteGeneratedLabels(cluster, dataproc)

        schema_path = self.GetSchemaPath()
        if args.destination:
            with files.FileWriter(args.destination) as stream:
                export_util.Export(message=cluster,
                                   stream=stream,
                                   schema_path=schema_path)
        else:
            export_util.Export(message=cluster,
                               stream=sys.stdout,
                               schema_path=schema_path)
Пример #5
0
    def Run(self, args):
        """This is what gets called when the user runs this command."""
        dataproc = dp.Dataproc(self.ReleaseTrack())

        request_id = util.GetUniqueId()
        job_id = args.id if args.id else request_id

        # Don't use ResourceArgument, because --id is hidden by default
        job_ref = util.ParseJob(job_id, dataproc)

        self.PopulateFilesByType(args)

        cluster_ref = util.ParseCluster(args.cluster, dataproc)
        request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)

        cluster = dataproc.client.projects_regions_clusters.Get(request)

        self._staging_dir = self.GetStagingDir(cluster,
                                               job_ref.jobId,
                                               bucket=args.bucket)
        self.ValidateAndStageFiles()

        job = dataproc.messages.Job(
            reference=dataproc.messages.JobReference(
                projectId=job_ref.projectId, jobId=job_ref.jobId),
            placement=dataproc.messages.JobPlacement(clusterName=args.cluster))
        self.ConfigureJob(dataproc.messages, job, args)

        if args.max_failures_per_hour:
            scheduling = dataproc.messages.JobScheduling(
                maxFailuresPerHour=args.max_failures_per_hour)
            job.scheduling = scheduling

        request = dataproc.messages.DataprocProjectsRegionsJobsSubmitRequest(
            projectId=job_ref.projectId,
            region=job_ref.region,
            submitJobRequest=dataproc.messages.SubmitJobRequest(
                job=job, requestId=request_id))

        job = dataproc.client.projects_regions_jobs.Submit(request)

        log.status.Print('Job [{0}] submitted.'.format(job_id))

        if not args.async_:
            job = util.WaitForJobTermination(
                dataproc,
                job,
                job_ref,
                message='Waiting for job completion',
                goal_state=dataproc.messages.JobStatus.StateValueValuesEnum.
                DONE,
                error_state=dataproc.messages.JobStatus.StateValueValuesEnum.
                ERROR,
                stream_driver_log=True)
            log.status.Print('Job [{0}] finished successfully.'.format(job_id))

        return job
Пример #6
0
 def Run(self, args):
     dataproc = dp.Dataproc(self.ReleaseTrack())
     data = console_io.ReadFromFileOrStdin(args.file or '-', binary=False)
     cluster = export_util.Import(message_type=dataproc.messages.Cluster,
                                  stream=data)
     cluster_ref = util.ParseCluster(cluster.clusterName, dataproc)
     return clusters.CreateCluster(dataproc, cluster_ref, cluster,
                                   args.async_, args.timeout)
Пример #7
0
    def Run(self, args):
        client = self.context['dataproc_client']

        cluster_ref = util.ParseCluster(args.name, self.context)
        request = cluster_ref.Request()

        cluster = client.projects_clusters.Get(request)
        return cluster
Пример #8
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())
        messages = dataproc.messages

        cluster_ref = util.ParseCluster(args.name, dataproc)
        request = messages.DataprocProjectsRegionsClustersGetIamPolicyRequest(
            resource=cluster_ref.RelativeName())

        return dataproc.client.projects_regions_clusters.GetIamPolicy(request)
Пример #9
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        cluster_config = messages.ClusterConfiguration()
        changed_fields = []

        has_changes = False

        if args.new_num_workers is not None:
            log.warn(
                '--new-num-workers parameter is deprecated and will be removed '
                'in a future release. Please use --num-workers instead')
            args.num_workers = args.new_num_workers

        if args.num_workers is not None:
            worker_config = messages.InstanceGroupConfiguration(
                numInstances=args.num_workers)
            cluster_config.workerConfiguration = worker_config
            changed_fields.append(
                'configuration.worker_configuration.num_instances')
            has_changes = True

        if args.num_preemptible_workers is not None:
            worker_config = messages.InstanceGroupConfiguration(
                numInstances=args.num_preemptible_workers)
            cluster_config.secondaryWorkerConfiguration = worker_config
            changed_fields.append(
                'configuration.secondary_worker_configuration.num_instances')
            has_changes = True

        if not has_changes:
            raise exceptions.ToolException(
                'Must specify at least one cluster parameter to update.')

        cluster = messages.Cluster(configuration=cluster_config,
                                   clusterName=cluster_ref.clusterName,
                                   projectId=cluster_ref.projectId)

        request = messages.DataprocProjectsClustersPatchRequest(
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId,
            cluster=cluster,
            updateMask=','.join(changed_fields))

        operation = client.projects_clusters.Patch(request)
        util.WaitForOperation(operation,
                              self.context,
                              message='Waiting for cluster update operation',
                              timeout_s=3600 * 3)

        cluster = client.projects_clusters.Get(cluster_ref.Request())
        log.UpdatedResource(cluster_ref)
        return cluster
Пример #10
0
def CreateCluster(dataproc, cluster, is_async, timeout):
    """Create a cluster.

  Args:
    dataproc: Dataproc object that contains client, messages, and resources
    cluster: Cluster to create
    is_async: Whether to wait for the operation to complete
    timeout: Timeout used when waiting for the operation to complete

  Returns:
    Created cluster, or None if async
  """
    # Get project id and region.
    cluster_ref = util.ParseCluster(cluster.clusterName, dataproc)
    request_id = util.GetUniqueId()
    request = dataproc.messages.DataprocProjectsRegionsClustersCreateRequest(
        cluster=cluster,
        projectId=cluster_ref.projectId,
        region=cluster_ref.region,
        requestId=request_id)
    operation = dataproc.client.projects_regions_clusters.Create(request)

    if is_async:
        log.status.write('Creating [{0}] with operation [{1}].'.format(
            cluster_ref, operation.name))
        return

    operation = util.WaitForOperation(
        dataproc,
        operation,
        message='Waiting for cluster creation operation',
        timeout_s=timeout)

    get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
        projectId=cluster_ref.projectId,
        region=cluster_ref.region,
        clusterName=cluster_ref.clusterName)
    cluster = dataproc.client.projects_regions_clusters.Get(get_request)
    if cluster.status.state == (
            dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING):

        zone_uri = cluster.config.gceClusterConfig.zoneUri
        zone_short_name = zone_uri.split('/')[-1]

        # Log the URL of the cluster
        log.CreatedResource(
            cluster_ref,
            # Also indicate which zone the cluster was placed in. This is helpful
            # if the server picked a zone (auto zone)
            details='Cluster placed in zone [{0}]'.format(zone_short_name))
    else:
        log.error('Create cluster failed!')
        if operation.details:
            log.error('Details:\n' + operation.details)
    return cluster
Пример #11
0
    def Run(self, args):
        client = self.context['dataproc_client']

        cluster_ref = util.ParseCluster(args.name, self.context)
        request = client.MESSAGES_MODULE.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)

        cluster = client.projects_regions_clusters.Get(request)
        return cluster
Пример #12
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)
        request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)

        cluster = dataproc.client.projects_regions_clusters.Get(request)
        return cluster
Пример #13
0
    def Run(self, args):
        """This is what gets called when the user runs this command."""
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        job_id = util.GetJobId(args.id)
        job_ref = util.ParseJob(job_id, self.context)

        self.PopulateFilesByType(args)

        cluster_ref = util.ParseCluster(args.cluster, self.context)
        request = messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)

        try:
            cluster = client.projects_regions_clusters.Get(request)
        except apitools_exceptions.HttpError as error:
            raise exceptions.HttpException(error)

        self._staging_dir = self.GetStagingDir(cluster, job_ref.jobId)
        self.ValidateAndStageFiles()

        job = messages.Job(
            reference=messages.JobReference(projectId=job_ref.projectId,
                                            jobId=job_ref.jobId),
            placement=messages.JobPlacement(clusterName=args.cluster))

        self.ConfigureJob(job, args)

        request = messages.DataprocProjectsRegionsJobsSubmitRequest(
            projectId=job_ref.projectId,
            region=job_ref.region,
            submitJobRequest=messages.SubmitJobRequest(job=job))

        try:
            job = client.projects_regions_jobs.Submit(request)
        except apitools_exceptions.HttpError as error:
            raise exceptions.HttpException(error)

        log.status.Print('Job [{0}] submitted.'.format(job_id))

        if not args. async:
            job = util.WaitForJobTermination(
                job,
                self.context,
                message='Waiting for job completion',
                goal_state=messages.JobStatus.StateValueValuesEnum.DONE,
                stream_driver_log=True)
            log.status.Print('Job [{0}] finished successfully.'.format(job_id))

        return job
Пример #14
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())
        messages = dataproc.messages

        policy = iam_util.ParsePolicyFile(args.policy_file, messages.Policy)
        set_iam_policy_request = messages.SetIamPolicyRequest(policy=policy)

        cluster_ref = util.ParseCluster(args.cluster, dataproc)
        request = messages.DataprocProjectsRegionsClustersSetIamPolicyRequest(
            resource=cluster_ref.RelativeName(),
            setIamPolicyRequest=set_iam_policy_request)

        return dataproc.client.projects_regions_clusters.SetIamPolicy(request)
Пример #15
0
  def Run(self, args):
    """This is what gets called when the user runs this command."""
    dataproc = dp.Dataproc(self.ReleaseTrack())

    job_id = util.GetJobId(args.id)
    job_ref = util.ParseJob(job_id, dataproc)

    self.PopulateFilesByType(args)

    cluster_ref = util.ParseCluster(args.cluster, dataproc)
    request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
        projectId=cluster_ref.projectId,
        region=cluster_ref.region,
        clusterName=cluster_ref.clusterName)

    cluster = dataproc.client.projects_regions_clusters.Get(request)

    self._staging_dir = self.GetStagingDir(
        cluster, job_ref.jobId, bucket=args.bucket)
    self.ValidateAndStageFiles()

    job = dataproc.messages.Job(
        reference=dataproc.messages.JobReference(
            projectId=job_ref.projectId,
            jobId=job_ref.jobId),
        placement=dataproc.messages.JobPlacement(
            clusterName=args.cluster))

    self.ConfigureJob(dataproc.messages, job, args)

    request = dataproc.messages.DataprocProjectsRegionsJobsSubmitRequest(
        projectId=job_ref.projectId,
        region=job_ref.region,
        submitJobRequest=dataproc.messages.SubmitJobRequest(
            job=job))

    job = dataproc.client.projects_regions_jobs.Submit(request)

    log.status.Print('Job [{0}] submitted.'.format(job_id))

    if not args.async:
      job = util.WaitForJobTermination(
          dataproc,
          job,
          message='Waiting for job completion',
          goal_state=dataproc.messages.JobStatus.StateValueValuesEnum.DONE,
          stream_driver_log=True)
      log.status.Print('Job [{0}] finished successfully.'.format(job_id))

    return job
Пример #16
0
  def Run(self, args):
    dataproc = dp.Dataproc(self.ReleaseTrack())
    msgs = dataproc.messages

    data = console_io.ReadFromFileOrStdin(args.source or '-', binary=False)
    cluster = export_util.Import(message_type=msgs.Cluster,
                                 stream=data,
                                 schema_path=self.GetSchemaPath())

    cluster_ref = dp_util.ParseCluster(args.name, dataproc)
    cluster.clusterName = cluster_ref.clusterName
    cluster.projectId = cluster_ref.projectId

    # Import only supports create, not update (for now).
    return clusters.CreateCluster(dataproc, cluster, args.async, args.timeout)
Пример #17
0
  def Run(self, args):
    client = self.context['dataproc_client']
    messages = self.context['dataproc_messages']

    cluster_ref = util.ParseCluster(args.name, self.context)

    request = messages.DataprocProjectsRegionsClustersDiagnoseRequest(
        clusterName=cluster_ref.clusterName,
        region=cluster_ref.region,
        projectId=cluster_ref.projectId)

    try:
      operation = client.projects_regions_clusters.Diagnose(request)
      # TODO(user): Stream output during polling.
      operation = util.WaitForOperation(
          operation, self.context,
          message='Waiting for cluster diagnose operation')
      response = operation.response
    except apitools_exceptions.HttpError as error:
      raise exceptions.HttpException(util.FormatHttpError(error))

    if not response:
      raise exceptions.ToolException('Operation is missing response')

    properties = encoding.MessageToDict(response)
    output_uri = properties['outputUri']

    if not output_uri:
      raise exceptions.ToolException('Response is missing outputUri')

    log.err.Print('Output from diagnostic:')
    log.err.Print('-----------------------------------------------')
    driver_log_stream = storage_helpers.StorageObjectSeriesStream(
        output_uri)
    # A single read might not read whole stream. Try a few times.
    read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None)
    try:
      read_retrier.RetryOnResult(
          lambda: driver_log_stream.ReadIntoWritable(log.err),
          sleep_ms=100,
          should_retry_if=lambda *_: driver_log_stream.open)
    except retry.MaxRetrialsException:
      log.warn(
          'Diagnostic finished succesfully, '
          'but output did not finish streaming.')
    log.err.Print('-----------------------------------------------')
    return output_uri
Пример #18
0
  def Run(self, args):
    dataproc = dp.Dataproc(self.ReleaseTrack())

    cluster_ref = util.ParseCluster(args.name, dataproc)

    request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest(
        clusterName=cluster_ref.clusterName,
        region=cluster_ref.region,
        projectId=cluster_ref.projectId)

    operation = dataproc.client.projects_regions_clusters.Diagnose(request)
    # TODO(b/36052522): Stream output during polling.
    operation = util.WaitForOperation(
        dataproc,
        operation,
        message='Waiting for cluster diagnose operation',
        timeout_s=args.timeout)

    if not operation.response:
      raise exceptions.OperationError('Operation is missing response')

    properties = encoding.MessageToDict(operation.response)
    output_uri = properties['outputUri']

    if not output_uri:
      raise exceptions.OperationError('Response is missing outputUri')

    log.err.Print('Output from diagnostic:')
    log.err.Print('-----------------------------------------------')
    driver_log_stream = storage_helpers.StorageObjectSeriesStream(
        output_uri)
    # A single read might not read whole stream. Try a few times.
    read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None)
    try:
      read_retrier.RetryOnResult(
          lambda: driver_log_stream.ReadIntoWritable(log.err),
          sleep_ms=100,
          should_retry_if=lambda *_: driver_log_stream.open)
    except retry.MaxRetrialsException:
      log.warning(
          'Diagnostic finished successfully, '
          'but output did not finish streaming.')
    log.err.Print('-----------------------------------------------')
    return output_uri
Пример #19
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())
        msgs = dataproc.messages

        if args.source:
            with files.FileReader(args.source) as stream:
                cluster = util.ReadYaml(message_type=msgs.Cluster,
                                        stream=stream,
                                        schema_path=SCHEMA_PATH)
        else:
            cluster = util.ReadYaml(message_type=msgs.Cluster,
                                    stream=sys.stdin,
                                    schema_path=SCHEMA_PATH)

        cluster_ref = util.ParseCluster(args.name, dataproc)
        cluster.clusterName = cluster_ref.clusterName
        cluster.projectId = cluster_ref.projectId

        # Import only supports create, not update (for now).
        return clusters.CreateCluster(dataproc, cluster, args. async,
                                      args.timeout)
Пример #20
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        request = messages.DataprocProjectsClustersDeleteRequest(
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        if not console_io.PromptContinue(
                message="The cluster '{0}' and all attached disks will be "
                'deleted.'.format(args.name)):
            raise exceptions.ToolException('Deletion aborted by user.')

        operation = client.projects_clusters.Delete(request)
        operation = util.WaitForOperation(
            operation, self.context, 'Waiting for cluster deletion operation')
        log.DeletedResource(cluster_ref)

        return operation
Пример #21
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        request = messages.DataprocProjectsClustersDiagnoseRequest(
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        try:
            operation = client.projects_clusters.Diagnose(request)
            operation = util.WaitForOperation(
                operation,
                self.context,
                message='Waiting for cluster diagnose operation')
            response = operation.response
        except apitools_base.HttpError as error:
            raise exceptions.HttpException(util.FormatHttpError(error))

        if not response:
            raise exceptions.ToolException('Operation is missing response')

        properties = apitools_base.MessageToDict(response)
        output_uri = properties['outputUri']

        if not output_uri:
            raise exceptions.ToolException('Response is missing outputUri')

        log.err.Print('Output from diagnostic:')
        log.err.Print('-----------------------------------------------')
        driver_log_stream = storage_helpers.StorageObjectSeriesStream(
            output_uri)
        driver_log_stream.ReadIntoWritable(log.err)
        log.err.Print('-----------------------------------------------')
        return output_uri
Пример #22
0
  def Run(self, args):
    self.ValidateArgs(args)

    dataproc = dp.Dataproc(self.ReleaseTrack())

    cluster_ref = util.ParseCluster(args.name, dataproc)

    compute_resources = compute_helpers.GetComputeResources(
        self.ReleaseTrack(), args.name)

    master_accelerator_type = None
    worker_accelerator_type = None
    master_accelerator_count = None
    worker_accelerator_count = None
    if self.ReleaseTrack() == base.ReleaseTrack.BETA:
      if args.master_accelerator:
        master_accelerator_type = args.master_accelerator['type']
        master_accelerator_count = args.master_accelerator.get('count', 1)
      if args.worker_accelerator:
        worker_accelerator_type = args.worker_accelerator['type']
        worker_accelerator_count = args.worker_accelerator.get('count', 1)

    # Resolve non-zonal GCE resources
    # We will let the server resolve short names of zonal resources because
    # if auto zone is requested, we will not know the zone before sending the
    # request
    image_ref = args.image and compute_resources.Parse(
        args.image,
        params={'project': cluster_ref.projectId},
        collection='compute.images')
    network_ref = args.network and compute_resources.Parse(
        args.network,
        params={'project': cluster_ref.projectId},
        collection='compute.networks')
    subnetwork_ref = args.subnet and compute_resources.Parse(
        args.subnet,
        params={
            'project': cluster_ref.projectId,
            'region': properties.VALUES.compute.region.GetOrFail,
        },
        collection='compute.subnetworks')
    timeout_str = str(args.initialization_action_timeout) + 's'
    init_actions = [
        dataproc.messages.NodeInitializationAction(
            executableFile=exe, executionTimeout=timeout_str)
        for exe in (args.initialization_actions or [])]
    # Increase the client timeout for each initialization action.
    args.timeout += args.initialization_action_timeout * len(init_actions)

    expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes)

    software_config = dataproc.messages.SoftwareConfig(
        imageVersion=args.image_version)

    master_boot_disk_size_gb = args.master_boot_disk_size_gb
    if args.master_boot_disk_size:
      master_boot_disk_size_gb = (
          api_utils.BytesToGb(args.master_boot_disk_size))

    worker_boot_disk_size_gb = args.worker_boot_disk_size_gb
    if args.worker_boot_disk_size:
      worker_boot_disk_size_gb = (
          api_utils.BytesToGb(args.worker_boot_disk_size))

    preemptible_worker_boot_disk_size_gb = (
        api_utils.BytesToGb(args.preemptible_worker_boot_disk_size))

    if args.single_node:
      args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true'

    if args.properties:
      software_config.properties = encoding.DictToMessage(
          args.properties, dataproc.messages.SoftwareConfig.PropertiesValue)

    gce_cluster_config = dataproc.messages.GceClusterConfig(
        networkUri=network_ref and network_ref.SelfLink(),
        subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(),
        internalIpOnly=args.no_address,
        serviceAccount=args.service_account,
        serviceAccountScopes=expanded_scopes,
        zoneUri=properties.VALUES.compute.zone.GetOrFail())

    if args.tags:
      gce_cluster_config.tags = args.tags

    if args.metadata:
      flat_metadata = dict((k, v) for d in args.metadata for k, v in d.items())
      gce_cluster_config.metadata = encoding.DictToMessage(
          flat_metadata, dataproc.messages.GceClusterConfig.MetadataValue)

    master_accelerators = []
    if master_accelerator_type:
      master_accelerators.append(
          dataproc.messages.AcceleratorConfig(
              acceleratorTypeUri=master_accelerator_type,
              acceleratorCount=master_accelerator_count))
    worker_accelerators = []
    if worker_accelerator_type:
      worker_accelerators.append(
          dataproc.messages.AcceleratorConfig(
              acceleratorTypeUri=worker_accelerator_type,
              acceleratorCount=worker_accelerator_count))

    cluster_config = dataproc.messages.ClusterConfig(
        configBucket=args.bucket,
        gceClusterConfig=gce_cluster_config,
        masterConfig=dataproc.messages.InstanceGroupConfig(
            numInstances=args.num_masters,
            imageUri=image_ref and image_ref.SelfLink(),
            machineTypeUri=args.master_machine_type,
            accelerators=master_accelerators,
            diskConfig=dataproc.messages.DiskConfig(
                bootDiskSizeGb=master_boot_disk_size_gb,
                numLocalSsds=args.num_master_local_ssds,),),
        workerConfig=dataproc.messages.InstanceGroupConfig(
            numInstances=args.num_workers,
            imageUri=image_ref and image_ref.SelfLink(),
            machineTypeUri=args.worker_machine_type,
            accelerators=worker_accelerators,
            diskConfig=dataproc.messages.DiskConfig(
                bootDiskSizeGb=worker_boot_disk_size_gb,
                numLocalSsds=args.num_worker_local_ssds,),),
        initializationActions=init_actions,
        softwareConfig=software_config,)

    # Secondary worker group is optional. However, users may specify
    # future pVM disk size at creation time.
    if (args.num_preemptible_workers is not None or
        preemptible_worker_boot_disk_size_gb is not None):
      cluster_config.secondaryWorkerConfig = (
          dataproc.messages.InstanceGroupConfig(
              numInstances=args.num_preemptible_workers,
              diskConfig=dataproc.messages.DiskConfig(
                  bootDiskSizeGb=preemptible_worker_boot_disk_size_gb,
              )))

    cluster = dataproc.messages.Cluster(
        config=cluster_config,
        clusterName=cluster_ref.clusterName,
        projectId=cluster_ref.projectId)

    self.ConfigureCluster(dataproc.messages, args, cluster)

    operation = dataproc.client.projects_regions_clusters.Create(
        dataproc.messages.DataprocProjectsRegionsClustersCreateRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            cluster=cluster))

    if args.async:
      log.status.write(
          'Creating [{0}] with operation [{1}].'.format(
              cluster_ref, operation.name))
      return

    operation = util.WaitForOperation(
        dataproc,
        operation,
        message='Waiting for cluster creation operation',
        timeout_s=args.timeout)

    get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
        projectId=cluster_ref.projectId,
        region=cluster_ref.region,
        clusterName=cluster_ref.clusterName)
    cluster = dataproc.client.projects_regions_clusters.Get(get_request)
    if cluster.status.state == (
        dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING):

      zone_uri = cluster.config.gceClusterConfig.zoneUri
      zone_short_name = zone_uri.split('/')[-1]

      # Log the URL of the cluster
      log.CreatedResource(
          cluster_ref,
          # Also indicate which zone the cluster was placed in. This is helpful
          # if the server picked a zone (auto zone)
          details='Cluster placed in zone [{0}]'.format(zone_short_name))
    else:
      log.error('Create cluster failed!')
      if operation.details:
        log.error('Details:\n' + operation.details)
    return cluster
Пример #23
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)

        cluster_config = dataproc.messages.ClusterConfig()
        changed_fields = []

        has_changes = False

        if args.num_workers is not None:
            worker_config = dataproc.messages.InstanceGroupConfig(
                numInstances=args.num_workers)
            cluster_config.workerConfig = worker_config
            changed_fields.append('config.worker_config.num_instances')
            has_changes = True

        if args.num_preemptible_workers is not None:
            worker_config = dataproc.messages.InstanceGroupConfig(
                numInstances=args.num_preemptible_workers)
            cluster_config.secondaryWorkerConfig = worker_config
            changed_fields.append(
                'config.secondary_worker_config.num_instances')
            has_changes = True

        if self.ReleaseTrack() == base.ReleaseTrack.BETA:
            lifecycle_config = dataproc.messages.LifecycleConfig()
            changed_config = False
            if args.max_age is not None:
                lifecycle_config.autoDeleteTtl = str(args.max_age) + 's'
                changed_config = True
            if args.expiration_time is not None:
                lifecycle_config.autoDeleteTime = times.FormatDateTime(
                    args.expiration_time)
                changed_config = True
            if args.max_idle is not None:
                lifecycle_config.idleDeleteTtl = str(args.max_idle) + 's'
                changed_config = True
            if changed_config:
                cluster_config.lifecycleConfig = lifecycle_config
                changed_fields.append('config.lifecycle_config')
                has_changes = True

        # Update labels if the user requested it
        labels = None
        if args.update_labels or args.remove_labels:
            has_changes = True
            changed_fields.append('labels')

            # We need to fetch cluster first so we know what the labels look like. The
            # labels_util.UpdateLabels will fill out the proto for us with all the
            # updates and removals, but first we need to provide the current state
            # of the labels
            get_cluster_request = (
                dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
                    projectId=cluster_ref.projectId,
                    region=cluster_ref.region,
                    clusterName=cluster_ref.clusterName))
            current_cluster = dataproc.client.projects_regions_clusters.Get(
                get_cluster_request)
            labels = labels_util.UpdateLabels(
                current_cluster.labels, dataproc.messages.Cluster.LabelsValue,
                args.update_labels, args.remove_labels)

        if not has_changes:
            raise exceptions.ArgumentError(
                'Must specify at least one cluster parameter to update.')

        cluster = dataproc.messages.Cluster(
            config=cluster_config,
            clusterName=cluster_ref.clusterName,
            labels=labels,
            projectId=cluster_ref.projectId)

        request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest(
            clusterName=cluster_ref.clusterName,
            region=cluster_ref.region,
            projectId=cluster_ref.projectId,
            cluster=cluster,
            updateMask=','.join(changed_fields))

        if (self.ReleaseTrack() == base.ReleaseTrack.BETA
                and args.graceful_decommission_timeout):
            request.gracefulDecommissionTimeout = (
                str(args.graceful_decommission_timeout) + 's')

        operation = dataproc.client.projects_regions_clusters.Patch(request)

        if args. async:
            log.status.write('Updating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        util.WaitForOperation(dataproc,
                              operation,
                              message='Waiting for cluster update operation',
                              timeout_s=args.timeout)

        request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = dataproc.client.projects_regions_clusters.Get(request)
        log.UpdatedResource(cluster_ref)
        return cluster
Пример #24
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        cluster_config = messages.ClusterConfig()
        changed_fields = []

        has_changes = False

        if args.num_workers is not None:
            worker_config = messages.InstanceGroupConfig(
                numInstances=args.num_workers)
            cluster_config.workerConfig = worker_config
            changed_fields.append('config.worker_config.num_instances')
            has_changes = True

        if args.num_preemptible_workers is not None:
            worker_config = messages.InstanceGroupConfig(
                numInstances=args.num_preemptible_workers)
            cluster_config.secondaryWorkerConfig = worker_config
            changed_fields.append(
                'config.secondary_worker_config.num_instances')
            has_changes = True

        # Update labels if the user requested it
        labels = None
        if args.update_labels or args.remove_labels:
            has_changes = True
            changed_fields.append('labels')

            # We need to fetch cluster first so we know what the labels look like. The
            # labels_util.UpdateLabels will fill out the proto for us with all the
            # updates and removals, but first we need to provide the current state
            # of the labels
            get_cluster_request = (client.MESSAGES_MODULE.
                                   DataprocProjectsRegionsClustersGetRequest(
                                       projectId=cluster_ref.projectId,
                                       region=cluster_ref.region,
                                       clusterName=cluster_ref.clusterName))
            current_cluster = client.projects_regions_clusters.Get(
                get_cluster_request)
            labels = labels_util.UpdateLabels(current_cluster.labels,
                                              messages.Cluster.LabelsValue,
                                              args.update_labels,
                                              args.remove_labels)

        if not has_changes:
            raise exceptions.ArgumentError(
                'Must specify at least one cluster parameter to update.')

        cluster = messages.Cluster(config=cluster_config,
                                   clusterName=cluster_ref.clusterName,
                                   labels=labels,
                                   projectId=cluster_ref.projectId)

        request = messages.DataprocProjectsRegionsClustersPatchRequest(
            clusterName=cluster_ref.clusterName,
            region=cluster_ref.region,
            projectId=cluster_ref.projectId,
            cluster=cluster,
            updateMask=','.join(changed_fields))

        operation = client.projects_regions_clusters.Patch(request)

        if args. async:
            log.status.write('Updating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        util.WaitForOperation(operation,
                              self.context,
                              message='Waiting for cluster update operation',
                              timeout_s=3600 * 3)

        request = client.MESSAGES_MODULE.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = client.projects_regions_clusters.Get(request)
        log.UpdatedResource(cluster_ref)
        return cluster
Пример #25
0
    def Run(self, args):
        self.ValidateArgs(args)

        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), args.name)

        main_accelerator_type = None
        worker_accelerator_type = None
        main_accelerator_count = None
        worker_accelerator_count = None
        if self.ReleaseTrack() == base.ReleaseTrack.BETA:
            if args.main_accelerator:
                main_accelerator_type = args.main_accelerator['type']
                main_accelerator_count = args.main_accelerator.get('count', 1)
            if args.worker_accelerator:
                worker_accelerator_type = args.worker_accelerator['type']
                worker_accelerator_count = args.worker_accelerator.get(
                    'count', 1)

        # Resolve GCE resources
        zone_ref = compute_resources.Parse(None, collection='compute.zones')
        image_ref = args.image and compute_resources.Parse(
            args.image, collection='compute.images')
        main_machine_type_ref = (args.main_machine_type
                                 and compute_resources.Parse(
                                     args.main_machine_type,
                                     collection='compute.machineTypes'))
        worker_machine_type_ref = (args.worker_machine_type
                                   and compute_resources.Parse(
                                       args.worker_machine_type,
                                       collection='compute.machineTypes'))
        network_ref = args.network and compute_resources.Parse(
            args.network, collection='compute.networks')
        subnetwork_ref = args.subnet and compute_resources.Parse(
            args.subnet, collection='compute.subnetworks')
        main_accelerator_type_ref = (
            main_accelerator_type and compute_resources.Parse(
                main_accelerator_type, collection='compute.acceleratorTypes'))
        worker_accelerator_type_ref = (
            worker_accelerator_type
            and compute_resources.Parse(worker_accelerator_type,
                                        collection='compute.acceleratorTypes'))

        init_actions = []
        timeout_str = str(args.initialization_action_timeout) + 's'
        if args.initialization_actions:
            init_actions = [
                messages.NodeInitializationAction(executableFile=exe,
                                                  executionTimeout=timeout_str)
                for exe in args.initialization_actions
            ]
        expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes)

        software_config = messages.SoftwareConfig(
            imageVersion=args.image_version)

        main_boot_disk_size_gb = args.main_boot_disk_size_gb
        if args.main_boot_disk_size:
            main_boot_disk_size_gb = (api_utils.BytesToGb(
                args.main_boot_disk_size))

        worker_boot_disk_size_gb = args.worker_boot_disk_size_gb
        if args.worker_boot_disk_size:
            worker_boot_disk_size_gb = (api_utils.BytesToGb(
                args.worker_boot_disk_size))

        preemptible_worker_boot_disk_size_gb = (api_utils.BytesToGb(
            args.preemptible_worker_boot_disk_size))

        if args.single_node:
            args.properties[constants.ALLOW_ZERO_WORKERS_PROPERTY] = 'true'

        if args.properties:
            software_config.properties = encoding.DictToMessage(
                args.properties, messages.SoftwareConfig.PropertiesValue)

        gce_cluster_config = messages.GceClusterConfig(
            networkUri=network_ref and network_ref.SelfLink(),
            subnetworkUri=subnetwork_ref and subnetwork_ref.SelfLink(),
            serviceAccount=args.service_account,
            serviceAccountScopes=expanded_scopes,
            zoneUri=zone_ref and zone_ref.SelfLink())

        if args.tags:
            gce_cluster_config.tags = args.tags

        if args.metadata:
            flat_metadata = dict(
                (k, v) for d in args.metadata for k, v in d.items())
            gce_cluster_config.metadata = encoding.DictToMessage(
                flat_metadata, messages.GceClusterConfig.MetadataValue)

        main_accelerators = []
        if main_accelerator_type:
            main_accelerators.append(
                messages.AcceleratorConfig(
                    acceleratorTypeUri=main_accelerator_type_ref
                    and main_accelerator_type_ref.SelfLink(),
                    acceleratorCount=main_accelerator_count))
        worker_accelerators = []
        if worker_accelerator_type:
            worker_accelerators.append(
                messages.AcceleratorConfig(
                    acceleratorTypeUri=worker_accelerator_type_ref
                    and worker_accelerator_type_ref.SelfLink(),
                    acceleratorCount=worker_accelerator_count))

        cluster_config = messages.ClusterConfig(
            configBucket=args.bucket,
            gceClusterConfig=gce_cluster_config,
            mainConfig=messages.InstanceGroupConfig(
                numInstances=args.num_mains,
                imageUri=image_ref and image_ref.SelfLink(),
                machineTypeUri=main_machine_type_ref
                and main_machine_type_ref.SelfLink(),
                accelerators=main_accelerators,
                diskConfig=messages.DiskConfig(
                    bootDiskSizeGb=main_boot_disk_size_gb,
                    numLocalSsds=args.num_main_local_ssds,
                ),
            ),
            workerConfig=messages.InstanceGroupConfig(
                numInstances=args.num_workers,
                imageUri=image_ref and image_ref.SelfLink(),
                machineTypeUri=worker_machine_type_ref
                and worker_machine_type_ref.SelfLink(),
                accelerators=worker_accelerators,
                diskConfig=messages.DiskConfig(
                    bootDiskSizeGb=worker_boot_disk_size_gb,
                    numLocalSsds=args.num_worker_local_ssds,
                ),
            ),
            initializationActions=init_actions,
            softwareConfig=software_config,
        )

        # Secondary worker group is optional. However, users may specify
        # future pVM disk size at creation time.
        if (args.num_preemptible_workers is not None
                or preemptible_worker_boot_disk_size_gb is not None):
            cluster_config.secondaryWorkerConfig = (
                messages.InstanceGroupConfig(
                    numInstances=args.num_preemptible_workers,
                    diskConfig=messages.DiskConfig(
                        bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, ))
            )

        cluster = messages.Cluster(config=cluster_config,
                                   clusterName=cluster_ref.clusterName,
                                   projectId=cluster_ref.projectId)

        self.ConfigureCluster(messages, args, cluster)

        operation = client.projects_regions_clusters.Create(
            messages.DataprocProjectsRegionsClustersCreateRequest(
                projectId=cluster_ref.projectId,
                region=cluster_ref.region,
                cluster=cluster))

        if args. async:
            log.status.write('Creating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        operation = util.WaitForOperation(
            operation, self.context, 'Waiting for cluster creation operation')

        get_request = messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = client.projects_regions_clusters.Get(get_request)
        if cluster.status.state == (
                messages.ClusterStatus.StateValueValuesEnum.RUNNING):
            log.CreatedResource(cluster_ref)
        else:
            log.error('Create cluster failed!')
            if operation.details:
                log.error('Details:\n' + operation.details)
        return cluster
Пример #26
0
    def Run(self, args):
        self.ValidateArgs(args)

        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        config_helper = compute_helpers.ConfigurationHelper.FromContext(
            self.context)
        compute_uris = config_helper.ResolveGceUris(args.name, args.image,
                                                    args.master_machine_type,
                                                    args.worker_machine_type,
                                                    args.network, args.subnet)

        init_actions = []
        timeout_str = str(args.initialization_action_timeout) + 's'
        if args.initialization_actions:
            init_actions = [
                messages.NodeInitializationAction(executableFile=exe,
                                                  executionTimeout=timeout_str)
                for exe in args.initialization_actions
            ]
        expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes)

        software_config = messages.SoftwareConfig(
            imageVersion=args.image_version)

        master_boot_disk_size_gb = args.master_boot_disk_size_gb
        if args.master_boot_disk_size:
            master_boot_disk_size_gb = (api_utils.BytesToGb(
                args.master_boot_disk_size))

        worker_boot_disk_size_gb = args.worker_boot_disk_size_gb
        if args.worker_boot_disk_size:
            worker_boot_disk_size_gb = (api_utils.BytesToGb(
                args.worker_boot_disk_size))

        preemptible_worker_boot_disk_size_gb = (api_utils.BytesToGb(
            args.preemptible_worker_boot_disk_size))

        if args.properties:
            software_config.properties = encoding.DictToMessage(
                args.properties, messages.SoftwareConfig.PropertiesValue)

        gce_cluster_config = messages.GceClusterConfig(
            networkUri=compute_uris['network'],
            subnetworkUri=compute_uris['subnetwork'],
            serviceAccountScopes=expanded_scopes,
            zoneUri=compute_uris['zone'])

        if args.tags:
            gce_cluster_config.tags = args.tags

        if args.metadata:
            flat_metadata = dict(
                (k, v) for d in args.metadata for k, v in d.items())
            gce_cluster_config.metadata = encoding.DictToMessage(
                flat_metadata, messages.GceClusterConfig.MetadataValue)

        cluster_config = messages.ClusterConfig(
            configBucket=args.bucket,
            gceClusterConfig=gce_cluster_config,
            masterConfig=messages.InstanceGroupConfig(
                numInstances=args.num_masters,
                imageUri=compute_uris['image'],
                machineTypeUri=compute_uris['master_machine_type'],
                diskConfig=messages.DiskConfig(
                    bootDiskSizeGb=master_boot_disk_size_gb,
                    numLocalSsds=args.num_master_local_ssds,
                ),
            ),
            workerConfig=messages.InstanceGroupConfig(
                numInstances=args.num_workers,
                imageUri=compute_uris['image'],
                machineTypeUri=compute_uris['worker_machine_type'],
                diskConfig=messages.DiskConfig(
                    bootDiskSizeGb=worker_boot_disk_size_gb,
                    numLocalSsds=args.num_worker_local_ssds,
                ),
            ),
            initializationActions=init_actions,
            softwareConfig=software_config,
        )

        # Secondary worker group is optional. However, users may specify
        # future pVM disk size at creation time.
        if (args.num_preemptible_workers is not None
                or preemptible_worker_boot_disk_size_gb is not None):
            cluster_config.secondaryWorkerConfig = (
                messages.InstanceGroupConfig(
                    numInstances=args.num_preemptible_workers,
                    diskConfig=messages.DiskConfig(
                        bootDiskSizeGb=preemptible_worker_boot_disk_size_gb, ))
            )

        cluster = messages.Cluster(config=cluster_config,
                                   clusterName=cluster_ref.clusterName,
                                   projectId=cluster_ref.projectId)

        operation = client.projects_regions_clusters.Create(
            messages.DataprocProjectsRegionsClustersCreateRequest(
                projectId=cluster_ref.projectId,
                region=cluster_ref.region,
                cluster=cluster))

        if args. async:
            log.status.write('Creating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        operation = util.WaitForOperation(
            operation, self.context, 'Waiting for cluster creation operation')

        cluster = client.projects_regions_clusters.Get(cluster_ref.Request())
        if cluster.status.state == (
                messages.ClusterStatus.StateValueValuesEnum.RUNNING):
            log.CreatedResource(cluster_ref)
        else:
            log.error('Create cluster failed!')
            if operation.details:
                log.error('Details:\n' + operation.details)
        return cluster
Пример #27
0
    def Run(self, args):
        self.ValidateArgs(args)

        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)

        compute_resources = compute_helpers.GetComputeResources(
            self.ReleaseTrack(), args.name)

        beta = self.ReleaseTrack() == base.ReleaseTrack.BETA
        cluster_config = clusters.GetClusterConfig(args, dataproc,
                                                   cluster_ref.projectId,
                                                   compute_resources, beta)

        cluster = dataproc.messages.Cluster(
            config=cluster_config,
            clusterName=cluster_ref.clusterName,
            projectId=cluster_ref.projectId)

        self.ConfigureCluster(dataproc.messages, args, cluster)

        operation = dataproc.client.projects_regions_clusters.Create(
            dataproc.messages.DataprocProjectsRegionsClustersCreateRequest(
                projectId=cluster_ref.projectId,
                region=cluster_ref.region,
                cluster=cluster))

        if args. async:
            log.status.write('Creating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        operation = util.WaitForOperation(
            dataproc,
            operation,
            message='Waiting for cluster creation operation',
            timeout_s=args.timeout)

        get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = dataproc.client.projects_regions_clusters.Get(get_request)
        if cluster.status.state == (
                dataproc.messages.ClusterStatus.StateValueValuesEnum.RUNNING):

            zone_uri = cluster.config.gceClusterConfig.zoneUri
            zone_short_name = zone_uri.split('/')[-1]

            # Log the URL of the cluster
            log.CreatedResource(
                cluster_ref,
                # Also indicate which zone the cluster was placed in. This is helpful
                # if the server picked a zone (auto zone)
                details='Cluster placed in zone [{0}]'.format(zone_short_name))
        else:
            log.error('Create cluster failed!')
            if operation.details:
                log.error('Details:\n' + operation.details)
        return cluster
Пример #28
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        cluster_config = messages.ClusterConfig()
        changed_fields = []

        has_changes = False

        if args.new_num_workers is not None:
            log.warn(
                '--new-num-workers parameter is deprecated and will be removed '
                'in a future release. Please use --num-workers instead')
            args.num_workers = args.new_num_workers

        if args.num_workers is not None:
            worker_config = messages.InstanceGroupConfig(
                numInstances=args.num_workers)
            cluster_config.workerConfig = worker_config
            changed_fields.append('config.worker_config.num_instances')
            has_changes = True

        if args.num_preemptible_workers is not None:
            worker_config = messages.InstanceGroupConfig(
                numInstances=args.num_preemptible_workers)
            cluster_config.secondaryWorkerConfig = worker_config
            changed_fields.append(
                'config.secondary_worker_config.num_instances')
            has_changes = True

        if not has_changes:
            raise exceptions.ArgumentError(
                'Must specify at least one cluster parameter to update.')

        cluster = messages.Cluster(config=cluster_config,
                                   clusterName=cluster_ref.clusterName,
                                   projectId=cluster_ref.projectId)

        request = messages.DataprocProjectsRegionsClustersPatchRequest(
            clusterName=cluster_ref.clusterName,
            region=cluster_ref.region,
            projectId=cluster_ref.projectId,
            cluster=cluster,
            updateMask=','.join(changed_fields))

        operation = client.projects_regions_clusters.Patch(request)

        if args. async:
            log.status.write('Updating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        util.WaitForOperation(operation,
                              self.context,
                              message='Waiting for cluster update operation',
                              timeout_s=3600 * 3)

        request = client.MESSAGES_MODULE.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = client.projects_regions_clusters.Get(request)
        log.UpdatedResource(cluster_ref)
        return cluster
Пример #29
0
    def Run(self, args):
        dataproc = dp.Dataproc(self.ReleaseTrack())

        cluster_ref = util.ParseCluster(args.name, dataproc)

        cluster_config = dataproc.messages.ClusterConfig()
        changed_fields = []

        has_changes = False

        if args.num_workers is not None:
            worker_config = dataproc.messages.InstanceGroupConfig(
                numInstances=args.num_workers)
            cluster_config.workerConfig = worker_config
            changed_fields.append('config.worker_config.num_instances')
            has_changes = True

        if args.num_preemptible_workers is not None:
            worker_config = dataproc.messages.InstanceGroupConfig(
                numInstances=args.num_preemptible_workers)
            cluster_config.secondaryWorkerConfig = worker_config
            changed_fields.append(
                'config.secondary_worker_config.num_instances')
            has_changes = True

        if self.ReleaseTrack() == base.ReleaseTrack.BETA:
            if args.autoscaling_policy:
                cluster_config.autoscalingConfig = dataproc.messages.AutoscalingConfig(
                    policyUri=args.CONCEPTS.autoscaling_policy.Parse(
                    ).RelativeName())
                changed_fields.append('config.autoscaling_config.policy_uri')
                has_changes = True
            elif args.autoscaling_policy == '' or args.disable_autoscaling:  # pylint: disable=g-explicit-bool-comparison
                # Disabling autoscaling. Don't need to explicitly set
                # cluster_config.autoscaling_config to None.
                changed_fields.append('config.autoscaling_config.policy_uri')
                has_changes = True

            lifecycle_config = dataproc.messages.LifecycleConfig()
            changed_config = False
            if args.max_age is not None:
                lifecycle_config.autoDeleteTtl = str(args.max_age) + 's'
                changed_fields.append(
                    'config.lifecycle_config.auto_delete_ttl')
                changed_config = True
            if args.expiration_time is not None:
                lifecycle_config.autoDeleteTime = times.FormatDateTime(
                    args.expiration_time)
                changed_fields.append(
                    'config.lifecycle_config.auto_delete_time')
                changed_config = True
            if args.max_idle is not None:
                lifecycle_config.idleDeleteTtl = str(args.max_idle) + 's'
                changed_fields.append(
                    'config.lifecycle_config.idle_delete_ttl')
                changed_config = True
            if args.no_max_age:
                lifecycle_config.autoDeleteTtl = None
                changed_fields.append(
                    'config.lifecycle_config.auto_delete_ttl')
                changed_config = True
            if args.no_max_idle:
                lifecycle_config.idleDeleteTtl = None
                changed_fields.append(
                    'config.lifecycle_config.idle_delete_ttl')
                changed_config = True
            if changed_config:
                cluster_config.lifecycleConfig = lifecycle_config
                has_changes = True

        # Put in a thunk so we only make this call if needed
        def _GetCurrentLabels():
            # We need to fetch cluster first so we know what the labels look like. The
            # labels_util will fill out the proto for us with all the updates and
            # removals, but first we need to provide the current state of the labels
            get_cluster_request = (
                dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
                    projectId=cluster_ref.projectId,
                    region=cluster_ref.region,
                    clusterName=cluster_ref.clusterName))
            current_cluster = dataproc.client.projects_regions_clusters.Get(
                get_cluster_request)
            return current_cluster.labels

        labels_update = labels_util.ProcessUpdateArgsLazy(
            args,
            dataproc.messages.Cluster.LabelsValue,
            orig_labels_thunk=_GetCurrentLabels)
        if labels_update.needs_update:
            has_changes = True
            changed_fields.append('labels')
        labels = labels_update.GetOrNone()

        if not has_changes:
            raise exceptions.ArgumentError(
                'Must specify at least one cluster parameter to update.')

        cluster = dataproc.messages.Cluster(
            config=cluster_config,
            clusterName=cluster_ref.clusterName,
            labels=labels,
            projectId=cluster_ref.projectId)

        request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest(
            clusterName=cluster_ref.clusterName,
            region=cluster_ref.region,
            projectId=cluster_ref.projectId,
            cluster=cluster,
            updateMask=','.join(changed_fields),
            requestId=util.GetUniqueId())

        if args.graceful_decommission_timeout is not None:
            request.gracefulDecommissionTimeout = (
                str(args.graceful_decommission_timeout) + 's')

        operation = dataproc.client.projects_regions_clusters.Patch(request)

        if args. async:
            log.status.write('Updating [{0}] with operation [{1}].'.format(
                cluster_ref, operation.name))
            return

        util.WaitForOperation(dataproc,
                              operation,
                              message='Waiting for cluster update operation',
                              timeout_s=args.timeout)

        request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
            projectId=cluster_ref.projectId,
            region=cluster_ref.region,
            clusterName=cluster_ref.clusterName)
        cluster = dataproc.client.projects_regions_clusters.Get(request)
        log.UpdatedResource(cluster_ref)
        return cluster
Пример #30
0
    def Run(self, args):
        client = self.context['dataproc_client']
        messages = self.context['dataproc_messages']

        cluster_ref = util.ParseCluster(args.name, self.context)

        config_helper = compute_helpers.ConfigurationHelper.FromContext(
            self.context)
        compute_uris = config_helper.ResolveGceUris(args.name, args.image,
                                                    args.master_machine_type,
                                                    args.worker_machine_type,
                                                    args.network)

        init_actions = []
        timeout_str = str(args.initialization_action_timeout) + 's'
        if args.initialization_actions:
            init_actions = [
                messages.NodeInitializationAction(executableFile=exe,
                                                  executionTimeout=timeout_str)
                for exe in args.initialization_actions
            ]
        expanded_scopes = compute_helpers.ExpandScopeAliases(args.scopes)

        cluster_config = messages.ClusterConfiguration(
            configurationBucket=args.bucket,
            gceClusterConfiguration=messages.GceClusterConfiguration(
                networkUri=compute_uris['network'],
                serviceAccountScopes=expanded_scopes,
                zoneUri=compute_uris['zone'],
            ),
            masterConfiguration=messages.InstanceGroupConfiguration(
                imageUri=compute_uris['image'],
                machineTypeUri=compute_uris['master_machine_type'],
                diskConfiguration=messages.DiskConfiguration(
                    bootDiskSizeGb=args.master_boot_disk_size_gb,
                    numLocalSsds=args.num_master_local_ssds,
                ),
            ),
            workerConfiguration=messages.InstanceGroupConfiguration(
                numInstances=args.num_workers,
                imageUri=compute_uris['image'],
                machineTypeUri=compute_uris['worker_machine_type'],
                diskConfiguration=messages.DiskConfiguration(
                    bootDiskSizeGb=args.worker_boot_disk_size_gb,
                    numLocalSsds=args.num_worker_local_ssds,
                ),
            ),
            initializationActions=init_actions,
            softwareConfiguration=messages.SoftwareConfiguration(
                imageVersion=args.image_version),
        )

        # Secondary worker group is optional.
        if args.num_preemptible_workers is not None:
            cluster_config.secondaryWorkerConfiguration = (
                messages.InstanceGroupConfiguration(
                    numInstances=args.num_preemptible_workers))

        cluster = messages.Cluster(configuration=cluster_config,
                                   clusterName=cluster_ref.clusterName,
                                   projectId=cluster_ref.projectId)

        operation = client.projects_clusters.Create(cluster)
        operation = util.WaitForOperation(
            operation, self.context, 'Waiting for cluster creation operation')

        cluster = client.projects_clusters.Get(cluster_ref.Request())
        if cluster.status.state == (
                messages.ClusterStatus.StateValueValuesEnum.RUNNING):
            log.CreatedResource(cluster_ref)
        else:
            log.error('Create cluster failed!')
            if operation.details:
                log.error('Details:\n' + operation.details)
        return cluster