示例#1
0
def create_from_pipeline_file(
    pipeline_path: str,
    schedule: str,
    project_id: str,
    region: str = 'us-central1',
    time_zone: str = 'US/Pacific',
    parameter_values: Optional[Mapping[str, Any]] = None,
    pipeline_root: Optional[str] = None,
    service_account: Optional[str] = None,
    app_engine_region: Optional[str] = None,
    cloud_scheduler_service_account: Optional[str] = None,
) -> dict:
    """Creates schedule for compiled pipeline file.

    This function creates scheduled job which will run the provided pipeline on
    schedule. This is implemented by creating a Google Cloud Scheduler Job.
    The job will be visible in https://console.google.com/cloudscheduler and can
    be paused/resumed and deleted.

    To make the system work, this function also creates a Google Cloud Function
    which acts as an intermediary between the Scheduler and Pipelines. A single
    function is shared between all scheduled jobs.
    The following APIs will be activated automatically:
    * cloudfunctions.googleapis.com
    * cloudscheduler.googleapis.com
    * appengine.googleapis.com

    Args:
      pipeline_path: Path of the compiled pipeline file.
      schedule: Schedule in cron format. Example: "45 * * * *"
      project_id: Google Cloud project ID
      region: Google Cloud compute region. Default is 'us-central1'
      time_zone: Schedule time zone. Default is 'US/Pacific'
      parameter_values: Arguments for the pipeline parameters
      pipeline_root: Optionally the user can override the pipeline root
        specified during the compile time.
      service_account: The service account that the pipeline workload runs as.
      app_engine_region: The region that cloud scheduler job is created in.
      cloud_scheduler_service_account: The service account that Cloud Scheduler job and the proxy cloud function use.
        this should have permission to call AI Platform API and the proxy function.
        If not specified, the functions uses the App Engine default service account.

    Returns:
      Created Google Cloud Scheduler Job object dictionary.
    """
    pipeline_dict = client_utils.load_json(pipeline_path)

    return _create_from_pipeline_dict(
        pipeline_dict=pipeline_dict,
        schedule=schedule,
        project_id=project_id,
        region=region,
        time_zone=time_zone,
        parameter_values=parameter_values,
        pipeline_root=pipeline_root,
        service_account=service_account,
        app_engine_region=app_engine_region,
        cloud_scheduler_service_account=cloud_scheduler_service_account,
    )
示例#2
0
    def create_schedule_from_job_spec(
        self,
        job_spec_path: str,
        schedule: str,
        time_zone: str = 'US/Pacific',
        pipeline_root: Optional[str] = None,
        parameter_values: Optional[Mapping[str, Any]] = None,
        service_account: Optional[str] = None,
        enable_caching: Optional[bool] = None,
        app_engine_region: Optional[str] = None,
    ) -> dict:
        """Creates schedule for compiled pipeline file.

    This function creates scheduled job which will run the provided pipeline on
    schedule. This is implemented by creating a Google Cloud Scheduler Job.
    The job will be visible in https://console.google.com/cloudscheduler and can
    be paused/resumed and deleted.

    To make the system work, this function also creates a Google Cloud Function
    which acts as an intermediary between the Scheduler and Pipelines. A single
    function is shared between all scheduled jobs.
    The following APIs will be activated automatically:
    * cloudfunctions.googleapis.com
    * cloudscheduler.googleapis.com
    * appengine.googleapis.com

    Args:
      job_spec_path: Path of the compiled pipeline file.
      schedule: Schedule in cron format. Example: "45 * * * *"
      time_zone: Schedule time zone. Default is 'US/Pacific'
      parameter_values: Arguments for the pipeline parameters
      pipeline_root: Optionally the user can override the pipeline root
        specified during the compile time.
      service_account: The service account that the pipeline workload runs as.
      enable_caching: Whether or not to enable caching for the run.
        If not set, defaults to the compile time settings, which are True for all
        tasks by default, while users may specify different caching options for
        individual tasks.
        If set, the setting applies to all tasks in the pipeline -- overrides
        the compile time settings.
      app_engine_region: The region that cloud scheduler job is created in.

    Returns:
      Created Google Cloud Scheduler Job object dictionary.
    """
        job_spec = client_utils.load_json(job_spec_path)

        if enable_caching is not None:
            _set_enable_caching_value(job_spec['pipelineSpec'], enable_caching)

        return _create_from_pipeline_dict(pipeline_dict=job_spec,
                                          schedule=schedule,
                                          project_id=self._project_id,
                                          region=self._region,
                                          time_zone=time_zone,
                                          parameter_values=parameter_values,
                                          pipeline_root=pipeline_root,
                                          service_account=service_account,
                                          app_engine_region=app_engine_region)
示例#3
0
    def create_run_from_job_spec(
            self,
            job_spec_path: str,
            job_id: Optional[str] = None,
            pipeline_root: Optional[str] = None,
            parameter_values: Optional[Mapping[str, Any]] = None,
            enable_caching: Optional[bool] = None,
            cmek: Optional[str] = None,
            service_account: Optional[str] = None,
            network: Optional[str] = None,
            labels: Optional[Mapping[str, str]] = None) -> dict:
        """Runs a pre-compiled pipeline job on AIPlatformPipelines service.

        Args:
          job_spec_path: The path of PipelineJob JSON file. It can be a local path
            or a GS URI.
          job_id: Optionally, the user can provide the unique ID of the job run. If
            not specified, pipeline name + timestamp will be used.
          pipeline_root: Optionally the user can override the pipeline root
            specified during the compile time.
          parameter_values: The mapping from runtime parameter names to its values.
          enable_caching: Whether or not to enable caching for the run.
            If not set, defaults to the compile time settings, which are True for all
            tasks by default, while users may specify different caching options for
            individual tasks.
            If set, the setting applies to all tasks in the pipeline -- overrides
            the compile time settings.
          cmek: The customer-managed encryption key for a pipelineJob. If set, the
            pipeline job and all of its sub-resources will be secured by this key.
          service_account: The service account that the pipeline workload runs as.
          network: The network configuration applied for pipeline jobs. If left
            unspecified, the workload is not peered with any network.
          labels: The user defined metadata to organize PipelineJob.

        Returns:
          Full AIPlatformPipelines job name.

        Raises:
          ParseError: On JSON parsing problems.
          RuntimeError: If AIPlatformPipelines service returns unexpected response
          or empty job name.
        """
        job_spec = client_utils.load_json(job_spec_path)
        pipeline_name = job_spec['pipelineSpec']['pipelineInfo']['name']
        job_id = job_id or '{pipeline_name}-{timestamp}'.format(
            pipeline_name=re.sub(
                '[^-0-9a-z]+', '-',
                pipeline_name.lower()).lstrip('-').rstrip('-'),
            timestamp=_get_current_time().strftime('%Y%m%d%H%M%S'))
        if not _VALID_NAME_PATTERN.match(job_id):
            raise ValueError(
                'Generated job ID: {} is illegal as a uCAIP pipelines job ID. '
                'Expecting an ID following the regex pattern '
                '"[a-z][-a-z0-9]{{0,127}}"'.format(job_id))

        job_name = _JOB_NAME_PATTERN.format(parent=self._parent, job_id=job_id)

        job_spec['name'] = job_name
        job_spec['displayName'] = job_id

        builder = runtime_config_builder.RuntimeConfigBuilder.from_job_spec_json(
            job_spec)
        builder.update_pipeline_root(pipeline_root)
        builder.update_runtime_parameters(parameter_values)

        runtime_config = builder.build()
        job_spec['runtimeConfig'] = runtime_config

        if enable_caching is not None:
            _set_enable_caching_value(job_spec['pipelineSpec'], enable_caching)

        if cmek is not None:
            job_spec['encryptionSpec'] = {'kmsKeyName': cmek}
        if service_account is not None:
            job_spec['serviceAccount'] = service_account
        if network is not None:
            job_spec['network'] = network

        if labels:
            if not isinstance(labels, Mapping):
                raise ValueError(
                    'Expect labels to be a mapping of string key value pairs. '
                    'Got "{}" of type "{}"'.format(labels, type(labels)))
            for k, v in labels.items():
                if not isinstance(k, str) or not isinstance(v, str):
                    raise ValueError(
                        'Expect labels to be a mapping of string key value pairs. '
                        'Got "{}".'.format(labels))

            job_spec['labels'] = labels

        return self._submit_job(
            job_spec=job_spec,
            job_id=job_id,
        )
示例#4
0
 def test_load_json_from_local_file(self):
     self.assertEqual({'key': 'value'},
                      client_utils.load_json('/path/to/file'))
示例#5
0
 def test_load_json_from_gs_uri(self, mock_download_as_bytes,
                                unused_storage_client):
     mock_download_as_bytes.return_value = b'{"key":"value"}'
     self.assertEqual({'key': 'value'},
                      client_utils.load_json('gs://bucket/path/to/blob'))