def create_from_pipeline_file( pipeline_path: str, schedule: str, project_id: str, region: str = 'us-central1', time_zone: str = 'US/Pacific', parameter_values: Optional[Mapping[str, Any]] = None, pipeline_root: Optional[str] = None, service_account: Optional[str] = None, app_engine_region: Optional[str] = None, cloud_scheduler_service_account: Optional[str] = None, ) -> dict: """Creates schedule for compiled pipeline file. This function creates scheduled job which will run the provided pipeline on schedule. This is implemented by creating a Google Cloud Scheduler Job. The job will be visible in https://console.google.com/cloudscheduler and can be paused/resumed and deleted. To make the system work, this function also creates a Google Cloud Function which acts as an intermediary between the Scheduler and Pipelines. A single function is shared between all scheduled jobs. The following APIs will be activated automatically: * cloudfunctions.googleapis.com * cloudscheduler.googleapis.com * appengine.googleapis.com Args: pipeline_path: Path of the compiled pipeline file. schedule: Schedule in cron format. Example: "45 * * * *" project_id: Google Cloud project ID region: Google Cloud compute region. Default is 'us-central1' time_zone: Schedule time zone. Default is 'US/Pacific' parameter_values: Arguments for the pipeline parameters pipeline_root: Optionally the user can override the pipeline root specified during the compile time. service_account: The service account that the pipeline workload runs as. app_engine_region: The region that cloud scheduler job is created in. cloud_scheduler_service_account: The service account that Cloud Scheduler job and the proxy cloud function use. this should have permission to call AI Platform API and the proxy function. If not specified, the functions uses the App Engine default service account. Returns: Created Google Cloud Scheduler Job object dictionary. """ pipeline_dict = client_utils.load_json(pipeline_path) return _create_from_pipeline_dict( pipeline_dict=pipeline_dict, schedule=schedule, project_id=project_id, region=region, time_zone=time_zone, parameter_values=parameter_values, pipeline_root=pipeline_root, service_account=service_account, app_engine_region=app_engine_region, cloud_scheduler_service_account=cloud_scheduler_service_account, )
def create_schedule_from_job_spec( self, job_spec_path: str, schedule: str, time_zone: str = 'US/Pacific', pipeline_root: Optional[str] = None, parameter_values: Optional[Mapping[str, Any]] = None, service_account: Optional[str] = None, enable_caching: Optional[bool] = None, app_engine_region: Optional[str] = None, ) -> dict: """Creates schedule for compiled pipeline file. This function creates scheduled job which will run the provided pipeline on schedule. This is implemented by creating a Google Cloud Scheduler Job. The job will be visible in https://console.google.com/cloudscheduler and can be paused/resumed and deleted. To make the system work, this function also creates a Google Cloud Function which acts as an intermediary between the Scheduler and Pipelines. A single function is shared between all scheduled jobs. The following APIs will be activated automatically: * cloudfunctions.googleapis.com * cloudscheduler.googleapis.com * appengine.googleapis.com Args: job_spec_path: Path of the compiled pipeline file. schedule: Schedule in cron format. Example: "45 * * * *" time_zone: Schedule time zone. Default is 'US/Pacific' parameter_values: Arguments for the pipeline parameters pipeline_root: Optionally the user can override the pipeline root specified during the compile time. service_account: The service account that the pipeline workload runs as. enable_caching: Whether or not to enable caching for the run. If not set, defaults to the compile time settings, which are True for all tasks by default, while users may specify different caching options for individual tasks. If set, the setting applies to all tasks in the pipeline -- overrides the compile time settings. app_engine_region: The region that cloud scheduler job is created in. Returns: Created Google Cloud Scheduler Job object dictionary. """ job_spec = client_utils.load_json(job_spec_path) if enable_caching is not None: _set_enable_caching_value(job_spec['pipelineSpec'], enable_caching) return _create_from_pipeline_dict(pipeline_dict=job_spec, schedule=schedule, project_id=self._project_id, region=self._region, time_zone=time_zone, parameter_values=parameter_values, pipeline_root=pipeline_root, service_account=service_account, app_engine_region=app_engine_region)
def create_run_from_job_spec( self, job_spec_path: str, job_id: Optional[str] = None, pipeline_root: Optional[str] = None, parameter_values: Optional[Mapping[str, Any]] = None, enable_caching: Optional[bool] = None, cmek: Optional[str] = None, service_account: Optional[str] = None, network: Optional[str] = None, labels: Optional[Mapping[str, str]] = None) -> dict: """Runs a pre-compiled pipeline job on AIPlatformPipelines service. Args: job_spec_path: The path of PipelineJob JSON file. It can be a local path or a GS URI. job_id: Optionally, the user can provide the unique ID of the job run. If not specified, pipeline name + timestamp will be used. pipeline_root: Optionally the user can override the pipeline root specified during the compile time. parameter_values: The mapping from runtime parameter names to its values. enable_caching: Whether or not to enable caching for the run. If not set, defaults to the compile time settings, which are True for all tasks by default, while users may specify different caching options for individual tasks. If set, the setting applies to all tasks in the pipeline -- overrides the compile time settings. cmek: The customer-managed encryption key for a pipelineJob. If set, the pipeline job and all of its sub-resources will be secured by this key. service_account: The service account that the pipeline workload runs as. network: The network configuration applied for pipeline jobs. If left unspecified, the workload is not peered with any network. labels: The user defined metadata to organize PipelineJob. Returns: Full AIPlatformPipelines job name. Raises: ParseError: On JSON parsing problems. RuntimeError: If AIPlatformPipelines service returns unexpected response or empty job name. """ job_spec = client_utils.load_json(job_spec_path) pipeline_name = job_spec['pipelineSpec']['pipelineInfo']['name'] job_id = job_id or '{pipeline_name}-{timestamp}'.format( pipeline_name=re.sub( '[^-0-9a-z]+', '-', pipeline_name.lower()).lstrip('-').rstrip('-'), timestamp=_get_current_time().strftime('%Y%m%d%H%M%S')) if not _VALID_NAME_PATTERN.match(job_id): raise ValueError( 'Generated job ID: {} is illegal as a uCAIP pipelines job ID. ' 'Expecting an ID following the regex pattern ' '"[a-z][-a-z0-9]{{0,127}}"'.format(job_id)) job_name = _JOB_NAME_PATTERN.format(parent=self._parent, job_id=job_id) job_spec['name'] = job_name job_spec['displayName'] = job_id builder = runtime_config_builder.RuntimeConfigBuilder.from_job_spec_json( job_spec) builder.update_pipeline_root(pipeline_root) builder.update_runtime_parameters(parameter_values) runtime_config = builder.build() job_spec['runtimeConfig'] = runtime_config if enable_caching is not None: _set_enable_caching_value(job_spec['pipelineSpec'], enable_caching) if cmek is not None: job_spec['encryptionSpec'] = {'kmsKeyName': cmek} if service_account is not None: job_spec['serviceAccount'] = service_account if network is not None: job_spec['network'] = network if labels: if not isinstance(labels, Mapping): raise ValueError( 'Expect labels to be a mapping of string key value pairs. ' 'Got "{}" of type "{}"'.format(labels, type(labels))) for k, v in labels.items(): if not isinstance(k, str) or not isinstance(v, str): raise ValueError( 'Expect labels to be a mapping of string key value pairs. ' 'Got "{}".'.format(labels)) job_spec['labels'] = labels return self._submit_job( job_spec=job_spec, job_id=job_id, )
def test_load_json_from_local_file(self): self.assertEqual({'key': 'value'}, client_utils.load_json('/path/to/file'))
def test_load_json_from_gs_uri(self, mock_download_as_bytes, unused_storage_client): mock_download_as_bytes.return_value = b'{"key":"value"}' self.assertEqual({'key': 'value'}, client_utils.load_json('gs://bucket/path/to/blob'))