def testResolveParametersFail(self): parameters = pipeline_pb2.NodeParameters() text_format.Parse( """ parameters { key: 'key_one' value { runtime_parameter {name: 'rp'} } }""", parameters) with self.assertRaisesRegex(RuntimeError, 'Parameter value not ready'): inputs_utils.resolve_parameters(parameters)
def generate_resolved_info(metadata_handler: metadata.Metadata, node: pipeline_pb2.PipelineNode) -> ResolvedInfo: """Returns a `ResolvedInfo` object for executing the node. Args: metadata_handler: A handler to access MLMD db. node: The pipeline node for which to generate. Returns: A `ResolvedInfo` with input resolutions. """ # Register node contexts. contexts = context_lib.register_contexts_if_not_exists( metadata_handler=metadata_handler, node_contexts=node.contexts) # Resolve execution properties. exec_properties = inputs_utils.resolve_parameters( node_parameters=node.parameters) # Resolve inputs. input_artifacts = inputs_utils.resolve_input_artifacts( metadata_handler=metadata_handler, node_inputs=node.inputs) return ResolvedInfo(contexts=contexts, exec_properties=exec_properties, input_artifacts=input_artifacts)
def run( self, mlmd_connection: metadata.Metadata, pipeline_node: pipeline_pb2.PipelineNode, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec ) -> data_types.ExecutionInfo: """Runs Resolver specific logic. Args: mlmd_connection: ML metadata connection. pipeline_node: The specification of the node that this launcher lauches. pipeline_info: The information of the pipeline that this node runs in. pipeline_runtime_spec: The runtime information of the pipeline that this node runs in. Returns: The execution of the run. """ logging.info('Running as an resolver node.') with mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.prepare_contexts( metadata_handler=m, node_contexts=pipeline_node.contexts) # 2. Resolves inputs an execution properties. exec_properties = inputs_utils.resolve_parameters( node_parameters=pipeline_node.parameters) input_artifacts = inputs_utils.resolve_input_artifacts( metadata_handler=m, node_inputs=pipeline_node.inputs) # 3. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=pipeline_node.node_info.type, contexts=contexts, exec_properties=exec_properties) # 4. Publish the execution as a cached execution with # resolved input artifact as the output artifacts. execution_publish_utils.publish_internal_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, output_artifacts=input_artifacts) return data_types.ExecutionInfo(execution_id=execution.id, input_dict=input_artifacts, output_dict=input_artifacts, exec_properties=exec_properties, pipeline_node=pipeline_node, pipeline_info=pipeline_info)
def testResolveParameters(self): parameters = pipeline_pb2.NodeParameters() text_format.Parse( """ parameters { key: 'key_one' value { field_value {string_value: 'value_one'} } } parameters { key: 'key_two' value { field_value {int_value: 2} } }""", parameters) parameters = inputs_utils.resolve_parameters(parameters) self.assertEqual(len(parameters), 2) self.assertEqual(parameters['key_one'], 'value_one') self.assertEqual(parameters['key_two'], 2)
def _prepare_execution(self) -> _PrepareExecutionResult: """Prepares inputs, outputs and execution properties for actual execution.""" # TODO(b/150979622): handle the edge case that the component get evicted # between successful pushlish and stateful working dir being clean up. # Otherwise following retries will keep failing because of duplicate # publishes. with self._mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.register_contexts_if_not_exists( metadata_handler=m, node_contexts=self._pipeline_node.contexts) # 2. Resolves inputs an execution properties. exec_properties = inputs_utils.resolve_parameters( node_parameters=self._pipeline_node.parameters) input_artifacts = inputs_utils.resolve_input_artifacts( metadata_handler=m, node_inputs=self._pipeline_node.inputs) # 3. If not all required inputs are met. Return ExecutionInfo with # is_execution_needed being false. No publish will happen so down stream # nodes won't be triggered. if input_artifacts is None: return _PrepareExecutionResult( execution_info=data_types.ExecutionInfo(), contexts=contexts, is_execution_needed=False) # 4. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=self._pipeline_node.node_info.type, contexts=contexts, input_artifacts=input_artifacts, exec_properties=exec_properties) # 5. Resolve output output_artifacts = self._output_resolver.generate_output_artifacts( execution.id) # If there is a custom driver, runs it. if self._driver_operator: driver_output = self._driver_operator.run_driver( data_types.ExecutionInfo( input_dict=input_artifacts, output_dict=output_artifacts, exec_properties=exec_properties, execution_output_uri=self._output_resolver. get_driver_output_uri())) self._update_with_driver_output(driver_output, exec_properties, output_artifacts) # We reconnect to MLMD here because the custom driver closes MLMD connection # on returning. with self._mlmd_connection as m: # 6. Check cached result cache_context = cache_utils.get_cache_context( metadata_handler=m, pipeline_node=self._pipeline_node, pipeline_info=self._pipeline_info, input_artifacts=input_artifacts, output_artifacts=output_artifacts, parameters=exec_properties) contexts.append(cache_context) cached_outputs = cache_utils.get_cached_outputs( metadata_handler=m, cache_context=cache_context) # 7. Should cache be used? if (self._pipeline_node.execution_options.caching_options. enable_cache and cached_outputs): # Publishes cache result execution_publish_utils.publish_cached_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, output_artifacts=cached_outputs) return _PrepareExecutionResult( execution_info=data_types.ExecutionInfo( execution_id=execution.id), execution_metadata=execution, contexts=contexts, is_execution_needed=False) pipeline_run_id = (self._pipeline_runtime_spec.pipeline_run_id. field_value.string_value) # 8. Going to trigger executor. return _PrepareExecutionResult( execution_info=data_types.ExecutionInfo( execution_id=execution.id, input_dict=input_artifacts, output_dict=output_artifacts, exec_properties=exec_properties, execution_output_uri=self._output_resolver. get_executor_output_uri(execution.id), stateful_working_dir=(self._output_resolver. get_stateful_working_directory()), tmp_dir=self._output_resolver.make_tmp_dir(execution.id), pipeline_node=self._pipeline_node, pipeline_info=self._pipeline_info, pipeline_run_id=pipeline_run_id), execution_metadata=execution, contexts=contexts, is_execution_needed=True)
def run( self, mlmd_connection: metadata.Metadata, pipeline_node: pipeline_pb2.PipelineNode, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec ) -> metadata_store_pb2.Execution: """Runs Importer specific logic. Args: mlmd_connection: ML metadata connection. pipeline_node: The specification of the node that this launcher lauches. pipeline_info: The information of the pipeline that this node runs in. pipeline_runtime_spec: The runtime information of the pipeline that this node runs in. Returns: The execution of the run. """ logging.info('Running as an importer node.') with mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.register_contexts_if_not_exists( metadata_handler=m, node_contexts=pipeline_node.contexts) # 2. Resolves execution properties, please note that importers has no # input. exec_properties = inputs_utils.resolve_parameters( node_parameters=pipeline_node.parameters) # 3. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=pipeline_node.node_info.type, contexts=contexts, exec_properties=exec_properties) # 4. Generate output artifacts to represent the imported artifacts. output_spec = pipeline_node.outputs.outputs[ importer_node.IMPORT_RESULT_KEY] properties = self._extract_proto_map( output_spec.artifact_spec.additional_properties) custom_properties = self._extract_proto_map( output_spec.artifact_spec.additional_custom_properties) output_artifact_class = types.Artifact( output_spec.artifact_spec.type).type output_artifacts = importer_node.generate_output_dict( metadata_handler=m, uri=str(exec_properties[importer_node.SOURCE_URI_KEY]), properties=properties, custom_properties=custom_properties, reimport=bool( exec_properties[importer_node.REIMPORT_OPTION_KEY]), output_artifact_class=output_artifact_class, mlmd_artifact_type=output_spec.artifact_spec.type) # 5. Publish the output artifacts. execution_publish_utils.publish_succeeded_execution( metadata_handler=m, execution_id=execution.id, contexts=contexts, output_artifacts=output_artifacts) return execution
def run( self, mlmd_connection: metadata.Metadata, pipeline_node: pipeline_pb2.PipelineNode, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec ) -> data_types.ExecutionInfo: """Runs Importer specific logic. Args: mlmd_connection: ML metadata connection. pipeline_node: The specification of the node that this launcher lauches. pipeline_info: The information of the pipeline that this node runs in. pipeline_runtime_spec: The runtime information of the pipeline that this node runs in. Returns: The execution of the run. """ logging.info('Running as an importer node.') with mlmd_connection as m: # 1.Prepares all contexts. contexts = context_lib.prepare_contexts( metadata_handler=m, node_contexts=pipeline_node.contexts) # 2. Resolves execution properties, please note that importers has no # input. exec_properties = inputs_utils.resolve_parameters( node_parameters=pipeline_node.parameters) # 3. Registers execution in metadata. execution = execution_publish_utils.register_execution( metadata_handler=m, execution_type=pipeline_node.node_info.type, contexts=contexts, exec_properties=exec_properties) # 4. Generate output artifacts to represent the imported artifacts. output_spec = pipeline_node.outputs.outputs[ importer.IMPORT_RESULT_KEY] properties = self._extract_proto_map( output_spec.artifact_spec.additional_properties) custom_properties = self._extract_proto_map( output_spec.artifact_spec.additional_custom_properties) output_artifact_class = types.Artifact( output_spec.artifact_spec.type).type output_artifacts = importer.generate_output_dict( metadata_handler=m, uri=str(exec_properties[importer.SOURCE_URI_KEY]), properties=properties, custom_properties=custom_properties, reimport=bool(exec_properties[importer.REIMPORT_OPTION_KEY]), output_artifact_class=output_artifact_class, mlmd_artifact_type=output_spec.artifact_spec.type) result = data_types.ExecutionInfo(execution_id=execution.id, input_dict={}, output_dict=output_artifacts, exec_properties=exec_properties, pipeline_node=pipeline_node, pipeline_info=pipeline_info) # TODO(b/182316162): consider let the launcher level do the publish # for system nodes. So that the version taging logic doesn't need to be # handled per system node. outputs_utils.tag_output_artifacts_with_version(result.output_dict) # 5. Publish the output artifacts. If artifacts are reimported, the # execution is published as CACHED. Otherwise it is published as COMPLETE. if _is_artifact_reimported(output_artifacts): execution_publish_utils.publish_cached_execution( metadata_handler=m, contexts=contexts, execution_id=execution.id, output_artifacts=output_artifacts) else: execution_publish_utils.publish_succeeded_execution( metadata_handler=m, execution_id=execution.id, contexts=contexts, output_artifacts=output_artifacts) return result