Exemplo n.º 1
0
 def testResolveParametersFail(self):
     parameters = pipeline_pb2.NodeParameters()
     text_format.Parse(
         """
     parameters {
       key: 'key_one'
       value {
         runtime_parameter {name: 'rp'}
       }
     }""", parameters)
     with self.assertRaisesRegex(RuntimeError, 'Parameter value not ready'):
         inputs_utils.resolve_parameters(parameters)
Exemplo n.º 2
0
def generate_resolved_info(metadata_handler: metadata.Metadata,
                           node: pipeline_pb2.PipelineNode) -> ResolvedInfo:
    """Returns a `ResolvedInfo` object for executing the node.

  Args:
    metadata_handler: A handler to access MLMD db.
    node: The pipeline node for which to generate.

  Returns:
    A `ResolvedInfo` with input resolutions.
  """
    # Register node contexts.
    contexts = context_lib.register_contexts_if_not_exists(
        metadata_handler=metadata_handler, node_contexts=node.contexts)

    # Resolve execution properties.
    exec_properties = inputs_utils.resolve_parameters(
        node_parameters=node.parameters)

    # Resolve inputs.
    input_artifacts = inputs_utils.resolve_input_artifacts(
        metadata_handler=metadata_handler, node_inputs=node.inputs)

    return ResolvedInfo(contexts=contexts,
                        exec_properties=exec_properties,
                        input_artifacts=input_artifacts)
Exemplo n.º 3
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> data_types.ExecutionInfo:
        """Runs Resolver specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an resolver node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.prepare_contexts(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves inputs an execution properties.
            exec_properties = inputs_utils.resolve_parameters(
                node_parameters=pipeline_node.parameters)
            input_artifacts = inputs_utils.resolve_input_artifacts(
                metadata_handler=m, node_inputs=pipeline_node.inputs)

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # 4. Publish the execution as a cached execution with
            # resolved input artifact as the output artifacts.
            execution_publish_utils.publish_internal_execution(
                metadata_handler=m,
                contexts=contexts,
                execution_id=execution.id,
                output_artifacts=input_artifacts)

            return data_types.ExecutionInfo(execution_id=execution.id,
                                            input_dict=input_artifacts,
                                            output_dict=input_artifacts,
                                            exec_properties=exec_properties,
                                            pipeline_node=pipeline_node,
                                            pipeline_info=pipeline_info)
Exemplo n.º 4
0
    def testResolveParameters(self):
        parameters = pipeline_pb2.NodeParameters()
        text_format.Parse(
            """
        parameters {
          key: 'key_one'
          value {
            field_value {string_value: 'value_one'}
          }
        }
        parameters {
          key: 'key_two'
          value {
            field_value {int_value: 2}
          }
        }""", parameters)

        parameters = inputs_utils.resolve_parameters(parameters)
        self.assertEqual(len(parameters), 2)
        self.assertEqual(parameters['key_one'], 'value_one')
        self.assertEqual(parameters['key_two'], 2)
Exemplo n.º 5
0
    def _prepare_execution(self) -> _PrepareExecutionResult:
        """Prepares inputs, outputs and execution properties for actual execution."""
        # TODO(b/150979622): handle the edge case that the component get evicted
        # between successful pushlish and stateful working dir being clean up.
        # Otherwise following retries will keep failing because of duplicate
        # publishes.
        with self._mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.register_contexts_if_not_exists(
                metadata_handler=m, node_contexts=self._pipeline_node.contexts)

            # 2. Resolves inputs an execution properties.
            exec_properties = inputs_utils.resolve_parameters(
                node_parameters=self._pipeline_node.parameters)
            input_artifacts = inputs_utils.resolve_input_artifacts(
                metadata_handler=m, node_inputs=self._pipeline_node.inputs)
            # 3. If not all required inputs are met. Return ExecutionInfo with
            # is_execution_needed being false. No publish will happen so down stream
            # nodes won't be triggered.
            if input_artifacts is None:
                return _PrepareExecutionResult(
                    execution_info=data_types.ExecutionInfo(),
                    contexts=contexts,
                    is_execution_needed=False)

            # 4. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=self._pipeline_node.node_info.type,
                contexts=contexts,
                input_artifacts=input_artifacts,
                exec_properties=exec_properties)

            # 5. Resolve output
            output_artifacts = self._output_resolver.generate_output_artifacts(
                execution.id)

        # If there is a custom driver, runs it.
        if self._driver_operator:
            driver_output = self._driver_operator.run_driver(
                data_types.ExecutionInfo(
                    input_dict=input_artifacts,
                    output_dict=output_artifacts,
                    exec_properties=exec_properties,
                    execution_output_uri=self._output_resolver.
                    get_driver_output_uri()))
            self._update_with_driver_output(driver_output, exec_properties,
                                            output_artifacts)

        # We reconnect to MLMD here because the custom driver closes MLMD connection
        # on returning.
        with self._mlmd_connection as m:
            # 6. Check cached result
            cache_context = cache_utils.get_cache_context(
                metadata_handler=m,
                pipeline_node=self._pipeline_node,
                pipeline_info=self._pipeline_info,
                input_artifacts=input_artifacts,
                output_artifacts=output_artifacts,
                parameters=exec_properties)
            contexts.append(cache_context)
            cached_outputs = cache_utils.get_cached_outputs(
                metadata_handler=m, cache_context=cache_context)

            # 7. Should cache be used?
            if (self._pipeline_node.execution_options.caching_options.
                    enable_cache and cached_outputs):
                # Publishes cache result
                execution_publish_utils.publish_cached_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    output_artifacts=cached_outputs)
                return _PrepareExecutionResult(
                    execution_info=data_types.ExecutionInfo(
                        execution_id=execution.id),
                    execution_metadata=execution,
                    contexts=contexts,
                    is_execution_needed=False)

            pipeline_run_id = (self._pipeline_runtime_spec.pipeline_run_id.
                               field_value.string_value)

            # 8. Going to trigger executor.
            return _PrepareExecutionResult(
                execution_info=data_types.ExecutionInfo(
                    execution_id=execution.id,
                    input_dict=input_artifacts,
                    output_dict=output_artifacts,
                    exec_properties=exec_properties,
                    execution_output_uri=self._output_resolver.
                    get_executor_output_uri(execution.id),
                    stateful_working_dir=(self._output_resolver.
                                          get_stateful_working_directory()),
                    tmp_dir=self._output_resolver.make_tmp_dir(execution.id),
                    pipeline_node=self._pipeline_node,
                    pipeline_info=self._pipeline_info,
                    pipeline_run_id=pipeline_run_id),
                execution_metadata=execution,
                contexts=contexts,
                is_execution_needed=True)
Exemplo n.º 6
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> metadata_store_pb2.Execution:
        """Runs Importer specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an importer node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.register_contexts_if_not_exists(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves execution properties, please note that importers has no
            # input.
            exec_properties = inputs_utils.resolve_parameters(
                node_parameters=pipeline_node.parameters)

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # 4. Generate output artifacts to represent the imported artifacts.
            output_spec = pipeline_node.outputs.outputs[
                importer_node.IMPORT_RESULT_KEY]
            properties = self._extract_proto_map(
                output_spec.artifact_spec.additional_properties)
            custom_properties = self._extract_proto_map(
                output_spec.artifact_spec.additional_custom_properties)
            output_artifact_class = types.Artifact(
                output_spec.artifact_spec.type).type
            output_artifacts = importer_node.generate_output_dict(
                metadata_handler=m,
                uri=str(exec_properties[importer_node.SOURCE_URI_KEY]),
                properties=properties,
                custom_properties=custom_properties,
                reimport=bool(
                    exec_properties[importer_node.REIMPORT_OPTION_KEY]),
                output_artifact_class=output_artifact_class,
                mlmd_artifact_type=output_spec.artifact_spec.type)

            # 5. Publish the output artifacts.
            execution_publish_utils.publish_succeeded_execution(
                metadata_handler=m,
                execution_id=execution.id,
                contexts=contexts,
                output_artifacts=output_artifacts)

            return execution
Exemplo n.º 7
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> data_types.ExecutionInfo:
        """Runs Importer specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an importer node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.prepare_contexts(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves execution properties, please note that importers has no
            # input.
            exec_properties = inputs_utils.resolve_parameters(
                node_parameters=pipeline_node.parameters)

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # 4. Generate output artifacts to represent the imported artifacts.
            output_spec = pipeline_node.outputs.outputs[
                importer.IMPORT_RESULT_KEY]
            properties = self._extract_proto_map(
                output_spec.artifact_spec.additional_properties)
            custom_properties = self._extract_proto_map(
                output_spec.artifact_spec.additional_custom_properties)
            output_artifact_class = types.Artifact(
                output_spec.artifact_spec.type).type
            output_artifacts = importer.generate_output_dict(
                metadata_handler=m,
                uri=str(exec_properties[importer.SOURCE_URI_KEY]),
                properties=properties,
                custom_properties=custom_properties,
                reimport=bool(exec_properties[importer.REIMPORT_OPTION_KEY]),
                output_artifact_class=output_artifact_class,
                mlmd_artifact_type=output_spec.artifact_spec.type)

            result = data_types.ExecutionInfo(execution_id=execution.id,
                                              input_dict={},
                                              output_dict=output_artifacts,
                                              exec_properties=exec_properties,
                                              pipeline_node=pipeline_node,
                                              pipeline_info=pipeline_info)

            # TODO(b/182316162): consider let the launcher level do the publish
            # for system nodes. So that the version taging logic doesn't need to be
            # handled per system node.
            outputs_utils.tag_output_artifacts_with_version(result.output_dict)

            # 5. Publish the output artifacts. If artifacts are reimported, the
            # execution is published as CACHED. Otherwise it is published as COMPLETE.
            if _is_artifact_reimported(output_artifacts):
                execution_publish_utils.publish_cached_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    output_artifacts=output_artifacts)

            else:
                execution_publish_utils.publish_succeeded_execution(
                    metadata_handler=m,
                    execution_id=execution.id,
                    contexts=contexts,
                    output_artifacts=output_artifacts)

            return result