Exemplo n.º 1
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> data_types.ExecutionInfo:
        """Runs Resolver specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an resolver node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.prepare_contexts(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves inputs an execution properties.
            exec_properties = inputs_utils.resolve_parameters(
                node_parameters=pipeline_node.parameters)
            input_artifacts = inputs_utils.resolve_input_artifacts(
                metadata_handler=m, node_inputs=pipeline_node.inputs)

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # 4. Publish the execution as a cached execution with
            # resolved input artifact as the output artifacts.
            execution_publish_utils.publish_internal_execution(
                metadata_handler=m,
                contexts=contexts,
                execution_id=execution.id,
                output_artifacts=input_artifacts)

            return data_types.ExecutionInfo(execution_id=execution.id,
                                            input_dict=input_artifacts,
                                            output_dict=input_artifacts,
                                            exec_properties=exec_properties,
                                            pipeline_node=pipeline_node,
                                            pipeline_info=pipeline_info)
Exemplo n.º 2
0
 def testPublishInternalExecution(self):
     with metadata.Metadata(connection_config=self._connection_config) as m:
         contexts = self._generate_contexts(m)
         execution_id = execution_publish_utils.register_execution(
             m, self._execution_type, contexts).id
         output_example = standard_artifacts.Examples()
         execution_publish_utils.publish_internal_execution(
             m,
             contexts,
             execution_id,
             output_artifacts={'examples': [output_example]})
         [execution] = m.store.get_executions()
         self.assertProtoPartiallyEquals("""
       id: 1
       type_id: 3
       last_known_state: COMPLETE
       """,
                                         execution,
                                         ignored_fields=[
                                             'create_time_since_epoch',
                                             'last_update_time_since_epoch'
                                         ])
         [event] = m.store.get_events_by_execution_ids([execution.id])
         self.assertProtoPartiallyEquals(
             """
       artifact_id: 1
       execution_id: 1
       path {
         steps {
           key: 'examples'
         }
         steps {
           index: 0
         }
       }
       type: INTERNAL_OUTPUT
       """,
             event,
             ignored_fields=['milliseconds_since_epoch'])
         # Verifies the context-execution edges are set up.
         self.assertCountEqual([c.id for c in contexts], [
             c.id for c in m.store.get_contexts_by_execution(execution.id)
         ])
         self.assertCountEqual([c.id for c in contexts], [
             c.id
             for c in m.store.get_contexts_by_artifact(output_example.id)
         ])
Exemplo n.º 3
0
def _publish_execution_results(mlmd_handle: metadata.Metadata,
                               task: task_lib.ExecNodeTask,
                               result: ts.TaskSchedulerResult) -> None:
    """Publishes execution results to MLMD."""
    def _update_state(status: status_lib.Status) -> None:
        assert status.code != status_lib.Code.OK
        _remove_output_dirs(task, result)
        _remove_task_dirs(task)
        if status.code == status_lib.Code.CANCELLED:
            logging.info(
                'Cancelling execution (id: %s); task id: %s; status: %s',
                task.execution_id, task.task_id, status)
            execution_state = metadata_store_pb2.Execution.CANCELED
        else:
            logging.info(
                'Aborting execution (id: %s) due to error (code: %s); task id: %s',
                task.execution_id, status.code, task.task_id)
            execution_state = metadata_store_pb2.Execution.FAILED
        _update_execution_state_in_mlmd(mlmd_handle, task.execution_id,
                                        execution_state, status.message)
        pipeline_state.record_state_change_time()

    if result.status.code != status_lib.Code.OK:
        _update_state(result.status)
        return

    # TODO(b/182316162): Unify publisher handing so that post-execution artifact
    # logic is more cleanly handled.
    outputs_utils.tag_output_artifacts_with_version(task.output_artifacts)
    if isinstance(result.output, ts.ExecutorNodeOutput):
        executor_output = result.output.executor_output
        if executor_output is not None:
            if executor_output.execution_result.code != status_lib.Code.OK:
                _update_state(
                    status_lib.Status(
                        code=executor_output.execution_result.code,
                        message=executor_output.execution_result.result_message
                    ))
                return
            # TODO(b/182316162): Unify publisher handing so that post-execution
            # artifact logic is more cleanly handled.
            outputs_utils.tag_executor_output_with_version(executor_output)
        _remove_task_dirs(task)
        execution_publish_utils.publish_succeeded_execution(
            mlmd_handle,
            execution_id=task.execution_id,
            contexts=task.contexts,
            output_artifacts=task.output_artifacts,
            executor_output=executor_output)
    elif isinstance(result.output, ts.ImporterNodeOutput):
        output_artifacts = result.output.output_artifacts
        # TODO(b/182316162): Unify publisher handing so that post-execution artifact
        # logic is more cleanly handled.
        outputs_utils.tag_output_artifacts_with_version(output_artifacts)
        _remove_task_dirs(task)
        execution_publish_utils.publish_succeeded_execution(
            mlmd_handle,
            execution_id=task.execution_id,
            contexts=task.contexts,
            output_artifacts=output_artifacts)
    elif isinstance(result.output, ts.ResolverNodeOutput):
        resolved_input_artifacts = result.output.resolved_input_artifacts
        execution_publish_utils.publish_internal_execution(
            mlmd_handle,
            execution_id=task.execution_id,
            contexts=task.contexts,
            output_artifacts=resolved_input_artifacts)
    else:
        raise TypeError(f'Unable to process task scheduler result: {result}')

    pipeline_state.record_state_change_time()
Exemplo n.º 4
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> data_types.ExecutionInfo:
        """Runs Resolver specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an resolver node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.prepare_contexts(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves inputs and execution properties.
            exec_properties = data_types_utils.build_parsed_value_dict(
                inputs_utils.resolve_parameters_with_schema(
                    node_parameters=pipeline_node.parameters))
            try:
                resolved_inputs = inputs_utils.resolve_input_artifacts_v2(
                    pipeline_node=pipeline_node, metadata_handler=m)
            except exceptions.InputResolutionError as e:
                execution = execution_publish_utils.register_execution(
                    metadata_handler=m,
                    execution_type=pipeline_node.node_info.type,
                    contexts=contexts,
                    exec_properties=exec_properties)
                execution_publish_utils.publish_failed_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    executor_output=self._build_error_output(
                        code=e.grpc_code_value))
                return data_types.ExecutionInfo(
                    execution_id=execution.id,
                    exec_properties=exec_properties,
                    pipeline_node=pipeline_node,
                    pipeline_info=pipeline_info)

            # 2a. If Skip (i.e. inside conditional), no execution should be made.
            # TODO(b/197907821): Publish special execution for Skip?
            if isinstance(resolved_inputs, inputs_utils.Skip):
                return data_types.ExecutionInfo()

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # TODO(b/197741942): Support len > 1.
            if len(resolved_inputs) > 1:
                execution_publish_utils.publish_failed_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    executor_output=self._build_error_output(
                        _ERROR_CODE_UNIMPLEMENTED,
                        'Handling more than one input dicts not implemented yet.'
                    ))
                return data_types.ExecutionInfo(
                    execution_id=execution.id,
                    exec_properties=exec_properties,
                    pipeline_node=pipeline_node,
                    pipeline_info=pipeline_info)

            input_artifacts = resolved_inputs[0]

            # 4. Publish the execution as a cached execution with
            # resolved input artifact as the output artifacts.
            execution_publish_utils.publish_internal_execution(
                metadata_handler=m,
                contexts=contexts,
                execution_id=execution.id,
                output_artifacts=input_artifacts)

            return data_types.ExecutionInfo(execution_id=execution.id,
                                            input_dict=input_artifacts,
                                            output_dict=input_artifacts,
                                            exec_properties=exec_properties,
                                            pipeline_node=pipeline_node,
                                            pipeline_info=pipeline_info)