def _generate_task_from_execution(metadata_handler: metadata.Metadata, pipeline: pipeline_pb2.Pipeline, node: pipeline_pb2.PipelineNode, execution: metadata_store_pb2.Execution, is_cancelled: bool = False) -> task_lib.Task: """Generates `ExecNodeTask` given execution.""" contexts = metadata_handler.store.get_contexts_by_execution(execution.id) exec_properties = extract_properties(execution) input_artifacts = execution_lib.get_artifacts_dict( metadata_handler, execution.id, [metadata_store_pb2.Event.INPUT]) outputs_resolver = outputs_utils.OutputsResolver(node, pipeline.pipeline_info, pipeline.runtime_spec, pipeline.execution_mode) output_artifacts = outputs_resolver.generate_output_artifacts(execution.id) outputs_utils.make_output_dirs(output_artifacts) return task_lib.ExecNodeTask( node_uid=task_lib.NodeUid.from_pipeline_node(pipeline, node), execution_id=execution.id, contexts=contexts, exec_properties=exec_properties, input_artifacts=input_artifacts, output_artifacts=output_artifacts, executor_output_uri=outputs_resolver.get_executor_output_uri( execution.id), stateful_working_dir=outputs_resolver.get_stateful_working_directory( execution.id), tmp_dir=outputs_resolver.make_tmp_dir(execution.id), pipeline=pipeline, is_cancelled=is_cancelled)
def testMakeOutputDirsAndRemoveOutputDirs(self): output_artifacts = self._output_resolver.generate_output_artifacts(1) outputs_utils.make_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: self.assertTrue(tf.io.gfile.exists(artifact.uri)) outputs_utils.remove_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: self.assertFalse(tf.io.gfile.exists(artifact.uri))
def _run_executor( self, execution_info: data_types.ExecutionInfo ) -> execution_result_pb2.ExecutorOutput: """Executes underlying component implementation.""" logging.info('Going to run a new execution: %s', execution_info) outputs_utils.make_output_dirs(execution_info.output_dict) try: return self._executor_operator.run_executor(execution_info) except Exception: # pylint: disable=broad-except outputs_utils.remove_output_dirs(execution_info.output_dict) raise
def testMakeOutputDirsAndRemoveOutputDirs(self): output_artifacts = self._output_resolver.generate_output_artifacts(1) outputs_utils.make_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: if isinstance(artifact, ValueArtifact): self.assertFalse(fileio.isdir(artifact.uri)) else: self.assertTrue(fileio.isdir(artifact.uri)) self.assertTrue(fileio.exists(artifact.uri)) outputs_utils.remove_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: self.assertFalse(fileio.exists(artifact.uri))
def _run_executor( self, execution_info: base_executor_operator.ExecutionInfo ) -> execution_result_pb2.ExecutorOutput: """Executes underlying component implementation.""" logging.info('Going to run a new execution: %s', execution_info) outputs_utils.make_output_dirs(execution_info.output_dict) try: return self._executor_operator.run_executor(execution_info) except Exception as e: # pylint: disable=broad-except outputs_utils.remove_output_dirs(execution_info.output_dict) logging.error( 'Execution failed with error %s ' 'and this is the stack trace \n %s', e, traceback.format_exc()) raise
def testMakeOutputDirsArtifactAlreadyExists(self): output_artifacts = self._output_resolver().generate_output_artifacts(1) outputs_utils.make_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: if isinstance(artifact, ValueArtifact): with fileio.open(artifact.uri, 'w') as f: f.write('test') else: with fileio.open(os.path.join(artifact.uri, 'output'), 'w') as f: f.write('test') outputs_utils.make_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: if isinstance(artifact, ValueArtifact): with fileio.open(artifact.uri, 'r') as f: self.assertEqual(f.read(), 'test') else: with fileio.open(os.path.join(artifact.uri, 'output'), 'r') as f: self.assertEqual(f.read(), 'test')
def _run_executor( self, execution_info: data_types.ExecutionInfo ) -> execution_result_pb2.ExecutorOutput: """Executes underlying component implementation.""" logging.info('Going to run a new execution: %s', execution_info) outputs_utils.make_output_dirs(execution_info.output_dict) try: executor_output = self._executor_operator.run_executor(execution_info) code = executor_output.execution_result.code if code != 0: result_message = executor_output.execution_result.result_message err = (f'Execution {execution_info.execution_id} ' f'failed with error code {code} and ' f'error message {result_message}') logging.error(err) raise _ExecutionFailedError(err, executor_output) return executor_output except Exception: # pylint: disable=broad-except outputs_utils.remove_output_dirs(execution_info.output_dict) raise
def testMakeClearAndRemoveOutputDirs(self): output_artifacts = self._output_resolver().generate_output_artifacts(1) outputs_utils.make_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: if isinstance(artifact, ValueArtifact): self.assertFalse(fileio.isdir(artifact.uri)) else: self.assertTrue(fileio.isdir(artifact.uri)) with fileio.open(os.path.join(artifact.uri, 'output'), 'w') as f: f.write('') self.assertTrue(fileio.exists(artifact.uri)) outputs_utils.clear_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: if not isinstance(artifact, ValueArtifact): self.assertEqual(fileio.listdir(artifact.uri), []) outputs_utils.remove_output_dirs(output_artifacts) for _, artifact_list in output_artifacts.items(): for artifact in artifact_list: self.assertFalse(fileio.exists(artifact.uri))
def _resolve_inputs_and_generate_tasks_for_node( self, node: pipeline_pb2.PipelineNode, ) -> List[task_lib.Task]: """Generates tasks for a node by freshly resolving inputs.""" result = [] node_uid = task_lib.NodeUid.from_pipeline_node(self._pipeline, node) resolved_info = task_gen_utils.generate_resolved_info( self._mlmd_handle, node) if resolved_info is None: result.append( task_lib.UpdateNodeStateTask(node_uid=node_uid, state=pstate.NodeState.SKIPPED)) return result if not resolved_info.input_artifacts: error_msg = f'failure to resolve inputs; node uid: {node_uid}' result.append( task_lib.UpdateNodeStateTask(node_uid=node_uid, state=pstate.NodeState.FAILED, status=status_lib.Status( code=status_lib.Code.ABORTED, message=error_msg))) return result # TODO(b/207038460): Update sync pipeline to support ForEach. input_artifacts = resolved_info.input_artifacts[0] execution = execution_publish_utils.register_execution( metadata_handler=self._mlmd_handle, execution_type=node.node_info.type, contexts=resolved_info.contexts, input_artifacts=input_artifacts, exec_properties=resolved_info.exec_properties) outputs_resolver = outputs_utils.OutputsResolver( node, self._pipeline.pipeline_info, self._pipeline.runtime_spec, self._pipeline.execution_mode) output_artifacts = outputs_resolver.generate_output_artifacts( execution.id) # For mixed service nodes, we ensure node services and check service # status; pipeline is aborted if the service jobs have failed. service_status = self._ensure_node_services_if_mixed(node.node_info.id) if service_status == service_jobs.ServiceStatus.FAILED: error_msg = f'associated service job failed; node uid: {node_uid}' result.append( task_lib.UpdateNodeStateTask(node_uid=node_uid, state=pstate.NodeState.FAILED, status=status_lib.Status( code=status_lib.Code.ABORTED, message=error_msg))) return result outputs_utils.make_output_dirs(output_artifacts) result.append( task_lib.UpdateNodeStateTask(node_uid=node_uid, state=pstate.NodeState.RUNNING)) result.append( task_lib.ExecNodeTask( node_uid=node_uid, execution_id=execution.id, contexts=resolved_info.contexts, input_artifacts=input_artifacts, exec_properties=resolved_info.exec_properties, output_artifacts=output_artifacts, executor_output_uri=outputs_resolver.get_executor_output_uri( execution.id), stateful_working_dir=outputs_resolver. get_stateful_working_directory(execution.id), tmp_dir=outputs_resolver.make_tmp_dir(execution.id), pipeline=self._pipeline)) return result
def _generate_tasks_for_node( self, metadata_handler: metadata.Metadata, node: pipeline_pb2.PipelineNode) -> List[task_lib.Task]: """Generates a node execution task. If a node execution is not feasible, `None` is returned. Args: metadata_handler: A handler to access MLMD db. node: The pipeline node for which to generate a task. Returns: Returns a `Task` or `None` if task generation is deemed infeasible. """ result = [] node_uid = task_lib.NodeUid.from_pipeline_node(self._pipeline, node) executions = task_gen_utils.get_executions(metadata_handler, node) exec_node_task = task_gen_utils.generate_task_from_active_execution( metadata_handler, self._pipeline, node, executions) if exec_node_task: result.append( task_lib.UpdateNodeStateTask(node_uid=node_uid, state=pstate.NodeState.RUNNING)) result.append(exec_node_task) return result resolved_info = task_gen_utils.generate_resolved_info( metadata_handler, node) # TODO(b/207038460): Update async pipeline to support ForEach. if (resolved_info is None or not resolved_info.input_artifacts or resolved_info.input_artifacts[0] is None or not any(resolved_info.input_artifacts[0].values())): logging.info( 'Task cannot be generated for node %s since no input artifacts ' 'are resolved.', node.node_info.id) return result input_artifact = resolved_info.input_artifacts[0] executor_spec_fingerprint = hashlib.sha256() executor_spec = task_gen_utils.get_executor_spec( self._pipeline_state.pipeline, node.node_info.id) if executor_spec is not None: executor_spec_fingerprint.update( executor_spec.SerializeToString(deterministic=True)) resolved_info.exec_properties[ constants. EXECUTOR_SPEC_FINGERPRINT_KEY] = executor_spec_fingerprint.hexdigest( ) # If the latest execution had the same resolved input artifacts, execution # properties and executor specs, we should not trigger a new execution. latest_exec = task_gen_utils.get_latest_execution(executions) if latest_exec: artifact_ids_by_event_type = ( execution_lib.get_artifact_ids_by_event_type_for_execution_id( metadata_handler, latest_exec.id)) latest_exec_input_artifact_ids = artifact_ids_by_event_type.get( metadata_store_pb2.Event.INPUT, set()) current_exec_input_artifact_ids = set( a.id for a in itertools.chain(*input_artifact.values())) latest_exec_properties = task_gen_utils.extract_properties( latest_exec) current_exec_properties = resolved_info.exec_properties latest_exec_executor_spec_fp = latest_exec_properties[ constants.EXECUTOR_SPEC_FINGERPRINT_KEY] current_exec_executor_spec_fp = resolved_info.exec_properties[ constants.EXECUTOR_SPEC_FINGERPRINT_KEY] if (latest_exec_input_artifact_ids == current_exec_input_artifact_ids and _exec_properties_match(latest_exec_properties, current_exec_properties) and latest_exec_executor_spec_fp == current_exec_executor_spec_fp): result.append( task_lib.UpdateNodeStateTask( node_uid=node_uid, state=pstate.NodeState.STARTED)) return result execution = execution_publish_utils.register_execution( metadata_handler=metadata_handler, execution_type=node.node_info.type, contexts=resolved_info.contexts, input_artifacts=input_artifact, exec_properties=resolved_info.exec_properties) outputs_resolver = outputs_utils.OutputsResolver( node, self._pipeline.pipeline_info, self._pipeline.runtime_spec, self._pipeline.execution_mode) # For mixed service nodes, we ensure node services and check service # status; the node is aborted if its service jobs have failed. service_status = self._ensure_node_services_if_mixed(node.node_info.id) if service_status is not None: if service_status != service_jobs.ServiceStatus.RUNNING: error_msg = f'associated service job failed; node uid: {node_uid}' result.append( task_lib.UpdateNodeStateTask( node_uid=node_uid, state=pstate.NodeState.FAILED, status=status_lib.Status(code=status_lib.Code.ABORTED, message=error_msg))) return result output_artifacts = outputs_resolver.generate_output_artifacts( execution.id) outputs_utils.make_output_dirs(output_artifacts) result.append( task_lib.UpdateNodeStateTask(node_uid=node_uid, state=pstate.NodeState.RUNNING)) result.append( task_lib.ExecNodeTask( node_uid=node_uid, execution_id=execution.id, contexts=resolved_info.contexts, input_artifacts=input_artifact, exec_properties=resolved_info.exec_properties, output_artifacts=output_artifacts, executor_output_uri=outputs_resolver.get_executor_output_uri( execution.id), stateful_working_dir=outputs_resolver. get_stateful_working_directory(execution.id), tmp_dir=outputs_resolver.make_tmp_dir(execution.id), pipeline=self._pipeline)) return result