Пример #1
0
 def setUp(self):
     super(PlaceholderUtilsTest, self).setUp()
     examples = [standard_artifacts.Examples()]
     examples[0].uri = "/tmp"
     examples[0].split_names = artifact_utils.encode_split_names(
         ["train", "eval"])
     self._serving_spec = infra_validator_pb2.ServingSpec()
     self._serving_spec.tensorflow_serving.tags.extend(
         ["latest", "1.15.0-gpu"])
     self._resolution_context = placeholder_utils.ResolutionContext(
         exec_info=data_types.ExecutionInfo(
             input_dict={
                 "model": [standard_artifacts.Model()],
                 "examples": examples,
             },
             output_dict={"blessing": [standard_artifacts.ModelBlessing()]},
             exec_properties={
                 "proto_property":
                 json_format.MessageToJson(message=self._serving_spec,
                                           sort_keys=True,
                                           preserving_proto_field_name=True,
                                           indent=0)
             },
             execution_output_uri="test_executor_output_uri",
             stateful_working_dir="test_stateful_working_dir",
             pipeline_node=pipeline_pb2.PipelineNode(
                 node_info=pipeline_pb2.NodeInfo(
                     type=metadata_store_pb2.ExecutionType(
                         name="infra_validator"))),
             pipeline_info=pipeline_pb2.PipelineInfo(
                 id="test_pipeline_id")),
         executor_spec=executable_spec_pb2.PythonClassExecutableSpec(
             class_path="test_class_path"),
     )
     # Resolution context to simulate missing optional values.
     self._none_resolution_context = placeholder_utils.ResolutionContext(
         exec_info=data_types.ExecutionInfo(
             input_dict={
                 "model": [],
                 "examples": [],
             },
             output_dict={"blessing": []},
             exec_properties={},
             pipeline_node=pipeline_pb2.PipelineNode(
                 node_info=pipeline_pb2.NodeInfo(
                     type=metadata_store_pb2.ExecutionType(
                         name="infra_validator"))),
             pipeline_info=pipeline_pb2.PipelineInfo(
                 id="test_pipeline_id")),
         executor_spec=None,
         platform_config=None)
Пример #2
0
 def testRunExecutor_with_InprocessExecutor(self):
   executor_sepc = text_format.Parse(
       """
     class_path: "tfx.orchestration.portable.python_executor_operator_test.InprocessExecutor"
   """, executable_spec_pb2.PythonClassExecutableSpec())
   operator = python_executor_operator.PythonExecutorOperator(executor_sepc)
   input_dict = {'input_key': [standard_artifacts.Examples()]}
   output_dict = {'output_key': [standard_artifacts.Model()]}
   exec_properties = {'key': 'value'}
   stateful_working_dir = os.path.join(self.tmp_dir, 'stateful_working_dir')
   executor_output_uri = os.path.join(self.tmp_dir, 'executor_output')
   executor_output = operator.run_executor(
       data_types.ExecutionInfo(
           execution_id=1,
           input_dict=input_dict,
           output_dict=output_dict,
           exec_properties=exec_properties,
           stateful_working_dir=stateful_working_dir,
           execution_output_uri=executor_output_uri))
   self.assertProtoPartiallyEquals(
       """
         output_artifacts {
           key: "output_key"
           value {
             artifacts {
             }
           }
         }""", executor_output)
Пример #3
0
 def testRunExecutor_with_InplaceUpdateExecutor(self):
     executor_sepc = text_format.Parse(
         """
   class_path: "tfx.orchestration.portable.python_executor_operator_test.InplaceUpdateExecutor"
 """, executable_spec_pb2.PythonClassExecutableSpec())
     operator = python_executor_operator.PythonExecutorOperator(
         executor_sepc)
     input_dict = {'input_key': [standard_artifacts.Examples()]}
     output_dict = {'output_key': [standard_artifacts.Model()]}
     exec_properties = {
         'string': 'value',
         'int': 1,
         'float': 0.0,
         # This should not happen on production and will be
         # dropped.
         'proto': execution_result_pb2.ExecutorOutput()
     }
     stateful_working_dir = os.path.join(self.tmp_dir,
                                         'stateful_working_dir')
     executor_output_uri = os.path.join(self.tmp_dir, 'executor_output')
     executor_output = operator.run_executor(
         data_types.ExecutionInfo(execution_id=1,
                                  input_dict=input_dict,
                                  output_dict=output_dict,
                                  exec_properties=exec_properties,
                                  stateful_working_dir=stateful_working_dir,
                                  execution_output_uri=executor_output_uri))
     self.assertProtoPartiallyEquals(
         """
       execution_properties {
         key: "float"
         value {
           double_value: 0.0
         }
       }
       execution_properties {
         key: "int"
         value {
           int_value: 1
         }
       }
       execution_properties {
         key: "string"
         value {
           string_value: "value"
         }
       }
       output_artifacts {
         key: "output_key"
         value {
           artifacts {
             custom_properties {
               key: "name"
               value {
                 string_value: "my_model"
               }
             }
           }
         }
       }""", executor_output)
Пример #4
0
    def testQueryBasedDriver(self):
        # Create exec proterties.
        exec_properties = {
            standard_component_specs.INPUT_CONFIG_KEY:
            proto_utils.proto_to_json(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(
                        name='s1',
                        pattern=
                        "select * from table where span={SPAN} and split='s1'"
                    ),
                    example_gen_pb2.Input.Split(
                        name='s2',
                        pattern=
                        "select * from table where span={SPAN} and split='s2'")
                ])),
            standard_component_specs.RANGE_CONFIG_KEY:
            proto_utils.proto_to_json(
                range_config_pb2.RangeConfig(
                    static_range=range_config_pb2.StaticRange(
                        start_span_number=2, end_span_number=2))),
        }
        # Prepare output_dict
        example = standard_artifacts.Examples()
        example.uri = 'my_uri'
        output_dict = {standard_component_specs.EXAMPLES_KEY: [example]}

        query_based_driver = driver.QueryBasedDriver(self._mock_metadata)
        result = query_based_driver.run(
            portable_data_types.ExecutionInfo(output_dict=output_dict,
                                              exec_properties=exec_properties))

        self.assertEqual(exec_properties[utils.SPAN_PROPERTY_NAME], 2)
        self.assertIsNone(exec_properties[utils.VERSION_PROPERTY_NAME])
        self.assertIsNone(exec_properties[utils.FINGERPRINT_PROPERTY_NAME])
        updated_input_config = example_gen_pb2.Input()
        proto_utils.json_to_proto(
            exec_properties[standard_component_specs.INPUT_CONFIG_KEY],
            updated_input_config)
        self.assertProtoEquals(
            """
        splits {
          name: "s1"
          pattern: "select * from table where span=2 and split='s1'"
        }
        splits {
          name: "s2"
          pattern: "select * from table where span=2 and split='s2'"
        }""", updated_input_config)
        self.assertLen(
            result.output_artifacts[
                standard_component_specs.EXAMPLES_KEY].artifacts, 1)
        output_example = result.output_artifacts[
            standard_component_specs.EXAMPLES_KEY].artifacts[0]
        self.assertEqual(output_example.uri, example.uri)
        self.assertEqual(
            output_example.custom_properties[
                utils.SPAN_PROPERTY_NAME].string_value, '2')
Пример #5
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> data_types.ExecutionInfo:
        """Runs Resolver specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an resolver node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.prepare_contexts(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves inputs an execution properties.
            exec_properties = inputs_utils.resolve_parameters(
                node_parameters=pipeline_node.parameters)
            input_artifacts = inputs_utils.resolve_input_artifacts(
                metadata_handler=m, node_inputs=pipeline_node.inputs)

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # 4. Publish the execution as a cached execution with
            # resolved input artifact as the output artifacts.
            execution_publish_utils.publish_internal_execution(
                metadata_handler=m,
                contexts=contexts,
                execution_id=execution.id,
                output_artifacts=input_artifacts)

            return data_types.ExecutionInfo(execution_id=execution.id,
                                            input_dict=input_artifacts,
                                            output_dict=input_artifacts,
                                            exec_properties=exec_properties,
                                            pipeline_node=pipeline_node,
                                            pipeline_info=pipeline_info)
 def _set_up_test_execution_info(self,
                                 input_dict=None,
                                 output_dict=None,
                                 exec_properties=None):
   return data_types.ExecutionInfo(
       input_dict=input_dict or {},
       output_dict=output_dict or {},
       exec_properties=exec_properties or {},
       execution_output_uri='/testing/executor/output/',
       stateful_working_dir='/testing/stateful/dir',
       pipeline_node=pipeline_pb2.PipelineNode(
           node_info=pipeline_pb2.NodeInfo(
               type=metadata_store_pb2.ExecutionType(name='Docker_executor'))),
       pipeline_info=pipeline_pb2.PipelineInfo(id='test_pipeline_id'))
Пример #7
0
 def testRunExecutorWithBeamPipelineArgs(self):
   executor_sepc = text_format.Parse(
       """
     class_path: "tfx.orchestration.portable.python_executor_operator_test.ValidateBeamPipelineArgsExecutor"
     extra_flags: "--runner=DirectRunner"
   """, executable_spec_pb2.PythonClassExecutableSpec())
   operator = python_executor_operator.PythonExecutorOperator(executor_sepc)
   executor_output_uri = os.path.join(self.tmp_dir, 'executor_output')
   operator.run_executor(
       data_types.ExecutionInfo(
           input_dict={},
           output_dict={},
           exec_properties={},
           execution_output_uri=executor_output_uri))
Пример #8
0
 def _set_up_test_execution_info(self,
                                 input_dict=None,
                                 output_dict=None,
                                 exec_properties=None):
     return data_types.ExecutionInfo(
         execution_id=123,
         input_dict=input_dict or {},
         output_dict=output_dict or {},
         exec_properties=exec_properties or {},
         execution_output_uri='/testing/executor/output/',
         stateful_working_dir='/testing/stateful/dir',
         pipeline_node=pipeline_pb2.PipelineNode(
             node_info=pipeline_pb2.NodeInfo(
                 id='fakecomponent-fakecomponent')),
         pipeline_info=pipeline_pb2.PipelineInfo(id='Test'),
         pipeline_run_id='123')
Пример #9
0
 def testRunExecutorWithBeamPipelineArgs(self):
   executor_spec = text_format.Parse(
       """
     python_executor_spec: {
         class_path: "tfx.orchestration.portable.beam_executor_operator_test.ValidateBeamPipelineArgsExecutor"
     }
     beam_pipeline_args: "--runner=DirectRunner"
   """, executable_spec_pb2.BeamExecutableSpec())
   operator = beam_executor_operator.BeamExecutorOperator(executor_spec)
   executor_output_uri = os.path.join(self.tmp_dir, 'executor_output')
   operator.run_executor(
       data_types.ExecutionInfo(
           execution_id=1,
           input_dict={},
           output_dict={},
           exec_properties={},
           execution_output_uri=executor_output_uri))
Пример #10
0
    def resolve_artifacts(
        self, metadata_handler: metadata.Metadata,
        input_dict: Dict[str, List[types.Artifact]]
    ) -> Optional[Dict[str, List[types.Artifact]]]:
        for placeholder_pb in self._predicates:
            context = placeholder_utils.ResolutionContext(
                exec_info=portable_data_types.ExecutionInfo(
                    input_dict=input_dict))
            predicate_result = placeholder_utils.resolve_placeholder_expression(
                placeholder_pb, context)
            if not isinstance(predicate_result, bool):
                raise ValueError(
                    "Predicate evaluates to a non-boolean result.")

            if not predicate_result:
                raise exceptions.SkipSignal("Predicate evaluates to False.")
        return input_dict
Пример #11
0
 def _get_execution_info(self, input_dict, output_dict, exec_properties):
     pipeline_node = pipeline_pb2.PipelineNode(
         node_info={'id': 'MyPythonNode'})
     pipeline_info = pipeline_pb2.PipelineInfo(id='MyPipeline')
     stateful_working_dir = os.path.join(self.tmp_dir,
                                         'stateful_working_dir')
     executor_output_uri = os.path.join(self.tmp_dir, 'executor_output')
     return data_types.ExecutionInfo(
         execution_id=1,
         input_dict=input_dict,
         output_dict=output_dict,
         exec_properties=exec_properties,
         stateful_working_dir=stateful_working_dir,
         execution_output_uri=executor_output_uri,
         pipeline_node=pipeline_node,
         pipeline_info=pipeline_info,
         pipeline_run_id=99)
Пример #12
0
def deserialize_execution_info(
    execution_info_b64: str) -> data_types.ExecutionInfo:
  """De-serializes the ExecutionInfo class from a binary string."""
  execution_info_proto = (
      executor_invocation_pb2.ExecutorInvocation.FromString(
          base64.b64decode(execution_info_b64)))
  result = data_types.ExecutionInfo(
      execution_output_uri=execution_info_proto.output_metadata_uri,
      stateful_working_dir=execution_info_proto.stateful_working_dir,
      pipeline_info=execution_info_proto.pipeline_info,
      pipeline_node=execution_info_proto.pipeline_node)

  result.exec_properties = _build_exec_property_dict(
      execution_info_proto.execution_properties)

  result.input_dict = _build_artifact_dict(execution_info_proto.input_dict)
  result.output_dict = _build_artifact_dict(execution_info_proto.output_dict)
  return result
  def testExecutionInfoSerialization(self):
    my_artifact = _MyArtifact()
    my_artifact.int1 = 111

    execution_output_uri = 'output/uri'
    stateful_working_dir = 'workding/dir'
    exec_properties = {
        'property1': 'value1',
        'property2': 'value2',
    }
    pipeline_info = pipeline_pb2.PipelineInfo(id='my_pipeline')
    pipeline_node = text_format.Parse(
        """
        node_info {
          id: 'my_node'
        }
        """, pipeline_pb2.PipelineNode())

    original = data_types.ExecutionInfo(
        input_dict={'input': [my_artifact]},
        output_dict={'output': [my_artifact]},
        exec_properties=exec_properties,
        execution_output_uri=execution_output_uri,
        stateful_working_dir=stateful_working_dir,
        pipeline_info=pipeline_info,
        pipeline_node=pipeline_node)

    serialized = python_execution_binary_utils.serialize_execution_info(
        original)
    rehydrated = python_execution_binary_utils.deserialize_execution_info(
        serialized)

    self.CheckArtifactDict(rehydrated.input_dict, {'input': [my_artifact]})
    self.CheckArtifactDict(rehydrated.output_dict, {'output': [my_artifact]})
    self.assertEqual(rehydrated.exec_properties, exec_properties)
    self.assertEqual(rehydrated.execution_output_uri, execution_output_uri)
    self.assertEqual(rehydrated.stateful_working_dir, stateful_working_dir)
    self.assertProtoEquals(rehydrated.pipeline_info, original.pipeline_info)
    self.assertProtoEquals(rehydrated.pipeline_node, original.pipeline_node)
Пример #14
0
 def testLauncher_resolver_node(self):
     mock_resolver_node_handler_class = mock.create_autospec(
         system_node_handler.SystemNodeHandler)
     mock_resolver_node_handler = mock.create_autospec(
         system_node_handler.SystemNodeHandler, instance=True)
     mock_resolver_node_handler_class.return_value = mock_resolver_node_handler
     expected_execution_info = data_types.ExecutionInfo()
     expected_execution_info.execution_id = 123
     mock_resolver_node_handler.run.return_value = expected_execution_info
     launcher._SYSTEM_NODE_HANDLERS[
         'tfx.dsl.components.common.resolver.Resolver'] = (
             mock_resolver_node_handler_class)
     test_launcher = launcher.Launcher(
         pipeline_node=self._resolver,
         mlmd_connection=self._mlmd_connection,
         pipeline_info=self._pipeline_info,
         pipeline_runtime_spec=self._pipeline_runtime_spec)
     execution_info = test_launcher.launch()
     mock_resolver_node_handler.run.assert_called_once_with(
         self._mlmd_connection, self._resolver, self._pipeline_info,
         self._pipeline_runtime_spec)
     self.assertEqual(execution_info, expected_execution_info)
Пример #15
0
 def testRunExecutorWithBeamPipelineArgs(self):
     executor_spec = text_format.Parse(
         """
   python_executor_spec: {
       class_path: "tfx.orchestration.portable.beam_executor_operator_test.ValidateBeamPipelineArgsExecutor"
   }
   beam_pipeline_args: "--runner=DirectRunner"
 """, executable_spec_pb2.BeamExecutableSpec())
     operator = beam_executor_operator.BeamExecutorOperator(executor_spec)
     pipeline_node = pipeline_pb2.PipelineNode(
         node_info={'id': 'MyBeamNode'})
     pipeline_info = pipeline_pb2.PipelineInfo(id='MyPipeline')
     executor_output_uri = os.path.join(self.tmp_dir, 'executor_output')
     executor_output = operator.run_executor(
         data_types.ExecutionInfo(
             execution_id=1,
             input_dict={'input_key': [standard_artifacts.Examples()]},
             output_dict={'output_key': [standard_artifacts.Model()]},
             exec_properties={},
             execution_output_uri=executor_output_uri,
             pipeline_node=pipeline_node,
             pipeline_info=pipeline_info,
             pipeline_run_id=99))
     self.assertProtoPartiallyEquals(
         """
       output_artifacts {
         key: "output_key"
         value {
           artifacts {
             custom_properties {
               key: "name"
               value {
                 string_value: "MyPipeline.MyBeamNode.my_model"
               }
             }
           }
         }
       }""", executor_output)
Пример #16
0
    def _prepare_execution(self) -> _PrepareExecutionResult:
        """Prepares inputs, outputs and execution properties for actual execution."""
        # TODO(b/150979622): handle the edge case that the component get evicted
        # between successful pushlish and stateful working dir being clean up.
        # Otherwise following retries will keep failing because of duplicate
        # publishes.
        with self._mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.register_contexts_if_not_exists(
                metadata_handler=m, node_contexts=self._pipeline_node.contexts)

            # 2. Resolves inputs an execution properties.
            exec_properties = inputs_utils.resolve_parameters(
                node_parameters=self._pipeline_node.parameters)
            input_artifacts = inputs_utils.resolve_input_artifacts(
                metadata_handler=m, node_inputs=self._pipeline_node.inputs)
            # 3. If not all required inputs are met. Return ExecutionInfo with
            # is_execution_needed being false. No publish will happen so down stream
            # nodes won't be triggered.
            if input_artifacts is None:
                return _PrepareExecutionResult(
                    execution_info=data_types.ExecutionInfo(),
                    contexts=contexts,
                    is_execution_needed=False)

            # 4. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=self._pipeline_node.node_info.type,
                contexts=contexts,
                input_artifacts=input_artifacts,
                exec_properties=exec_properties)

            # 5. Resolve output
            output_artifacts = self._output_resolver.generate_output_artifacts(
                execution.id)

        # If there is a custom driver, runs it.
        if self._driver_operator:
            driver_output = self._driver_operator.run_driver(
                data_types.ExecutionInfo(
                    input_dict=input_artifacts,
                    output_dict=output_artifacts,
                    exec_properties=exec_properties,
                    execution_output_uri=self._output_resolver.
                    get_driver_output_uri()))
            self._update_with_driver_output(driver_output, exec_properties,
                                            output_artifacts)

        # We reconnect to MLMD here because the custom driver closes MLMD connection
        # on returning.
        with self._mlmd_connection as m:
            # 6. Check cached result
            cache_context = cache_utils.get_cache_context(
                metadata_handler=m,
                pipeline_node=self._pipeline_node,
                pipeline_info=self._pipeline_info,
                input_artifacts=input_artifacts,
                output_artifacts=output_artifacts,
                parameters=exec_properties)
            contexts.append(cache_context)
            cached_outputs = cache_utils.get_cached_outputs(
                metadata_handler=m, cache_context=cache_context)

            # 7. Should cache be used?
            if (self._pipeline_node.execution_options.caching_options.
                    enable_cache and cached_outputs):
                # Publishes cache result
                execution_publish_utils.publish_cached_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    output_artifacts=cached_outputs)
                return _PrepareExecutionResult(
                    execution_info=data_types.ExecutionInfo(
                        execution_id=execution.id),
                    execution_metadata=execution,
                    contexts=contexts,
                    is_execution_needed=False)

            pipeline_run_id = (self._pipeline_runtime_spec.pipeline_run_id.
                               field_value.string_value)

            # 8. Going to trigger executor.
            return _PrepareExecutionResult(
                execution_info=data_types.ExecutionInfo(
                    execution_id=execution.id,
                    input_dict=input_artifacts,
                    output_dict=output_artifacts,
                    exec_properties=exec_properties,
                    execution_output_uri=self._output_resolver.
                    get_executor_output_uri(execution.id),
                    stateful_working_dir=(self._output_resolver.
                                          get_stateful_working_directory()),
                    tmp_dir=self._output_resolver.make_tmp_dir(execution.id),
                    pipeline_node=self._pipeline_node,
                    pipeline_info=self._pipeline_info,
                    pipeline_run_id=pipeline_run_id),
                execution_metadata=execution,
                contexts=contexts,
                is_execution_needed=True)
Пример #17
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> data_types.ExecutionInfo:
        """Runs Importer specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an importer node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.prepare_contexts(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves execution properties, please note that importers has no
            # input.
            exec_properties = data_types_utils.build_parsed_value_dict(
                inputs_utils.resolve_parameters_with_schema(
                    node_parameters=pipeline_node.parameters))

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # 4. Generate output artifacts to represent the imported artifacts.
            output_spec = pipeline_node.outputs.outputs[
                importer.IMPORT_RESULT_KEY]
            properties = self._extract_proto_map(
                output_spec.artifact_spec.additional_properties)
            custom_properties = self._extract_proto_map(
                output_spec.artifact_spec.additional_custom_properties)
            output_artifact_class = types.Artifact(
                output_spec.artifact_spec.type).type
            output_artifacts = importer.generate_output_dict(
                metadata_handler=m,
                uri=str(exec_properties[importer.SOURCE_URI_KEY]),
                properties=properties,
                custom_properties=custom_properties,
                reimport=bool(exec_properties[importer.REIMPORT_OPTION_KEY]),
                output_artifact_class=output_artifact_class,
                mlmd_artifact_type=output_spec.artifact_spec.type)

            result = data_types.ExecutionInfo(execution_id=execution.id,
                                              input_dict={},
                                              output_dict=output_artifacts,
                                              exec_properties=exec_properties,
                                              pipeline_node=pipeline_node,
                                              pipeline_info=pipeline_info)

            # TODO(b/182316162): consider let the launcher level do the publish
            # for system nodes. So that the version taging logic doesn't need to be
            # handled per system node.
            outputs_utils.tag_output_artifacts_with_version(result.output_dict)

            # 5. Publish the output artifacts. If artifacts are reimported, the
            # execution is published as CACHED. Otherwise it is published as COMPLETE.
            if _is_artifact_reimported(output_artifacts):
                execution_publish_utils.publish_cached_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    output_artifacts=output_artifacts)

            else:
                execution_publish_utils.publish_succeeded_execution(
                    metadata_handler=m,
                    execution_id=execution.id,
                    contexts=contexts,
                    output_artifacts=output_artifacts)

            return result
Пример #18
0
    def testDriverRunFn(self):
        # Create input dir.
        self._input_base_path = os.path.join(self._test_dir, 'input_base')
        fileio.makedirs(self._input_base_path)

        # Fake previous outputs
        span1_v1_split1 = os.path.join(self._input_base_path, 'span01',
                                       'split1', 'data')
        io_utils.write_string_file(span1_v1_split1, 'testing11')
        span1_v1_split2 = os.path.join(self._input_base_path, 'span01',
                                       'split2', 'data')
        io_utils.write_string_file(span1_v1_split2, 'testing12')

        ir_driver = driver.FileBasedDriver(self._mock_metadata)
        example = standard_artifacts.Examples()

        # Prepare output_dic
        example.uri = 'my_uri'  # Will verify that this uri is not changed.
        output_dic = {standard_component_specs.EXAMPLES_KEY: [example]}

        # Prepare output_dic exec_proterties.
        exec_properties = {
            standard_component_specs.INPUT_BASE_KEY:
            self._input_base_path,
            standard_component_specs.INPUT_CONFIG_KEY:
            proto_utils.proto_to_json(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(
                        name='s1', pattern='span{SPAN:2}/split1/*'),
                    example_gen_pb2.Input.Split(
                        name='s2', pattern='span{SPAN:2}/split2/*')
                ])),
        }
        result = ir_driver.run(
            portable_data_types.ExecutionInfo(output_dict=output_dic,
                                              exec_properties=exec_properties))
        # Assert exec_properties' values
        exec_properties = result.exec_properties
        self.assertEqual(exec_properties[utils.SPAN_PROPERTY_NAME].int_value,
                         1)
        updated_input_config = example_gen_pb2.Input()
        proto_utils.json_to_proto(
            exec_properties[
                standard_component_specs.INPUT_CONFIG_KEY].string_value,
            updated_input_config)
        self.assertProtoEquals(
            """
        splits {
          name: "s1"
          pattern: "span01/split1/*"
        }
        splits {
          name: "s2"
          pattern: "span01/split2/*"
        }""", updated_input_config)
        self.assertRegex(
            exec_properties[utils.FINGERPRINT_PROPERTY_NAME].string_value,
            r'split:s1,num_files:1,total_bytes:9,xor_checksum:.*,sum_checksum:.*\nsplit:s2,num_files:1,total_bytes:9,xor_checksum:.*,sum_checksum:.*'
        )
        # Assert output_artifacts' values
        self.assertLen(
            result.output_artifacts[
                standard_component_specs.EXAMPLES_KEY].artifacts, 1)
        output_example = result.output_artifacts[
            standard_component_specs.EXAMPLES_KEY].artifacts[0]
        self.assertEqual(output_example.uri, example.uri)
        self.assertEqual(
            output_example.custom_properties[
                utils.SPAN_PROPERTY_NAME].string_value, '1')
        self.assertRegex(
            output_example.custom_properties[
                utils.FINGERPRINT_PROPERTY_NAME].string_value,
            r'split:s1,num_files:1,total_bytes:9,xor_checksum:.*,sum_checksum:.*\nsplit:s2,num_files:1,total_bytes:9,xor_checksum:.*,sum_checksum:.*'
        )
Пример #19
0
  def run(
      self, mlmd_connection: metadata.Metadata,
      pipeline_node: pipeline_pb2.PipelineNode,
      pipeline_info: pipeline_pb2.PipelineInfo,
      pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
  ) -> data_types.ExecutionInfo:
    """Runs Importer specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
    logging.info('Running as an importer node.')
    with mlmd_connection as m:
      # 1.Prepares all contexts.
      contexts = context_lib.prepare_contexts(
          metadata_handler=m, node_contexts=pipeline_node.contexts)

      # 2. Resolves execution properties, please note that importers has no
      # input.
      exec_properties = inputs_utils.resolve_parameters(
          node_parameters=pipeline_node.parameters)

      # 3. Registers execution in metadata.
      execution = execution_publish_utils.register_execution(
          metadata_handler=m,
          execution_type=pipeline_node.node_info.type,
          contexts=contexts,
          exec_properties=exec_properties)

      # 4. Generate output artifacts to represent the imported artifacts.
      output_spec = pipeline_node.outputs.outputs[importer.IMPORT_RESULT_KEY]
      properties = self._extract_proto_map(
          output_spec.artifact_spec.additional_properties)
      custom_properties = self._extract_proto_map(
          output_spec.artifact_spec.additional_custom_properties)
      output_artifact_class = types.Artifact(
          output_spec.artifact_spec.type).type
      output_artifacts = importer.generate_output_dict(
          metadata_handler=m,
          uri=str(exec_properties[importer.SOURCE_URI_KEY]),
          properties=properties,
          custom_properties=custom_properties,
          reimport=bool(exec_properties[importer.REIMPORT_OPTION_KEY]),
          output_artifact_class=output_artifact_class,
          mlmd_artifact_type=output_spec.artifact_spec.type)

      # 5. Publish the output artifacts.
      execution_publish_utils.publish_succeeded_execution(
          metadata_handler=m,
          execution_id=execution.id,
          contexts=contexts,
          output_artifacts=output_artifacts)

      return data_types.ExecutionInfo(
          execution_id=execution.id,
          input_dict={},
          output_dict=output_artifacts,
          exec_properties=exec_properties,
          pipeline_node=pipeline_node,
          pipeline_info=pipeline_info)
Пример #20
0
    def run(
        self, mlmd_connection: metadata.Metadata,
        pipeline_node: pipeline_pb2.PipelineNode,
        pipeline_info: pipeline_pb2.PipelineInfo,
        pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec
    ) -> data_types.ExecutionInfo:
        """Runs Resolver specific logic.

    Args:
      mlmd_connection: ML metadata connection.
      pipeline_node: The specification of the node that this launcher lauches.
      pipeline_info: The information of the pipeline that this node runs in.
      pipeline_runtime_spec: The runtime information of the pipeline that this
        node runs in.

    Returns:
      The execution of the run.
    """
        logging.info('Running as an resolver node.')
        with mlmd_connection as m:
            # 1.Prepares all contexts.
            contexts = context_lib.prepare_contexts(
                metadata_handler=m, node_contexts=pipeline_node.contexts)

            # 2. Resolves inputs and execution properties.
            exec_properties = data_types_utils.build_parsed_value_dict(
                inputs_utils.resolve_parameters_with_schema(
                    node_parameters=pipeline_node.parameters))
            try:
                resolved_inputs = inputs_utils.resolve_input_artifacts_v2(
                    pipeline_node=pipeline_node, metadata_handler=m)
            except exceptions.InputResolutionError as e:
                execution = execution_publish_utils.register_execution(
                    metadata_handler=m,
                    execution_type=pipeline_node.node_info.type,
                    contexts=contexts,
                    exec_properties=exec_properties)
                execution_publish_utils.publish_failed_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    executor_output=self._build_error_output(
                        code=e.grpc_code_value))
                return data_types.ExecutionInfo(
                    execution_id=execution.id,
                    exec_properties=exec_properties,
                    pipeline_node=pipeline_node,
                    pipeline_info=pipeline_info)

            # 2a. If Skip (i.e. inside conditional), no execution should be made.
            # TODO(b/197907821): Publish special execution for Skip?
            if isinstance(resolved_inputs, inputs_utils.Skip):
                return data_types.ExecutionInfo()

            # 3. Registers execution in metadata.
            execution = execution_publish_utils.register_execution(
                metadata_handler=m,
                execution_type=pipeline_node.node_info.type,
                contexts=contexts,
                exec_properties=exec_properties)

            # TODO(b/197741942): Support len > 1.
            if len(resolved_inputs) > 1:
                execution_publish_utils.publish_failed_execution(
                    metadata_handler=m,
                    contexts=contexts,
                    execution_id=execution.id,
                    executor_output=self._build_error_output(
                        _ERROR_CODE_UNIMPLEMENTED,
                        'Handling more than one input dicts not implemented yet.'
                    ))
                return data_types.ExecutionInfo(
                    execution_id=execution.id,
                    exec_properties=exec_properties,
                    pipeline_node=pipeline_node,
                    pipeline_info=pipeline_info)

            input_artifacts = resolved_inputs[0]

            # 4. Publish the execution as a cached execution with
            # resolved input artifact as the output artifacts.
            execution_publish_utils.publish_internal_execution(
                metadata_handler=m,
                contexts=contexts,
                execution_id=execution.id,
                output_artifacts=input_artifacts)

            return data_types.ExecutionInfo(execution_id=execution.id,
                                            input_dict=input_artifacts,
                                            output_dict=input_artifacts,
                                            exec_properties=exec_properties,
                                            pipeline_node=pipeline_node,
                                            pipeline_info=pipeline_info)