Пример #1
0
 def testConstruct(self):
     examples = standard_artifacts.Examples()
     binder = binder_component.DataViewBinder(
         input_examples=channel_utils.as_channel([examples]),
         data_view=channel_utils.as_channel([standard_artifacts.DataView()
                                             ]))
     self.assertIsNotNone(binder.outputs['output_examples'])
Пример #2
0
  def __init__(self,
               create_decoder_func: Text,
               module_file: Optional[Text] = None,
               data_view: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    """Construct a StatisticsGen component.

    Args:
      create_decoder_func: If `module_file` is not None, this should be the name
        of the function in `module_file` that this component need to use to
        create the TfGraphRecordDecoder. Otherwise it should be the path
        (dot-delimited, e.g. "some_package.some_module.some_func") to such
        a function. The function must have the following signature:

        def create_decoder_func() -> tfx_bsl.coder.TfGraphRecordDecoder:
          ...
      module_file: The file path to a python module file, from which the
        function named after `create_decoder_func` will be loaded. If not
        provided, `create_decoder_func` is expected to be a path to a function.
      data_view: Output 'DataView' channel, in which a the decoder will be
        saved.
      instance_name: Optional unique instance name. Necessary iff multiple
        transform components are declared in the same pipeline.
    """
    if data_view is None:
      data_view = types.Channel(
          type=standard_artifacts.DataView,
          artifacts=[standard_artifacts.DataView()])
    spec = _TfGraphDataViewProviderSpec(
        module_file=module_file,
        create_decoder_func=create_decoder_func,
        data_view=data_view)
    super().__init__(spec=spec, instance_name=instance_name)
Пример #3
0
 def testConstruct(self):
     examples = standard_artifacts.Examples()
     examples.split_names = artifact_utils.encode_split_names(
         ['train', 'eval'])
     examples.span = 1
     binder = binder_component.DataViewBinder(
         input_examples=channel_utils.as_channel([examples]),
         data_view=channel_utils.as_channel([standard_artifacts.DataView()
                                             ]))
     output_examples = binder.outputs['output_examples']
     self.assertIsNotNone(output_examples)
     output_examples = output_examples.get()
     self.assertLen(output_examples, 1)
     self._assert_example_artifact_equal(output_examples[0], examples)
Пример #4
0
 def testExecutorModuleFileNotProvided(self):
     input_dict = {}
     output = standard_artifacts.DataView()
     output.uri = os.path.join(self._output_data_dir, 'output_data_view')
     output_dict = {'data_view': output}
     exec_properties = {
         'create_decoder_func':
         '%s.%s' % (data_view_module.create_simple_decoder.__module__,
                    data_view_module.create_simple_decoder.__name__),
     }
     executor = provider_executor.TfGraphDataViewProviderExecutor()
     executor.Do(input_dict, output_dict, exec_properties)
     loaded_decoder = tf_graph_record_decoder.load_decoder(output.uri)
     self.assertIsInstance(loaded_decoder,
                           tf_graph_record_decoder.TFGraphRecordDecoder)
Пример #5
0
 def testExecutorModuleFileProvided(self):
   input_dict = {}
   output = standard_artifacts.DataView()
   output.uri = os.path.join(self._output_data_dir, 'output_data_view')
   output_dict = {'data_view': [output]}
   exec_properties = {
       'module_file':
           os.path.join(self._source_data_dir,
                        'module_file/data_view_module.py'),
       'create_decoder_func':
           'create_simple_decoder',
   }
   executor = provider_executor.TfGraphDataViewProviderExecutor()
   executor.Do(input_dict, output_dict, exec_properties)
   loaded_decoder = tf_graph_record_decoder.load_decoder(output.uri)
   self.assertIsInstance(
       loaded_decoder, tf_graph_record_decoder.LoadedDecoder)
Пример #6
0
  def testDo(self):
    data_view = standard_artifacts.DataView()
    data_view.uri = '/old/data_view'
    data_view.id = 1

    existing_custom_property = 'payload_format'
    input_examples = standard_artifacts.Examples()
    input_examples.uri = '/examples/1'
    input_examples.set_string_custom_property(
        existing_custom_property, 'VALUE1')

    input_dict = {
        'input_examples': [input_examples],
        'data_view': [data_view],
    }
    exec_properties = {}
    output_dict = {
        'output_examples': [
            standard_artifacts.Examples()
        ]
    }

    executor = binder_executor.DataViewBinderExecutor()
    executor.Do(input_dict, output_dict, exec_properties)

    output_examples = output_dict.get('output_examples')
    self.assertIsNotNone(output_examples)
    self.assertLen(output_examples, 1)
    oe = output_examples[0]
    self.assertEqual(
        oe.get_string_custom_property(
            binder_executor.DATA_VIEW_URI_PROPERTY_KEY), data_view.uri)
    self.assertEqual(
        oe.get_int_custom_property(
            binder_executor.DATA_VIEW_ID_PROPERTY_KEY), data_view.id)

    # output should share the URI with the input.
    self.assertEqual(oe.uri, input_examples.uri)
    # other custom properties should be inherited.
    self.assertEqual(
        oe.get_string_custom_property(existing_custom_property),
        input_examples.get_string_custom_property(existing_custom_property))
Пример #7
0
 def test_raise_if_not_example(self):
     artifact = standard_artifacts.DataView()
     with self.assertRaisesRegex(
             AssertionError, 'must be of type standard_artifacts.Example'):
         tfxio_utils.get_tfxio_factory_from_artifact(
             artifact, _TELEMETRY_DESCRIPTORS)