Пример #1
0
  def __init__(self,
               input_data: types.Channel = None,
               output_data: types.Channel = None,
               name: Optional[Text] = None):
    """Construct a HelloComponent.

    Args:
      input_data: A Channel of type `standard_artifacts.Examples`. This will
        often contain two splits: 'train', and 'eval'.
      output_data: A Channel of type `standard_artifacts.Examples`. This will
        usually contain the same splits as input_data.
      name: Optional unique name. Necessary if multiple Hello components are
        declared in the same pipeline.
    """
    # output_data will contain a list of Channels for each split of the data,
    # by default a 'train' split and an 'eval' split. Since HelloComponent
    # passes the input data through to output, the splits in output_data will
    # be the same as the splits in input_data, which were generated by the
    # upstream component.
    if not output_data:
      examples_artifact = standard_artifacts.Examples()
      examples_artifact.split_names = input_data.get()[0].split_names
      output_data = channel_utils.as_channel([examples_artifact])

    spec = HelloComponentSpec(input_data=input_data,
                              output_data=output_data, name=name)
    super(HelloComponent, self).__init__(spec=spec)
Пример #2
0
  def __init__(self,
               statistics: types.Channel = None,
               schema: types.Channel = None,
               exclude_splits: Optional[List[Text]] = None,
               output: Optional[types.Channel] = None,
               stats: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    """Construct an ExampleValidator component.

    Args:
      statistics: A Channel of type `standard_artifacts.ExampleStatistics`. This
        should contain at least 'eval' split. Other splits are currently
        ignored.
      schema: A Channel of type `standard_artifacts.Schema`. _required_
      exclude_splits: Names of splits that the example validator should not
        validate. Default behavior (when exclude_splits is set to None)
        is excluding no splits.
      output: Output channel of type `standard_artifacts.ExampleAnomalies`.
      stats: Backwards compatibility alias for the 'statistics' argument.
      instance_name: Optional name assigned to this specific instance of
        ExampleValidator. Required only if multiple ExampleValidator components
        are declared in the same pipeline.  Either `stats` or `statistics` must
        be present in the arguments.
    """
    if stats:
      logging.warning(
          'The "stats" argument to the StatisticsGen component has '
          'been renamed to "statistics" and is deprecated. Please update your '
          'usage as support for this argument will be removed soon.')
      statistics = stats
    if exclude_splits is None:
      exclude_splits = []
      logging.info('Excluding no splits because exclude_splits is not set.')
    anomalies = output
    if not anomalies:
      anomalies_artifact = standard_artifacts.ExampleAnomalies()
      statistics_split_names = artifact_utils.decode_split_names(
          artifact_utils.get_single_instance(list(
              statistics.get())).split_names)
      split_names = [
          split for split in statistics_split_names
          if split not in exclude_splits
      ]
      anomalies_artifact.split_names = artifact_utils.encode_split_names(
          split_names)
      anomalies = types.Channel(
          type=standard_artifacts.ExampleAnomalies,
          artifacts=[anomalies_artifact])
    spec = ExampleValidatorSpec(
        statistics=statistics,
        schema=schema,
        exclude_splits=json_utils.dumps(exclude_splits),
        anomalies=anomalies)
    super(ExampleValidator, self).__init__(
        spec=spec, instance_name=instance_name)
Пример #3
0
    def __init__(self,
                 examples: types.Channel = None,
                 schema: Optional[types.Channel] = None,
                 stats_options: Optional[tfdv.StatsOptions] = None,
                 output: Optional[types.Channel] = None,
                 input_data: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None,
                 enable_cache: Optional[bool] = None):
        """Construct a StatisticsGen component.

    Args:
      examples: A Channel of `ExamplesPath` type, likely generated by the
        [ExampleGen component](https://www.tensorflow.org/tfx/guide/examplegen).
        This needs to contain two splits labeled `train` and `eval`. _required_
      schema: A `Schema` channel to use for automatically configuring the value
        of stats options passed to TFDV.
      stats_options: The StatsOptions instance to configure optional TFDV
        behavior. When stats_options.schema is set, it will be used instead of
        the `schema` channel input. Due to the requirement that stats_options be
        serialized, the slicer functions and custom stats generators are dropped
        and are therefore not usable.
      output: `ExampleStatisticsPath` channel for statistics of each split
        provided in the input examples.
      input_data: Backwards compatibility alias for the `examples` argument.
      instance_name: Optional name assigned to this specific instance of
        StatisticsGen.  Required only if multiple StatisticsGen components are
        declared in the same pipeline.
      enable_cache: Optional boolean to indicate if cache is enabled for the
        StatisticsGen component. If not specified, defaults to the value
        specified for pipeline's enable_cache parameter.
    """
        if input_data:
            absl.logging.warning(
                'The "input_data" argument to the StatisticsGen component has '
                'been renamed to "examples" and is deprecated. Please update your '
                'usage as support for this argument will be removed soon.')
            examples = input_data
        if not output:
            statistics_artifact = standard_artifacts.ExampleStatistics()
            statistics_artifact.split_names = artifact_utils.get_single_instance(
                list(examples.get())).split_names
            output = types.Channel(type=standard_artifacts.ExampleStatistics,
                                   artifacts=[statistics_artifact])
        # TODO(b/150802589): Move jsonable interface to tfx_bsl and use json_utils.
        stats_options_json = stats_options.to_json() if stats_options else None
        spec = StatisticsGenSpec(examples=examples,
                                 schema=schema,
                                 stats_options_json=stats_options_json,
                                 statistics=output)
        super(StatisticsGen, self).__init__(spec=spec,
                                            instance_name=instance_name,
                                            enable_cache=enable_cache)
Пример #4
0
  def __init__(self,
               input_examples: types.Channel,
               data_view: types.Channel,
               output_examples: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    if not output_examples:
      output_artifact = standard_artifacts.Examples()
      output_artifact.copy_from(
          artifact_utils.get_single_instance(list(input_examples.get())))
      output_examples = channel_utils.as_channel([output_artifact])

    spec = _DataViewBinderComponentSpec(
        input_examples=input_examples,
        data_view=data_view,
        output_examples=output_examples)
    super().__init__(spec=spec, instance_name=instance_name)