Python ImportExampleGen примеры использования

Язык программирования: Python

Пространство имен/Пакет: tfx.components

Класс/Тип: ImportExampleGen

Примеров на hotexamples.com: 16

Python ImportExampleGen - 16 примеров найдено. Это лучшие примеры Python кода для tfx.components.ImportExampleGen, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ImportExampleGen(16)

Основные методы

ImportExampleGen (16)

Пример #1

Показать файл

Файл: data_ingestion.py Проект: arilwan/twiml_bmlp_practice

                                 input_config=input_config)
    return span_example


# %%

if __name__ == '__main__':
    context = InteractiveContext(pipeline_root=config.PIPELINE_ROOT)

    # %%
    complaint_df = pd.read_csv(config.DATA_FILE_PATH, encoding='utf-8')

    # %%
    #ImportExampleGen with TFRecord
    complaint_tfrecord = tfrecord_data_writer(file_path=config.DATA_FILE_PATH)
    example_gen = ImportExampleGen(input_base=config.RECORD_DIR_PATH)
    context.run(example_gen)

    # %%
    #Plain simple csv file for CsvExampleGen
    example_gen = CsvExampleGen(input_base=config.DATA_DIR_PATH)
    context.run(example_gen)

    # %%
    #Data Split
    split_example_gen = data_split(file_path=config.DATA_SPLITS_DIR_PATH)
    context.run(split_example_gen)

    # %%
    #Existing Data Split
    #Won't run through as there is no train folder

Пример #2

Показать файл

Файл: builder.py Проект: pmccarthy-dstillery/tfxtesting

def build_pipeline(timestamp: str) -> pipeline:
    """
    Gather tfx components and produce the output pipeline
    """

    conf['beam']['serving_model_dir'] = f"{conf['beam']['serving_model_dir']}/beam/OL{653374}/{timestamp}"
    conf['beam']['pipeline_root_dir'] = f"{conf['beam']['pipeline_root_dir']}/beam/OL{653374}/{timestamp}"
    conf['beam']['metadata_path'] = f"{conf['beam']['metadata_path']}/beam/OL{653374}"

    logging.info("Serving model dir is now %s",conf['beam']['serving_model_dir'])

    example_gen = ImportExampleGen(input_base=conf['train_data'])

    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    schema_gen = SchemaGen(
        statistics=statistics_gen.outputs['statistics'],
        infer_feature_shape=False
    )
    
    transform = Transform(
        examples=example_gen.outputs['examples'],
        schema=schema_gen.outputs['schema'],
        module_file=conf['trainer_module_file']
    )

    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema']
    )

    trainer = Trainer(
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        module_file=conf['trainer_module_file'],
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor), # define this to use run_fn instead of trainer_fn
        train_args=trainer_pb2.TrainArgs(num_steps=conf['train_args_steps']),
        eval_args=trainer_pb2.EvalArgs(num_steps=50)
    )

    metrics = [
        tfma.metrics.ExampleCount(name='example_count'),
        tfma.metrics.WeightedExampleCount(name='weighted_example_count'),
        tf.keras.metrics.BinaryCrossentropy(name='binary_crossentropy'),
        tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.AUC(name='auc', num_thresholds=10),
        tf.keras.metrics.AUC(
            name='auc_precision_recall', curve='PR', num_thresholds=100),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tfma.metrics.MeanLabel(name='mean_label'),
        tfma.metrics.MeanPrediction(name='mean_prediction'),
        tfma.metrics.Calibration(name='calibration'),
        tfma.metrics.ConfusionMatrixPlot(name='confusion_matrix_plot'),
        tfma.metrics.CalibrationPlot(name='calibration_plot')
    ]
    my_metrics_specs = tfma.metrics.specs_from_metrics(metrics)

    eval_config = tfma.EvalConfig(
        model_specs=[
            tfma.ModelSpec(label_key='label')
        ],
        metrics_specs=my_metrics_specs
        # [
            # tfma.MetricsSpec(
                # metrics=[
                #     # tfma.MetricConfig(class_name='ExampleCount'),
                #     tfma.MetricConfig(class_name='BinaryAccuracy',
                #       threshold=tfma.MetricThreshold(
                #           value_threshold=tfma.GenericValueThreshold(
                #               lower_bound={'value': 0.5}),
                #           change_threshold=tfma.GenericChangeThreshold(
                #               direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                #               absolute={'value': -1e-10})))
                # ]
            # )
        # ],
        ,
        slicing_specs=[
            tfma.SlicingSpec(),
        ])

    model_resolver = ResolverNode(
          instance_name='latest_blessed_model_resolver',
          resolver_class=latest_blessed_model_resolver.LatestBlessedModelResolver,
          model=Channel(type=Model),
          model_blessing=Channel(type=ModelBlessing))

    evaluator = Evaluator(
        examples=example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        baseline_model=model_resolver.outputs['model'],
        eval_config=eval_config)

    pusher = Pusher(
        model=trainer.outputs['model'],
        model_blessing=evaluator.outputs['blessing'],
        push_destination=pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(
                base_directory=conf['beam']['serving_model_dir'])))

    components = [
        example_gen,
        statistics_gen,
        schema_gen,
        transform,
        example_validator,
        trainer,
        model_resolver,
        evaluator,
        pusher
    ]


    tfx_pipeline = pipeline.Pipeline(
        pipeline_name=conf['beam']['pipeline_name'],
        pipeline_root=conf['beam']['pipeline_root_dir'],
        components=components,
        enable_cache=False,
        metadata_connection_config=(
            metadata.sqlite_metadata_connection_config(conf['beam']['metadata_path'])

        )
    )

    return tfx_pipeline

Пример #3

Показать файл

Файл: mnist_pipeline_native_keras.py Проект: casassg/tfx

def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
                     module_file: Text, module_file_lite: Text,
                     serving_model_dir: Text, serving_model_dir_lite: Text,
                     metadata_path: Text,
                     beam_pipeline_args: List[Text]) -> pipeline.Pipeline:
    """Implements the handwritten digit classification example using TFX."""
    # Brings data into the pipeline.
    example_gen = ImportExampleGen(input_base=data_root)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=True)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    def _create_trainer(module_file, instance_name):
        return Trainer(module_file=module_file,
                       custom_executor_spec=executor_spec.ExecutorClassSpec(
                           GenericExecutor),
                       examples=transform.outputs['transformed_examples'],
                       transform_graph=transform.outputs['transform_graph'],
                       schema=schema_gen.outputs['schema'],
                       train_args=trainer_pb2.TrainArgs(num_steps=5000),
                       eval_args=trainer_pb2.EvalArgs(num_steps=100),
                       instance_name=instance_name)

    # Uses user-provided Python function that trains a Keras model.
    trainer = _create_trainer(module_file, 'mnist')

    # Trains the same model as the one above, but converts it into a TFLite one.
    trainer_lite = _create_trainer(module_file_lite, 'mnist_lite')

    # TODO(b/150949276): Add resolver back once it supports two trainers.

    # Uses TFMA to compute an evaluation statistics over features of a model and
    # performs quality validation of a candidate model.
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key='image_class')],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(metrics=[
                tfma.MetricConfig(
                    class_name='SparseCategoricalAccuracy',
                    threshold=tfma.config.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.8})))
            ])
        ])

    eval_config_lite = tfma.EvalConfig()
    eval_config_lite.CopyFrom(eval_config)
    # Informs the evaluator that the model is a TFLite model.
    eval_config_lite.model_specs[0].model_type = 'tf_lite'

    # Uses TFMA to compute the evaluation statistics over features of a model.
    evaluator = Evaluator(examples=example_gen.outputs['examples'],
                          model=trainer.outputs['model'],
                          eval_config=eval_config,
                          instance_name='mnist')

    # Uses TFMA to compute the evaluation statistics over features of a TFLite
    # model.
    evaluator_lite = Evaluator(examples=example_gen.outputs['examples'],
                               model=trainer_lite.outputs['model'],
                               eval_config=eval_config_lite,
                               instance_name='mnist_lite')

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(model=trainer.outputs['model'],
                    model_blessing=evaluator.outputs['blessing'],
                    push_destination=pusher_pb2.PushDestination(
                        filesystem=pusher_pb2.PushDestination.Filesystem(
                            base_directory=serving_model_dir)),
                    instance_name='mnist')

    # Checks whether the TFLite model passed the validation steps and pushes the
    # model to a file destination if check passed.
    pusher_lite = Pusher(model=trainer_lite.outputs['model'],
                         model_blessing=evaluator_lite.outputs['blessing'],
                         push_destination=pusher_pb2.PushDestination(
                             filesystem=pusher_pb2.PushDestination.Filesystem(
                                 base_directory=serving_model_dir_lite)),
                         instance_name='mnist_lite')

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            example_gen,
            statistics_gen,
            schema_gen,
            example_validator,
            transform,
            trainer,
            trainer_lite,
            evaluator,
            evaluator_lite,
            pusher,
            pusher_lite,
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
        beam_pipeline_args=beam_pipeline_args)

Пример #4

Показать файл

Файл: ranking_pipeline.py Проект: mfkiwl/tfx

def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
                     module_file: Text, serving_model_dir: Text,
                     metadata_path: Text, beam_pipeline_args: List[Text]):
  """Creates pipeline."""
  pipeline_root = os.path.join(pipeline_root, 'pipelines', pipeline_name)

  example_gen = ImportExampleGen(
      input_base=data_root,
      # IMPORTANT: must set FORMAT_PROTO
      payload_format=example_gen_pb2.FORMAT_PROTO)

  data_view_provider = provider_component.TfGraphDataViewProvider(
      module_file=module_file,
      create_decoder_func='make_decoder')

  data_view_binder = binder_component.DataViewBinder(
      example_gen.outputs['examples'],
      data_view_provider.outputs['data_view'])

  statistics_gen = StatisticsGen(
      examples=data_view_binder.outputs['output_examples'])

  schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'])

  transform = Transform(
      examples=data_view_binder.outputs['output_examples'],
      schema=schema_gen.outputs['schema'],
      module_file=module_file,
      # important: must disable Transform materialization.
      materialize=False)

  trainer = Trainer(
      examples=data_view_binder.outputs['output_examples'],
      transform_graph=transform.outputs['transform_graph'],
      module_file=module_file,
      train_args=trainer_pb2.TrainArgs(num_steps=1000),
      schema=schema_gen.outputs['schema'],
      eval_args=trainer_pb2.EvalArgs(num_steps=10))

  eval_config = tfma.EvalConfig(
      model_specs=[
          tfma.ModelSpec(
              signature_name='',
              label_key='relevance',
              padding_options=tfma.config.PaddingOptions(
                  label_float_padding=-1.0, prediction_float_padding=-1.0))
      ],
      slicing_specs=[
          tfma.SlicingSpec(),
          tfma.SlicingSpec(feature_keys=['query_tokens']),
      ],
      metrics_specs=[
          tfma.MetricsSpec(
              per_slice_thresholds={
                  'metric/ndcg_10':
                      tfma.config.PerSliceMetricThresholds(thresholds=[
                          tfma.PerSliceMetricThreshold(
                              # The overall slice.
                              slicing_specs=[tfma.SlicingSpec()],
                              threshold=tfma.MetricThreshold(
                                  value_threshold=tfma.GenericValueThreshold(
                                      lower_bound={'value': 0.6})))
                      ])
              })
      ])

  evaluator = Evaluator(
      examples=data_view_binder.outputs['output_examples'],
      model=trainer.outputs['model'],
      eval_config=eval_config,
      schema=schema_gen.outputs['schema'])

  # Checks whether the model passed the validation steps and pushes the model
  # to a file destination if check passed.
  pusher = Pusher(
      model=trainer.outputs['model'],
      model_blessing=evaluator.outputs['blessing'],
      push_destination=pusher_pb2.PushDestination(
          filesystem=pusher_pb2.PushDestination.Filesystem(
              base_directory=serving_model_dir)))

  return pipeline.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      components=[
          example_gen, data_view_provider, data_view_binder,
          statistics_gen,
          schema_gen,
          transform,
          trainer,
          evaluator,
          pusher,
      ],
      enable_cache=True,
      metadata_connection_config=metadata.sqlite_metadata_connection_config(
          metadata_path),
      beam_pipeline_args=beam_pipeline_args)

Пример #5

Показать файл

Файл: cifar10_pipeline_native_keras.py Проект: jeongukjae/tfx

def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
                     module_file: Text, serving_model_dir_lite: Text,
                     metadata_path: Text, labels_path: Text,
                     beam_pipeline_args: List[Text]) -> pipeline.Pipeline:
    """Implements the CIFAR10 image classification pipeline using TFX."""
    # This is needed for datasets with pre-defined splits
    # Change the pattern argument to train_whole/* and test_whole/* to train
    # on the whole CIFAR-10 dataset
    input_config = example_gen_pb2.Input(splits=[
        example_gen_pb2.Input.Split(name='train', pattern='train/*'),
        example_gen_pb2.Input.Split(name='eval', pattern='test/*')
    ])

    # Brings data into the pipeline.
    example_gen = ImportExampleGen(input_base=data_root,
                                   input_config=input_config)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=True)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    # Uses user-provided Python function that trains a model.
    # When traning on the whole dataset, use 18744 for train steps, 156 for eval
    # steps. 18744 train steps correspond to 24 epochs on the whole train set, and
    # 156 eval steps correspond to 1 epoch on the whole test set. The
    # configuration below is for training on the dataset we provided in the data
    # folder, which has 128 train and 128 test samples. The 160 train steps
    # correspond to 40 epochs on this tiny train set, and 4 eval steps correspond
    # to 1 epoch on this tiny test set.
    trainer = Trainer(module_file=module_file,
                      examples=transform.outputs['transformed_examples'],
                      transform_graph=transform.outputs['transform_graph'],
                      schema=schema_gen.outputs['schema'],
                      train_args=trainer_pb2.TrainArgs(num_steps=160),
                      eval_args=trainer_pb2.EvalArgs(num_steps=4),
                      custom_config={'labels_path': labels_path})

    # Get the latest blessed model for model validation.
    model_resolver = ResolverNode(
        resolver_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(
            type=ModelBlessing)).with_id('latest_blessed_model_resolver')

    # Uses TFMA to compute evaluation statistics over features of a model and
    # perform quality validation of a candidate model (compare to a baseline).
    eval_config = tfma.EvalConfig(
        model_specs=[
            tfma.ModelSpec(label_key='label_xf', model_type='tf_lite')
        ],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(metrics=[
                tfma.MetricConfig(
                    class_name='SparseCategoricalAccuracy',
                    threshold=tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.55}),
                        # Change threshold will be ignored if there is no
                        # baseline model resolved from MLMD (first run).
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-3})))
            ])
        ])

    # Uses TFMA to compute the evaluation statistics over features of a model.
    # We evaluate using the materialized examples that are output by Transform
    # because
    # 1. the decoding_png function currently performed within Transform are not
    # compatible with TFLite.
    # 2. MLKit requires deserialized (float32) tensor image inputs
    # Note that for deployment, the same logic that is performed within Transform
    # must be reproduced client-side.
    evaluator = Evaluator(examples=transform.outputs['transformed_examples'],
                          model=trainer.outputs['model'],
                          baseline_model=model_resolver.outputs['model'],
                          eval_config=eval_config)

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(model=trainer.outputs['model'],
                    model_blessing=evaluator.outputs['blessing'],
                    push_destination=pusher_pb2.PushDestination(
                        filesystem=pusher_pb2.PushDestination.Filesystem(
                            base_directory=serving_model_dir_lite)))

    components = [
        example_gen, statistics_gen, schema_gen, example_validator, transform,
        trainer, model_resolver, evaluator, pusher
    ]

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
        beam_pipeline_args=beam_pipeline_args)

Пример #6

Показать файл

def create_pipeline(
    pipeline_name: Text,
    pipeline_root: Path,
    module_file: Path,
    serving_model_path: Path,
    metadata_path: Path,
    data_path: Path,
) -> pipeline.Pipeline:
    builder = Gta1()
    builder.download_and_prepare()

    input_config = example_gen_pb2.Input(splits=[
        example_gen_pb2.Input.Split(name="train", pattern="*.tfrecord-[0-9]*"),
    ], )

    output_config = example_gen_pb2.Output(
        split_config=example_gen_pb2.SplitConfig(splits=[
            example_gen_pb2.SplitConfig.Split(name="train", hash_buckets=9),
            example_gen_pb2.SplitConfig.Split(name="eval", hash_buckets=1),
        ], ), )

    # Bring the data in to the pipeline.
    example_gen = ImportExampleGen(
        input_base=builder.data_dir,
        input_config=input_config,
        output_config=output_config,
    )

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"])

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(
        statistics=statistics_gen.outputs["statistics"],
        infer_feature_shape=True,
    )

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(
        module_file=str(module_file),
        examples=example_gen.outputs["examples"],
        schema=schema_gen.outputs["schema"],
        materialize=True,
    )

    # Uses user-provided Python function that trains a model.
    trainer = Trainer(
        module_file=str(module_file),
        examples=transform.outputs["transformed_examples"],
        transform_graph=transform.outputs["transform_graph"],
        schema=schema_gen.outputs["schema"],
        train_args=trainer_pb2.TrainArgs(num_steps=10_000),
        eval_args=trainer_pb2.EvalArgs(num_steps=500),
    )

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(
        model=trainer.outputs["model"],
        # model_blessing=evaluator.outputs["blessing"],
        push_destination=pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(
                base_directory=str(serving_model_path), ), ),
    )

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=str(pipeline_root),
        components=[
            example_gen,
            statistics_gen,
            schema_gen,
            transform,
            trainer,
            pusher,
        ],
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            str(metadata_path), ),
        enable_cache=True,
    )

Пример #7

Показать файл

def create_pipeline(
    pipeline_name: Text,
    pipeline_root: Text,
    data_path: Text,
    enable_cache: bool,
    preprocessing_fn: Text,
    run_fn: Text,
    train_args: trainer_pb2.TrainArgs,
    eval_args: trainer_pb2.EvalArgs,
    serving_model_dir: Text,
    metadata_connection_config: Optional[
        metadata_store_pb2.ConnectionConfig] = None,
    beam_pipeline_args: Optional[List[Text]] = None,
    ai_platform_training_args: Optional[Dict[Text, Text]] = None,
    ai_platform_serving_args: Optional[Dict[Text, Any]] = None,
    trainer_custom_config: Optional[Dict[Text, Any]] = None,
) -> pipeline.Pipeline:
    components = []

    # Brings data into the pipeline or otherwise joins/converts training data.
    example_gen = ImportExampleGen(input=external_input(data_path))
    components.append(example_gen)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
    components.append(statistics_gen)

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=False)
    components.append(schema_gen)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(  # pylint: disable=unused-variable
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])
    components.append(example_validator)

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          preprocessing_fn=preprocessing_fn)
    components.append(transform)

    # Uses user-provided Python function that implements a model using TF-Learn.
    trainer_args = {
        'run_fn':
        run_fn,
        'transformed_examples':
        transform.outputs['transformed_examples'],
        'schema':
        schema_gen.outputs['schema'],
        'transform_graph':
        transform.outputs['transform_graph'],
        'train_args':
        train_args,
        'eval_args':
        eval_args,
        'custom_executor_spec':
        executor_spec.ExecutorClassSpec(trainer_executor.GenericExecutor),
        'custom_config':
        trainer_custom_config,
    }

    if ai_platform_training_args is not None:
        trainer_args.update({
            'custom_executor_spec':
            executor_spec.ExecutorClassSpec(
                ai_platform_trainer_executor.GenericExecutor),
            'custom_config': {
                ai_platform_trainer_executor.TRAINING_ARGS_KEY:
                ai_platform_training_args,
            }
        })
    trainer = Trainer(**trainer_args)
    components.append(trainer)

    # TODO in TFX <= 2.22.0 we need a workaround to enable the pusher. Pusher is disabled until we move sample to >
    #  TFX==2.22.00

    #
    # pusher_args = {
    #         'model': trainer.outputs['model'],
    #         'model_blessing': blessing_importer.outputs['result'],
    #         'push_destination': pusher_pb2.PushDestination(
    #                 filesystem=pusher_pb2.PushDestination.Filesystem(
    #                         base_directory=serving_model_dir)),
    # }
    # if ai_platform_serving_args is not None:
    #     pusher_args.update({
    #             'custom_executor_spec': executor_spec.ExecutorClassSpec(
    #                     ai_platform_pusher_executor.Executor),
    #             'custom_config': {
    #                     ai_platform_pusher_executor.SERVING_ARGS_KEY: ai_platform_serving_args
    #             },
    #     })
    # pusher = Pusher(**pusher_args)  # pylint: disable=unused-variable
    # Temporary disable pusher.
    # components.append(pusher)

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=enable_cache,
        metadata_connection_config=metadata_connection_config,
        beam_pipeline_args=beam_pipeline_args,
    )

Пример #8

Показать файл

Файл: cifar10_pipeline_beam.py Проект: zzhmtxxhh/tfx

def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
                     module_file: Text, serving_model_dir: Text,
                     metadata_path: Text) -> pipeline.Pipeline:
    """Implements the cifar10 pipeline with TFX."""
    examples = external_input(data_root)
    input_split = example_gen_pb2.Input(splits=[
        example_gen_pb2.Input.Split(name='train', pattern='train.tfrecord'),
        example_gen_pb2.Input.Split(name='eval', pattern='test.tfrecord')
    ])
    example_gen = ImportExampleGen(input=examples, input_config=input_split)
    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    # Generates schema based on statistics files.
    infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                             infer_feature_shape=True)

    # Performs anomaly detection based on statistics and data schema.
    validate_stats = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=infer_schema.outputs['schema'])

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=infer_schema.outputs['schema'],
                          module_file=module_file)

    # Uses user-provided Python function that implements a model using TF-Learn.
    trainer = Trainer(module_file=module_file,
                      examples=transform.outputs['transformed_examples'],
                      schema=infer_schema.outputs['schema'],
                      transform_graph=transform.outputs['transform_graph'],
                      train_args=trainer_pb2.TrainArgs(num_steps=1000),
                      eval_args=trainer_pb2.EvalArgs(num_steps=500))

    # Uses TFMA to compute a evaluation statistics over features of a model.
    evaluator = Evaluator(
        examples=example_gen.outputs['examples'],
        model_exports=trainer.outputs['model'],
        feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(
            specs=[evaluator_pb2.SingleSlicingSpec()]))

    # Performs quality validation of a candidate model (compared to a baseline).
    model_validator = ModelValidator(examples=example_gen.outputs['examples'],
                                     model=trainer.outputs['model'])

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(model=trainer.outputs['model'],
                    model_blessing=model_validator.outputs['blessing'],
                    push_destination=pusher_pb2.PushDestination(
                        filesystem=pusher_pb2.PushDestination.Filesystem(
                            base_directory=serving_model_dir)))

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            example_gen, statistics_gen, infer_schema, validate_stats,
            transform, trainer, evaluator, model_validator, pusher
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
    )

Пример #9

Показать файл

def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
                     module_file: Text, serving_model_dir: Text,
                     metadata_path: Text,
                     direct_num_workers: int) -> pipeline.Pipeline:
    """Implements the handwritten digit classification example using TFX."""
    examples = external_input(data_root)

    # Brings data into the pipeline.
    example_gen = ImportExampleGen(input=examples)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    # Generates schema based on statistics files.
    infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                             infer_feature_shape=True)

    # Performs anomaly detection based on statistics and data schema.
    validate_stats = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=infer_schema.outputs['schema'])

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=infer_schema.outputs['schema'],
                          module_file=module_file)

    # Uses user-provided Python function that trains a model using TF-Learn.
    trainer = Trainer(
        module_file=module_file,
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=infer_schema.outputs['schema'],
        train_args=trainer_pb2.TrainArgs(num_steps=5000),
        eval_args=trainer_pb2.EvalArgs(num_steps=100))

    # Get the latest blessed model for model validation.
    model_resolver = ResolverNode(
        instance_name='latest_blessed_model_resolver',
        resolver_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing))

    # Uses TFMA to compute an evaluation statistics over features of a model and
    # perform quality validation of a candidate model (compared to a baseline).
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key='image_class')],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(
                thresholds={
                    'sparse_categorical_accuracy':
                    tfma.config.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.8}),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-10}))
                })
        ])

    # Uses TFMA to compute a evaluation statistics over features of a model.
    model_analyzer = Evaluator(examples=example_gen.outputs['examples'],
                               model=trainer.outputs['model'],
                               baseline_model=model_resolver.outputs['model'],
                               eval_config=eval_config)

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(model=trainer.outputs['model'],
                    model_blessing=model_analyzer.outputs['blessing'],
                    push_destination=pusher_pb2.PushDestination(
                        filesystem=pusher_pb2.PushDestination.Filesystem(
                            base_directory=serving_model_dir)))

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            example_gen,
            statistics_gen,
            infer_schema,
            validate_stats,
            transform,
            trainer,
            model_resolver,
            model_analyzer,
            pusher,
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
        # TODO(b/142684737): The multi-processing API might change.
        beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers],
    )

Пример #10

Показать файл

Файл: pipeline.py Проект: pmccarthy-dstillery/tfxtesting

def create_pipeline(
    pipeline_name: Text,
    pipeline_root: Text,
    data_path: Text,
    # TODO(step 7): (Optional) Uncomment here to use BigQuery as a data source.
    # query: Text,
    # preprocessing_fn: Text,
    # run_fn: Text,
    module_file: Text,
    train_args: trainer_pb2.TrainArgs,
    eval_args: trainer_pb2.EvalArgs,
    eval_accuracy_threshold: float,
    serving_model_dir: Text,
    metadata_connection_config: Optional[
        metadata_store_pb2.ConnectionConfig] = None,
    beam_pipeline_args: Optional[List[Text]] = None,
    ai_platform_training_args: Optional[Dict[Text, Text]] = None,
    ai_platform_serving_args: Optional[Dict[Text, Any]] = None,
) -> pipeline.Pipeline:
    """Implements the chicago taxi pipeline with TFX."""

    components = []

    # Brings data into the pipeline or otherwise joins/converts training data.
    #  example_gen = CsvExampleGen(input=external_input(data_path))
    example_gen = ImportExampleGen(input=external_input(data_path))
    # TODO(step 7): (Optional) Uncomment here to use BigQuery as a data source.
    # example_gen = BigQueryExampleGen(query=query)
    components.append(example_gen)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
    # TODO(step 5): Uncomment here to add StatisticsGen to the pipeline.
    components.append(statistics_gen)

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=True)
    # TODO(step 5): Uncomment here to add SchemaGen to the pipeline.
    components.append(schema_gen)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(  # pylint: disable=unused-variable
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    components.append(example_validator)

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    components.append(transform)

    # Uses user-provided Python function that implements a model using TF-Learn.
    trainer_args = {
        'module_file':
        module_file,
        #   'examples': example_gen.outputs['examples'],
        'transformed_examples':
        transform.outputs['transformed_examples'],
        'schema':
        schema_gen.outputs['schema'],
        'transform_graph':
        transform.outputs['transform_graph'],
        'train_args':
        train_args,
        'eval_args':
        eval_args,
        'custom_executor_spec':
        executor_spec.ExecutorClassSpec(trainer_executor.GenericExecutor),
    }
    if ai_platform_training_args is not None:
        trainer_args.update({
            'custom_executor_spec':
            executor_spec.ExecutorClassSpec(
                ai_platform_trainer_executor.GenericExecutor),
            'custom_config': {
                ai_platform_trainer_executor.TRAINING_ARGS_KEY:
                ai_platform_training_args,
            }
        })
    trainer = Trainer(**trainer_args)
    # TODO(step 6): Uncomment here to add Trainer to the pipeline.
    components.append(trainer)

    # Get the latest blessed model for model validation.
    model_resolver = ResolverNode(
        instance_name='latest_blessed_model_resolver',
        resolver_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing))
    # TODO(step 6): Uncomment here to add ResolverNode to the pipeline.
    components.append(model_resolver)

    # Uses TFMA to compute a evaluation statistics over features of a model and
    # perform quality validation of a candidate model (compared to a baseline).
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key='label')],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(metrics=[
                tfma.MetricConfig(
                    class_name='BinaryAccuracy',
                    threshold=tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': eval_accuracy_threshold}),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-10})))
            ])
        ])
    evaluator = Evaluator(
        examples=example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        baseline_model=model_resolver.outputs['model'],
        # Change threshold will be ignored if there is no baseline (first run).
        eval_config=eval_config)
    # TODO(step 6): Uncomment here to add Evaluator to the pipeline.
    components.append(evaluator)

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher_args = {
        'model':
        trainer.outputs['model'],
        'model_blessing':
        evaluator.outputs['blessing'],
        'push_destination':
        pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(
                base_directory=serving_model_dir)),
    }
    if ai_platform_serving_args is not None:
        pusher_args.update({
            'custom_executor_spec':
            executor_spec.ExecutorClassSpec(
                ai_platform_pusher_executor.Executor),
            'custom_config': {
                ai_platform_pusher_executor.SERVING_ARGS_KEY:
                ai_platform_serving_args
            },
        })
    pusher = Pusher(**pusher_args)  # pylint: disable=unused-variable
    # TODO(step 6): Uncomment here to add Pusher to the pipeline.
    components.append(pusher)

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=components,
        # TODO(step 8): Change this value to control caching of execution results.
        enable_cache=True,
        metadata_connection_config=metadata_connection_config,
        beam_pipeline_args=beam_pipeline_args,
    )

Пример #11

Показать файл

def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
                     module_file: Text,
                     metadata_path: Text) -> pipeline.Pipeline:
    """Implements the handwritten digit classification example using TFX."""
    examples = external_input(data_root)

    # Brings data into the pipeline.
    example_gen = ImportExampleGen(input=examples)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    # Generates schema based on statistics files.
    infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                             infer_feature_shape=True)

    # Performs anomaly detection based on statistics and data schema.
    validate_stats = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=infer_schema.outputs['schema'])

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=infer_schema.outputs['schema'],
                          module_file=module_file)

    # Uses user-provided Python function that trains a model using TF-Learn.
    trainer = Trainer(
        module_file=module_file,
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=infer_schema.outputs['schema'],
        train_args=trainer_pb2.TrainArgs(num_steps=5000),
        eval_args=trainer_pb2.EvalArgs(num_steps=100))

    # Uses TFMA to compute a evaluation statistics over features of a model.
    model_analyzer = Evaluator(
        examples=example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        eval_config=tfma.EvalConfig(
            model_specs=[tfma.ModelSpec(label_key='image/class')],
            slicing_specs=[tfma.SlicingSpec()]))

    # TODO(ananthr): support infra validator, model validation in evaluator,
    # and pusher component.

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            example_gen,
            statistics_gen,
            infer_schema,
            validate_stats,
            transform,
            trainer,
            model_analyzer,
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
    )

Пример #12

Показать файл

def create_pipeline(pipeline_name: Text,
                    pipeline_root: Text,
                    data_root: Text,
                    test_data_root: Text,
                    module_file: Text,
                    serving_model_dir: Text,
                    enable_cache: bool,
                    metadata_connection_config: Optional[
                        metadata_store_pb2.ConnectionConfig] = None,
                    beam_pipeline_args: Optional[List[Text]] = None):
    """create pipeline

    Args:
        pipeline_name (Text): pipeline name
        pipeline_root (Text): pipeline root path
        data_root (Text): input data path
        test_data_root (Text): test data path
        module_file (Text): Python module files to inject customized logic into the TFX components.
        serving_model_dir (Text): output directory path
        enable_cache (bool): Whether to use the cache or not
        metadata_connection_config (Optional[ metadata_store_pb2.ConnectionConfig], optional): [description]. Defaults to None.
        beam_pipeline_args (Optional[List[Text]], optional): [description]. Defaults to None.

    Returns:
        [type]: [description]
    """

    # train testで分かれているtfrecordを指定
    output_config = example_gen_pb2.Output(
        split_config=example_gen_pb2.SplitConfig(splits=[
            example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=8),
            example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=2),
        ]))
    # パイプラインにデータをロード
    example_gen = ImportExampleGen(input_base=data_root,
                                   output_config=output_config,
                                   instance_name="train_data")

    test_example_gen = ImportExampleGen(input_base=test_data_root,
                                        instance_name="test_data")

    # データの統計量を計算
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    # staticsGenの統計ファイルからスキーマを生成
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=True)

    # データに欠損などがないかを検査
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    trainer = Trainer(
        module_file=module_file,
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        train_args=trainer_pb2.TrainArgs(num_steps=160),
        eval_args=trainer_pb2.EvalArgs(num_steps=4),
    )

    model_resolver = ResolverNode(
        instance_name='latest_blessed_model_resolver',
        resolver_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing))

    # https://github.com/tensorflow/tfx/issues/3016
    eval_config = tfma.EvalConfig(
        model_specs=[
            tfma.ModelSpec(label_key='label',
                           model_type='tf_keras',
                           signature_name="serving_default")
        ],
        slicing_specs=[
            tfma.SlicingSpec(),
        ],
        metrics_specs=[
            tfma.MetricsSpec(metrics=[
                tfma.MetricConfig(
                    class_name='SparseCategoricalAccuracy',
                    threshold=tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.2}),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-3})))
            ])
        ])

    evaluator = Evaluator(examples=test_example_gen.outputs['examples'],
                          model=trainer.outputs['model'],
                          baseline_model=model_resolver.outputs['model'],
                          eval_config=eval_config)

    pusher = Pusher(model=trainer.outputs['model'],
                    model_blessing=evaluator.outputs['blessing'],
                    push_destination=pusher_pb2.PushDestination(
                        filesystem=pusher_pb2.PushDestination.Filesystem(
                            base_directory=serving_model_dir)))

    components = [
        example_gen, test_example_gen, statistics_gen, schema_gen,
        example_validator, transform, trainer, model_resolver, evaluator,
        pusher
    ]

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=enable_cache,
        metadata_connection_config=metadata_connection_config,
        beam_pipeline_args=beam_pipeline_args,
    )

Пример #13

Показать файл

Файл: pipeline.py Проект: rakesh283343/tfx-kubeflow-pipeline

def create_pipeline(
    pipeline_name: Text,
    pipeline_root: Text,
    data_path: Text,
    preprocessing_fn: Text,
    run_fn: Text,
    train_args: trainer_pb2.TrainArgs,
    eval_args: trainer_pb2.EvalArgs,
    eval_accuracy_threshold: float,
    serving_model_dir: Text,
    query: Optional[Text] = None,
    metadata_connection_config: Optional[
        metadata_store_pb2.ConnectionConfig] = None,
    beam_pipeline_args: Optional[List[Text]] = None,
    ai_platform_training_args: Optional[Dict[Text, Text]] = None,
    ai_platform_serving_args: Optional[Dict[Text, Any]] = None,
) -> pipeline.Pipeline:

  if query:
    example_gen = BigQueryExampleGen(query=query)
  else:
    # example_gen = CsvExampleGen(input=external_input(data_path))
    example_gen = ImportExampleGen(input=external_input(data_path))

  statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

  schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                         infer_feature_shape=False)

  example_validator = ExampleValidator(statistics=statistics_gen.outputs['statistics'],
                                       schema=schema_gen.outputs['schema'])

  transform = Transform(examples=example_gen.outputs['examples'],
                        schema=schema_gen.outputs['schema'],
                        preprocessing_fn=preprocessing_fn)

  trainer_args = {
    'run_fn': run_fn,
    'transformed_examples': transform.outputs['transformed_examples'],
    'schema': schema_gen.outputs['schema'],
    'transform_graph': transform.outputs['transform_graph'],
    'train_args': train_args,
    'eval_args': eval_args,
    'custom_executor_spec':
        executor_spec.ExecutorClassSpec(
          trainer_executor.GenericExecutor),
  }
  if ai_platform_training_args:
    trainer_args.update({
      'custom_executor_spec':
        executor_spec.ExecutorClassSpec(
            ai_platform_trainer_executor.GenericExecutor),
      'custom_config': {
        ai_platform_trainer_executor.TRAINING_ARGS_KEY:
          ai_platform_training_args,
      }
    })
  trainer = Trainer(**trainer_args)

  # model_resolver = ResolverNode(instance_name='latest_blessed_model_resolver',
  #                               resolver_class=latest_blessed_model_resolver.LatestBlessedModelResolver,
  #                               model=Channel(type=Model),
  #                               model_blessing=Channel(type=ModelBlessing))

  # eval_config = tfma.EvalConfig(
  #     model_specs=[tfma.ModelSpec(label_key='tips')],
  #     slicing_specs=[tfma.SlicingSpec()],
  #     metrics_specs=[
  #         tfma.MetricsSpec(
  #             thresholds={
  #                 'binary_accuracy':
  #                 tfma.config.MetricThreshold(
  #                     value_threshold=tfma.GenericValueThreshold(
  #                         lower_bound={'value': eval_accuracy_threshold}),
  #                     change_threshold=tfma.GenericChangeThreshold(
  #                         direction=tfma.MetricDirection.HIGHER_IS_BETTER,
  #                         absolute={'value': -1e-10}))
  #             })
  #     ])
  # evaluator = Evaluator(examples=example_gen.outputs['examples'],
  #                       model=trainer.outputs['model'],
  #                       baseline_model=model_resolver.outputs['model'],
  #                       eval_config=eval_config)

  # pusher_args = {
  #   'model':
  #     trainer.outputs['model'],
  #   'model_blessing':
  #     evaluator.outputs['blessing'],
  #   'push_destination':
  #     pusher_pb2.PushDestination(
  #       filesystem=pusher_pb2.PushDestination.Filesystem(
  #         base_directory=serving_model_dir)),
  # }
  # if ai_platform_serving_args:
  #   pusher_args.update({
  #     'custom_executor_spec': 
  #       executor_spec.ExecutorClassSpec(
  #         ai_platform_pusher_executor.Executor),
  #     'custom_config': {
  #       ai_platform_pusher_executor.SERVING_ARGS_KEY:
  #         ai_platform_serving_args
  #     },
  #   })
  # pusher = Pusher(**pusher_args)

  return pipeline.Pipeline(
    pipeline_name=pipeline_name,
    pipeline_root=pipeline_root,
    components=[
      example_gen,
      statistics_gen,
      schema_gen,
      example_validator,
      transform,
      # trainer,
      # model_resolver,
      # evaluator,
      # pusher
    ],
    enable_cache=True,
    metadata_connection_config=metadata_connection_config,
    beam_pipeline_args=beam_pipeline_args,
  )

Пример #14

Показать файл

Файл: mnist_pipeline_native_keras.py Проект: ssoudan/tfx_x

def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
                     custom_config: Dict[Text, Any], module_file: Text,
                     serving_model_dir: Text, metadata_path: Text,
                     beam_pipeline_args: List[Text]) -> pipeline.Pipeline:
    """Implements the handwritten digit classification example using TFX."""
    # Store the configuration along with the pipeline run so results can be reproduced
    pipeline_configuration = FromCustomConfig(custom_config=custom_config)

    # Brings data into the pipeline.
    example_gen = ImportExampleGen(input_base=data_root)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=True)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    # Create a filtered dataset - today we only want a model for small digits
    filter = Filter(examples=example_gen.outputs['examples'],
                    pipeline_configuration=pipeline_configuration.
                    outputs['pipeline_configuration'],
                    splits_to_transform=['train', 'eval'],
                    splits_to_copy=[])

    # Create a stratified dataset for evaluation
    stratified_examples = StratifiedSampler(
        examples=filter.outputs['filtered_examples'],
        pipeline_configuration=pipeline_configuration.
        outputs['pipeline_configuration'],
        samples_per_key=1200,
        splits_to_transform=['eval'],
        splits_to_copy=['train'])

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=filter.outputs['filtered_examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    # Uses user-provided Python function that trains a Keras model.
    trainer = Trainer(
        module_file=module_file,
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        custom_config=custom_config,
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        train_args=trainer_pb2.TrainArgs(num_steps=5000),
        eval_args=trainer_pb2.EvalArgs(num_steps=100)).with_id(u'trainer')

    # Uses TFMA to compute evaluation statistics over features of a model and
    # performs quality validation of a candidate model.
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key='image_class')],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(metrics=[
                tfma.MetricConfig(
                    class_name='SparseCategoricalAccuracy',
                    threshold=tfma.config.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.8})))
            ])
        ])

    # Uses TFMA to compute the evaluation statistics over features of a model.
    evaluator = Evaluator(
        examples=stratified_examples.outputs['stratified_examples'],
        model=trainer.outputs['model'],
        eval_config=eval_config).with_id(u'evaluator')

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(
        model=trainer.outputs['model'],
        model_blessing=evaluator.outputs['blessing'],
        push_destination=pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(
                base_directory=serving_model_dir))).with_id(u'pusher')

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            pipeline_configuration,
            example_gen,
            filter,
            stratified_examples,
            statistics_gen,
            schema_gen,
            example_validator,
            transform,
            trainer,
            evaluator,
            pusher,
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
        beam_pipeline_args=beam_pipeline_args)

Пример #15

Показать файл

def create_pipeline(pipeline_name: Text, pipeline_root: Text,
                    metadata_path: Text) -> Pipeline:
    # Read the dataset and split to train / eval
    output_config = example_gen_pb2.Output(
        split_config=example_gen_pb2.SplitConfig(splits=[
            example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=4),
            example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1)
        ]))
    examples = tfrecord_input(DATA_PATH)
    example_gen = ImportExampleGen(input=examples, output_config=output_config)

    # Generate dataset statistics
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

    # Generate schema based on statistics
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=True)

    # Validate data and perform anomaly detection
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    # Feature engineering
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=TRANSFORM_MODULE)

    trainer = Trainer(
        module_file=TRAINER_MODULE,
        examples=transform.outputs['transformed_examples'],
        schema=schema_gen.outputs['schema'],
        transform_graph=transform.outputs['transform_graph'],
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        train_args=trainer_pb2.TrainArgs(num_steps=200),
        eval_args=trainer_pb2.EvalArgs(num_steps=35))

    model_spec = tfma.ModelSpec(label_key=LABEL_KEY)
    slicing_spec = tfma.SlicingSpec()

    value_threshold = tfma.GenericValueThreshold(upper_bound={'value': 0.7})
    threshold = tfma.MetricThreshold(value_threshold=value_threshold)
    metric_config = tfma.MetricConfig(class_name='MeanAbsoluteError',
                                      threshold=threshold)
    metrics_spec = tfma.MetricsSpec(metrics=[metric_config])

    eval_config = tfma.EvalConfig(model_specs=[model_spec],
                                  slicing_specs=[slicing_spec],
                                  metrics_specs=[metrics_spec])
    evaluator = Evaluator(examples=example_gen.outputs['examples'],
                          model=trainer.outputs['model'],
                          eval_config=eval_config)

    filesystem = pusher_pb2.PushDestination.Filesystem(
        base_directory=SERVING_MODEL_DIR)
    push_destination = pusher_pb2.PushDestination(filesystem=filesystem)
    pusher = Pusher(model=trainer.outputs['model'],
                    model_blessing=evaluator.outputs['blessing'],
                    push_destination=push_destination)

    pipeline = Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
        components=[
            example_gen, statistics_gen, schema_gen, example_validator,
            transform, trainer, evaluator, pusher
        ],
        enable_cache=True,
        beam_pipeline_args=['--direct_num_workers=0'])
    return pipeline

Пример #16

Показать файл

def create_pipeline(
    pipeline_name: Text,
    pipeline_root: Text,
    data_path: Text,
    # TODO(step 7): (Optional) Uncomment here to use BigQuery as a data source.
    # query: Text,
    preprocessing_fn: Text,
    run_fn: Text,
    train_args: trainer_pb2.TrainArgs,
    eval_args: trainer_pb2.EvalArgs,
    eval_accuracy_threshold: float,
    serving_model_dir: Text,
    metadata_connection_config: Optional[
        metadata_store_pb2.ConnectionConfig] = None,
    beam_pipeline_args: Optional[List[Text]] = None,
    ai_platform_training_args: Optional[Dict[Text, Text]] = None,
    ai_platform_serving_args: Optional[Dict[Text, Any]] = None,
) -> pipeline.Pipeline:
    """Implements the Centernet pipeline with TFX."""
    components = []

    output_config = example_gen_pb2.Output(
        split_config=example_gen_pb2.
        SplitConfig(splits=[
            example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=3),
            example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1)
        ],
                    partition_feature_name='image/filename'))

    # Brings data into the pipeline or otherwise joins/converts training data.
    example_gen = ImportExampleGen(input=external_input(data_path),
                                   output_config=output_config)
    components.append(example_gen)

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'],
                                   stats_options=STATS_OPTIONS)
    components.append(statistics_gen)

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=False)
    components.append(schema_gen)

    # Import manually crafted schema
    importer_node = ImporterNode(
        instance_name='import_user_schema',
        source_uri="gs://raw_data_layer/schema/",
        artifact_type=tfx.types.standard_artifacts.Schema)
    components.append(importer_node)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(  # pylint: disable=unused-variable
        statistics=statistics_gen.outputs['statistics'],
        schema=importer_node.outputs['result'])
    components.append(example_validator)

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=importer_node.outputs['result'],
                          preprocessing_fn=preprocessing_fn)
    components.append(transform)

    # update training_args per once use.
    trainer_args = {
        'run_fn':
        run_fn,
        'transformed_examples':
        transform.outputs['transformed_examples'],
        'schema':
        importer_node.outputs['result'],
        'transform_graph':
        transform.outputs['transform_graph'],
        'train_args':
        train_args,
        'eval_args':
        eval_args,
        'custom_executor_spec':
        executor_spec.ExecutorClassSpec(trainer_executor.GenericExecutor),
    }
    if ai_platform_training_args is not None:
        trainer_args.update({
            'custom_executor_spec':
            executor_spec.ExecutorClassSpec(
                ai_platform_trainer_executor.GenericExecutor),
            'custom_config': {
                ai_platform_trainer_executor.TRAINING_ARGS_KEY:
                ai_platform_training_args,
            }
        })
    trainer = Trainer(**trainer_args)
    components.append(trainer)

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=components,
        # Change this value to control caching of execution results. Default value
        # is `False`.
        enable_cache=True,
        metadata_connection_config=metadata_connection_config,
        beam_pipeline_args=beam_pipeline_args,
    )