Python Importer示例，tfx.dsl.components.common.importer.Importer Python示例

示例#1

0

显示文件

    def testBuildImporter(self):
        impt = importer.Importer(instance_name='my_importer',
                                 source_uri='m/y/u/r/i',
                                 properties={
                                     'split_names': '["train", "eval"]',
                                 },
                                 custom_properties={
                                     'str_custom_property': 'abc',
                                     'int_custom_property': 123,
                                 },
                                 artifact_type=standard_artifacts.Examples)
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        my_builder = step_builder.StepBuilder(
            node=impt,
            deployment_config=deployment_config,
            component_defs=component_defs)
        actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_component.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_task.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_executor.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)

示例#2

0

显示文件

文件： step_builder_test.py 项目： jay90099/tfx

    def testBuildImporterWithRuntimeParam(self):
        param = data_types.RuntimeParameter(name='runtime_flag', ptype=str)
        impt = importer.Importer(
            source_uri=param,
            artifact_type=standard_artifacts.Examples).with_id('my_importer')
        deployment_config = pipeline_pb2.PipelineDeploymentConfig()
        component_defs = {}
        with parameter_utils.ParameterContext() as pc:
            my_builder = step_builder.StepBuilder(
                node=impt,
                deployment_config=deployment_config,
                component_defs=component_defs)
            actual_step_spec = self._sole(my_builder.build())
        actual_component_def = self._sole(component_defs)

        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_component_with_runtime_param.pbtxt',
                pipeline_pb2.ComponentSpec()), actual_component_def)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_task_with_runtime_param.pbtxt',
                pipeline_pb2.PipelineTaskSpec()), actual_step_spec)
        self.assertProtoEquals(
            test_utils.get_proto_from_test_data(
                'expected_importer_executor_with_runtime_param.pbtxt',
                pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
        self.assertListEqual([param], pc.parameters)

示例#3

0

显示文件

文件： importer_test.py 项目： sycdesign/tfx

 def testImporterDefinitionWithSingleUri(self):
     impt = importer.Importer(instance_name='my_importer',
                              source_uri='m/y/u/r/i',
                              properties={
                                  'split_names': '["train", "eval"]',
                              },
                              custom_properties={
                                  'str_custom_property': 'abc',
                                  'int_custom_property': 123,
                              },
                              artifact_type=standard_artifacts.Examples)
     self.assertDictEqual(
         impt.exec_properties, {
             importer.SOURCE_URI_KEY: 'm/y/u/r/i',
             importer.REIMPORT_OPTION_KEY: 0,
             importer.PROPERTIES_KEY: {
                 'split_names': '["train", "eval"]',
             },
             importer.CUSTOM_PROPERTIES_KEY: {
                 'str_custom_property': 'abc',
                 'int_custom_property': 123,
             },
         })
     self.assertEmpty(impt.inputs.get_all())
     self.assertEqual(impt.outputs[importer.IMPORT_RESULT_KEY].type,
                      standard_artifacts.Examples)

示例#4

0

显示文件

    def testIsImporter(self):
        impt = importer.Importer(source_uri="uri/to/schema",
                                 artifact_type=standard_artifacts.Schema)
        self.assertTrue(compiler_utils.is_importer(impt))

        example_gen = CsvExampleGen(input_base="data_path")
        self.assertFalse(compiler_utils.is_importer(example_gen))

示例#5

0

显示文件

文件： ai_platform_training_component_integration_test.py 项目： jay90099/tfx

  def testSuccessfulExecution(self):
    example_importer = importer.Importer(
        artifact_type=simple_artifacts.File,
        reimport=False,
        source_uri=f'gs://{self._TEST_DATA_BUCKET}/ai-platform-training/mnist'
    ).with_id('examples')

    train = ai_platform_training_component.create_ai_platform_training(
        name='simple_aip_training',
        project_id=self._GCP_PROJECT_ID,
        region=self._GCP_REGION,
        image_uri=self._TRAINING_IMAGE,
        args=[
            '--dataset',
            placeholders.InputUriPlaceholder('examples'),
            '--model-dir',
            placeholders.OutputUriPlaceholder('model'),
            '--lr',
            placeholders.InputValuePlaceholder('learning_rate'),
        ],
        scale_tier='BASIC',
        inputs={'examples': example_importer.outputs['result']},
        outputs={'model': standard_artifacts.Model},
        parameters={'learning_rate': '0.001'})

    pipeline_name = _PIPELINE_NAME_PREFIX.format(test_utils.random_id())
    aip_training_pipeline = pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=self._pipeline_root(pipeline_name),
        components=[example_importer, train],
    )

    self._run_pipeline(aip_training_pipeline)

示例#6

0

显示文件

    def testIsImporter(self):
        impt = importer.Importer(instance_name="import_schema",
                                 source_uri="uri/to/schema",
                                 artifact_type=standard_artifacts.Schema)
        self.assertTrue(compiler_utils.is_importer(impt))
        impt = legacy_importer_node.ImporterNode(
            instance_name="import_schema",
            source_uri="uri/to/schema",
            artifact_type=standard_artifacts.Schema)
        self.assertTrue(compiler_utils.is_importer(impt))

        example_gen = CsvExampleGen(input=external_input("data_path"))
        self.assertFalse(compiler_utils.is_importer(example_gen))

示例#7

0

显示文件

文件： importer_test.py 项目： sycdesign/tfx

    def testImporterDumpsJsonRoundtrip(self):
        instance_name = 'my_importer'
        source_uris = ['m/y/u/r/i']
        impt = importer.Importer(instance_name=instance_name,
                                 source_uri=source_uris,
                                 artifact_type=standard_artifacts.Examples)

        # The following line will raise an assertion if object not JSONable.
        json_text = json_utils.dumps(impt)

        actual_obj = json_utils.loads(json_text)
        self.assertEqual(actual_obj._instance_name, instance_name)
        self.assertEqual(actual_obj._source_uri, source_uris)

示例#8

0

显示文件

  def setUp(self):
    super().setUp()

    # Example artifacts for testing.
    self.raw_examples_importer = importer.Importer(
        source_uri=os.path.join(self._testdata_root, 'csv_example_gen'),
        artifact_type=standard_artifacts.Examples,
        reimport=True,
        properties={
            'split_names': '["train", "eval"]'
        }).with_id('raw_examples')

    # Schema artifact for testing.
    self.schema_importer = importer.Importer(
        source_uri=os.path.join(self._testdata_root, 'schema_gen'),
        artifact_type=standard_artifacts.Schema,
        reimport=True).with_id('schema')

    # Model artifact for testing.
    self.model_1_importer = importer.Importer(
        source_uri=os.path.join(self._testdata_root, 'trainer', 'previous'),
        artifact_type=standard_artifacts.Model,
        reimport=True).with_id('model_1')

示例#9

0

显示文件

文件： test_pipeline_with_importer.py 项目： jay90099/tfx

def create_pipeline() -> pipeline_pb2.Pipeline:
    """Creates a pipeline with an importer node for testing."""
    inode = importer.Importer(
        source_uri='my_url',
        reimport=True,
        custom_properties={
            'int_custom_property': 123,
            'str_custom_property': 'abc',
        },
        artifact_type=standard_artifacts.Schema).with_id('my_importer')
    pipeline = pipeline_lib.Pipeline(
        pipeline_name='my_pipeline',
        pipeline_root='/path/to/root',
        components=[inode],
        execution_mode=pipeline_lib.ExecutionMode.SYNC)
    dsl_compiler = compiler.Compiler()
    return dsl_compiler.compile(pipeline)

示例#10

0

显示文件

def pipeline_with_one_container_spec_component() -> tfx_pipeline.Pipeline:
    """Pipeline with container."""

    importer_task = importer.Importer(
        source_uri='some-uri',
        artifact_type=standard_artifacts.Model,
    ).with_id('my_importer')

    container_task = DummyContainerSpecComponent(
        input1=importer_task.outputs['result'],
        output1=channel_utils.as_channel([standard_artifacts.Model()]),
        param1='value1',
    )

    return tfx_pipeline.Pipeline(
        pipeline_name='pipeline-with-container',
        pipeline_root=_TEST_PIPELINE_ROOT,
        components=[importer_task, container_task],
    )

示例#11

0

显示文件

文件： importer_test.py 项目： jay90099/tfx

 def testImporterDefinitionWithSingleUri(self):
     impt = importer.Importer(
         source_uri='m/y/u/r/i',
         properties={
             'split_names': '["train", "eval"]',
         },
         custom_properties={
             'str_custom_property': 'abc',
             'int_custom_property': 123,
         },
         artifact_type=standard_artifacts.Examples).with_id('my_importer')
     self.assertDictEqual(
         impt.exec_properties, {
             importer.SOURCE_URI_KEY: 'm/y/u/r/i',
             importer.REIMPORT_OPTION_KEY: 0,
         })
     self.assertEmpty(impt.inputs)
     output_channel = impt.outputs[importer.IMPORT_RESULT_KEY]
     self.assertEqual(output_channel.type, standard_artifacts.Examples)
     # Tests properties in channel.
     self.assertEqual(output_channel.additional_properties, {
         'split_names': '["train", "eval"]',
     })
     self.assertEqual(output_channel.additional_custom_properties, {
         'str_custom_property': 'abc',
         'int_custom_property': 123,
     })
     # Tests properties in artifact.
     output_artifact = list(output_channel.get())[0]
     self.assertEqual(output_artifact.split_names, '["train", "eval"]')
     self.assertEqual(
         output_artifact.get_string_custom_property('str_custom_property'),
         'abc')
     self.assertEqual(
         output_artifact.get_int_custom_property('int_custom_property'),
         123)

示例#12

0

显示文件

文件： iris_pipeline_sync.py 项目： jay90099/tfx

def create_test_pipeline():
    """Builds an Iris example pipeline with slight changes."""
    pipeline_name = "iris"
    iris_root = "iris_root"
    serving_model_dir = os.path.join(iris_root, "serving_model", pipeline_name)
    tfx_root = "tfx_root"
    data_path = os.path.join(tfx_root, "data_path")
    pipeline_root = os.path.join(tfx_root, "pipelines", pipeline_name)

    example_gen = CsvExampleGen(input_base=data_path)

    statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"])

    my_importer = importer.Importer(
        source_uri="m/y/u/r/i",
        properties={
            "split_names": "['train', 'eval']",
        },
        custom_properties={
            "int_custom_property": 42,
            "str_custom_property": "42",
        },
        artifact_type=standard_artifacts.Examples).with_id("my_importer")
    another_statistics_gen = StatisticsGen(
        examples=my_importer.outputs["result"]).with_id(
            "another_statistics_gen")

    schema_gen = SchemaGen(statistics=statistics_gen.outputs["statistics"])

    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs["statistics"],
        schema=schema_gen.outputs["schema"])

    trainer = Trainer(
        # Use RuntimeParameter as module_file to test out RuntimeParameter in
        # compiler.
        module_file=data_types.RuntimeParameter(name="module_file",
                                                default=os.path.join(
                                                    iris_root,
                                                    "iris_utils.py"),
                                                ptype=str),
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        examples=example_gen.outputs["examples"],
        schema=schema_gen.outputs["schema"],
        train_args=trainer_pb2.TrainArgs(num_steps=2000),
        # Attaching `TrainerArgs` as platform config is not sensible practice,
        # but is only for testing purpose.
        eval_args=trainer_pb2.EvalArgs(num_steps=5)).with_platform_config(
            config=trainer_pb2.TrainArgs(num_steps=2000))

    model_resolver = resolver.Resolver(
        strategy_class=latest_blessed_model_strategy.
        LatestBlessedModelStrategy,
        model=Channel(type=standard_artifacts.Model,
                      producer_component_id=trainer.id),
        model_blessing=Channel(type=standard_artifacts.ModelBlessing)).with_id(
            "latest_blessed_model_resolver")

    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(signature_name="eval")],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(
                thresholds={
                    "sparse_categorical_accuracy":
                    tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={"value": 0.6}),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={"value": -1e-10}))
                })
        ])
    evaluator = Evaluator(examples=example_gen.outputs["examples"],
                          model=trainer.outputs["model"],
                          baseline_model=model_resolver.outputs["model"],
                          eval_config=eval_config)

    pusher = Pusher(model=trainer.outputs["model"],
                    model_blessing=evaluator.outputs["blessing"],
                    push_destination=pusher_pb2.PushDestination(
                        filesystem=pusher_pb2.PushDestination.Filesystem(
                            base_directory=serving_model_dir)))

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            example_gen,
            statistics_gen,
            another_statistics_gen,
            my_importer,
            schema_gen,
            example_validator,
            trainer,
            model_resolver,
            evaluator,
            pusher,
        ],
        enable_cache=True,
        beam_pipeline_args=["--my_testing_beam_pipeline_args=foo"],
        # Attaching `TrainerArgs` as platform config is not sensible practice,
        # but is only for testing purpose.
        platform_config=trainer_pb2.TrainArgs(num_steps=2000),
        execution_mode=pipeline.ExecutionMode.SYNC)

示例#13

0

显示文件

    def setUp(self):
        super().setUp()

        # Transformed Example artifacts for testing.
        self.transformed_examples_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'transform',
                                    'transformed_examples'),
            artifact_type=standard_artifacts.Examples,
            reimport=True,
            properties={
                'split_names': '["train", "eval"]'
            }).with_id('transformed_examples')

        # Schema artifact for testing.
        self.schema_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'schema_gen'),
            artifact_type=standard_artifacts.Schema,
            reimport=True).with_id('schema')

        # TransformGraph artifact for testing.
        self.transform_graph_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'transform',
                                    'transform_graph'),
            artifact_type=standard_artifacts.TransformGraph,
            reimport=True).with_id('transform_graph')

        # Model artifact for testing.
        self.model_1_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'trainer',
                                    'previous'),
            artifact_type=standard_artifacts.Model,
            reimport=True).with_id('model_1')

        self.model_2_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'trainer', 'current'),
            artifact_type=standard_artifacts.Model,
            reimport=True).with_id('model_2')

        # ModelBlessing artifact for testing.
        self.model_blessing_1_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'model_validator',
                                    'blessed'),
            artifact_type=standard_artifacts.ModelBlessing,
            reimport=True,
            custom_properties={
                'blessed': 1
            }).with_id('model_blessing_1')

        self.model_blessing_2_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'model_validator',
                                    'blessed'),
            artifact_type=standard_artifacts.ModelBlessing,
            reimport=True,
            custom_properties={
                'blessed': 1
            }).with_id('model_blessing_2')

        ### Test data and modules for native Keras trainer and tuner.
        self._penguin_tuner_module = os.path.join(self._MODULE_ROOT,
                                                  'tuner_module.py')
        self.penguin_examples_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'penguin', 'data'),
            artifact_type=standard_artifacts.Examples,
            reimport=True,
            properties={
                'split_names': '["train", "eval"]'
            }).with_id('penguin_examples')
        self.penguin_schema_importer = importer.Importer(
            source_uri=os.path.join(self._testdata_root, 'penguin', 'schema'),
            artifact_type=standard_artifacts.Schema,
            reimport=True).with_id('penguin_schema')