def testTaxiPipelineConstructionAndDefinitionFileExists(self): logical_pipeline = taxi_pipeline_kubeflow_simple._create_pipeline( pipeline_name=taxi_pipeline_kubeflow_simple._pipeline_name, pipeline_root=taxi_pipeline_kubeflow_simple._pipeline_root, data_root=taxi_pipeline_kubeflow_simple._data_root, module_file=taxi_pipeline_kubeflow_simple._module_file, serving_model_dir=taxi_pipeline_kubeflow_simple._serving_model_dir, direct_num_workers=1) self.assertEqual(9, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join( self._tmp_dir, 'chicago_taxi_pipeline_kubeflow_simple.tar.gz') self.assertTrue(tf.io.gfile.exists(file_path))
def testTaxiPipelineConstructionAndDefinitionFileExists(self): logical_pipeline = taxi_pipeline_kubeflow_local._create_pipeline( pipeline_name=taxi_pipeline_kubeflow_local._pipeline_name, pipeline_root=taxi_pipeline_kubeflow_local._pipeline_root, data_root=taxi_pipeline_kubeflow_local._data_root, module_file=taxi_pipeline_kubeflow_local._module_file, serving_model_dir=taxi_pipeline_kubeflow_local._serving_model_dir, beam_pipeline_args=[]) self.assertEqual(10, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join( self.tmp_dir, 'chicago_taxi_pipeline_kubeflow_local.tar.gz') self.assertTrue(fileio.exists(file_path))
def testTaxiPipelineConstructionAndDefinitionFileExists(self): logical_pipeline = taxi_pipeline_kubeflow_gcp.create_pipeline( pipeline_name=taxi_pipeline_kubeflow_gcp._pipeline_name, pipeline_root=taxi_pipeline_kubeflow_gcp._pipeline_root, module_file=taxi_pipeline_kubeflow_gcp._module_file, ai_platform_training_args=taxi_pipeline_kubeflow_gcp. _ai_platform_training_args, ai_platform_serving_args=taxi_pipeline_kubeflow_gcp. _ai_platform_serving_args) self.assertEqual(9, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join(self._tmp_dir, 'chicago_taxi_pipeline_kubeflow_gcp.tar.gz') self.assertTrue(tf.io.gfile.exists(file_path))
def testTaxiPipelineConstructionAndDefinitionFileExists(self): logical_pipeline = taxi_pipeline_kubeflow_simple._create_pipeline( pipeline_name=taxi_pipeline_kubeflow_simple._pipeline_name, pipeline_root=taxi_pipeline_kubeflow_simple._pipeline_root, query=taxi_pipeline_kubeflow_simple._query, module_file=taxi_pipeline_kubeflow_simple._module_file, beam_pipeline_args=taxi_pipeline_kubeflow_simple. _beam_pipeline_args, ) self.assertEqual(9, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join( self._tmp_dir, 'chicago_taxi_pipeline_kubeflow_simple.tar.gz') self.assertTrue(tf.io.gfile.exists(file_path))
def testPipelineConstruction(self): logical_pipeline = penguin_pipeline_sklearn_gcp._create_pipeline( pipeline_name=self._pipeline_name, pipeline_root=self._pipeline_root, data_root=self._data_root, trainer_module_file=self._trainer_module_file, evaluator_module_file=self._evaluator_module_file, ai_platform_training_args=self._ai_platform_training_args, ai_platform_serving_args=self._ai_platform_serving_args, beam_pipeline_args=[]) self.assertEqual(8, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join(self.tmp_dir, 'sklearn_test.tar.gz') self.assertTrue(fileio.exists(file_path))
def testPenguinPipelineConstructionAndDefinitionFileExists(self): logical_pipeline = penguin_pipeline_kubeflow_gcp.create_pipeline( pipeline_name=penguin_pipeline_kubeflow_gcp._pipeline_name, pipeline_root=penguin_pipeline_kubeflow_gcp._pipeline_root, data_root=penguin_pipeline_kubeflow_gcp._data_root, module_file=penguin_pipeline_kubeflow_gcp._module_file, enable_tuning=True, ai_platform_training_args=penguin_pipeline_kubeflow_gcp. _ai_platform_training_args, ai_platform_serving_args=penguin_pipeline_kubeflow_gcp. _ai_platform_serving_args, beam_pipeline_args=penguin_pipeline_kubeflow_gcp. _beam_pipeline_args) self.assertEqual(10, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join(self.tmp_dir, 'penguin_kubeflow_gcp.tar.gz') self.assertTrue(fileio.exists(file_path))
def testPipelineConstruction(self, resolve_mock): # Avoid actually performing user module packaging because relative path is # not valid with respect to temporary directory. resolve_mock.side_effect = lambda pipeline_root: None logical_pipeline = penguin_pipeline_sklearn_gcp._create_pipeline( pipeline_name=self._pipeline_name, pipeline_root=self._pipeline_root, data_root=self._data_root, trainer_module_file=self._trainer_module_file, evaluator_module_file=self._evaluator_module_file, ai_platform_training_args=self._ai_platform_training_args, ai_platform_serving_args=self._ai_platform_serving_args, beam_pipeline_args=[]) self.assertEqual(8, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join(self.tmp_dir, 'sklearn_test.tar.gz') self.assertTrue(fileio.exists(file_path))
def testTaxiPipelineConstructionAndDefinitionFileExists( self, resolve_mock): # Avoid actually performing user module packaging because a placeholder # GCS bucket is used. resolve_mock.side_effect = lambda pipeline_root: None logical_pipeline = taxi_pipeline_kubeflow_local._create_pipeline( pipeline_name=taxi_pipeline_kubeflow_local._pipeline_name, pipeline_root=taxi_pipeline_kubeflow_local._pipeline_root, data_root=taxi_pipeline_kubeflow_local._data_root, module_file=taxi_pipeline_kubeflow_local._module_file, serving_model_dir=taxi_pipeline_kubeflow_local._serving_model_dir, beam_pipeline_args=[]) self.assertEqual(10, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join( self.tmp_dir, 'chicago_taxi_pipeline_kubeflow_local.tar.gz') self.assertTrue(fileio.exists(file_path))
def testPenguinPipelineConstructionAndDefinitionFileExists( self, resolve_mock): # Avoid actually performing user module packaging because a placeholder # GCS bucket is used. resolve_mock.side_effect = lambda pipeline_root: None logical_pipeline = penguin_pipeline_kubeflow_gcp.create_pipeline( pipeline_name=penguin_pipeline_kubeflow_gcp._pipeline_name, pipeline_root=penguin_pipeline_kubeflow_gcp._pipeline_root, data_root=penguin_pipeline_kubeflow_gcp._data_root, module_file=penguin_pipeline_kubeflow_gcp._module_file, enable_tuning=True, ai_platform_training_args=penguin_pipeline_kubeflow_gcp ._ai_platform_training_args, ai_platform_serving_args=penguin_pipeline_kubeflow_gcp ._ai_platform_serving_args, beam_pipeline_args=penguin_pipeline_kubeflow_gcp._beam_pipeline_args) self.assertEqual(10, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join(self.tmp_dir, 'penguin_kubeflow_gcp.tar.gz') self.assertTrue(fileio.exists(file_path))
examples = csv_input(_data_root) # Brings data into the pipeline or otherwise joins/converts training data. example_gen = CsvExampleGen(input=examples) # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Generates schema based on statistics files. infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics']) return pipeline.Pipeline( pipeline_name='chicago_taxi_pipeline_kubeflow', pipeline_root=_pipeline_root, components=[example_gen, statistics_gen, infer_schema], additional_pipeline_args={ 'beam_pipeline_args': [ '--runner=DataflowRunner', '--experiments=shuffle_mode=auto', '--project=' + _project_id, '--temp_location=' + os.path.join(_output_dir, 'tmp'), '--region=' + _gcp_region, ], }, log_root='/var/tmp/tfx/logs', ) _ = KubeflowDagRunner().run(_create_pipeline())