def testFullTaxiGcpPipeline(self): pipeline_name = 'gcp-perf-test-full-e2e-test-{}'.format( test_utils.random_id()) # Custom CAIP training job using a testing image. ai_platform_training_args = { 'project': self._GCP_PROJECT_ID, 'region': self._GCP_REGION, 'scaleTier': 'CUSTOM', 'masterType': 'large_model', 'masterConfig': { 'imageUri': self._CONTAINER_IMAGE }, 'workerType': self._WORKER_TYPE, 'parameterServerType': 'standard', 'workerCount': self._WORKER_COUNT, 'parameterServerCount': self._PARAMETER_SERVER_COUNT } pipeline = taxi_pipeline_kubeflow_gcp.create_pipeline( pipeline_name=pipeline_name, pipeline_root=self._pipeline_root(pipeline_name), module_file=self._MODULE_FILE, ai_platform_training_args=ai_platform_training_args, ai_platform_serving_args=self._AI_PLATFORM_SERVING_ARGS, beam_pipeline_args=self._BEAM_PIPELINE_ARGS) self._compile_and_run_pipeline( pipeline=pipeline, query_sample_rate=1, # (1M * batch_size=200) / 200M records ~ 1 epoch train_steps=1000000, eval_steps=10000, worker_count=20, parameter_server_count=3, )
def testTaxiPipelineConstructionAndDefinitionFileExists(self): logical_pipeline = taxi_pipeline_kubeflow_gcp.create_pipeline( pipeline_name=taxi_pipeline_kubeflow_gcp._pipeline_name, pipeline_root=taxi_pipeline_kubeflow_gcp._pipeline_root, module_file=taxi_pipeline_kubeflow_gcp._module_file, ai_platform_training_args=taxi_pipeline_kubeflow_gcp. _ai_platform_training_args, ai_platform_serving_args=taxi_pipeline_kubeflow_gcp. _ai_platform_serving_args) self.assertEqual(9, len(logical_pipeline.components)) KubeflowDagRunner().run(logical_pipeline) file_path = os.path.join(self._tmp_dir, 'chicago_taxi_pipeline_kubeflow_gcp.tar.gz') self.assertTrue(tf.io.gfile.exists(file_path))
def testFullTaxiGcpPipeline(self): pipeline_name = 'gcp-perf-test-full-e2e-test-{}'.format( test_utils.random_id()) # Custom CAIP training job using a testing image. ai_platform_training_args = { 'project': self._GCP_PROJECT_ID, 'region': self._GCP_REGION, 'scaleTier': 'CUSTOM', 'masterType': 'large_model', 'masterConfig': { 'imageUri': self.container_image }, 'workerType': self._WORKER_TYPE, 'parameterServerType': 'standard', 'workerCount': self._WORKER_COUNT, 'parameterServerCount': self._PARAMETER_SERVER_COUNT } pipeline = taxi_pipeline_kubeflow_gcp.create_pipeline( pipeline_name=pipeline_name, pipeline_root=self._pipeline_root(pipeline_name), module_file=self._MODULE_FILE, ai_platform_training_args=ai_platform_training_args, ai_platform_serving_args=self._AI_PLATFORM_SERVING_ARGS, beam_pipeline_args=self._BEAM_PIPELINE_ARGS) # TODO(b/162451308): Add this clean-up back after we re-enable AIP pusher # when AIP prediction service supports TF>=2.3. # self.addCleanup(kubeflow_test_utils.delete_ai_platform_model, # self._MODEL_NAME) self._compile_and_run_pipeline( pipeline=pipeline, query_sample_rate=1, # (1M * batch_size=200) / 200M records ~ 1 epoch train_steps=1000000, eval_steps=10000, worker_count=20, parameter_server_count=3, )