def main(): args = parse_arguments() test_cases = [] test_name = 'TFX Sample Test' ###### Initialization ###### client = Client(namespace=args.namespace) ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(args.input), 'yaml file is not generated') if not os.path.exists(args.input): utils.write_junit_xml(test_name, args.result, test_cases) exit() ###### Create Experiment ###### experiment_name = 'TFX sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = 'TFX_sample' params = {'output': args.output, 'project': 'ml-pipeline-test', 'column-names': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/column-names.json', 'evaluation': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/eval20.csv', 'train': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/train50.csv', 'hidden-layer-size': '5', 'steps': '5'} response = client.run_pipeline(experiment_id, job_name, args.input, params) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### start_time = datetime.now() response = client.wait_for_run_completion(run_id, 1200) succ = (response.run.status.lower()=='succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) if not succ: utils.write_junit_xml(test_name, args.result, test_cases) exit() ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format(args.namespace, workflow_id)) print("=========Argo Workflow Log=========") print(argo_log) ###### Validate the results ###### #TODO: enable after launch # model analysis html is validated # argo_workflow_id = workflow_json['metadata']['name'] # gcs_html_path = os.path.join(os.path.join(args.output, str(argo_workflow_id)), 'analysis/output_display.html') # print('Output display HTML path is ' + gcs_html_path) # utils.run_bash_command('gsutil cp ' + gcs_html_path + './') # display_file = open('./output_display.html', 'r') # is_next_line_state = False # for line in display_file: # if is_next_line_state: # state = line.strip() # break # if line.strip() == '<script type="application/vnd.jupyter.widget-state+json">': # is_next_line_state = True # import json # state_json = json.loads(state) # succ = ('state' in state_json and 'version_major' in state_json and 'version_minor' in state_json) # utils.add_junit_test(test_cases, 'output display html', succ, 'the state json does not contain state, version_major, or version_inor') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, args.result, test_cases)
def _get_kfp_client(host=None): return Client(host=host)
def check(self): """Run sample test and check results.""" test_cases = [] test_name = self._testname + ' Sample Test' ###### Initialization ###### host = 'ml-pipeline.%s.svc.cluster.local:8888' % self._namespace client = Client(host=host) ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(self._input), 'yaml file is not generated') if not os.path.exists(self._input): utils.write_junit_xml(test_name, self._result, test_cases) print('Error: job not found.') exit(1) ###### Create Experiment ###### experiment_name = self._testname + ' sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = self._testname + '_sample' ###### Figure out arguments from associated config files. ####### test_args = {} try: with open(DEFAULT_CONFIG, 'r') as f: raw_args = yaml.safe_load(f) except yaml.YAMLError as yamlerr: raise RuntimeError('Illegal default config:{}'.format(yamlerr)) except OSError as ose: raise FileExistsError('Default config not found:{}'.format(ose)) else: test_timeout = raw_args['test_timeout'] try: with open(os.path.join(CONFIG_DIR, '%s.config.yaml' % self._testname), 'r') as f: raw_args = yaml.safe_load(f) except yaml.YAMLError as yamlerr: print('No legit yaml config file found, use default args:{}'.format(yamlerr)) except OSError as ose: print('Config file with the same name not found, use default args:{}'.format(ose)) else: test_args.update(raw_args['arguments']) if 'output' in test_args.keys(): # output is a special param that has to be specified dynamically. test_args['output'] = self._output if 'test_timeout' in raw_args.keys(): test_timeout = raw_args['test_timeout'] response = client.run_pipeline(experiment_id, job_name, self._input, test_args) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### try: start_time = datetime.now() response = client.wait_for_run_completion(run_id, test_timeout) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) finally: ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format( self._namespace, workflow_id)) print('=========Argo Workflow Log=========') print(argo_log) if not succ: utils.write_junit_xml(test_name, self._result, test_cases) exit(1) ###### Validate the results for specific test cases ###### #TODO: Add result check for tfx-cab-classification after launch. if self._testname == 'xgboost_training_cm': # For xgboost sample, check its confusion matrix. cm_tar_path = './confusion_matrix.tar.gz' utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path, 'mlpipeline-ui-metadata') with tarfile.open(cm_tar_path) as tar_handle: file_handles = tar_handle.getmembers() assert len(file_handles) == 1 with tar_handle.extractfile(file_handles[0]) as f: cm_data = f.read() utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, self._result, test_cases)
def run_pipeline(pipeline, arguments=None, experiment=None, run=None, namespace=None, artifact_path=None, ops=None, url=None, remote=False): """remote KubeFlow pipeline execution Submit a workflow task to KFP via mlrun API service :param pipeline KFP pipeline function or path to .yaml/.zip pipeline file :param arguments pipeline arguments :param experiment experiment name :param run optional, run name :param namespace Kubernetes namespace (if not using default) :param url optional, url to mlrun API service :param artifact_path target location/url for mlrun artifacts :param ops additional operators (.apply() to all pipeline functions) :param remote use mlrun remote API service vs direct KFP APIs :return kubeflow pipeline id """ namespace = namespace or mlconf.namespace arguments = arguments or {} if remote or url: mldb = get_run_db(url).connect() if mldb.kind != 'http': raise ValueError( 'run pipeline require access to remote api-service' ', please set the dbpath url') id = mldb.submit_pipeline(pipeline, arguments, experiment=experiment, run=run, namespace=namespace, ops=ops, artifact_path=artifact_path) else: client = Client(namespace=namespace) if isinstance(pipeline, str): experiment = client.create_experiment(name=experiment) run_result = client.run_pipeline(experiment.id, run, pipeline, params=arguments) else: conf = new_pipe_meta(artifact_path, ops) run_result = client.create_run_from_pipeline_func( pipeline, arguments, run_name=run, experiment_name=experiment, pipeline_conf=conf) id = run_result.run_id logger.info('Pipeline run id={}, check UI or DB for progress'.format(id)) return id
def _get_kfp_client(host=None, namespace: str = "kubeflow"): return Client(host=host, namespace=namespace)
def wait_for_pipeline_completion( run_id, timeout=60 * 60, expected_statuses: List[str] = None, namespace=None ): """Wait for Pipeline status, timeout in sec :param run_id: id of pipelines run :param timeout: wait timeout in sec :param expected_statuses: list of expected statuses, one of [ Succeeded | Failed | Skipped | Error ], by default [ Succeeded ] :param namespace: k8s namespace if not default :return: kfp run dict """ if expected_statuses is None: expected_statuses = [RunStatuses.succeeded] namespace = namespace or mlconf.namespace remote = not get_k8s_helper(silent=True).is_running_inside_kubernetes_cluster() logger.debug( f"Waiting for run completion." f" run_id: {run_id}," f" expected_statuses: {expected_statuses}," f" timeout: {timeout}," f" remote: {remote}," f" namespace: {namespace}" ) if remote: mldb = get_run_db() def get_pipeline_if_completed(run_id, namespace=namespace): resp = mldb.get_pipeline(run_id, namespace=namespace) status = resp["run"]["status"] if status not in RunStatuses.stable_statuses(): # TODO: think of nicer liveness indication and make it re-usable # log '.' each retry as a liveness indication logger.debug(".") raise RuntimeError("pipeline run has not completed yet") return resp if mldb.kind != "http": raise ValueError( "get pipeline require access to remote api-service" ", please set the dbpath url" ) resp = retry_until_successful( 10, timeout, logger, False, get_pipeline_if_completed, run_id, namespace=namespace, ) else: client = Client(namespace=namespace) resp = client.wait_for_run_completion(run_id, timeout) if resp: resp = resp.to_dict() status = resp["run"]["status"] if resp else "unknown" if expected_statuses: if status not in expected_statuses: raise RuntimeError(f"run status {status} not in expected statuses") logger.debug( f"Finished waiting for pipeline completion." f" run_id: {run_id}," f" status: {status}," f" namespace: {namespace}" ) return resp
def main(): args = parse_arguments() test_cases = [] test_name = 'XGBoost Sample Test' ###### Initialization ###### client = Client(namespace=args.namespace) ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(args.input), 'yaml file is not generated') if not os.path.exists(args.input): utils.write_junit_xml(test_name, args.result, test_cases) exit() ###### Create Experiment ###### experiment_name = 'xgboost sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = 'xgboost_sample' params = { 'output': args.output, 'project': 'ml-pipeline-test', 'train-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/train_50.csv', 'eval-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/eval_20.csv', 'schema': 'gs://ml-pipeline-dataset/sample-test/sfpd/schema.json', 'rounds': '20', 'workers': '2' } response = client.run_pipeline(experiment_id, job_name, args.input, params) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### start_time = datetime.now() response = client.wait_for_run_completion(run_id, 1800) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format( args.namespace, workflow_id)) print("=========Argo Workflow Log=========") print(argo_log) ###### If the job fails, skip the result validation ###### if not succ: utils.write_junit_xml(test_name, args.result, test_cases) exit() ###### Validate the results ###### # confusion matrix should show three columns for the flower data # target, predicted, count cm_tar_path = './confusion_matrix.tar.gz' cm_filename = 'mlpipeline-ui-metadata.json' utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path) tar_handler = tarfile.open(cm_tar_path) tar_handler.extractall() with open(cm_filename, 'r') as f: cm_data = f.read() utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, args.result, test_cases)
def main(): args = parse_arguments() test_cases = [] test_name = 'Kubeflow Sample Test' ###### Initialization ###### client = Client() ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(args.input), 'yaml file is not generated') if not os.path.exists(args.input): utils.write_junit_xml(test_name, args.result, test_cases) exit() ###### Create Experiment ###### experiment_name = 'kubeflow sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = 'kubeflow_sample' params = { 'output': args.output, 'project': 'ml-pipeline-test', 'evaluation': 'gs://ml-pipeline-playground/IntegTest/flower/eval15.csv', 'train': 'gs://ml-pipeline-playground/IntegTest/flower/train30.csv', 'hidden-layer-size': '10,5', 'steps': '5' } response = client.run_pipeline(experiment_id, job_name, args.input, params) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### start_time = datetime.now() response = client.wait_for_run_completion(run_id, 1200) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) if not succ: utils.write_junit_xml(test_name, args.result, test_cases) exit() ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] #TODO: remove the namespace dependency or make is configurable. argo_log, _ = utils.run_bash_command( 'argo logs -n kubeflow -w {}'.format(workflow_id)) print("=========Argo Workflow Log=========") print(argo_log) ###### Validate the results ###### # confusion matrix should show three columns for the flower data # target, predicted, count cm_tar_path = './confusion_matrix.tar.gz' cm_filename = 'mlpipeline-ui-metadata.json' utils.get_artifact_in_minio(workflow_json, 'confusionmatrix', cm_tar_path) tar_handler = tarfile.open(cm_tar_path) tar_handler.extractall() with open(cm_filename, 'r') as f: cm_data = json.load(f) utils.add_junit_test( test_cases, 'confusion matrix format', (len(cm_data['outputs'][0]['schema']) == 3), 'the column number of the confusion matrix output is not equal to three' ) ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, args.result, test_cases)
def run_pipeline( pipeline, arguments=None, project=None, experiment=None, run=None, namespace=None, artifact_path=None, ops=None, url=None, ttl=None, ): """remote KubeFlow pipeline execution Submit a workflow task to KFP via mlrun API service :param pipeline: KFP pipeline function or path to .yaml/.zip pipeline file :param arguments: pipeline arguments :param experiment: experiment name :param run: optional, run name :param namespace: Kubernetes namespace (if not using default) :param url: optional, url to mlrun API service :param artifact_path: target location/url for mlrun artifacts :param ops: additional operators (.apply() to all pipeline functions) :param ttl: pipeline ttl in secs (after that the pods will be removed) :returns: kubeflow pipeline id """ remote = not get_k8s_helper(silent=True).is_running_inside_kubernetes_cluster() artifact_path = artifact_path or mlconf.artifact_path artifact_path = mlrun.utils.helpers.fill_artifact_path_template( artifact_path, project or mlconf.default_project ) if artifact_path and "{{run.uid}}" in artifact_path: artifact_path.replace("{{run.uid}}", "{{workflow.uid}}") if not artifact_path: raise ValueError("artifact path was not specified") namespace = namespace or mlconf.namespace arguments = arguments or {} if remote or url: mldb = get_run_db(url) if mldb.kind != "http": raise ValueError( "run pipeline require access to remote api-service" ", please set the dbpath url" ) id = mldb.submit_pipeline( pipeline, arguments, experiment=experiment, run=run, namespace=namespace, ops=ops, artifact_path=artifact_path, ) else: client = Client(namespace=namespace) if isinstance(pipeline, str): experiment = client.create_experiment(name=experiment) run_result = client.run_pipeline( experiment.id, run, pipeline, params=arguments ) else: conf = new_pipe_meta(artifact_path, ttl, ops) run_result = client.create_run_from_pipeline_func( pipeline, arguments, run_name=run, experiment_name=experiment, pipeline_conf=conf, ) id = run_result.run_id logger.info(f"Pipeline run id={id}, check UI or DB for progress") return id
def run(self): """Run compiled KFP pipeline.""" ###### Initialization ###### self._client = Client(host=self._host) ###### Check Input File ###### utils.add_junit_test(self._test_cases, 'input generated yaml file', os.path.exists(self._input), 'yaml file is not generated') if not os.path.exists(self._input): utils.write_junit_xml(self._test_name, self._result, self._test_cases) print('Error: job not found.') exit(1) ###### Create Experiment ###### response = self._client.create_experiment(self._experiment_name) self._experiment_id = response.id utils.add_junit_test(self._test_cases, 'create experiment', True) ###### Create Job ###### self._job_name = self._testname + '_sample' ###### Figure out arguments from associated config files. ####### self._test_args = {} config_schema = yamale.make_schema(SCHEMA_CONFIG) try: with open(DEFAULT_CONFIG, 'r') as f: raw_args = yaml.safe_load(f) default_config = yamale.make_data(DEFAULT_CONFIG) yamale.validate( config_schema, default_config) # If fails, a ValueError will be raised. except yaml.YAMLError as yamlerr: raise RuntimeError('Illegal default config:{}'.format(yamlerr)) except OSError as ose: raise FileExistsError('Default config not found:{}'.format(ose)) else: self._test_timeout = raw_args['test_timeout'] self._run_pipeline = raw_args['run_pipeline'] try: config_file = os.path.join(CONFIG_DIR, '%s.config.yaml' % self._testname) with open(config_file, 'r') as f: raw_args = yaml.safe_load(f) test_config = yamale.make_data(config_file) yamale.validate( config_schema, test_config) # If fails, a ValueError will be raised. except yaml.YAMLError as yamlerr: print( 'No legit yaml config file found, use default args:{}'.format( yamlerr)) except OSError as ose: print( 'Config file with the same name not found, use default args:{}' .format(ose)) else: if 'arguments' in raw_args.keys() and raw_args['arguments']: self._test_args.update(raw_args['arguments']) if 'output' in self._test_args.keys( ): # output is a special param that has to be specified dynamically. self._test_args['output'] = self._output if 'test_timeout' in raw_args.keys(): self._test_timeout = raw_args['test_timeout'] if 'run_pipeline' in raw_args.keys(): self._run_pipeline = raw_args['run_pipeline'] # TODO(numerology): Special treatment for TFX::OSS sample if self._testname == 'parameterized_tfx_oss': self._test_args['pipeline-root'] = os.path.join( self._test_args['output'], 'tfx_taxi_simple_' + kfp.dsl.RUN_ID_PLACEHOLDER) del self._test_args['output'] # Submit for pipeline running. if self._run_pipeline: response = self._client.run_pipeline(self._experiment_id, self._job_name, self._input, self._test_args) self._run_id = response.id utils.add_junit_test(self._test_cases, 'create pipeline run', True)
def main(): args = parse_arguments() test_cases = [] test_name = 'Resnet CMLE Test' ###### Initialization ###### host = 'ml-pipeline.%s.svc.cluster.local:8888' % args.namespace client = Client(host=host) ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(args.input), 'yaml file is not generated') if not os.path.exists(args.input): utils.write_junit_xml(test_name, args.result, test_cases) print('Error: job not found.') exit(1) ###### Create Experiment ###### experiment_name = 'resnet cmle sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = 'cmle_sample' params = { 'output': args.output, 'project_id': 'ml-pipeline-test', 'region': 'us-central1', 'model': 'bolts', 'version': 'beta1', 'tf_version': '1.9', # Watch out! If 1.9 is no longer supported we need to set it to a newer version. 'train_csv': 'gs://ml-pipeline-dataset/sample-test/bolts/bolt_images_train_sample1000.csv', 'validation_csv': 'gs://ml-pipeline-dataset/sample-test/bolts/bolt_images_validate_sample200.csv', 'labels': 'gs://bolts_image_dataset/labels.txt', 'depth': 50, 'train_batch_size': 32, 'eval_batch_size': 32, 'steps_per_eval': 128, 'train_steps': 128, 'num_train_images': 1000, 'num_eval_images': 200, 'num_label_classes': 10 } response = client.run_pipeline(experiment_id, job_name, args.input, params) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### try: start_time = datetime.now() response = client.wait_for_run_completion(run_id, 1800) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) finally: ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] print("Argo Workflow Name: ", workflow_id) argo_log, _ = utils.run_bash_command('argo logs {} -n {}'.format( workflow_id, args.namespace)) print("=========Argo Workflow Log=========") print(argo_log) if not succ: utils.write_junit_xml(test_name, args.result, test_cases) exit(1) ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, args.result, test_cases)
def wait_for_pipeline_completion( run_id, timeout=60 * 60, expected_statuses: List[str] = None, namespace=None, remote=True, project: str = None, ): """Wait for Pipeline status, timeout in sec :param run_id: id of pipelines run :param timeout: wait timeout in sec :param expected_statuses: list of expected statuses, one of [ Succeeded | Failed | Skipped | Error ], by default [ Succeeded ] :param namespace: k8s namespace if not default :param remote: read kfp data from mlrun service (default=True) :param project: the project of the pipeline :return: kfp run dict """ if expected_statuses is None: expected_statuses = [RunStatuses.succeeded] namespace = namespace or mlconf.namespace logger.debug(f"Waiting for run completion." f" run_id: {run_id}," f" project: {project}," f" expected_statuses: {expected_statuses}," f" timeout: {timeout}," f" remote: {remote}," f" namespace: {namespace}") if remote: mldb = get_run_db() def get_pipeline_if_completed(run_id, namespace=namespace): resp = mldb.get_pipeline(run_id, namespace=namespace, project=project) status = resp["run"]["status"] show_kfp_run(resp, clear_output=True) if status not in RunStatuses.stable_statuses(): # TODO: think of nicer liveness indication and make it re-usable # log '.' each retry as a liveness indication logger.debug(".") raise RuntimeError("pipeline run has not completed yet") return resp if mldb.kind != "http": raise ValueError( "get pipeline require access to remote api-service" ", please set the dbpath url") resp = retry_until_successful( 10, timeout, logger, False, get_pipeline_if_completed, run_id, namespace=namespace, ) else: client = Client(namespace=namespace) resp = client.wait_for_run_completion(run_id, timeout) if resp: resp = resp.to_dict() resp = format_summary_from_kfp_run(resp) show_kfp_run(resp) status = resp["run"]["status"] if resp else "unknown" message = resp["run"].get("message", "") if expected_statuses: if status not in expected_statuses: raise RuntimeError( f"Pipeline run status {status}{', ' + message if message else ''}" ) logger.debug(f"Finished waiting for pipeline completion." f" run_id: {run_id}," f" status: {status}," f" message: {message}," f" namespace: {namespace}") return resp
def check(self): """Run sample test and check results.""" test_cases = [] test_name = self._testname + ' Sample Test' ###### Initialization ###### host = 'ml-pipeline.%s.svc.cluster.local:8888' % self._namespace client = Client(host=host) ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(self._input), 'yaml file is not generated') if not os.path.exists(self._input): utils.write_junit_xml(test_name, self._result, test_cases) print('Error: job not found.') exit(1) ###### Create Experiment ###### experiment_name = self._testname + ' sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = self._testname + '_sample' ###### Test-specific arguments ####### if self._testname == 'tfx_cab_classification': params = { 'output': self._output, 'project': 'ml-pipeline-test', 'column-names': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/column-names.json', 'evaluation': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/eval5.csv', 'train': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/train20.csv', 'hidden-layer-size': '5', 'steps': '5' } elif self._testname == 'xgboost_training_cm': params = { 'output': self._output, 'project': 'ml-pipeline-test', 'train-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/train_20.csv', 'eval-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/eval_5.csv', 'schema': 'gs://ml-pipeline-dataset/sample-test/sfpd/schema.json', 'rounds': '5', 'workers': '2' } else: # Basic tests require no additional params. params = {} response = client.run_pipeline(experiment_id, job_name, self._input, params) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### try: start_time = datetime.now() if self._testname == 'xgboost_training_cm': response = client.wait_for_run_completion( run_id, _XGB_TEST_TIMEOUT) else: response = client.wait_for_run_completion( run_id, _TEST_TIMEOUT) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) finally: ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] argo_log, _ = utils.run_bash_command( 'argo logs -n {} -w {}'.format(self._namespace, workflow_id)) print('=========Argo Workflow Log=========') print(argo_log) if not succ: utils.write_junit_xml(test_name, self._result, test_cases) exit(1) ###### Validate the results for specific test cases ###### #TODO: Add result check for tfx-cab-classification after launch. if self._testname == 'xgboost_training_cm': # For xgboost sample, check its confusion matrix. cm_tar_path = './confusion_matrix.tar.gz' utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path, 'mlpipeline-ui-metadata') with tarfile.open(cm_tar_path) as tar_handle: file_handles = tar_handle.getmembers() assert len(file_handles) == 1 with tar_handle.extractfile(file_handles[0]) as f: cm_data = f.read() utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, self._result, test_cases)
).output # Preparing the true values true_values_table = pandas_transform_csv_op( table=training_data, transform_code='df = df[["tips"]]', ).output true_values = drop_header_op(true_values_table).output # Initial model training first_model = xgboost_train_on_csv_op( training_data=training_data, label_column=0, objective='reg:squarederror', num_iterations=100, ).outputs['model'] # Recursively training until the error becomes low train_until_low_error( starting_model=first_model, training_data=training_data, true_values=true_values, ) if __name__ == '__main__': kfp_endpoint = None Client(host=kfp_endpoint).create_run_from_pipeline_func( train_until_good_pipeline, arguments={})