def test_batch_train_component_with_an_output(self): input_example_meta = af.register_example( name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BATCH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) example_meta = af.register_example( name='output_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='numpy', data_format='npz', batch_uri=os.path.abspath( os.path.dirname(__file__) + '/numpy_output.npz')) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='batch_train')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) train_channel = af.train( input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModelWithOutput()), model_info=model_meta, output_num=1) af.write_example(input_data=train_channel, example_info=example_meta) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_stream_train_component(self): batch_input_example_meta = af.register_example( name='stream_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) stream_input_example_meta = af.register_example( name='stream_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='stream_train')): batch_input_example = af.read_example( example_info=batch_input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[batch_input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) stream_input_example = af.read_example( example_info=stream_input_example_meta, executor=PythonObjectExecutor( python_object=ReadStreamExample())) stream_train = af.train(input_data_list=[stream_input_example], executor=PythonObjectExecutor( python_object=TrainStreamMnistModel()), model_info=model_meta) af.stop_before_control_dependency(stream_train, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def prepare_workflow(): data_set_dir, output_dir = DataSets().collect_data_file_dir() """ Prepare workflow: Example & Model Metadata registration. """ train_example_meta: ExampleMeta = af.register_example( name='train_data', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='pandas', data_format='csv', batch_uri=data_set_dir + '/train_data.csv') label_example_meta: ExampleMeta = af.register_example( name='label_data', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='pandas', data_format='csv', batch_uri=data_set_dir + '/label_data.csv') test_example_meta: ExampleMeta = af.register_example( name='test_data', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='pandas', data_format='csv', batch_uri=data_set_dir + '/test_data.csv') test_output_example_meta: ExampleMeta = af.register_example( name='test_output_data', support_type=ExampleSupportType.EXAMPLE_STREAM, data_type='kafka', data_format='csv', stream_uri='localhost:9092') train_model_meta: ModelMeta = af.register_model( model_name='label_model', model_type=ModelType.SAVED_MODEL) return train_example_meta, label_example_meta, test_example_meta, test_output_example_meta, train_model_meta
def test_run_model_version_job(self): project_name = 'test_project' workflow_name = 'test_workflow' dag_id = '{}.{}'.format(project_name, workflow_name) train_model = af.register_model(model_name='model_1', model_type=af.ModelType.SAVED_MODEL, model_desc='test model') def run_task_function(client: NotificationClient): with af.global_config_file(workflow_config_file()): with af.config('task_2'): executor_1 = af.user_define_operation( af.PythonObjectExecutor(SimpleExecutor())) with af.config('task_3'): executor_2 = af.user_define_operation( af.PythonObjectExecutor(SimpleExecutor())) af.model_version_control_dependency( src=executor_2, dependency=executor_1, model_name='model_1', model_version_event_type=ModelVersionEventType. MODEL_GENERATED) workflow_info = af.workflow_operation.submit_workflow( workflow_name) af.workflow_operation.start_new_workflow_execution(workflow_name) r_flag = True while True: with create_session() as session: tes2 = session.query(TaskExecution).filter( TaskExecution.dag_id == 'test_project.test_workflow', TaskExecution.task_id == 'task_2').all() if len(tes2) == 1 and r_flag: af.register_model_version( model='model_1', model_path='/tmp/model/v1', current_stage=af.ModelVersionStage.GENERATED) r_flag = False dag_run = session.query(DagRun).filter( DagRun.dag_id == 'test_project.test_workflow').first() if dag_run is not None and dag_run.state in State.finished: break else: time.sleep(1) self.run_ai_flow(dag_id, run_task_function) with create_session() as session: tes = session.query(TaskExecution).filter( TaskExecution.dag_id == 'test_project.test_workflow', TaskExecution.task_id == 'task_2').all() self.assertEqual(1, len(tes))
def test_stream_evaluate_component(self): input_example_meta = af.register_example( name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BATCH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) stream_evaluate_example_meta = af.register_example( name='stream_evaluate_example', support_type=ExampleSupportType.EXAMPLE_STREAM) stream_output_file = get_file_dir(__file__) + '/stream_evaluate' evaluate_output = af.register_artifact(name='stream_evaluate', stream_uri=stream_output_file) stream_evaluate_result_example_meta = af.register_example( name='stream_evaluate_result_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=stream_output_file) if os.path.exists(stream_output_file): os.remove(stream_output_file) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='stream_evaluate')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) stream_evaluate_example = af.read_example( example_info=stream_evaluate_example_meta, executor=PythonObjectExecutor( python_object=ReadStreamExample())) stream_evaluate = af.evaluate( input_data_list=[stream_evaluate_example], model_info=model_meta, executor=PythonObjectExecutor( python_object=EvaluateStreamMnistModel()), output_num=1) af.write_example(input_data=stream_evaluate, example_info=stream_evaluate_result_example_meta, executor=PythonObjectExecutor( python_object=WriteStreamExample())) af.stop_before_control_dependency(stream_evaluate, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_stream_with_external_trigger_with_model_control(self): print(sys._getframe().f_code.co_name) model_name = 'test_create_model_version' model_desc = 'test create model version' response = af.register_model(model_name=model_name, model_type=af.ModelType.CHECKPOINT, model_desc=model_desc) trigger = af.external_trigger(name='stream_trigger') job_config = af.BaseJobConfig('local', 'cmd_line') job_config.job_name = 'test_cmd' with af.config(job_config): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor( cmd_line="echo 'hello world' && sleep {}".format(1))) af.model_version_control_dependency( src=cmd_executor, dependency=trigger, model_name=model_name, model_version_event_type='MODEL_DEPLOYED') workflow_id = af.submit_ai_flow() model_path1 = 'fs://source1.pkl' model_metric1 = 'http://metric1' model_flavor1 = '{"flavor.version":1}' version_desc1 = 'test create model version1' time.sleep(1) response = af.register_model_version( model=model_name, model_path=model_path1, model_metric=model_metric1, model_flavor=model_flavor1, version_desc=version_desc1, current_stage=af.ModelVersionStage.DEPLOYED) time.sleep(5) response = af.register_model_version( model=model_name, model_path=model_path1, model_metric=model_metric1, model_flavor=model_flavor1, version_desc=version_desc1, current_stage=af.ModelVersionStage.DEPLOYED) time.sleep(10) af.stop_execution_by_id(workflow_id) res = af.get_ai_flow_client().list_job(5, 0) self.assertEqual(3, len(res))
def prepare_workflow(train_data_file: str, predict_result_directory: str, merge_predict_result_path: str, first_test_data_file: str, first_result_data_file: str): """ Prepare workflow: Example & Model Metadata registration. """ train_example_meta: ExampleMeta = af.register_example(name='train_data', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='pandas', data_format='csv', batch_uri=train_data_file) predict_result_meta: ExampleMeta = af.register_example(name='predict_result', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=predict_result_directory) merge_data_meta: ExampleMeta = af.register_example(name='merge_data', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=merge_predict_result_path) first_test_example_meta: ExampleMeta = af.register_example(name='first_test_data', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='pandas', data_format='csv', batch_uri=first_test_data_file) second_test_example_data: ExampleMeta = af.register_example(name='second_test_data', support_type=ExampleSupportType.EXAMPLE_STREAM, data_type='kafka', data_format='csv', stream_uri='localhost:9092') first_result_example_meta: ExampleMeta = af.register_example(name='first_result_111', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='pandas', data_format='csv', batch_uri=first_result_data_file) second_result_example_meta: ExampleMeta = af.register_example(name='second_result_111', support_type=ExampleSupportType.EXAMPLE_STREAM, data_type='kafka', data_format='csv', stream_uri='localhost:9092') train_model_meta: ModelMeta = af.register_model(model_name='auto_encoder', model_type=ModelType.SAVED_MODEL) return train_example_meta, predict_result_meta, merge_data_meta, \ first_test_example_meta, second_test_example_data, \ first_result_example_meta, second_result_example_meta, train_model_meta
def test_batch_predict_component(self): input_example_meta = af.register_example( name='input_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) batch_output_file = get_file_dir(__file__) + '/batch_predict' evaluate_output = af.register_artifact(name='batch_evaluate', batch_uri=batch_output_file) output_example_meta = af.register_example( name='output_result_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='numpy', data_format='txt', batch_uri=batch_output_file) if os.path.exists(batch_output_file): os.remove(batch_output_file) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='batch_predict')): batch_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[batch_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) batch_predict = af.predict( input_data_list=[batch_example], model_info=model_meta, executor=PythonObjectExecutor( python_object=PredictBatchMnistModel()), output_num=1) af.write_example(input_data=batch_predict, example_info=output_example_meta) af.stop_before_control_dependency(batch_predict, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_batch_train_component(self): input_example_meta = af.register_example( name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BATCH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='batch_train')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) af.train(input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_batch_model_validate(self): input_example_meta = af.register_example(name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) with af.config(af.BaseJobConfig(platform='local', engine='python', job_name='evaluate')): input_example = af.read_example(example_info=input_example_meta, executor=PythonObjectExecutor(python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[input_example], executor=PythonObjectExecutor(python_object=TrainBatchMnistModel()), model_info=model_meta) model_validate = af.model_validate(input_data_list=[input_example], model_info=model_meta, executor=PythonObjectExecutor(python_object=BatchModelValidate()), output_num=0) af.stop_before_control_dependency(model_validate, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): with af.config('train_job'): train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=TrainExampleReader())) train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=TrainExampleTransformer())) train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, stream_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name)), ) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, stream_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) write_example = af.register_example(name=artifact_prefix + 'export_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_workflow(): # Init project af.init_ai_flow_context() artifact_prefix = af.current_project_config().get_project_name() + "." # Training of model with af.job_config('train'): # Register metadata of training data(dataset) and read dataset(i.e. training dataset) train_dataset = af.register_dataset(name=artifact_prefix + 'train_dataset', uri=DATASET_URI.format('train')) train_read_dataset = af.read_dataset(dataset_info=train_dataset, read_dataset_processor=DatasetReader()) # Register model metadata and train model train_model = af.register_model(model_name=artifact_prefix + 'KNN', model_desc='KNN model') train_channel = af.train(input=[train_read_dataset], training_processor=ModelTrainer(), model_info=train_model) # Validation of model with af.job_config('validate'): # Read validation dataset validate_dataset = af.register_dataset(name=artifact_prefix + 'validate_dataset', uri=DATASET_URI.format('test')) # Validate model before it is used to predict validate_read_dataset = af.read_dataset(dataset_info=validate_dataset, read_dataset_processor=ValidateDatasetReader()) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input=[validate_read_dataset], model_info=train_model, model_validation_processor=ModelValidator(validate_artifact_name)) # Prediction(Inference) using flink with af.job_config('predict'): # Read test data and do prediction predict_dataset = af.register_dataset(name=artifact_prefix + 'predict_dataset', uri=DATASET_URI.format('test')) predict_read_dataset = af.read_dataset(dataset_info=predict_dataset, read_dataset_processor=Source()) predict_channel = af.predict(input=[predict_read_dataset], model_info=train_model, prediction_processor=Predictor()) # Save prediction result write_dataset = af.register_dataset(name=artifact_prefix + 'write_dataset', uri=get_file_dir(__file__) + '/predict_result.csv') af.write_dataset(input=predict_channel, dataset_info=write_dataset, write_dataset_processor=Sink()) # Define relation graph connected by control edge: train -> validate -> predict af.action_on_model_version_event(job_name='validate', model_version_event_type=ModelVersionEventType.MODEL_GENERATED, model_name=train_model.name) af.action_on_model_version_event(job_name='predict', model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, model_name=train_model.name) # Submit workflow af.workflow_operation.submit_workflow(af.current_workflow_config().workflow_name) # Run workflow af.workflow_operation.start_new_workflow_execution(af.current_workflow_config().workflow_name)
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): # the config of train job is a periodic job which means it will # run every `interval`(defined in workflow_config.yaml) seconds with af.config('train_job'): # Register metadata raw training data(example) and read example(i.e. training dataset) train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=ExampleReader())) # Transform(preprocessing) example train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=ExampleTransformer())) # Register model metadata and train model train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): # Validation of model # Read validation dataset and validate model before it is used to predict validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, batch_uri=EXAMPLE_URI.format('evaluate')) validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, batch_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name))) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, batch_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): # Prediction(Inference) predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) # Save prediction result write_example = af.register_example(name=artifact_prefix + 'write_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) # Define relation graph connected by control edge: # Once a round of training is done, validator will be launched and # pusher will be launched if the new model is better. # Prediction will start once the first round of training is done and # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well. af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_workflow(): af.init_ai_flow_context() artifact_prefix = af.current_project_config().get_project_name() + "." # the config of train job is a periodic job which means it will # run every `interval`(defined in workflow_config.yaml) seconds with af.job_config('train'): # Register metadata raw training data(dataset) and read dataset(i.e. training dataset) train_dataset = af.register_dataset(name=artifact_prefix + 'train_dataset', uri=DATASET_URI.format('train')) train_read_dataset = af.read_dataset( dataset_info=train_dataset, read_dataset_processor=DatasetReader()) # Transform(preprocessing) dataset train_transform = af.transform( input=[train_read_dataset], transform_processor=DatasetTransformer()) # Register model metadata and train model train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_desc='logistic regression model') train_channel = af.train(input=[train_transform], training_processor=ModelTrainer(), model_info=train_model) with af.job_config('validate'): # Validation of model # Read validation dataset and validate model before it is used to predict validate_dataset = af.register_dataset( name=artifact_prefix + 'validate_dataset', uri=DATASET_URI.format('evaluate')) validate_read_dataset = af.read_dataset( dataset_info=validate_dataset, read_dataset_processor=ValidateDatasetReader()) validate_transform = af.transform( input=[validate_read_dataset], transform_processor=ValidateTransformer()) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate( input=[validate_transform], model_info=train_model, model_validation_processor=ModelValidator(validate_artifact_name)) with af.job_config('push'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact( name=push_model_artifact_name, uri=get_file_dir(__file__) + '/pushed_model') af.push_model( model_info=train_model, pushing_model_processor=ModelPusher(push_model_artifact_name)) with af.job_config('predict'): # Prediction(Inference) predict_dataset = af.register_dataset( name=artifact_prefix + 'predict_dataset', uri=DATASET_URI.format('predict')) predict_read_dataset = af.read_dataset( dataset_info=predict_dataset, read_dataset_processor=PredictDatasetReader()) predict_transform = af.transform( input=[predict_read_dataset], transform_processor=PredictTransformer()) predict_channel = af.predict(input=[predict_transform], model_info=train_model, prediction_processor=ModelPredictor()) # Save prediction result write_dataset = af.register_dataset( name=artifact_prefix + 'write_dataset', uri=get_file_dir(__file__) + '/predict_result') af.write_dataset(input=predict_channel, dataset_info=write_dataset, write_dataset_processor=DatasetWriter()) # Define relation graph connected by control edge: # Once a round of training is done, validator will be launched and # pusher will be launched if the new model is better. # Prediction will start once the first round of training is done and # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well. af.action_on_model_version_event( job_name='validate', model_version_event_type=ModelVersionEventType.MODEL_GENERATED, model_name=train_model.name) af.action_on_model_version_event( job_name='push', model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, model_name=train_model.name) # Run workflow af.workflow_operation.submit_workflow( af.current_workflow_config().workflow_name) af.workflow_operation.start_new_workflow_execution( af.current_workflow_config().workflow_name)
def run_workflow(): af.init_ai_flow_context() artifact_prefix = af.current_project_config().get_project_name() + "." with af.job_config('train'): # Register metadata raw training data(dataset) and read dataset(i.e. training dataset) train_dataset = af.register_dataset(name=artifact_prefix + 'train_dataset', uri=DATASET_URI.format('train')) train_read_dataset = af.read_dataset( dataset_info=train_dataset, read_dataset_processor=TrainDatasetReader()) train_transform = af.transform( input=[train_read_dataset], transform_processor=TrainDatasetTransformer()) train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_desc='logistic regression model') train_channel = af.train(input=[train_transform], training_processor=ModelTrainer(), model_info=train_model) with af.job_config('validate'): validate_dataset = af.register_dataset( name=artifact_prefix + 'validate_dataset', uri=DATASET_URI.format('evaluate')) validate_read_dataset = af.read_dataset( dataset_info=validate_dataset, read_dataset_processor=ValidateDatasetReader()) validate_transform = af.transform( input=[validate_read_dataset], transform_processor=ValidateTransformer()) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate( input=[validate_transform], model_info=train_model, model_validation_processor=ModelValidator(validate_artifact_name)) with af.job_config('push'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact( name=push_model_artifact_name, uri=get_file_dir(__file__) + '/pushed_model') af.push_model( model_info=train_model, pushing_model_processor=ModelPusher(push_model_artifact_name)) with af.job_config('predict'): predict_dataset = af.register_dataset( name=artifact_prefix + 'predict_dataset', uri=DATASET_URI.format('predict')) predict_read_dataset = af.read_dataset( dataset_info=predict_dataset, read_dataset_processor=PredictDatasetReader()) predict_transform = af.transform( input=[predict_read_dataset], transform_processor=PredictTransformer()) predict_channel = af.predict(input=[predict_transform], model_info=train_model, prediction_processor=ModelPredictor()) write_dataset = af.register_dataset( name=artifact_prefix + 'export_dataset', uri=get_file_dir(__file__) + '/predict_result') af.write_dataset(input=predict_channel, dataset_info=write_dataset, write_dataset_processor=DatasetWriter()) af.action_on_model_version_event( job_name='validate', model_version_event_type=ModelVersionEventType.MODEL_GENERATED, model_name=train_model.name) af.action_on_model_version_event( job_name='push', model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, model_name=train_model.name) # Run workflow af.workflow_operation.submit_workflow( af.current_workflow_config().workflow_name) af.workflow_operation.start_new_workflow_execution( af.current_workflow_config().workflow_name)
def run_workflow(): af.init_ai_flow_context() artifact_prefix = af.current_project_config().get_project_name() + "." with af.job_config('train'): # Training of model # Register metadata raw training data(dataset) and read dataset(i.e. training dataset) train_dataset = af.register_dataset(name=artifact_prefix + 'train_dataset', uri=DATASET_URI.format('train')) train_read_dataset = af.read_dataset( dataset_info=train_dataset, read_dataset_processor=DatasetReader()) # Transform(preprocessing) dataset train_transform = af.transform( input=[train_read_dataset], transform_processor=DatasetTransformer()) # Register model metadata and train model train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_desc='logistic regression model') train_channel = af.train(input=[train_transform], training_processor=ModelTrainer(), model_info=train_model) with af.job_config('evaluate'): # Evaluation of model evaluate_dataset = af.register_dataset( name=artifact_prefix + 'evaluate_dataset', uri=DATASET_URI.format('evaluate')) evaluate_read_dataset = af.read_dataset( dataset_info=evaluate_dataset, read_dataset_processor=EvaluateDatasetReader()) evaluate_transform = af.transform( input=[evaluate_read_dataset], transform_processor=EvaluateTransformer()) # Register disk path used to save evaluate result evaluate_artifact_name = artifact_prefix + 'evaluate_artifact' evaluate_artifact = af.register_artifact(name=evaluate_artifact_name, uri=get_file_dir(__file__) + '/evaluate_result') # Evaluate model evaluate_channel = af.evaluate( input=[evaluate_transform], model_info=train_model, evaluation_processor=ModelEvaluator(evaluate_artifact_name)) with af.job_config('validate'): # Validation of model # Read validation dataset and validate model before it is used to predict validate_dataset = af.register_dataset( name=artifact_prefix + 'validate_dataset', uri=DATASET_URI.format('evaluate')) validate_read_dataset = af.read_dataset( dataset_info=validate_dataset, read_dataset_processor=ValidateDatasetReader()) validate_transform = af.transform( input=[validate_read_dataset], transform_processor=ValidateTransformer()) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate( input=[validate_transform], model_info=train_model, model_validation_processor=ModelValidator(validate_artifact_name)) with af.job_config('push'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact( name=push_model_artifact_name, uri=get_file_dir(__file__) + '/pushed_model') af.push_model( model_info=train_model, pushing_model_processor=ModelPusher(push_model_artifact_name)) with af.job_config('predict'): # Prediction(Inference) predict_dataset = af.register_dataset( name=artifact_prefix + 'predict_dataset', uri=DATASET_URI.format('predict')) predict_read_dataset = af.read_dataset( dataset_info=predict_dataset, read_dataset_processor=PredictDatasetReader()) predict_transform = af.transform( input=[predict_read_dataset], transform_processor=PredictTransformer()) predict_channel = af.predict(input=[predict_transform], model_info=train_model, prediction_processor=ModelPredictor()) # Save prediction result write_dataset = af.register_dataset( name=artifact_prefix + 'write_dataset', uri=get_file_dir(__file__) + '/predict_result') af.write_dataset(input=predict_channel, dataset_info=write_dataset, write_dataset_processor=DatasetWriter()) # Define relation graph connected by control edge: train -> evaluate -> validate -> push -> predict af.action_on_job_status('evaluate', 'train') af.action_on_job_status('validate', 'evaluate') af.action_on_job_status('push', 'validate') af.action_on_job_status('predict', 'push') # Run workflow af.workflow_operation.submit_workflow( af.current_workflow_config().workflow_name) af.workflow_operation.start_new_workflow_execution( af.current_workflow_config().workflow_name)