def test_batch_model_validate(self): input_example_meta = af.register_example(name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) with af.config(af.BaseJobConfig(platform='local', engine='python', job_name='evaluate')): input_example = af.read_example(example_info=input_example_meta, executor=PythonObjectExecutor(python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[input_example], executor=PythonObjectExecutor(python_object=TrainBatchMnistModel()), model_info=model_meta) model_validate = af.model_validate(input_data_list=[input_example], model_info=model_meta, executor=PythonObjectExecutor(python_object=BatchModelValidate()), output_num=0) af.stop_before_control_dependency(model_validate, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def run_project(project_root_path): af.set_project_config_file(project_root_path + '/project.yaml') # Config command line job, we set platform to local and engine to cmd_line here cmd_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=CMDEngine().engine()) with af.config(cmd_job_config): # Command line job executor cmd_job = af.user_define_operation(executor=CmdExecutor(cmd_line="echo Start AI flow")) # Config python job, we set platform to local and engine to python here python_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=PythonEngine.engine()) # Set execution mode of this python job to BATCH, # which indicates jobs with this config is running in the form of batch. python_job_config.exec_mode = af.ExecutionMode.BATCH with af.config(python_job_config): # Path of Source data(under '..../simple_transform_airflow' dir) source_path = os.path.dirname(os.path.abspath(__file__)) + '/source_data.csv' # Path of Sink data sink_path = os.path.dirname(os.path.abspath(__file__)) + '/sink_data.csv' # To make the project replaceable, we register the example in metadata service read_example_meta = af.register_example(name='read_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_format='csv', data_type='pandas', batch_uri=source_path) # Read training example using af.read_example() # example_info is the meta information of the example read_example_channel = af.read_example(example_info=read_example_meta, exec_args=ExecuteArgs( batch_properties=Args(header=None, names=["a", "b", "c"]))) # Transform examples using af.transform() transform_channel = af.transform(input_data_list=[read_example_channel], executor=PythonObjectExecutor(python_object=SimpleTransform())) write_example_meta = af.register_example(name='write_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_format='csv', data_type='pandas', batch_uri=sink_path) # Write example to specific path write = af.write_example(input_data=transform_channel, example_info=write_example_meta, exec_args=ExecuteArgs(batch_properties=Args(sep=',', header=False, index=False))) # Add control dependency, which means read_example job will start right after command line job finishes. af.stop_before_control_dependency(read_example_channel, cmd_job) transform_dag = 'simple_transform' af.deploy_to_airflow(project_root_path, dag_id=transform_dag) context = af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def test_read_example_with_numpy_npz(self): npy_name = 'test.npz' np.savez(npy_name, np.arange(10), np.sin(np.arange(10))) input_example_meta = af.register_example( name='input_numpy_example', data_type='numpy', data_format='npz', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + "/" + npy_name)) output_example_meta_first = af.register_example( name='ouput_numpy_example_1', data_type='numpy', data_format='npz', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + '/numpy_output_1.npz')) output_example_meta_second = af.register_example( name='ouput_numpy_example_2', data_type='numpy', data_format='npz', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + '/numpy_output_2.npz')) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='test_npz')): example_channel = af.read_example(example_info=input_example_meta) transform_channel = af.transform( input_data_list=[example_channel], executor=PythonObjectExecutor( python_object=TransformTrainData()), output_num=2) af.write_example(input_data=transform_channel[0], example_info=output_example_meta_first) af.write_example(input_data=transform_channel[1], example_info=output_example_meta_second) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): with af.config('train_job'): train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=TrainExampleReader())) train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=TrainExampleTransformer())) train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, stream_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name)), ) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, stream_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) write_example = af.register_example(name=artifact_prefix + 'export_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_project(project_root_path): af.set_project_config_file( example_util.get_project_config_file(project_root_path)) evaluate_trigger = af.external_trigger(name='evaluate') validate_trigger = af.external_trigger(name='validate') with af.engine('python'): project_name = example_util.get_parent_dir_name(__file__) project_meta = af.register_project(name=project_name, uri=project_root_path, project_type='local python') train_example = af.register_example( name='train_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('train'), data_format='npz') train_read_example = af.read_example( example_info=train_example, executor=PythonObjectExecutor(python_object=TrainExampleReader())) train_transform = af.transform( input_data_list=[train_read_example], executor=PythonObjectExecutor( python_object=TrainExampleTransformer())) train_model = af.register_model(model_name='logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train( input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) evaluate_example = af.register_example( name='evaluate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') evaluate_read_example = af.read_example( example_info=evaluate_example, executor=PythonObjectExecutor( python_object=EvaluateExampleReader())) evaluate_transform = af.transform( input_data_list=[evaluate_read_example], executor=PythonObjectExecutor(python_object=EvaluateTransformer())) evaluate_artifact = af.register_artifact( name='evaluate_artifact2', stream_uri=get_file_dir(__file__) + '/evaluate_model') evaluate_channel = af.evaluate( input_data_list=[evaluate_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelEvaluator())) validate_example = af.register_example( name='validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') validate_read_example = af.read_example( example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform( input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact = af.register_artifact( name='validate_artifact', stream_uri=get_file_dir(__file__) + '/validate_model') validate_channel = af.model_validate( input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelValidator()), ) predict_example = af.register_example( name='predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict'), data_format='npz') predict_read_example = af.read_example( example_info=predict_example, executor=PythonObjectExecutor( python_object=PredictExampleReader())) predict_transform = af.transform( input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict( input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) write_example = af.register_example( name='export_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_model', data_format='fs') af.write_example( input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) print(train_model.name) af.model_version_control_dependency( src=evaluate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=evaluate_trigger, model_name=train_model.name) af.model_version_control_dependency( src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) context = af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): # the config of train job is a periodic job which means it will # run every `interval`(defined in workflow_config.yaml) seconds with af.config('train_job'): # Register metadata raw training data(example) and read example(i.e. training dataset) train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=ExampleReader())) # Transform(preprocessing) example train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=ExampleTransformer())) # Register model metadata and train model train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): # Validation of model # Read validation dataset and validate model before it is used to predict validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, batch_uri=EXAMPLE_URI.format('evaluate')) validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, batch_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name))) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, batch_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): # Prediction(Inference) predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) # Save prediction result write_example = af.register_example(name=artifact_prefix + 'write_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) # Define relation graph connected by control edge: # Once a round of training is done, validator will be launched and # pusher will be launched if the new model is better. # Prediction will start once the first round of training is done and # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well. af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_project(project_root_path): af.set_project_config_file( example_util.get_project_config_file(project_root_path)) # Config python job, we set platform to local and engine to python here python_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=PythonEngine.engine()) # Set execution mode of this python job to BATCH, # which indicates jobs with this config is running in the form of batch. python_job_config.exec_mode = af.ExecutionMode.BATCH with af.config(python_job_config): project_name = example_util.get_parent_dir_name(__file__) project_meta = af.register_project(name=project_name, uri=project_root_path, project_type='local python') # Training of model # Register metadata raw training data(example) and read example(i.e. training dataset) train_example = af.register_example( name='train_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('train'), data_format='npz') train_read_example = af.read_example( example_info=train_example, executor=PythonObjectExecutor(python_object=ExampleReader())) # Transform(preprocessing) example train_transform = af.transform( input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=ExampleTransformer())) # Register model metadata and train model train_model = af.register_model(model_name='logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train( input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) # Evaluation of model evaluate_example = af.register_example( name='evaluate_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') evaluate_read_example = af.read_example( example_info=evaluate_example, executor=PythonObjectExecutor( python_object=EvaluateExampleReader())) evaluate_transform = af.transform( input_data_list=[evaluate_read_example], executor=PythonObjectExecutor(python_object=EvaluateTransformer())) # Register disk path used to save evaluate result evaluate_artifact_name = 'evaluate_artifact' evaluate_artifact = af.register_artifact( name=evaluate_artifact_name, batch_uri=get_file_dir(__file__) + '/evaluate_model') # Evaluate model evaluate_channel = af.evaluate( input_data_list=[evaluate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelEvaluator(evaluate_artifact_name))) # Validation of model # Read validation dataset and validate model before it is used to predict validate_example = af.register_example( name='validate_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') validate_read_example = af.read_example( example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform( input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = 'validate_artifact' validate_artifact = af.register_artifact( name=validate_artifact_name, batch_uri=get_file_dir(__file__) + '/validate_model') validate_channel = af.model_validate( input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name))) # Push model to serving # Register metadata of pushed model push_model_artifact_name = 'push_model_artifact' push_model_artifact = af.register_artifact( name=push_model_artifact_name, batch_uri=get_file_dir(__file__) + '/pushed_model') push_model_channel = af.push_model( model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) # Prediction(Inference) predict_example = af.register_example( name='predict_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('predict'), data_format='npz') predict_read_example = af.read_example( example_info=predict_example, executor=PythonObjectExecutor( python_object=PredictExampleReader())) predict_transform = af.transform( input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict( input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) # Save prediction result write_example = af.register_example( name='write_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=get_file_dir(__file__) + '/predict_model', data_format='fs') af.write_example( input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) # Define relation graph: train -> evaluate -> validate -> push -> predict af.stop_before_control_dependency(evaluate_channel, train_channel) af.stop_before_control_dependency(validate_channel, evaluate_channel) af.stop_before_control_dependency(push_model_channel, validate_channel) af.stop_before_control_dependency(predict_channel, push_model_channel) # Run workflow transform_dag = 'batch_train_batch_predict' af.deploy_to_airflow(project_root_path, dag_id=transform_dag) context = af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def run_workflow(): """ Run the user-defined workflow definition. """ train_data_file, predict_result_directory, merge_predict_result_path, \ first_test_data_file, first_result_data_file = collect_data_file() # Prepare workflow: Example & Model Metadata registration. train_example_meta, predict_result_meta, merge_data_meta, first_test_example_meta, second_test_example_meta, \ first_result_example_meta, second_result_example_meta, train_model_meta = \ prepare_workflow(train_data_file=train_data_file, predict_result_directory=predict_result_directory, merge_predict_result_path=merge_predict_result_path, first_test_data_file=first_test_data_file, first_result_data_file=first_result_data_file) # Save proxima indexes under the following index path. index_path = '{}/codes/{}/'.format(os.environ['ENV_HOME'], os.environ['TASK_ID']) + 'test.index' # Set Python job config to train model. python_job_config_0 = BaseJobConfig(platform='local', engine='python', job_name='train') python_job_config_1 = BaseJobConfig(platform='local', engine='python', job_name='start_cluster_serving') python_job_config_2 = BaseJobConfig(platform='local', engine='python', job_name='merge_predict_result') # Set Flink job config to predict with cluster serving global_job_config_1 = LocalFlinkJobConfig() global_job_config_1.local_mode = 'cluster' global_job_config_1.flink_home = os.environ['FLINK_HOME'] global_job_config_1.job_name = 'cluster_serving' global_job_config_1.set_table_env_create_func(StreamTableEnvCreatorBuildIndex()) # Set Flink job config to build index. global_job_config_2 = LocalFlinkJobConfig() global_job_config_2.local_mode = 'cluster' global_job_config_2.flink_home = os.environ['FLINK_HOME'] global_job_config_2.job_name = 'build_index' global_job_config_2.set_table_env_create_func(StreamTableEnvCreator()) # Set Flink job config to fink sick. global_job_config_3 = LocalFlinkJobConfig() global_job_config_3.local_mode = 'cluster' global_job_config_3.flink_home = os.environ['FLINK_HOME'] global_job_config_3.job_name = 'find_sick' global_job_config_3.set_table_env_create_func(StreamTableEnvCreator()) # Set Flink job config to online cluster. global_job_config_4 = LocalFlinkJobConfig() global_job_config_4.local_mode = 'cluster' global_job_config_4.flink_home = os.environ['FLINK_HOME'] global_job_config_4.job_name = 'online_cluster' global_job_config_4.set_table_env_create_func(StreamTableEnvCreator()) with af.config(python_job_config_0): # Under first job config, we construct the first job, the job is going to train an auto_encoder model. python_job_0_read_train_example = af.read_example(example_info=train_example_meta, executor=PythonObjectExecutor(python_object=ReadCsvExample())) python_job_0_train_model = af.train(input_data_list=[python_job_0_read_train_example], executor=PythonObjectExecutor(python_object=TrainAutoEncoder()), model_info=train_model_meta, name='trainer_0') with af.config(python_job_config_1): python_job_1_cluster_serving_channel = af.cluster_serving(model_info=train_model_meta, parallelism=2) # python_job_1_cluster_serving_channel = af.cluster_serving(model_info=train_model_meta, parallelism=16) with af.config(global_job_config_1): flink_job_0_read_train_example = af.read_example(example_info=train_example_meta, executor=FlinkPythonExecutor(python_object=ReadTrainExample())) flink_job_0_predict_model = af.predict(input_data_list=[flink_job_0_read_train_example], model_info=train_model_meta, executor=FlinkPythonExecutor( python_object=PredictAutoEncoderWithTrain())) flink_job_0_write_predict_data = af.write_example(input_data=flink_job_0_predict_model, example_info=predict_result_meta, executor=FlinkPythonExecutor( python_object=WritePredictResult())) with af.config(python_job_config_2): python_job_2_merge_train_data_file = af.user_define_operation(executor=PythonObjectExecutor( python_object=MergePredictResult())) with af.config(global_job_config_2): flink_job_1_read_train_example = af.read_example(example_info=merge_data_meta, executor=FlinkPythonExecutor(python_object=ReadMergeExample())) flink_job_1_build_index_channel = af.transform([flink_job_1_read_train_example], executor=FlinkPythonExecutor( python_object=BuildIndexExecutor(index_path, FloatDataType(), 128))) with af.config(global_job_config_3): flink_job_2_read_history_example = af.read_example(example_info=first_test_example_meta, executor=FlinkPythonExecutor( python_object=ReadPredictExample())) flink_job_2_predict_model = af.predict(input_data_list=[flink_job_2_read_history_example], model_info=train_model_meta, executor=FlinkPythonExecutor(python_object=PredictAutoEncoder())) flink_job_2_transformed_data = af.transform([flink_job_2_predict_model], executor=FlinkPythonExecutor( python_object=SearchExecutor(index_path, FloatDataType(), 2))) flink_job_2_read_train_example = af.read_example(example_info=train_example_meta, executor=FlinkPythonExecutor(python_object=ReadTrainExample())) flink_job_2_join_channel = af.transform( input_data_list=[flink_job_2_transformed_data, flink_job_2_read_train_example], executor=FlinkPythonExecutor(python_object=FindHistory())) flink_job_2_write_result = af.write_example(input_data=flink_job_2_join_channel, example_info=first_result_example_meta, executor=FlinkPythonExecutor(python_object=SearchSink())) with af.config(global_job_config_4): flink_job_3_read_online_example = af.read_example(example_info=second_test_example_meta, executor=FlinkPythonExecutor( python_object=ReadOnlinePredictExample())) flink_job_3_predict_model = af.predict(input_data_list=[flink_job_3_read_online_example], model_info=train_model_meta, executor=FlinkPythonExecutor(python_object=OnlinePredictAutoEncoder())) flink_job_3_transformed_data = af.transform([flink_job_3_predict_model], executor=FlinkPythonExecutor( python_object=SearchExecutor3(index_path, FloatDataType(), 2))) af.write_example(input_data=flink_job_3_transformed_data, example_info=second_result_example_meta, executor=FlinkPythonExecutor(python_object=WriteSecondResult())) af.stop_before_control_dependency(python_job_1_cluster_serving_channel, python_job_0_train_model) af.stop_before_control_dependency(flink_job_0_read_train_example, python_job_1_cluster_serving_channel) af.stop_before_control_dependency(python_job_2_merge_train_data_file, flink_job_0_read_train_example) af.stop_before_control_dependency(flink_job_1_build_index_channel, python_job_2_merge_train_data_file) af.stop_before_control_dependency(flink_job_2_read_history_example, flink_job_1_build_index_channel) af.stop_before_control_dependency(flink_job_3_read_online_example, flink_job_2_write_result) workflow_id = af.run(get_project_path()+'/') res = af.wait_workflow_execution_finished(workflow_id) sys.exit(res)
def run_workflow(): """ Run the user-defined workflow definition. """ train_example_meta, label_example_meta, test_example_meta, test_output_example_meta, train_model_meta = prepare_workflow( ) python_job_config_0 = BaseJobConfig(job_name='read_train', platform='local', engine='python') python_job_config_1 = BaseJobConfig(job_name='train', platform='local', engine='python') flink_job_config_2 = LocalFlinkJobConfig() flink_job_config_2.job_name = 'test' flink_job_config_2.local_mode = 'python' flink_job_config_2.flink_home = os.environ['FLINK_HOME'] flink_job_config_2.set_table_env_create_func(MyStreamTableEnvCreator()) with af.config(python_job_config_0): python_job_0_read_train_data = af.read_example( example_info=train_example_meta, executor=PythonObjectExecutor(python_object=ReadTrainCsvExample())) python_job_0_read_label_data = af.read_example( example_info=label_example_meta, executor=PythonObjectExecutor(python_object=ReadLabelCsvExample())) write_train_data_example = af.register_example( name='write_train_data', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='pandas', data_format='csv', batch_uri='/tmp/write_train_data.csv') python_job_0_write_train_result = af.write_example( input_data=python_job_0_read_train_data, example_info=write_train_data_example, executor=PythonObjectExecutor( python_object=WriteTrainCsvExample())) with af.config(python_job_config_1): python_job_1_train_model = af.train( name='trainer_0', input_data_list=[ python_job_0_read_train_data, python_job_0_read_label_data ], executor=PythonObjectExecutor(python_object=TrainModel()), model_info=train_model_meta) with af.config(flink_job_config_2): flink_job_2_read_test_data = af.read_example( example_info=test_example_meta, executor=FlinkPythonExecutor(python_object=ReadTestCsvExample())) flink_job_2_predict_test_data = af.transform( input_data_list=[flink_job_2_read_test_data], executor=FlinkPythonExecutor( python_object=PredictTestLabelExecutor())) write_result = af.write_example( input_data=flink_job_2_predict_test_data, example_info=test_output_example_meta, executor=FlinkPythonExecutor( python_object=WritePredictTestExample())) af.stop_before_control_dependency(python_job_1_train_model, python_job_0_write_train_result) af.stop_before_control_dependency(write_result, python_job_1_train_model) workflow_id = af.run(get_project_path() + '/') res = af.wait_workflow_execution_finished(workflow_id) sys.exit(res)