def __init__(self, component_name, task_id, parent_dag, input_dict, output_dict, exec_properties, driver_options, driver_class, executor_class, additional_pipeline_args, metadata_connection_config, logger_config): super(_TfxWorker, self).__init__( dag_id=task_id, schedule_interval=None, start_date=parent_dag.start_date, user_defined_filters={'b64encode': base64.b64encode}) adaptor = airflow_adapter.AirflowAdapter( component_name=component_name, input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, driver_options=driver_options, driver_class=driver_class, executor_class=executor_class, additional_pipeline_args=additional_pipeline_args, metadata_connection_config=metadata_connection_config, logger_config=logger_config) # Before the executor runs, check if the artifact already exists checkcache_op = python_operator.BranchPythonOperator( task_id=task_id + '.checkcache', provide_context=True, python_callable=adaptor.check_cache_and_maybe_prepare_execution, op_kwargs={ 'uncached_branch': task_id + '.exec', 'cached_branch': task_id + '.noop_sink', }, dag=self) tfx_op = python_operator.PythonOperator( task_id=task_id + '.exec', provide_context=True, python_callable=adaptor.python_exec, op_kwargs={ 'cache_task_name': task_id + '.checkcache', }, dag=self) noop_sink_op = dummy_operator.DummyOperator(task_id=task_id + '.noop_sink', dag=self) publishexec_op = python_operator.PythonOperator( task_id=task_id + '.publishexec', provide_context=True, python_callable=adaptor.publish_exec, op_kwargs={ 'cache_task_name': task_id + '.checkcache', 'exec_task_name': task_id + '.exec', }, dag=self) tfx_op.set_upstream(checkcache_op) publishexec_op.set_upstream(tfx_op) noop_sink_op.set_upstream(checkcache_op)
def makeBranchChoice(): """ Randomly choose between 'hello_spikey' & 'dummy' branches. Either one will run but not both. """ x = random.randint(1, 5) if (x <= 2): return 'hello_spikey' else: return 'dummy' run_this_first = dummy_operator.DummyOperator(task_id='run_this_first') # BranchPythonOperator takes in a callable which returns the task id of the next task. branching = python_operator.BranchPythonOperator( task_id='branching', python_callable=makeBranchChoice) run_this_first >> branching spikeysales_greeting = python_operator.PythonOperator( task_id='hello_spikey', python_callable=greeting) dummy_followed_python = dummy_operator.DummyOperator( task_id='follow_python') dummy = dummy_operator.DummyOperator(task_id='dummy') bash_greeting = bash_operator.BashOperator( task_id='bye_bash', bash_command='echo Goodbye! Hope to see you soon.', trigger_rule='one_success')
if hoge_or_fuga == 'hoge': return 'hoge_task' # hogeであればtask_idがhoge_taskのものを実行 elif hoge_or_fuga == 'fuga': return 'fuga_task' # fugeであればtask_idがfuge_taskのものを実行 else: return with models.DAG( 'branch_python_operator', schedule_interval=None, default_args=default_dag_args) as dag: task1 = python_operator.BranchPythonOperator( task_id='branch_python', provide_context=True, # トリガーするときに引数を渡すのでTrueにする python_callable=decider # 実行したいpythonの関数 ) hoge_task = bash_operator.BashOperator( task_id='hoge_task', bash_command='echo hoge' ) fuga_task = bash_operator.BashOperator( task_id='fuga_task', bash_command='echo fuge' ) task1 >> hoge_task task1 >> fuga_task