示例#1
0
    def __init__(self, component_name, task_id, parent_dag, input_dict,
                 output_dict, exec_properties, driver_options, driver_class,
                 executor_class, additional_pipeline_args,
                 metadata_connection_config, logger_config):
        super(_TfxWorker, self).__init__(
            dag_id=task_id,
            schedule_interval=None,
            start_date=parent_dag.start_date,
            user_defined_filters={'b64encode': base64.b64encode})
        adaptor = airflow_adapter.AirflowAdapter(
            component_name=component_name,
            input_dict=input_dict,
            output_dict=output_dict,
            exec_properties=exec_properties,
            driver_options=driver_options,
            driver_class=driver_class,
            executor_class=executor_class,
            additional_pipeline_args=additional_pipeline_args,
            metadata_connection_config=metadata_connection_config,
            logger_config=logger_config)
        # Before the executor runs, check if the artifact already exists
        checkcache_op = python_operator.BranchPythonOperator(
            task_id=task_id + '.checkcache',
            provide_context=True,
            python_callable=adaptor.check_cache_and_maybe_prepare_execution,
            op_kwargs={
                'uncached_branch': task_id + '.exec',
                'cached_branch': task_id + '.noop_sink',
            },
            dag=self)
        tfx_op = python_operator.PythonOperator(
            task_id=task_id + '.exec',
            provide_context=True,
            python_callable=adaptor.python_exec,
            op_kwargs={
                'cache_task_name': task_id + '.checkcache',
            },
            dag=self)
        noop_sink_op = dummy_operator.DummyOperator(task_id=task_id +
                                                    '.noop_sink',
                                                    dag=self)
        publishexec_op = python_operator.PythonOperator(
            task_id=task_id + '.publishexec',
            provide_context=True,
            python_callable=adaptor.publish_exec,
            op_kwargs={
                'cache_task_name': task_id + '.checkcache',
                'exec_task_name': task_id + '.exec',
            },
            dag=self)

        tfx_op.set_upstream(checkcache_op)
        publishexec_op.set_upstream(tfx_op)
        noop_sink_op.set_upstream(checkcache_op)
示例#2
0
    def makeBranchChoice():
        """
        Randomly choose between 'hello_spikey' & 'dummy' branches. Either one will run but not both.
        """
        x = random.randint(1, 5)

        if (x <= 2):
            return 'hello_spikey'

        else:
            return 'dummy'

    run_this_first = dummy_operator.DummyOperator(task_id='run_this_first')

    # BranchPythonOperator takes in a callable which returns the task id of the next task.
    branching = python_operator.BranchPythonOperator(
        task_id='branching', python_callable=makeBranchChoice)

    run_this_first >> branching

    spikeysales_greeting = python_operator.PythonOperator(
        task_id='hello_spikey', python_callable=greeting)

    dummy_followed_python = dummy_operator.DummyOperator(
        task_id='follow_python')

    dummy = dummy_operator.DummyOperator(task_id='dummy')

    bash_greeting = bash_operator.BashOperator(
        task_id='bye_bash',
        bash_command='echo Goodbye! Hope to see you soon.',
        trigger_rule='one_success')
    if hoge_or_fuga == 'hoge':
        return 'hoge_task'  # hogeであればtask_idがhoge_taskのものを実行
    elif hoge_or_fuga == 'fuga':
        return 'fuga_task'  # fugeであればtask_idがfuge_taskのものを実行
    else:
        return


with models.DAG(
        'branch_python_operator',
        schedule_interval=None,
        default_args=default_dag_args) as dag:

    task1 = python_operator.BranchPythonOperator(
        task_id='branch_python',
        provide_context=True,  # トリガーするときに引数を渡すのでTrueにする
        python_callable=decider  # 実行したいpythonの関数
    )

    hoge_task = bash_operator.BashOperator(
        task_id='hoge_task',
        bash_command='echo hoge'
    )

    fuga_task = bash_operator.BashOperator(
        task_id='fuga_task',
        bash_command='echo fuge'
    )

    task1 >> hoge_task
    task1 >> fuga_task