def task_sensor(): """ TODO 샘플 예제 잘 작동 안함.. DAG가 동일 파일에 두개 있으면, DAG두개 생성됨 각각의 dag가 다른 dag를 참조 할수 있음 """ with makeDag("task_marker_test") as parent_dag: parent_task = ExternalTaskMarker( task_id="parent_task", external_dag_id="task_sensor_test", external_task_id="child_task1", ) with makeDag("task_sensor_test") as child_dag: child_task1 = ExternalTaskSensor( task_id="child_task1", external_dag_id=parent_dag.dag_id, external_task_id=parent_task.task_id, timeout=600, allowed_states=['success'], failed_states=['failed', 'skipped'], mode="reschedule", ) child_task2 = DummyOperator(task_id="child_task2") child_task1 >> child_task2
def dag_bag_multiple(): """ Create a DagBag containing two DAGs, linked by multiple ExternalTaskMarker. """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) daily_dag = DAG("daily_dag", start_date=DEFAULT_DATE, schedule_interval="@daily") agg_dag = DAG("agg_dag", start_date=DEFAULT_DATE, schedule_interval="@daily") dag_bag.bag_dag(dag=daily_dag, root_dag=daily_dag) dag_bag.bag_dag(dag=agg_dag, root_dag=agg_dag) daily_task = DummyOperator(task_id="daily_tas", dag=daily_dag) start = DummyOperator(task_id="start", dag=agg_dag) for i in range(25): task = ExternalTaskMarker( task_id=f"{daily_task.task_id}_{i}", external_dag_id=daily_dag.dag_id, external_task_id=daily_task.task_id, execution_date="{{ macros.ds_add(ds, -1 * %s) }}" % i, dag=agg_dag, ) start >> task yield dag_bag
def dag_bag_cyclic(): """ Create a DagBag with DAGs having cyclic dependencies set up by ExternalTaskMarker and ExternalTaskSensor. dag_0: task_a_0 >> task_b_0 ^ | | | dag_1: | ---> task_a_1 >> task_b_1 | | --------------------------------- """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None) task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0) task_b_0 = ExternalTaskMarker(task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0) task_a_0 >> task_b_0 dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None) task_a_1 = ExternalTaskSensor(task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1) task_b_1 = ExternalTaskMarker(task_id="task_b_1", external_dag_id="dag_0", external_task_id="task_a_0", recursion_depth=2, dag=dag_1) task_a_1 >> task_b_1 for dag in [dag_0, dag_1]: dag_bag.bag_dag(dag=dag, root_dag=dag) return dag_bag
def test_serialized_external_task_marker(self): dag = DAG('test_serialized_external_task_marker', start_date=DEFAULT_DATE) task = ExternalTaskMarker( task_id="parent_task", external_dag_id="external_task_marker_child", external_task_id="child_task1", dag=dag, ) serialized_op = SerializedBaseOperator.serialize_operator(task) deserialized_op = SerializedBaseOperator.deserialize_operator( serialized_op) assert deserialized_op.task_type == 'ExternalTaskMarker' assert getattr(deserialized_op, 'external_dag_id') == 'external_task_marker_child' assert getattr(deserialized_op, 'external_task_id') == 'child_task1'
def dag_bag_head_tail(): """ Create a DagBag containing one DAG, with task "head" depending on task "tail" of the previous execution_date. 20200501 20200502 20200510 +------+ +------+ +------+ | head | -->head | --> -->head | | | | / | | | / / | | | | v | / | v | / / | v | | body | / | body | / ... / | body | | | |/ | | |/ / | | | | v / | v / / | v | | tail/| | tail/| / | tail | +------+ +------+ +------+ """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) with DAG("head_tail", start_date=DEFAULT_DATE, schedule_interval="@daily") as dag: head = ExternalTaskSensor( task_id='head', external_dag_id=dag.dag_id, external_task_id="tail", execution_delta=timedelta(days=1), mode="reschedule", ) body = DummyOperator(task_id="body") tail = ExternalTaskMarker( task_id="tail", external_dag_id=dag.dag_id, external_task_id=head.task_id, execution_date="{{ tomorrow_ds_nodash }}", ) head >> body >> tail dag_bag.bag_dag(dag=dag, root_dag=dag) yield dag_bag
import datetime from airflow import DAG from airflow.operators.dummy import DummyOperator from airflow.sensors.external_task import ExternalTaskMarker, ExternalTaskSensor start_date = datetime.datetime(2015, 1, 1) with DAG(dag_id='external_task_marker_parent', start_date=start_date, schedule_interval=None, tags=['tms_practice']) as parent_dag: parent_task = ExternalTaskMarker( task_id="parent_task", external_dag_id="external_task_marker_child", external_tax_id="child_task1", ) with DAG( dag_id="external_task_marker_child", start_date=start_date, schedule_interval=None, tags=['tms_practice'], ) as child_dag: child_task1 = ExternalTaskSensor( task_id="child_task1", external_dag_id=parent_dag.dag_id, external_task_id=parent_task.task_id, timeout=600, allowed_states=['success'],
def dag_bag_ext(): """ Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies set up using ExternalTaskMarker and ExternalTaskSensor. dag_0: task_a_0 >> task_b_0 | | dag_1: ---> task_a_1 >> task_b_1 | | dag_2: ---> task_a_2 >> task_b_2 | | dag_3: ---> task_a_3 >> task_b_3 """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None) task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0) task_b_0 = ExternalTaskMarker(task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0) task_a_0 >> task_b_0 dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None) task_a_1 = ExternalTaskSensor(task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1) task_b_1 = ExternalTaskMarker(task_id="task_b_1", external_dag_id="dag_2", external_task_id="task_a_2", recursion_depth=2, dag=dag_1) task_a_1 >> task_b_1 dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None) task_a_2 = ExternalTaskSensor(task_id="task_a_2", external_dag_id=dag_1.dag_id, external_task_id=task_b_1.task_id, dag=dag_2) task_b_2 = ExternalTaskMarker(task_id="task_b_2", external_dag_id="dag_3", external_task_id="task_a_3", recursion_depth=1, dag=dag_2) task_a_2 >> task_b_2 dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None) task_a_3 = ExternalTaskSensor(task_id="task_a_3", external_dag_id=dag_2.dag_id, external_task_id=task_b_2.task_id, dag=dag_3) task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3) task_a_3 >> task_b_3 for dag in [dag_0, dag_1, dag_2, dag_3]: dag_bag.bag_dag(dag=dag, root_dag=dag) return dag_bag
def test_serialized_fields(self): self.assertTrue({"recursion_depth"}.issubset( ExternalTaskMarker.get_serialized_fields()))
from airflow_utils import set_dag_id with DAG(dag_id=set_dag_id(__file__) + '-parent', start_date=days_ago(1), schedule_interval="@daily") as parent_dag: start = DummyOperator(task_id='start') do_something = BashOperator(task_id='do_something', bash_command="sleep 10s") # Use Task Marker in case to clear child task if this task is cleared end = ExternalTaskMarker( task_id="end", external_dag_id="dag-dependency-child", external_task_id="child_task1", ) start >> do_something >> end with DAG(dag_id=set_dag_id(__file__) + '-child', start_date=days_ago(1), schedule_interval="@daily") as child_dag: child_task1 = ExternalTaskSensor( task_id="child_task1", external_dag_id=parent_dag.dag_id, external_task_id=end.task_id, timeout=600, allowed_states=['success'],