def test_external_task_sensor(self): self.test_time_sensor() t = ExternalTaskSensor(task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_failed_states(self): self.test_time_sensor() op = ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, failed_states=["failed"], dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_waits_for_task_check_existence(self): op = ExternalTaskSensor(task_id='test_external_task_sensor_check', external_dag_id="example_bash_operator", external_task_id="non-existing-task", check_existence=True, dag=self.dag) with self.assertRaises(AirflowException): op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_delta(self): self.test_time_sensor() t = ExternalTaskSensor(task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_delta=timedelta(0), allowed_states=['success'], dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_waits_for_dag_check_existence(self): t = ExternalTaskSensor(task_id='test_external_task_sensor_check', external_dag_id="non-existing-dag", external_task_id=None, check_existence=True, dag=self.dag) with self.assertRaises(AirflowException): t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def dag_bag_ext(): """ Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies set up using ExternalTaskMarker and ExternalTaskSensor. dag_0: task_a_0 >> task_b_0 | | dag_1: ---> task_a_1 >> task_b_1 | | dag_2: ---> task_a_2 >> task_b_2 | | dag_3: ---> task_a_3 >> task_b_3 """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None) task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0) task_b_0 = ExternalTaskMarker( task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0 ) task_a_0 >> task_b_0 dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None) task_a_1 = ExternalTaskSensor( task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1 ) task_b_1 = ExternalTaskMarker( task_id="task_b_1", external_dag_id="dag_2", external_task_id="task_a_2", recursion_depth=2, dag=dag_1 ) task_a_1 >> task_b_1 dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None) task_a_2 = ExternalTaskSensor( task_id="task_a_2", external_dag_id=dag_1.dag_id, external_task_id=task_b_1.task_id, dag=dag_2 ) task_b_2 = ExternalTaskMarker( task_id="task_b_2", external_dag_id="dag_3", external_task_id="task_a_3", recursion_depth=1, dag=dag_2 ) task_a_2 >> task_b_2 dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None) task_a_3 = ExternalTaskSensor( task_id="task_a_3", external_dag_id=dag_2.dag_id, external_task_id=task_b_2.task_id, dag=dag_3 ) task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3) task_a_3 >> task_b_3 for dag in [dag_0, dag_1, dag_2, dag_3]: dag_bag.bag_dag(dag=dag, root_dag=dag) return dag_bag
def test_external_dag_sensor(self): other_dag = DAG('other_dag', default_args=self.args, end_date=DEFAULT_DATE, schedule_interval='@once') other_dag.create_dagrun( run_id='test', start_date=DEFAULT_DATE, execution_date=DEFAULT_DATE, state=State.SUCCESS ) op = ExternalTaskSensor( task_id='test_external_dag_sensor_check', external_dag_id='other_dag', external_task_id=None, dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor(self): self.test_time_sensor() t = ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, dag=self.dag ) t.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True )
def test_external_task_sensor_fn(self): self.test_time_sensor() # check that the execution_fn works op1 = ExternalTaskSensor( task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=lambda dt: dt + timedelta(0), allowed_states=['success'], dag=self.dag ) op1.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True ) # double check that the execution is being called by failing the test op2 = ExternalTaskSensor( task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=lambda dt: dt + timedelta(days=1), allowed_states=['success'], timeout=1, poke_interval=1, dag=self.dag ) with self.assertRaises(exceptions.AirflowSensorTimeout): op2.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True )
def test_catch_invalid_allowed_states(self): with self.assertRaises(ValueError): ExternalTaskSensor(task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, allowed_states=['invalid_state'], dag=self.dag) with self.assertRaises(ValueError): ExternalTaskSensor(task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=None, allowed_states=['invalid_state'], dag=self.dag)
def test_external_task_sensor_failed_states_as_success(self): self.test_time_sensor() op = ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, allowed_states=["failed"], failed_states=["success"], dag=self.dag, ) with self.assertRaises(AirflowException) as cm: op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) self.assertEqual( str(cm.exception), "The external task " "time_sensor_check in DAG " "unit_test_dag failed." )
def test_external_task_sensor_waits_for_dag_check_existence(self): t = ExternalTaskSensor( task_id='test_external_task_sensor_check', external_dag_id="non-existing-dag", external_task_id=None, check_existence=True, dag=self.dag ) with self.assertRaises(AirflowException): t.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True )
def test_external_task_sensor_delta(self): self.test_time_sensor() t = ExternalTaskSensor( task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_delta=timedelta(0), allowed_states=['success'], dag=self.dag ) t.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True )
def dag_bag_cyclic(): """ Create a DagBag with DAGs having cyclic dependencies set up by ExternalTaskMarker and ExternalTaskSensor. dag_0: task_a_0 >> task_b_0 ^ | | | dag_1: | ---> task_a_1 >> task_b_1 | | --------------------------------- """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None) task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0) task_b_0 = ExternalTaskMarker( task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0 ) task_a_0 >> task_b_0 dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None) task_a_1 = ExternalTaskSensor( task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1 ) task_b_1 = ExternalTaskMarker( task_id="task_b_1", external_dag_id="dag_0", external_task_id="task_a_0", recursion_depth=2, dag=dag_1 ) task_a_1 >> task_b_1 for dag in [dag_0, dag_1]: dag_bag.bag_dag(dag=dag, root_dag=dag) return dag_bag
def dag_bag_head_tail(): """ Create a DagBag containing one DAG, with task "head" depending on task "tail" of the previous execution_date. 20200501 20200502 20200510 +------+ +------+ +------+ | head | -->head | --> -->head | | | | / | | | / / | | | | v | / | v | / / | v | | body | / | body | / ... / | body | | | |/ | | |/ / | | | | v / | v / / | v | | tail/| | tail/| / | tail | +------+ +------+ +------+ """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) with DAG("head_tail", start_date=DEFAULT_DATE, schedule_interval="@daily") as dag: head = ExternalTaskSensor(task_id='head', external_dag_id=dag.dag_id, external_task_id="tail", execution_delta=timedelta(days=1), mode="reschedule") body = DummyOperator(task_id="body") tail = ExternalTaskMarker(task_id="tail", external_dag_id=dag.dag_id, external_task_id=head.task_id, execution_date="{{ tomorrow_ds_nodash }}") head >> body >> tail dag_bag.bag_dag(dag=dag, root_dag=dag) yield dag_bag
def create_monitoring_dag(parent_dag, start_date, schedule_interval, triggered_dag_id, finish_file_dir, trigger_file_path): with DAG(dag_id=f'{parent_dag}.monitoring_dag', start_date=start_date, schedule_interval=schedule_interval) as sub_dag: external_dag_sensor = ExternalTaskSensor( task_id='external_dag_sensor', external_dag_id=triggered_dag_id, external_task_id='', poke_interval=10) print_result = PythonOperator(task_id='print_result', provide_context=True, python_callable=print_result_func( external_dag_id=triggered_dag_id, last_task_id='end')) remove_file = BashOperator(task_id='remove_file', bash_command=f'rm -f {trigger_file_path}') create_finish_file = BashOperator( task_id='create_finish_file', bash_command= "touch {{ params.finish_file_dir }}/finished_{{ ts_nodash }}", params={'finish_file_dir': finish_file_dir}) external_dag_sensor >> print_result >> remove_file >> create_finish_file return sub_dag
def test_external_task_sensor_wrong_failed_states(self): with self.assertRaises(ValueError): ExternalTaskSensor(task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, failed_states=["invalid_state"], dag=self.dag)
def test_catch_overlap_allowed_failed_state(self): with self.assertRaises(AirflowException): ExternalTaskSensor(task_id='test_external_task_sensor_check', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, allowed_states=[State.SUCCESS], failed_states=[State.SUCCESS], dag=self.dag)
def test_external_task_sensor_fn_multiple_args(self): """Check this task sensor passes multiple args with full context. If no failure, means clean run.""" self.test_time_sensor() def my_func(dt, context): assert context['execution_date'] == dt return dt + timedelta(0) op1 = ExternalTaskSensor( task_id='test_external_task_sensor_multiple_arg_fn', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=my_func, allowed_states=['success'], dag=self.dag, ) op1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_task_sensor_fn_kwargs(self): """Check this task sensor passes multiple args with full context. If no failure, means clean run.""" self.test_time_sensor() def my_func(dt, ds_nodash, tomorrow_ds_nodash): assert ds_nodash == dt.strftime("%Y%m%d") assert tomorrow_ds_nodash == (dt + timedelta(days=1)).strftime("%Y%m%d") return dt + timedelta(0) op1 = ExternalTaskSensor( task_id='test_external_task_sensor_fn_kwargs', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=my_func, allowed_states=['success'], dag=self.dag, ) op1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def _create_accounts_hub_op(dag): external_dag_id = lz_alpha_public.DAG_ID external_task_id = lz_alpha_public.TaskId.ACCOUNTS return ExternalTaskSensor(task_id='{}.{}'.format(external_dag_id, external_task_id), external_dag_id=external_dag_id, external_task_id=external_task_id, allowed_states=['success', 'skipped'], mode="reschedule", dag=dag)
def _create_balances_increment_hub_op(dag): external_dag_id = lz_alpha_public.DAG_ID external_task_id = lz_alpha_public.TaskId.BALANCES_INCREMENT return ExternalTaskSensor(task_id='{}.{}'.format(external_dag_id, external_task_id), external_dag_id=external_dag_id, external_task_id=external_task_id, allowed_states=['success', 'skipped'], mode="reschedule", dag=dag)
def _create_balances_previous_day_op(dag): external_dag_id = dwh_alpha_public.DAG_ID external_task_id = lz_alpha_public.TaskId.BALANCES return ExternalTaskSensor(task_id=f'{external_task_id}-previous_day', external_dag_id=external_dag_id, external_task_id=external_task_id, allowed_states=['success', 'skipped'], execution_delta=datetime.timedelta(days=1), mode="reschedule", dag=dag)
def _create_deals_dwh_op(dag, instance): external_dag_id = '{}.{}'.format(dwh_beta_public.DAG_ID, instance) external_task_id = dwh_beta_public.TaskId.DEALS return ExternalTaskSensor(task_id='{}.{}'.format(external_dag_id, external_task_id), external_dag_id=external_dag_id, external_task_id=external_task_id, allowed_states=['success', 'skipped'], mode="reschedule", dag=dag)
def test_external_task_sensor_error_delta_and_fn(self): self.test_time_sensor() # Test that providing execution_delta and a function raises an error with self.assertRaises(ValueError): ExternalTaskSensor(task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_delta=timedelta(0), execution_date_fn=lambda dt: dt, allowed_states=['success'], dag=self.dag)
def create_dag_from_config(dag_id: str, dag_config, operators=OPERATORS) -> DAG: dag = DAG(dag_id, **dag_config["dag_config"]) with dag: task_dict = {} for task in dag_config["tasks"]: created_task = create_task(task, operators) if created_task == None: continue task_dict[task.get("task_id")] = { "task": created_task, "dependencies": task.get("depends_on", []), } external_dependency_sensors = {} for task_id, task_and_dependencies in task_dict.items(): task = task_and_dependencies.get("task") dependencies = task_and_dependencies.get("dependencies", []) for dependency in dependencies: dependency_dag_id = dependency.get("dag") dependency_task_id = dependency.get("task") try: if dependency_dag_id == dag_id: task_dict[dependency_task_id]["task"] >> task else: external_dependency_task_id = ( f"wait_for_{dependency_dag_id}_{dependency_task_id}" ) if external_dependency_task_id in external_dependency_sensors: external_task_sensor = external_dependency_sensors[ external_dependency_task_id ] else: external_task_sensor = ExternalTaskSensor( dag=dag, task_id=f"wait_for_{dependency_dag_id}_{dependency_task_id}", external_dag_id=dependency_dag_id, external_task_id=dependency_task_id, allowed_states=["success"], mode="reschedule", poke_interval=random.randint(150, 210), ) external_dependency_sensors[ external_dependency_task_id ] = external_task_sensor external_task_sensor >> task except KeyError as e: logging.error( f"Cannot set the task {task_id} dependencies ({str(e)})" ) return dag
def test_templated_sensor(self): with self.dag: sensor = ExternalTaskSensor( task_id='templated_task', external_dag_id='dag_{{ ds }}', external_task_id='task_{{ ds }}' ) instance = TaskInstance(sensor, DEFAULT_DATE) instance.render_templates() self.assertEqual(sensor.external_dag_id, f"dag_{DEFAULT_DATE.date()}") self.assertEqual(sensor.external_task_id, f"task_{DEFAULT_DATE.date()}")
def test_external_task_sensor_fn(self): self.test_time_sensor() # check that the execution_fn works t = ExternalTaskSensor( task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=lambda dt: dt + timedelta(0), allowed_states=['success'], dag=self.dag ) t.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True ) # double check that the execution is being called by failing the test t2 = ExternalTaskSensor( task_id='test_external_task_sensor_check_delta', external_dag_id=TEST_DAG_ID, external_task_id=TEST_TASK_ID, execution_date_fn=lambda dt: dt + timedelta(days=1), allowed_states=['success'], timeout=1, poke_interval=1, dag=self.dag ) with self.assertRaises(exceptions.AirflowSensorTimeout): t2.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True )
def test_external_dag_sensor(self): other_dag = DAG( 'other_dag', default_args=self.args, end_date=DEFAULT_DATE, schedule_interval='@once') other_dag.create_dagrun( run_id='test', start_date=DEFAULT_DATE, execution_date=DEFAULT_DATE, state=State.SUCCESS) t = ExternalTaskSensor( task_id='test_external_dag_sensor_check', external_dag_id='other_dag', external_task_id=None, dag=self.dag ) t.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True )
def test_templated_sensor(self): dag = DAG(TEST_DAG_ID, self.args) with dag: sensor = ExternalTaskSensor(task_id='templated_task', external_dag_id='dag_{{ ds }}', external_task_id='task_{{ ds }}', start_date=DEFAULT_DATE) instance = TaskInstance(sensor, DEFAULT_DATE) instance.render_templates() self.assertEqual(sensor.external_dag_id, "dag_{}".format(DEFAULT_DATE.date())) self.assertEqual(sensor.external_task_id, "task_{}".format(DEFAULT_DATE.date()))
def load_subdag(parent_dag_name, child_dag_name, def_args): dag_subdag = DAG(dag_id=f'{parent_dag_name}.{child_dag_name}', default_args=def_args, schedule_interval='@hourly') with dag_subdag: wait_to_finish_dag = ExternalTaskSensor( task_id='wait_for_dag', external_dag_id='gridu_dag', execution_delta=timedelta(minutes=5), external_task_id=None, allowed_states=['success']) remove_a_file = BashOperator(task_id='remove_a_file', bash_command=f'rm {path_to_run_file}') print_a_result = PythonOperator(task_id='print_a_result', python_callable=print_pulled_value) create_a_file = BashOperator( task_id='create_a_file', bash_command='touch finished_{{ ts_nodash }}') wait_to_finish_dag >> remove_a_file >> print_a_result >> create_a_file return dag_subdag
def build_process_result_sub_dag(main_dag, default_args): s_dag = DAG( dag_id="{}.{}".format(main_dag, 'process_result_sub_dag'), default_args=default_args, schedule_interval='@hourly' ) with s_dag: external_dag_sensor = ExternalTaskSensor( task_id='external_dag_sensor', external_dag_id=dagToCall, external_task_id=None, execution_date_fn=get_external_dag_execution_date, check_existence=True, poke_interval=5, timeout=120, soft_fail=True ) ex_file_sensor = FileSensor( task_id="ex_file_sensor", filepath=ex_file ) print_external_dag_result = PythonOperator( task_id="print_external_dag_result", python_callable=_print_external_dag_result, provide_context=True ) remove_trigger_file = BashOperator( task_id="remove_trigger_file", bash_command="rm -f {}".format(path) ) create_finished_file = BashOperator( task_id="create_finished_file", bash_command="touch " + default_path + "/finished_#{{ ts_nodash }}" ) ex_file_sensor >> external_dag_sensor >> print_external_dag_result >> remove_trigger_file >> create_finished_file return s_dag
def test_external_task_sensor_fn_multiple_execution_dates(self): bash_command_code = """ {% set s=execution_date.time().second %} echo "second is {{ s }}" if [[ $(( {{ s }} % 60 )) == 1 ]] then exit 1 fi exit 0 """ dag_external_id = TEST_DAG_ID + '_external' dag_external = DAG( dag_external_id, default_args=self.args, schedule_interval=timedelta(seconds=1)) task_external_with_failure = BashOperator( task_id="task_external_with_failure", bash_command=bash_command_code, retries=0, dag=dag_external) task_external_without_failure = DummyOperator( task_id="task_external_without_failure", retries=0, dag=dag_external) task_external_without_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) session = settings.Session() TI = TaskInstance try: task_external_with_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) # The test_with_failure task is excepted to fail # once per minute (the run on the first second of # each minute). except Exception as e: failed_tis = session.query(TI).filter( TI.dag_id == dag_external_id, TI.state == State.FAILED, TI.execution_date == DEFAULT_DATE + timedelta(seconds=1)).all() if len(failed_tis) == 1 and \ failed_tis[0].task_id == 'task_external_with_failure': pass else: raise e dag_id = TEST_DAG_ID dag = DAG( dag_id, default_args=self.args, schedule_interval=timedelta(minutes=1)) task_without_failure = ExternalTaskSensor( task_id='task_without_failure', external_dag_id=dag_external_id, external_task_id='task_external_without_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag) task_with_failure = ExternalTaskSensor( task_id='task_with_failure', external_dag_id=dag_external_id, external_task_id='task_external_with_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag) task_without_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with self.assertRaises(AirflowSensorTimeout): task_with_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
start_date = datetime.datetime(2015, 1, 1) with DAG( dag_id="example_external_task_marker_parent", start_date=start_date, schedule_interval=None, tags=['example'], ) as parent_dag: # [START howto_operator_external_task_marker] parent_task = ExternalTaskMarker( task_id="parent_task", external_dag_id="example_external_task_marker_child", external_task_id="child_task1") # [END howto_operator_external_task_marker] with DAG( dag_id="example_external_task_marker_child", start_date=start_date, schedule_interval=None, tags=['example'], ) as child_dag: # [START howto_operator_external_task_sensor] child_task1 = ExternalTaskSensor(task_id="child_task1", external_dag_id=parent_dag.dag_id, external_task_id=parent_task.task_id, mode="reschedule") # [END howto_operator_external_task_sensor] child_task2 = DummyOperator(task_id="child_task2") child_task1 >> child_task2
import airflow.utils.dates from airflow import DAG from airflow.sensors.external_task_sensor import ExternalTaskSensor from airflow.providers.postgres.operators.postgres import PostgresOperator from datetime import datetime, timedelta default_args = {"owner": "airflow", "start_date": datetime(2020, 1, 1)} with DAG(dag_id="cleaning_dag", default_args=default_args, schedule_interval="*/10 * * * *", catchup=False) as dag: waiting_for_task = ExternalTaskSensor(task_id='waiting_for_task', external_dag_id='avocado_dag', external_task_id='publish_notebook', failed_states=['failed']) cleaning_xcoms = PostgresOperator(task_id='cleaning_xcoms', sql='sql/CLEANING_XCOMS.sql', postgres_conn_id='postgres') waiting_for_task >> cleaning_xcoms
def test_external_task_sensor_fn_multiple_execution_dates(self): bash_command_code = """ {% set s=execution_date.time().second %} echo "second is {{ s }}" if [[ $(( {{ s }} % 60 )) == 1 ]] then exit 1 fi exit 0 """ dag_external_id = TEST_DAG_ID + '_external' dag_external = DAG( dag_external_id, default_args=self.args, schedule_interval=timedelta(seconds=1)) task_external_with_failure = BashOperator( task_id="task_external_with_failure", bash_command=bash_command_code, retries=0, dag=dag_external) task_external_without_failure = DummyOperator( task_id="task_external_without_failure", retries=0, dag=dag_external) task_external_without_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) session = settings.Session() TI = TaskInstance try: task_external_with_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) # The test_with_failure task is excepted to fail # once per minute (the run on the first second of # each minute). except Exception as e: # pylint: disable=broad-except failed_tis = session.query(TI).filter( TI.dag_id == dag_external_id, TI.state == State.FAILED, TI.execution_date == DEFAULT_DATE + timedelta(seconds=1)).all() if len(failed_tis) == 1 and \ failed_tis[0].task_id == 'task_external_with_failure': pass else: raise e dag_id = TEST_DAG_ID dag = DAG( dag_id, default_args=self.args, schedule_interval=timedelta(minutes=1)) task_without_failure = ExternalTaskSensor( task_id='task_without_failure', external_dag_id=dag_external_id, external_task_id='task_external_without_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag) task_with_failure = ExternalTaskSensor( task_id='task_with_failure', external_dag_id=dag_external_id, external_task_id='task_external_with_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag) task_without_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with self.assertRaises(AirflowSensorTimeout): task_with_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)