示例#1
0
 def test_external_task_sensor(self):
     self.test_time_sensor()
     t = ExternalTaskSensor(task_id='test_external_task_sensor_check',
                            external_dag_id=TEST_DAG_ID,
                            external_task_id=TEST_TASK_ID,
                            dag=self.dag)
     t.run(start_date=DEFAULT_DATE,
           end_date=DEFAULT_DATE,
           ignore_ti_state=True)
 def test_external_task_sensor_failed_states(self):
     self.test_time_sensor()
     op = ExternalTaskSensor(
         task_id='test_external_task_sensor_check',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         failed_states=["failed"],
         dag=self.dag,
     )
     op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
示例#3
0
    def test_external_task_sensor_waits_for_task_check_existence(self):
        op = ExternalTaskSensor(task_id='test_external_task_sensor_check',
                                external_dag_id="example_bash_operator",
                                external_task_id="non-existing-task",
                                check_existence=True,
                                dag=self.dag)

        with self.assertRaises(AirflowException):
            op.run(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE,
                   ignore_ti_state=True)
示例#4
0
 def test_external_task_sensor_delta(self):
     self.test_time_sensor()
     t = ExternalTaskSensor(task_id='test_external_task_sensor_check_delta',
                            external_dag_id=TEST_DAG_ID,
                            external_task_id=TEST_TASK_ID,
                            execution_delta=timedelta(0),
                            allowed_states=['success'],
                            dag=self.dag)
     t.run(start_date=DEFAULT_DATE,
           end_date=DEFAULT_DATE,
           ignore_ti_state=True)
示例#5
0
    def test_external_task_sensor_waits_for_dag_check_existence(self):
        t = ExternalTaskSensor(task_id='test_external_task_sensor_check',
                               external_dag_id="non-existing-dag",
                               external_task_id=None,
                               check_existence=True,
                               dag=self.dag)

        with self.assertRaises(AirflowException):
            t.run(start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE,
                  ignore_ti_state=True)
def dag_bag_ext():
    """
    Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies
    set up using ExternalTaskMarker and ExternalTaskSensor.

    dag_0:   task_a_0 >> task_b_0
                             |
                             |
    dag_1:                   ---> task_a_1 >> task_b_1
                                                  |
                                                  |
    dag_2:                                        ---> task_a_2 >> task_b_2
                                                                       |
                                                                       |
    dag_3:                                                             ---> task_a_3 >> task_b_3
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)

    dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0)
    task_b_0 = ExternalTaskMarker(
        task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0
    )
    task_a_0 >> task_b_0

    dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_1 = ExternalTaskSensor(
        task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1
    )
    task_b_1 = ExternalTaskMarker(
        task_id="task_b_1", external_dag_id="dag_2", external_task_id="task_a_2", recursion_depth=2, dag=dag_1
    )
    task_a_1 >> task_b_1

    dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_2 = ExternalTaskSensor(
        task_id="task_a_2", external_dag_id=dag_1.dag_id, external_task_id=task_b_1.task_id, dag=dag_2
    )
    task_b_2 = ExternalTaskMarker(
        task_id="task_b_2", external_dag_id="dag_3", external_task_id="task_a_3", recursion_depth=1, dag=dag_2
    )
    task_a_2 >> task_b_2

    dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_3 = ExternalTaskSensor(
        task_id="task_a_3", external_dag_id=dag_2.dag_id, external_task_id=task_b_2.task_id, dag=dag_3
    )
    task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3)
    task_a_3 >> task_b_3

    for dag in [dag_0, dag_1, dag_2, dag_3]:
        dag_bag.bag_dag(dag=dag, root_dag=dag)

    return dag_bag
 def test_external_dag_sensor(self):
     other_dag = DAG('other_dag', default_args=self.args, end_date=DEFAULT_DATE, schedule_interval='@once')
     other_dag.create_dagrun(
         run_id='test', start_date=DEFAULT_DATE, execution_date=DEFAULT_DATE, state=State.SUCCESS
     )
     op = ExternalTaskSensor(
         task_id='test_external_dag_sensor_check',
         external_dag_id='other_dag',
         external_task_id=None,
         dag=self.dag,
     )
     op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
 def test_external_task_sensor(self):
     self.test_time_sensor()
     t = ExternalTaskSensor(
         task_id='test_external_task_sensor_check',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         dag=self.dag
     )
     t.run(
         start_date=DEFAULT_DATE,
         end_date=DEFAULT_DATE,
         ignore_ti_state=True
     )
 def test_external_task_sensor_fn(self):
     self.test_time_sensor()
     # check that the execution_fn works
     op1 = ExternalTaskSensor(
         task_id='test_external_task_sensor_check_delta',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         execution_date_fn=lambda dt: dt + timedelta(0),
         allowed_states=['success'],
         dag=self.dag
     )
     op1.run(
         start_date=DEFAULT_DATE,
         end_date=DEFAULT_DATE,
         ignore_ti_state=True
     )
     # double check that the execution is being called by failing the test
     op2 = ExternalTaskSensor(
         task_id='test_external_task_sensor_check_delta',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         execution_date_fn=lambda dt: dt + timedelta(days=1),
         allowed_states=['success'],
         timeout=1,
         poke_interval=1,
         dag=self.dag
     )
     with self.assertRaises(exceptions.AirflowSensorTimeout):
         op2.run(
             start_date=DEFAULT_DATE,
             end_date=DEFAULT_DATE,
             ignore_ti_state=True
         )
示例#10
0
    def test_catch_invalid_allowed_states(self):
        with self.assertRaises(ValueError):
            ExternalTaskSensor(task_id='test_external_task_sensor_check',
                               external_dag_id=TEST_DAG_ID,
                               external_task_id=TEST_TASK_ID,
                               allowed_states=['invalid_state'],
                               dag=self.dag)

        with self.assertRaises(ValueError):
            ExternalTaskSensor(task_id='test_external_task_sensor_check',
                               external_dag_id=TEST_DAG_ID,
                               external_task_id=None,
                               allowed_states=['invalid_state'],
                               dag=self.dag)
 def test_external_task_sensor_failed_states_as_success(self):
     self.test_time_sensor()
     op = ExternalTaskSensor(
         task_id='test_external_task_sensor_check',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         allowed_states=["failed"],
         failed_states=["success"],
         dag=self.dag,
     )
     with self.assertRaises(AirflowException) as cm:
         op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
     self.assertEqual(
         str(cm.exception), "The external task " "time_sensor_check in DAG " "unit_test_dag failed."
     )
    def test_external_task_sensor_waits_for_dag_check_existence(self):
        t = ExternalTaskSensor(
            task_id='test_external_task_sensor_check',
            external_dag_id="non-existing-dag",
            external_task_id=None,
            check_existence=True,
            dag=self.dag
        )

        with self.assertRaises(AirflowException):
            t.run(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_ti_state=True
            )
 def test_external_task_sensor_delta(self):
     self.test_time_sensor()
     t = ExternalTaskSensor(
         task_id='test_external_task_sensor_check_delta',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         execution_delta=timedelta(0),
         allowed_states=['success'],
         dag=self.dag
     )
     t.run(
         start_date=DEFAULT_DATE,
         end_date=DEFAULT_DATE,
         ignore_ti_state=True
     )
def dag_bag_cyclic():
    """
    Create a DagBag with DAGs having cyclic dependencies set up by ExternalTaskMarker and
    ExternalTaskSensor.

    dag_0:   task_a_0 >> task_b_0
                  ^          |
                  |          |
    dag_1:        |          ---> task_a_1 >> task_b_1
                  |                               |
                  ---------------------------------

    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)

    dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0)
    task_b_0 = ExternalTaskMarker(
        task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0
    )
    task_a_0 >> task_b_0

    dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_1 = ExternalTaskSensor(
        task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1
    )
    task_b_1 = ExternalTaskMarker(
        task_id="task_b_1", external_dag_id="dag_0", external_task_id="task_a_0", recursion_depth=2, dag=dag_1
    )
    task_a_1 >> task_b_1

    for dag in [dag_0, dag_1]:
        dag_bag.bag_dag(dag=dag, root_dag=dag)

    return dag_bag
def dag_bag_head_tail():
    """
    Create a DagBag containing one DAG, with task "head" depending on task "tail" of the
    previous execution_date.

    20200501     20200502                 20200510
    +------+     +------+                 +------+
    | head |    -->head |    -->         -->head |
    |  |   |   / |  |   |   /           / |  |   |
    |  v   |  /  |  v   |  /           /  |  v   |
    | body | /   | body | /     ...   /   | body |
    |  |   |/    |  |   |/           /    |  |   |
    |  v   /     |  v   /           /     |  v   |
    | tail/|     | tail/|          /      | tail |
    +------+     +------+                 +------+
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)
    with DAG("head_tail", start_date=DEFAULT_DATE,
             schedule_interval="@daily") as dag:
        head = ExternalTaskSensor(task_id='head',
                                  external_dag_id=dag.dag_id,
                                  external_task_id="tail",
                                  execution_delta=timedelta(days=1),
                                  mode="reschedule")
        body = DummyOperator(task_id="body")
        tail = ExternalTaskMarker(task_id="tail",
                                  external_dag_id=dag.dag_id,
                                  external_task_id=head.task_id,
                                  execution_date="{{ tomorrow_ds_nodash }}")
        head >> body >> tail

    dag_bag.bag_dag(dag=dag, root_dag=dag)

    yield dag_bag
示例#16
0
def create_monitoring_dag(parent_dag, start_date, schedule_interval,
                          triggered_dag_id, finish_file_dir,
                          trigger_file_path):
    with DAG(dag_id=f'{parent_dag}.monitoring_dag',
             start_date=start_date,
             schedule_interval=schedule_interval) as sub_dag:
        external_dag_sensor = ExternalTaskSensor(
            task_id='external_dag_sensor',
            external_dag_id=triggered_dag_id,
            external_task_id='',
            poke_interval=10)

        print_result = PythonOperator(task_id='print_result',
                                      provide_context=True,
                                      python_callable=print_result_func(
                                          external_dag_id=triggered_dag_id,
                                          last_task_id='end'))

        remove_file = BashOperator(task_id='remove_file',
                                   bash_command=f'rm -f {trigger_file_path}')

        create_finish_file = BashOperator(
            task_id='create_finish_file',
            bash_command=
            "touch {{ params.finish_file_dir }}/finished_{{ ts_nodash }}",
            params={'finish_file_dir': finish_file_dir})

    external_dag_sensor >> print_result >> remove_file >> create_finish_file

    return sub_dag
 def test_external_task_sensor_wrong_failed_states(self):
     with self.assertRaises(ValueError):
         ExternalTaskSensor(task_id='test_external_task_sensor_check',
                            external_dag_id=TEST_DAG_ID,
                            external_task_id=TEST_TASK_ID,
                            failed_states=["invalid_state"],
                            dag=self.dag)
 def test_catch_overlap_allowed_failed_state(self):
     with self.assertRaises(AirflowException):
         ExternalTaskSensor(task_id='test_external_task_sensor_check',
                            external_dag_id=TEST_DAG_ID,
                            external_task_id=TEST_TASK_ID,
                            allowed_states=[State.SUCCESS],
                            failed_states=[State.SUCCESS],
                            dag=self.dag)
    def test_external_task_sensor_fn_multiple_args(self):
        """Check this task sensor passes multiple args with full context. If no failure, means clean run."""
        self.test_time_sensor()

        def my_func(dt, context):
            assert context['execution_date'] == dt
            return dt + timedelta(0)

        op1 = ExternalTaskSensor(
            task_id='test_external_task_sensor_multiple_arg_fn',
            external_dag_id=TEST_DAG_ID,
            external_task_id=TEST_TASK_ID,
            execution_date_fn=my_func,
            allowed_states=['success'],
            dag=self.dag,
        )
        op1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
    def test_external_task_sensor_fn_kwargs(self):
        """Check this task sensor passes multiple args with full context. If no failure, means clean run."""
        self.test_time_sensor()

        def my_func(dt, ds_nodash, tomorrow_ds_nodash):
            assert ds_nodash == dt.strftime("%Y%m%d")
            assert tomorrow_ds_nodash == (dt + timedelta(days=1)).strftime("%Y%m%d")
            return dt + timedelta(0)

        op1 = ExternalTaskSensor(
            task_id='test_external_task_sensor_fn_kwargs',
            external_dag_id=TEST_DAG_ID,
            external_task_id=TEST_TASK_ID,
            execution_date_fn=my_func,
            allowed_states=['success'],
            dag=self.dag,
        )
        op1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
示例#21
0
def _create_accounts_hub_op(dag):
    external_dag_id = lz_alpha_public.DAG_ID
    external_task_id = lz_alpha_public.TaskId.ACCOUNTS
    return ExternalTaskSensor(task_id='{}.{}'.format(external_dag_id,
                                                     external_task_id),
                              external_dag_id=external_dag_id,
                              external_task_id=external_task_id,
                              allowed_states=['success', 'skipped'],
                              mode="reschedule",
                              dag=dag)
示例#22
0
def _create_balances_increment_hub_op(dag):
    external_dag_id = lz_alpha_public.DAG_ID
    external_task_id = lz_alpha_public.TaskId.BALANCES_INCREMENT
    return ExternalTaskSensor(task_id='{}.{}'.format(external_dag_id,
                                                     external_task_id),
                              external_dag_id=external_dag_id,
                              external_task_id=external_task_id,
                              allowed_states=['success', 'skipped'],
                              mode="reschedule",
                              dag=dag)
示例#23
0
def _create_balances_previous_day_op(dag):
    external_dag_id = dwh_alpha_public.DAG_ID
    external_task_id = lz_alpha_public.TaskId.BALANCES
    return ExternalTaskSensor(task_id=f'{external_task_id}-previous_day',
                              external_dag_id=external_dag_id,
                              external_task_id=external_task_id,
                              allowed_states=['success', 'skipped'],
                              execution_delta=datetime.timedelta(days=1),
                              mode="reschedule",
                              dag=dag)
def _create_deals_dwh_op(dag, instance):
    external_dag_id = '{}.{}'.format(dwh_beta_public.DAG_ID, instance)
    external_task_id = dwh_beta_public.TaskId.DEALS
    return ExternalTaskSensor(task_id='{}.{}'.format(external_dag_id,
                                                     external_task_id),
                              external_dag_id=external_dag_id,
                              external_task_id=external_task_id,
                              allowed_states=['success', 'skipped'],
                              mode="reschedule",
                              dag=dag)
示例#25
0
 def test_external_task_sensor_error_delta_and_fn(self):
     self.test_time_sensor()
     # Test that providing execution_delta and a function raises an error
     with self.assertRaises(ValueError):
         ExternalTaskSensor(task_id='test_external_task_sensor_check_delta',
                            external_dag_id=TEST_DAG_ID,
                            external_task_id=TEST_TASK_ID,
                            execution_delta=timedelta(0),
                            execution_date_fn=lambda dt: dt,
                            allowed_states=['success'],
                            dag=self.dag)
示例#26
0
def create_dag_from_config(dag_id: str, dag_config, operators=OPERATORS) -> DAG:
    dag = DAG(dag_id, **dag_config["dag_config"])
    with dag:
        task_dict = {}
        for task in dag_config["tasks"]:
            created_task = create_task(task, operators)
            if created_task == None:
                continue
            task_dict[task.get("task_id")] = {
                "task": created_task,
                "dependencies": task.get("depends_on", []),
            }

        external_dependency_sensors = {}
        for task_id, task_and_dependencies in task_dict.items():
            task = task_and_dependencies.get("task")
            dependencies = task_and_dependencies.get("dependencies", [])

            for dependency in dependencies:
                dependency_dag_id = dependency.get("dag")
                dependency_task_id = dependency.get("task")

                try:
                    if dependency_dag_id == dag_id:
                        task_dict[dependency_task_id]["task"] >> task
                    else:
                        external_dependency_task_id = (
                            f"wait_for_{dependency_dag_id}_{dependency_task_id}"
                        )

                        if external_dependency_task_id in external_dependency_sensors:
                            external_task_sensor = external_dependency_sensors[
                                external_dependency_task_id
                            ]
                        else:
                            external_task_sensor = ExternalTaskSensor(
                                dag=dag,
                                task_id=f"wait_for_{dependency_dag_id}_{dependency_task_id}",
                                external_dag_id=dependency_dag_id,
                                external_task_id=dependency_task_id,
                                allowed_states=["success"],
                                mode="reschedule",
                                poke_interval=random.randint(150, 210),
                            )
                            external_dependency_sensors[
                                external_dependency_task_id
                            ] = external_task_sensor

                        external_task_sensor >> task
                except KeyError as e:
                    logging.error(
                        f"Cannot set the task {task_id} dependencies ({str(e)})"
                    )
    return dag
    def test_templated_sensor(self):
        with self.dag:
            sensor = ExternalTaskSensor(
                task_id='templated_task', external_dag_id='dag_{{ ds }}', external_task_id='task_{{ ds }}'
            )

        instance = TaskInstance(sensor, DEFAULT_DATE)
        instance.render_templates()

        self.assertEqual(sensor.external_dag_id, f"dag_{DEFAULT_DATE.date()}")
        self.assertEqual(sensor.external_task_id, f"task_{DEFAULT_DATE.date()}")
 def test_external_task_sensor_fn(self):
     self.test_time_sensor()
     # check that the execution_fn works
     t = ExternalTaskSensor(
         task_id='test_external_task_sensor_check_delta',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         execution_date_fn=lambda dt: dt + timedelta(0),
         allowed_states=['success'],
         dag=self.dag
     )
     t.run(
         start_date=DEFAULT_DATE,
         end_date=DEFAULT_DATE,
         ignore_ti_state=True
     )
     # double check that the execution is being called by failing the test
     t2 = ExternalTaskSensor(
         task_id='test_external_task_sensor_check_delta',
         external_dag_id=TEST_DAG_ID,
         external_task_id=TEST_TASK_ID,
         execution_date_fn=lambda dt: dt + timedelta(days=1),
         allowed_states=['success'],
         timeout=1,
         poke_interval=1,
         dag=self.dag
     )
     with self.assertRaises(exceptions.AirflowSensorTimeout):
         t2.run(
             start_date=DEFAULT_DATE,
             end_date=DEFAULT_DATE,
             ignore_ti_state=True
         )
    def test_external_dag_sensor(self):

        other_dag = DAG(
            'other_dag',
            default_args=self.args,
            end_date=DEFAULT_DATE,
            schedule_interval='@once')
        other_dag.create_dagrun(
            run_id='test',
            start_date=DEFAULT_DATE,
            execution_date=DEFAULT_DATE,
            state=State.SUCCESS)
        t = ExternalTaskSensor(
            task_id='test_external_dag_sensor_check',
            external_dag_id='other_dag',
            external_task_id=None,
            dag=self.dag
        )
        t.run(
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_ti_state=True
        )
示例#30
0
    def test_templated_sensor(self):
        dag = DAG(TEST_DAG_ID, self.args)

        with dag:
            sensor = ExternalTaskSensor(task_id='templated_task',
                                        external_dag_id='dag_{{ ds }}',
                                        external_task_id='task_{{ ds }}',
                                        start_date=DEFAULT_DATE)

        instance = TaskInstance(sensor, DEFAULT_DATE)
        instance.render_templates()

        self.assertEqual(sensor.external_dag_id,
                         "dag_{}".format(DEFAULT_DATE.date()))
        self.assertEqual(sensor.external_task_id,
                         "task_{}".format(DEFAULT_DATE.date()))
示例#31
0
def load_subdag(parent_dag_name, child_dag_name, def_args):
    dag_subdag = DAG(dag_id=f'{parent_dag_name}.{child_dag_name}',
                     default_args=def_args,
                     schedule_interval='@hourly')

    with dag_subdag:
        wait_to_finish_dag = ExternalTaskSensor(
            task_id='wait_for_dag',
            external_dag_id='gridu_dag',
            execution_delta=timedelta(minutes=5),
            external_task_id=None,
            allowed_states=['success'])
        remove_a_file = BashOperator(task_id='remove_a_file',
                                     bash_command=f'rm {path_to_run_file}')
        print_a_result = PythonOperator(task_id='print_a_result',
                                        python_callable=print_pulled_value)
        create_a_file = BashOperator(
            task_id='create_a_file',
            bash_command='touch finished_{{ ts_nodash }}')

        wait_to_finish_dag >> remove_a_file >> print_a_result >> create_a_file

    return dag_subdag
示例#32
0
def build_process_result_sub_dag(main_dag, default_args):
    s_dag = DAG(
        dag_id="{}.{}".format(main_dag, 'process_result_sub_dag'),
        default_args=default_args,
        schedule_interval='@hourly'
    )
    with s_dag:
        external_dag_sensor = ExternalTaskSensor(
            task_id='external_dag_sensor',
            external_dag_id=dagToCall,
            external_task_id=None,
            execution_date_fn=get_external_dag_execution_date,
            check_existence=True,
            poke_interval=5,
            timeout=120,
            soft_fail=True
        )
        ex_file_sensor = FileSensor(
            task_id="ex_file_sensor",
            filepath=ex_file
        )
        print_external_dag_result = PythonOperator(
            task_id="print_external_dag_result",
            python_callable=_print_external_dag_result,
            provide_context=True
        )
        remove_trigger_file = BashOperator(
            task_id="remove_trigger_file",
            bash_command="rm -f {}".format(path)
        )
        create_finished_file = BashOperator(
            task_id="create_finished_file",
            bash_command="touch " + default_path + "/finished_#{{ ts_nodash }}"
        )

    ex_file_sensor >> external_dag_sensor >> print_external_dag_result >> remove_trigger_file >> create_finished_file
    return s_dag
    def test_external_task_sensor_fn_multiple_execution_dates(self):
        bash_command_code = """
{% set s=execution_date.time().second %}
echo "second is {{ s }}"
if [[ $(( {{ s }} % 60 )) == 1 ]]
    then
        exit 1
fi
exit 0
"""
        dag_external_id = TEST_DAG_ID + '_external'
        dag_external = DAG(
            dag_external_id,
            default_args=self.args,
            schedule_interval=timedelta(seconds=1))
        task_external_with_failure = BashOperator(
            task_id="task_external_with_failure",
            bash_command=bash_command_code,
            retries=0,
            dag=dag_external)
        task_external_without_failure = DummyOperator(
            task_id="task_external_without_failure",
            retries=0,
            dag=dag_external)

        task_external_without_failure.run(
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE + timedelta(seconds=1),
            ignore_ti_state=True)

        session = settings.Session()
        TI = TaskInstance
        try:
            task_external_with_failure.run(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE + timedelta(seconds=1),
                ignore_ti_state=True)
            # The test_with_failure task is excepted to fail
            # once per minute (the run on the first second of
            # each minute).
        except Exception as e:
            failed_tis = session.query(TI).filter(
                TI.dag_id == dag_external_id,
                TI.state == State.FAILED,
                TI.execution_date == DEFAULT_DATE + timedelta(seconds=1)).all()
            if len(failed_tis) == 1 and \
               failed_tis[0].task_id == 'task_external_with_failure':
                pass
            else:
                raise e

        dag_id = TEST_DAG_ID
        dag = DAG(
            dag_id,
            default_args=self.args,
            schedule_interval=timedelta(minutes=1))
        task_without_failure = ExternalTaskSensor(
            task_id='task_without_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_without_failure',
            execution_date_fn=lambda dt: [dt + timedelta(seconds=i)
                                          for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)
        task_with_failure = ExternalTaskSensor(
            task_id='task_with_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_with_failure',
            execution_date_fn=lambda dt: [dt + timedelta(seconds=i)
                                          for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)

        task_without_failure.run(
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_ti_state=True)

        with self.assertRaises(AirflowSensorTimeout):
            task_with_failure.run(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_ti_state=True)
start_date = datetime.datetime(2015, 1, 1)

with DAG(
        dag_id="example_external_task_marker_parent",
        start_date=start_date,
        schedule_interval=None,
        tags=['example'],
) as parent_dag:
    # [START howto_operator_external_task_marker]
    parent_task = ExternalTaskMarker(
        task_id="parent_task",
        external_dag_id="example_external_task_marker_child",
        external_task_id="child_task1")
    # [END howto_operator_external_task_marker]

with DAG(
        dag_id="example_external_task_marker_child",
        start_date=start_date,
        schedule_interval=None,
        tags=['example'],
) as child_dag:
    # [START howto_operator_external_task_sensor]
    child_task1 = ExternalTaskSensor(task_id="child_task1",
                                     external_dag_id=parent_dag.dag_id,
                                     external_task_id=parent_task.task_id,
                                     mode="reschedule")
    # [END howto_operator_external_task_sensor]
    child_task2 = DummyOperator(task_id="child_task2")
    child_task1 >> child_task2
示例#35
0
import airflow.utils.dates
from airflow import DAG
from airflow.sensors.external_task_sensor import ExternalTaskSensor
from airflow.providers.postgres.operators.postgres import PostgresOperator
from datetime import datetime, timedelta

default_args = {"owner": "airflow", "start_date": datetime(2020, 1, 1)}

with DAG(dag_id="cleaning_dag",
         default_args=default_args,
         schedule_interval="*/10 * * * *",
         catchup=False) as dag:

    waiting_for_task = ExternalTaskSensor(task_id='waiting_for_task',
                                          external_dag_id='avocado_dag',
                                          external_task_id='publish_notebook',
                                          failed_states=['failed'])

    cleaning_xcoms = PostgresOperator(task_id='cleaning_xcoms',
                                      sql='sql/CLEANING_XCOMS.sql',
                                      postgres_conn_id='postgres')

    waiting_for_task >> cleaning_xcoms
    def test_external_task_sensor_fn_multiple_execution_dates(self):
        bash_command_code = """
{% set s=execution_date.time().second %}
echo "second is {{ s }}"
if [[ $(( {{ s }} % 60 )) == 1 ]]
    then
        exit 1
fi
exit 0
"""
        dag_external_id = TEST_DAG_ID + '_external'
        dag_external = DAG(
            dag_external_id,
            default_args=self.args,
            schedule_interval=timedelta(seconds=1))
        task_external_with_failure = BashOperator(
            task_id="task_external_with_failure",
            bash_command=bash_command_code,
            retries=0,
            dag=dag_external)
        task_external_without_failure = DummyOperator(
            task_id="task_external_without_failure",
            retries=0,
            dag=dag_external)

        task_external_without_failure.run(
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE + timedelta(seconds=1),
            ignore_ti_state=True)

        session = settings.Session()
        TI = TaskInstance
        try:
            task_external_with_failure.run(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE + timedelta(seconds=1),
                ignore_ti_state=True)
            # The test_with_failure task is excepted to fail
            # once per minute (the run on the first second of
            # each minute).
        except Exception as e:  # pylint: disable=broad-except
            failed_tis = session.query(TI).filter(
                TI.dag_id == dag_external_id,
                TI.state == State.FAILED,
                TI.execution_date == DEFAULT_DATE + timedelta(seconds=1)).all()
            if len(failed_tis) == 1 and \
               failed_tis[0].task_id == 'task_external_with_failure':
                pass
            else:
                raise e

        dag_id = TEST_DAG_ID
        dag = DAG(
            dag_id,
            default_args=self.args,
            schedule_interval=timedelta(minutes=1))
        task_without_failure = ExternalTaskSensor(
            task_id='task_without_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_without_failure',
            execution_date_fn=lambda dt: [dt + timedelta(seconds=i)
                                          for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)
        task_with_failure = ExternalTaskSensor(
            task_id='task_with_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_with_failure',
            execution_date_fn=lambda dt: [dt + timedelta(seconds=i)
                                          for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)

        task_without_failure.run(
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_ti_state=True)

        with self.assertRaises(AirflowSensorTimeout):
            task_with_failure.run(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_ti_state=True)