Exemplo n.º 1
0
def test_build_task_group_context_manager():
    execution_date = pendulum.parse("20200101")
    with DAG("test_build_task_group_context_manager",
             start_date=execution_date) as dag:
        task1 = DummyOperator(task_id="task1")
        with TaskGroup("group234") as group234:
            _ = DummyOperator(task_id="task2")

            with TaskGroup("group34") as group34:
                _ = DummyOperator(task_id="task3")
                _ = DummyOperator(task_id="task4")

        task5 = DummyOperator(task_id="task5")
        task1 >> group234
        group34 >> task5

    assert task1.get_direct_relative_ids(upstream=False) == {
        'group234.group34.task4',
        'group234.group34.task3',
        'group234.task2',
    }
    assert task5.get_direct_relative_ids(upstream=True) == {
        'group234.group34.task4',
        'group234.group34.task3',
    }

    assert dag.task_group.group_id is None
    assert dag.task_group.is_root
    assert set(
        dag.task_group.children.keys()) == {"task1", "group234", "task5"}
    assert group34.group_id == "group234.group34"

    assert task_group_to_dict(dag.task_group) == EXPECTED_JSON
Exemplo n.º 2
0
def test_build_task_group_with_prefix():
    """
    Tests that prefix_group_id turns on/off prefixing of task_id with group_id.
    """
    execution_date = pendulum.parse("20200101")
    with DAG("test_build_task_group_with_prefix", start_date=execution_date) as dag:
        task1 = DummyOperator(task_id="task1")
        with TaskGroup("group234", prefix_group_id=False) as group234:
            task2 = DummyOperator(task_id="task2")

            with TaskGroup("group34") as group34:
                task3 = DummyOperator(task_id="task3")

                with TaskGroup("group4", prefix_group_id=False) as group4:
                    task4 = DummyOperator(task_id="task4")

        task5 = DummyOperator(task_id="task5")
        task1 >> group234
        group34 >> task5

    assert task2.task_id == "task2"
    assert group34.group_id == "group34"
    assert task3.task_id == "group34.task3"
    assert group4.group_id == "group34.group4"
    assert task4.task_id == "task4"
    assert task5.task_id == "task5"
    assert group234.get_child_by_label("task2") == task2
    assert group234.get_child_by_label("group34") == group34
    assert group4.get_child_by_label("task4") == task4

    assert extract_node_id(task_group_to_dict(dag.task_group), include_label=True) == {
        'id': None,
        'label': None,
        'children': [
            {
                'id': 'group234',
                'label': 'group234',
                'children': [
                    {
                        'id': 'group34',
                        'label': 'group34',
                        'children': [
                            {
                                'id': 'group34.group4',
                                'label': 'group4',
                                'children': [{'id': 'task4', 'label': 'task4'}],
                            },
                            {'id': 'group34.task3', 'label': 'task3'},
                            {'id': 'group34.downstream_join_id', 'label': ''},
                        ],
                    },
                    {'id': 'task2', 'label': 'task2'},
                    {'id': 'group234.upstream_join_id', 'label': ''},
                ],
            },
            {'id': 'task1', 'label': 'task1'},
            {'id': 'task5', 'label': 'task5'},
        ],
    }
Exemplo n.º 3
0
def test_build_task_group():
    """
    This is an alternative syntax to use TaskGroup. It should result in the same TaskGroup
    as using context manager.
    """
    execution_date = pendulum.parse("20200101")
    dag = DAG("test_build_task_group", start_date=execution_date)
    task1 = DummyOperator(task_id="task1", dag=dag)
    group234 = TaskGroup("group234", dag=dag)
    _ = DummyOperator(task_id="task2", dag=dag, task_group=group234)
    group34 = TaskGroup("group34", dag=dag, parent_group=group234)
    _ = DummyOperator(task_id="task3", dag=dag, task_group=group34)
    _ = DummyOperator(task_id="task4", dag=dag, task_group=group34)
    task5 = DummyOperator(task_id="task5", dag=dag)

    task1 >> group234
    group34 >> task5

    assert task_group_to_dict(dag.task_group) == EXPECTED_JSON
Exemplo n.º 4
0
def test_dag_edges():
    execution_date = pendulum.parse("20200101")
    with DAG("test_dag_edges", start_date=execution_date) as dag:
        task1 = DummyOperator(task_id="task1")
        with TaskGroup("group_a") as group_a:
            with TaskGroup("group_b") as group_b:
                task2 = DummyOperator(task_id="task2")
                task3 = DummyOperator(task_id="task3")
                task4 = DummyOperator(task_id="task4")
                task2 >> [task3, task4]

            task5 = DummyOperator(task_id="task5")

            task5 << group_b

        task1 >> group_a

        with TaskGroup("group_c") as group_c:
            task6 = DummyOperator(task_id="task6")
            task7 = DummyOperator(task_id="task7")
            task8 = DummyOperator(task_id="task8")
            [task6, task7] >> task8
            group_a >> group_c

        task5 >> task8

        task9 = DummyOperator(task_id="task9")
        task10 = DummyOperator(task_id="task10")

        group_c >> [task9, task10]

        with TaskGroup("group_d") as group_d:
            task11 = DummyOperator(task_id="task11")
            task12 = DummyOperator(task_id="task12")
            task11 >> task12

        group_d << group_c

    nodes = task_group_to_dict(dag.task_group)
    edges = dag_edges(dag)

    assert extract_node_id(nodes) == {
        'id':
        None,
        'children': [
            {
                'id':
                'group_a',
                'children': [
                    {
                        'id':
                        'group_a.group_b',
                        'children': [
                            {
                                'id': 'group_a.group_b.task2'
                            },
                            {
                                'id': 'group_a.group_b.task3'
                            },
                            {
                                'id': 'group_a.group_b.task4'
                            },
                            {
                                'id': 'group_a.group_b.downstream_join_id'
                            },
                        ],
                    },
                    {
                        'id': 'group_a.task5'
                    },
                    {
                        'id': 'group_a.upstream_join_id'
                    },
                    {
                        'id': 'group_a.downstream_join_id'
                    },
                ],
            },
            {
                'id':
                'group_c',
                'children': [
                    {
                        'id': 'group_c.task6'
                    },
                    {
                        'id': 'group_c.task7'
                    },
                    {
                        'id': 'group_c.task8'
                    },
                    {
                        'id': 'group_c.upstream_join_id'
                    },
                    {
                        'id': 'group_c.downstream_join_id'
                    },
                ],
            },
            {
                'id':
                'group_d',
                'children': [
                    {
                        'id': 'group_d.task11'
                    },
                    {
                        'id': 'group_d.task12'
                    },
                    {
                        'id': 'group_d.upstream_join_id'
                    },
                ],
            },
            {
                'id': 'task1'
            },
            {
                'id': 'task10'
            },
            {
                'id': 'task9'
            },
        ],
    }

    assert sorted((e["source_id"], e["target_id"]) for e in edges) == [
        ('group_a.downstream_join_id', 'group_c.upstream_join_id'),
        ('group_a.group_b.downstream_join_id', 'group_a.task5'),
        ('group_a.group_b.task2', 'group_a.group_b.task3'),
        ('group_a.group_b.task2', 'group_a.group_b.task4'),
        ('group_a.group_b.task3', 'group_a.group_b.downstream_join_id'),
        ('group_a.group_b.task4', 'group_a.group_b.downstream_join_id'),
        ('group_a.task5', 'group_a.downstream_join_id'),
        ('group_a.task5', 'group_c.task8'),
        ('group_a.upstream_join_id', 'group_a.group_b.task2'),
        ('group_c.downstream_join_id', 'group_d.upstream_join_id'),
        ('group_c.downstream_join_id', 'task10'),
        ('group_c.downstream_join_id', 'task9'),
        ('group_c.task6', 'group_c.task8'),
        ('group_c.task7', 'group_c.task8'),
        ('group_c.task8', 'group_c.downstream_join_id'),
        ('group_c.upstream_join_id', 'group_c.task6'),
        ('group_c.upstream_join_id', 'group_c.task7'),
        ('group_d.task11', 'group_d.task12'),
        ('group_d.upstream_join_id', 'group_d.task11'),
        ('task1', 'group_a.upstream_join_id'),
    ]
Exemplo n.º 5
0
def test_sub_dag_task_group():
    """
    Tests dag.sub_dag() updates task_group correctly.
    """
    execution_date = pendulum.parse("20200101")
    with DAG("test_test_task_group_sub_dag", start_date=execution_date) as dag:
        task1 = DummyOperator(task_id="task1")
        with TaskGroup("group234") as group234:
            _ = DummyOperator(task_id="task2")

            with TaskGroup("group34") as group34:
                _ = DummyOperator(task_id="task3")
                _ = DummyOperator(task_id="task4")

        with TaskGroup("group6") as group6:
            _ = DummyOperator(task_id="task6")

        task7 = DummyOperator(task_id="task7")
        task5 = DummyOperator(task_id="task5")

        task1 >> group234
        group34 >> task5
        group234 >> group6
        group234 >> task7

    subdag = dag.sub_dag(task_ids_or_regex="task5",
                         include_upstream=True,
                         include_downstream=False)

    assert extract_node_id(task_group_to_dict(subdag.task_group)) == {
        'id':
        None,
        'children': [
            {
                'id':
                'group234',
                'children': [
                    {
                        'id':
                        'group234.group34',
                        'children': [
                            {
                                'id': 'group234.group34.task3'
                            },
                            {
                                'id': 'group234.group34.task4'
                            },
                            {
                                'id': 'group234.group34.downstream_join_id'
                            },
                        ],
                    },
                    {
                        'id': 'group234.upstream_join_id'
                    },
                ],
            },
            {
                'id': 'task1'
            },
            {
                'id': 'task5'
            },
        ],
    }

    edges = dag_edges(subdag)
    assert sorted((e["source_id"], e["target_id"]) for e in edges) == [
        ('group234.group34.downstream_join_id', 'task5'),
        ('group234.group34.task3', 'group234.group34.downstream_join_id'),
        ('group234.group34.task4', 'group234.group34.downstream_join_id'),
        ('group234.upstream_join_id', 'group234.group34.task3'),
        ('group234.upstream_join_id', 'group234.group34.task4'),
        ('task1', 'group234.upstream_join_id'),
    ]

    subdag_task_groups = subdag.task_group.get_task_group_dict()
    assert subdag_task_groups.keys() == {None, "group234", "group234.group34"}

    included_group_ids = {"group234", "group234.group34"}
    included_task_ids = {
        'group234.group34.task3', 'group234.group34.task4', 'task1', 'task5'
    }

    for task_group in subdag_task_groups.values():
        assert task_group.upstream_group_ids.issubset(included_group_ids)
        assert task_group.downstream_group_ids.issubset(included_group_ids)
        assert task_group.upstream_task_ids.issubset(included_task_ids)
        assert task_group.downstream_task_ids.issubset(included_task_ids)

    for task in subdag.task_group:
        assert task.upstream_task_ids.issubset(included_task_ids)
        assert task.downstream_task_ids.issubset(included_task_ids)
Exemplo n.º 6
0
def test_build_task_group_with_task_decorator():
    """
    Test that TaskGroup can be used with the @task decorator.
    """
    from airflow.operators.python import task

    @task
    def task_1():
        print("task_1")

    @task
    def task_2():
        return "task_2"

    @task
    def task_3():
        return "task_3"

    @task
    def task_4(task_2_output, task_3_output):
        print(task_2_output, task_3_output)

    @task
    def task_5():
        print("task_5")

    execution_date = pendulum.parse("20200101")
    with DAG("test_build_task_group_with_task_decorator",
             start_date=execution_date) as dag:
        tsk_1 = task_1()

        with TaskGroup("group234") as group234:
            tsk_2 = task_2()
            tsk_3 = task_3()
            tsk_4 = task_4(tsk_2, tsk_3)

        tsk_5 = task_5()

        tsk_1 >> group234 >> tsk_5

    # pylint: disable=no-member
    assert tsk_1.operator in tsk_2.operator.upstream_list
    assert tsk_1.operator in tsk_3.operator.upstream_list
    assert tsk_5.operator in tsk_4.operator.downstream_list
    # pylint: enable=no-member

    assert extract_node_id(task_group_to_dict(dag.task_group)) == {
        'id':
        None,
        'children': [
            {
                'id':
                'group234',
                'children': [
                    {
                        'id': 'group234.task_2'
                    },
                    {
                        'id': 'group234.task_3'
                    },
                    {
                        'id': 'group234.task_4'
                    },
                    {
                        'id': 'group234.upstream_join_id'
                    },
                    {
                        'id': 'group234.downstream_join_id'
                    },
                ],
            },
            {
                'id': 'task_1'
            },
            {
                'id': 'task_5'
            },
        ],
    }

    edges = dag_edges(dag)
    assert sorted((e["source_id"], e["target_id"]) for e in edges) == [
        ('group234.downstream_join_id', 'task_5'),
        ('group234.task_2', 'group234.task_4'),
        ('group234.task_3', 'group234.task_4'),
        ('group234.task_4', 'group234.downstream_join_id'),
        ('group234.upstream_join_id', 'group234.task_2'),
        ('group234.upstream_join_id', 'group234.task_3'),
        ('task_1', 'group234.upstream_join_id'),
    ]
Exemplo n.º 7
0
def test_task_group_context_mix():
    """ Test cases to check nested TaskGroup context manager with taskgroup decorator"""

    from airflow.decorators import task

    def task_start():
        """Dummy Task which is First Task of Dag """
        return '[Task_start]'

    def task_end():
        """Dummy Task which is Last Task of Dag"""
        print('[ Task_End  ]')

    # Creating Tasks
    @task
    def task_1(value):
        """ Dummy Task1"""
        return f'[ Task1 {value} ]'

    @task
    def task_2(value):
        """ Dummy Task2"""
        return f'[ Task2 {value} ]'

    @task
    def task_3(value):
        """ Dummy Task3"""
        print(f'[ Task3 {value} ]')

    # Creating TaskGroups
    @task_group_decorator
    def section_2(value):
        """ TaskGroup for grouping related Tasks"""
        return task_3(task_2(task_1(value)))

    execution_date = pendulum.parse("20201109")
    with DAG(dag_id="example_task_group_decorator_mix",
             start_date=execution_date,
             tags=["example"]) as dag:
        t_start = PythonOperator(task_id='task_start',
                                 python_callable=task_start,
                                 dag=dag)

        with TaskGroup("section_1", tooltip="section_1") as section_1:
            sec_2 = section_2(t_start.output)
            task_s1 = DummyOperator(task_id="task_1")
            task_s2 = BashOperator(task_id="task_2", bash_command='echo 1')
            task_s3 = DummyOperator(task_id="task_3")

            sec_2.set_downstream(task_s1)
            task_s1 >> [task_s2, task_s3]

        t_end = PythonOperator(task_id='task_end',
                               python_callable=task_end,
                               dag=dag)
        t_start >> section_1 >> t_end

    node_ids = {
        'id':
        None,
        'children': [
            {
                'id':
                'section_1',
                'children': [
                    {
                        'id':
                        'section_1.section_2',
                        'children': [
                            {
                                'id': 'section_1.section_2.task_1'
                            },
                            {
                                'id': 'section_1.section_2.task_2'
                            },
                            {
                                'id': 'section_1.section_2.task_3'
                            },
                            {
                                'id': 'section_1.section_2.downstream_join_id'
                            },
                        ],
                    },
                    {
                        'id': 'section_1.task_1'
                    },
                    {
                        'id': 'section_1.task_2'
                    },
                    {
                        'id': 'section_1.task_3'
                    },
                    {
                        'id': 'section_1.upstream_join_id'
                    },
                    {
                        'id': 'section_1.downstream_join_id'
                    },
                ],
            },
            {
                'id': 'task_end'
            },
            {
                'id': 'task_start'
            },
        ],
    }

    assert extract_node_id(task_group_to_dict(dag.task_group)) == node_ids
Exemplo n.º 8
0
def test_build_task_group_deco_context_manager():
    """
    Tests Following :
    1. Nested TaskGroup creation using taskgroup decorator should create same TaskGroup which can be
    created using TaskGroup context manager.
    2. TaskGroup consisting Tasks created using task decorator.
    3. Node Ids of dags created with taskgroup decorator.
    """

    from airflow.decorators import task

    # Creating Tasks
    @task
    def task_start():
        """Dummy Task which is First Task of Dag """
        return '[Task_start]'

    @task
    def task_end():
        """Dummy Task which is Last Task of Dag"""
        print('[ Task_End ]')

    @task
    def task_1(value):
        """ Dummy Task1"""
        return f'[ Task1 {value} ]'

    @task
    def task_2(value):
        """ Dummy Task2"""
        print(f'[ Task2 {value} ]')

    @task
    def task_3(value):
        """ Dummy Task3"""
        return f'[ Task3 {value} ]'

    @task
    def task_4(value):
        """ Dummy Task3"""
        print(f'[ Task4 {value} ]')

    # Creating TaskGroups
    @task_group_decorator
    def section_1(value):
        """ TaskGroup for grouping related Tasks"""
        @task_group_decorator()
        def section_2(value2):
            """ TaskGroup for grouping related Tasks"""
            return task_4(task_3(value2))

        op1 = task_2(task_1(value))
        return section_2(op1)

    execution_date = pendulum.parse("20201109")
    with DAG(dag_id="example_nested_task_group_decorator",
             start_date=execution_date,
             tags=["example"]) as dag:
        t_start = task_start()
        sec_1 = section_1(t_start)
        sec_1.set_downstream(task_end())

    # Testing TaskGroup created using taskgroup decorator
    assert set(dag.task_group.children.keys()) == {
        "task_start", "task_end", "section_1"
    }
    assert set(dag.task_group.children['section_1'].children.keys()) == {
        'section_1.task_1',
        'section_1.task_2',
        'section_1.section_2',
    }

    # Testing TaskGroup consisting Tasks created using task decorator
    assert dag.task_dict['task_start'].downstream_task_ids == {
        'section_1.task_1'
    }
    assert dag.task_dict['section_1.task_2'].downstream_task_ids == {
        'section_1.section_2.task_3'
    }
    assert dag.task_dict['section_1.section_2.task_4'].downstream_task_ids == {
        'task_end'
    }

    # Node IDs test
    node_ids = {
        'id':
        None,
        'children': [
            {
                'id':
                'section_1',
                'children': [
                    {
                        'id':
                        'section_1.section_2',
                        'children': [
                            {
                                'id': 'section_1.section_2.task_3'
                            },
                            {
                                'id': 'section_1.section_2.task_4'
                            },
                        ],
                    },
                    {
                        'id': 'section_1.task_1'
                    },
                    {
                        'id': 'section_1.task_2'
                    },
                    {
                        'id': 'section_1.downstream_join_id'
                    },
                ],
            },
            {
                'id': 'task_end'
            },
            {
                'id': 'task_start'
            },
        ],
    }

    assert extract_node_id(task_group_to_dict(dag.task_group)) == node_ids
Exemplo n.º 9
0
def test_call_taskgroup_twice():
    """Test for using same taskgroup decorated function twice"""
    from airflow.decorators import task

    @task(task_id='start_task')
    def task_start():
        """Dummy Task which is First Task of Dag """
        print('[Task_start]')

    @task(task_id='end_task')
    def task_end():
        """Dummy Task which is Last Task of Dag"""
        print('[Task_End]')

    # Creating Tasks
    @task(task_id='task')
    def task_1():
        """ Dummy Task1"""
        print('[Task1]')

    @task_group_decorator
    def task_group1(name: str):
        print(f'Starting taskgroup {name}')
        task_start()
        task_1()
        task_end()

    execution_date = pendulum.parse("20201109")
    with DAG(dag_id="example_multi_call_task_groups",
             start_date=execution_date,
             tags=["example"]) as dag:
        task_group1('Call1')
        task_group1('Call2')

    node_ids = {
        'id':
        None,
        'children': [
            {
                'id':
                'task_group1',
                'children': [
                    {
                        'id': 'task_group1.end_task'
                    },
                    {
                        'id': 'task_group1.start_task'
                    },
                    {
                        'id': 'task_group1.task'
                    },
                ],
            },
            {
                'id':
                'task_group1__1',
                'children': [
                    {
                        'id': 'task_group1__1.end_task'
                    },
                    {
                        'id': 'task_group1__1.start_task'
                    },
                    {
                        'id': 'task_group1__1.task'
                    },
                ],
            },
        ],
    }

    assert extract_node_id(task_group_to_dict(dag.task_group)) == node_ids
Exemplo n.º 10
0
def test_duplicate_task_group_id():
    """ Testing automatic suffix assignment for duplicate group_id"""

    from airflow.decorators import task

    @task(task_id='start_task')
    def task_start():
        """Dummy Task which is First Task of Dag """
        print('[Task_start]')

    @task(task_id='end_task')
    def task_end():
        """Dummy Task which is Last Task of Dag"""
        print('[Task_End]')

    # Creating Tasks
    @task(task_id='task')
    def task_1():
        """ Dummy Task1"""
        print('[Task1]')

    @task(task_id='task')
    def task_2():
        """ Dummy Task2"""
        print('[Task2]')

    @task(task_id='task1')
    def task_3():
        """ Dummy Task3"""
        print('[Task3]')

    @task_group_decorator('task_group1')
    def task_group1():
        task_start()
        task_1()
        task_2()

    @task_group_decorator(group_id='task_group1')
    def task_group2():
        task_3()

    @task_group_decorator(group_id='task_group1')
    def task_group3():
        task_end()

    execution_date = pendulum.parse("20201109")
    with DAG(dag_id="example_duplicate_task_group_id",
             start_date=execution_date,
             tags=["example"]) as dag:
        task_group1()
        task_group2()
        task_group3()
    node_ids = {
        'id':
        None,
        'children': [
            {
                'id':
                'task_group1',
                'children': [
                    {
                        'id': 'task_group1.start_task'
                    },
                    {
                        'id': 'task_group1.task'
                    },
                    {
                        'id': 'task_group1.task__1'
                    },
                ],
            },
            {
                'id': 'task_group1__1',
                'children': [{
                    'id': 'task_group1__1.task1'
                }]
            },
            {
                'id': 'task_group1__2',
                'children': [{
                    'id': 'task_group1__2.end_task'
                }]
            },
        ],
    }

    assert extract_node_id(task_group_to_dict(dag.task_group)) == node_ids