示例#1
0
 def test_operator_shift(self, test_dag):
     """Tests the >> / << style with a plain operator"""
     # Unpack the fixture
     dag, (op1, op2, op3, op4) = test_dag
     # Arrange the operators with a Label in the middle
     op1 >> Label("Label 1") >> op2  # pylint: disable=W0106
     op3 << Label("Label 2") << op2 >> op4  # pylint: disable=W0106
     # Check that the DAG has the right edge info
     assert dag.get_edge_info(op1.task_id, op2.task_id) == {
         "label": "Label 1"
     }
     assert dag.get_edge_info(op2.task_id, op3.task_id) == {
         "label": "Label 2"
     }
     assert dag.get_edge_info(op2.task_id, op4.task_id) == {}
示例#2
0
 def test_operator_set(self, test_dag):
     """Tests the set_upstream/downstream style with a plain operator"""
     # Unpack the fixture
     dag, (op1, op2, op3, op4) = test_dag
     # Arrange the operators with a Label in the middle
     op1.set_downstream(op2, Label("Label 1"))
     op3.set_upstream(op2, Label("Label 2"))
     op4.set_upstream(op2)
     # Check that the DAG has the right edge info
     assert dag.get_edge_info(op1.task_id, op2.task_id) == {
         "label": "Label 1"
     }
     assert dag.get_edge_info(op2.task_id, op3.task_id) == {
         "label": "Label 2"
     }
     assert dag.get_edge_info(op2.task_id, op4.task_id) == {}
示例#3
0
 def test_xcomarg_shift(self, test_dag):
     """Tests the >> / << style with an XComArg"""
     # Unpack the fixture
     dag, (op1, op2, op3, op4) = test_dag
     # Arrange the operators with a Label in the middle
     op1_arg = XComArg(op1, "test_key")
     op1_arg >> Label("Label 1") >> [op2, op3]  # pylint: disable=W0106
     op1_arg >> op4
     # Check that the DAG has the right edge info
     assert dag.get_edge_info(op1.task_id, op2.task_id) == {
         "label": "Label 1"
     }
     assert dag.get_edge_info(op1.task_id, op4.task_id) == {}
示例#4
0
 def test_xcomarg_set(self, test_dag):
     """Tests the set_upstream/downstream style with an XComArg"""
     # Unpack the fixture
     dag, (op1, op2, op3, op4) = test_dag
     # Arrange the operators with a Label in the middle
     op1_arg = XComArg(op1, "test_key")
     op1_arg.set_downstream(op2, Label("Label 1"))
     op1.set_downstream([op3, op4])
     # Check that the DAG has the right edge info
     assert dag.get_edge_info(op1.task_id, op2.task_id) == {
         "label": "Label 1"
     }
     assert dag.get_edge_info(op1.task_id, op4.task_id) == {}
示例#5
0
 def test_taskgroup_shift(self, test_taskgroup_dag):
     """Tests the set_upstream/downstream style with a TaskGroup"""
     # Unpack the fixture
     dag, group, (op1, op2, op3, op4) = test_taskgroup_dag
     # Arrange them with a Label in the middle
     op1 >> Label("Group label") >> group >> op4  # pylint: disable=W0106
     # Check that the DAG has the right edge info
     assert dag.get_edge_info(op1.task_id, op2.task_id) == {
         "label": "Group label"
     }
     assert dag.get_edge_info(op1.task_id, op3.task_id) == {
         "label": "Group label"
     }
     assert dag.get_edge_info(op3.task_id, op4.task_id) == {}
示例#6
0
 def test_taskgroup_set(self, test_taskgroup_dag):
     """Tests the set_upstream/downstream style with a TaskGroup"""
     # Unpack the fixture
     dag, group, (op1, op2, op3, op4) = test_taskgroup_dag
     # Arrange them with a Label in the middle
     op1.set_downstream(group, Label("Group label"))
     group.set_downstream(op4)
     # Check that the DAG has the right edge info
     assert dag.get_edge_info(op1.task_id, op2.task_id) == {
         "label": "Group label"
     }
     assert dag.get_edge_info(op1.task_id, op3.task_id) == {
         "label": "Group label"
     }
     assert dag.get_edge_info(op3.task_id, op4.task_id) == {}
示例#7
0
    def test_edge_info_serialization(self):
        """
        Tests edge_info serialization/deserialization.
        """
        from airflow.operators.dummy import DummyOperator
        from airflow.utils.edgemodifier import Label

        with DAG("test_edge_info_serialization", start_date=datetime(2020, 1, 1)) as dag:
            task1 = DummyOperator(task_id="task1")
            task2 = DummyOperator(task_id="task2")
            task1 >> Label("test label") >> task2  # pylint: disable=W0106

        dag_dict = SerializedDAG.to_dict(dag)
        SerializedDAG.validate_schema(dag_dict)
        json_dag = SerializedDAG.from_json(SerializedDAG.to_json(dag))
        self.validate_deserialized_dag(json_dag, dag)

        serialized_dag = SerializedDAG.deserialize_dag(SerializedDAG.serialize_dag(dag))

        assert serialized_dag.edge_info == dag.edge_info
示例#8
0
    )

    accurate = DummyOperator(task_id='accurate')

    fetch_best_model = NotebookToKeepOperator(
        task_id='fetch_best_model',
        sql='sql/FETCH_BEST_MODEL.sql',
        postgres_conn_id='postgres'
    )

    publish_notebook = NotebookToGitOperator(
        task_id='publish_notebook',
        conn_id='git',
        nb_path='/tmp',
        nb_name='out-model-avocado-prediction-{{ ti.xcom_pull(task_ids="fetch_best_model") }}.ipynb'
    )

    inaccurate = DummyOperator(task_id='inaccurate')

    label_accurate = Label("RMSE < 0.15")
    label_inaccurate = Label("RMSE >= 0.15")

    creating_accuracy_table >> downloading_data >> waiting_for_data >> sanity_check >> training_model_tasks >> evaluating_rmse
    evaluating_rmse >> label_accurate >> accurate >> fetch_best_model >> publish_notebook
    evaluating_rmse >> label_inaccurate >> inaccurate





	
with DAG(
        dag_id='example_branch_operator',
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        schedule_interval="@daily",
        tags=['example', 'example2'],
) as dag:
    run_this_first = DummyOperator(task_id='run_this_first', )

    options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

    branching = BranchPythonOperator(
        task_id='branching',
        python_callable=lambda: random.choice(options),
    )
    run_this_first >> branching

    join = DummyOperator(
        task_id='join',
        trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
    )

    for option in options:
        t = DummyOperator(task_id=option, )

        dummy_follow = DummyOperator(task_id='follow_' + option, )

        # Label is optional here, but it can help identify more complex branches
        branching >> Label(option) >> t >> dummy_follow >> join
from airflow.utils.trigger_rule import TriggerRule

with DAG(
    dag_id='example_branch_python_operator_decorator',
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
    catchup=False,
    schedule="@daily",
    tags=['example', 'example2'],
) as dag:
    run_this_first = EmptyOperator(task_id='run_this_first')

    options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

    @task.branch(task_id="branching")
    def random_choice(choices: list[str]) -> str:
        return random.choice(choices)

    random_choice_instance = random_choice(choices=options)

    run_this_first >> random_choice_instance

    join = EmptyOperator(task_id='join', trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)

    for option in options:
        t = EmptyOperator(task_id=option)

        empty_follow = EmptyOperator(task_id='follow_' + option)

        # Label is optional here, but it can help identify more complex branches
        random_choice_instance >> Label(option) >> t >> empty_follow >> join
示例#11
0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example DAG demonstrating the usage of labels with different branches.
"""

from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.utils.dates import days_ago
from airflow.utils.edgemodifier import Label

with DAG("example_branch_labels",
         schedule_interval="@daily",
         start_date=days_ago(2)) as dag:

    ingest = DummyOperator(task_id="ingest")
    analyse = DummyOperator(task_id="analyze")
    check = DummyOperator(task_id="check_integrity")
    describe = DummyOperator(task_id="describe_integrity")
    error = DummyOperator(task_id="email_error")
    save = DummyOperator(task_id="save")
    report = DummyOperator(task_id="report")

    ingest >> analyse >> check
    check >> Label("No errors") >> save >> report  # pylint: disable=expression-not-assigned
    check >> Label("Errors found") >> describe >> error >> report  # pylint: disable=expression-not-assigned
def airflow2_good_example():
    begin = DummyOperator(task_id="begin")
    end = DummyOperator(task_id="end", trigger_rule=TriggerRule.NONE_FAILED)

    check_day_of_week = BranchDayOfWeekOperator(
        task_id="check_day_of_week",
        week_day={WeekDay.SATURDAY, WeekDay.SUNDAY},
        follow_task_ids_if_true="weekend",
        follow_task_ids_if_false="weekday",
        use_task_execution_day=True,
    )

    weekend = DummyOperator(task_id="weekend")
    weekday = DummyOperator(task_id="weekday")

    # Templated value for determining the name of the day of week based on the start date of the DagRun.
    day_name = "{{ dag_run.start_date.strftime('%A').lower() }}"

    # Begin weekday tasks.
    with TaskGroup("weekday_activities") as weekday_activities:
        which_weekday_activity_day = BranchPythonOperator(
            task_id="which_weekday_activity_day",
            python_callable=_get_activity,
            op_args=[day_name],
        )

        for day, day_info in DAY_ACTIVITY_MAPPING.items():
            if day_info["is_weekday"]:
                day_of_week = Label(label=day)
                activity = day_info["activity"]

                do_activity = BashOperator(
                    task_id=activity.replace(" ", "_"),
                    bash_command=
                    f"echo It's {day.capitalize()} and I'm busy with {activity}.",
                )

                # Declaring task dependencies within the `TaskGroup` via the classic bitshift operator.
                which_weekday_activity_day >> day_of_week >> do_activity

    # Begin weekend tasks.
    with TaskGroup("weekend_activities") as weekend_activities:
        which_weekend_activity_day = BranchPythonOperator(
            task_id="which_weekend_activity_day",
            python_callable=_get_activity,
            op_args=[day_name],
        )

        saturday = Label(label="saturday")
        sunday = Label(label="sunday")

        sleeping_in = BashOperator(
            task_id="sleeping_in",
            bash_command="sleep $[ ( $RANDOM % 30 )  + 1 ]s")

        going_to_the_beach = _going_to_the_beach()

        # Because the ``going_to_the_beach()`` function has ``multiple_outputs`` enabled, each dict key is
        # accessible as their own `XCom` key.
        inviting_friends = EmailOperator(
            task_id="inviting_friends",
            to="*****@*****.**",
            subject=going_to_the_beach["subject"],
            html_content=going_to_the_beach["body"],
        )

        # Using ``chain()`` here for list-to-list dependencies which are not supported by the bitshift
        # operator and to simplify the notation for the desired dependency structure.
        chain(which_weekend_activity_day, [saturday, sunday],
              [going_to_the_beach, sleeping_in])

    # High-level dependencies.
    chain(begin, check_day_of_week, [weekday, weekend],
          [weekday_activities, weekend_activities], end)
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example DAG demonstrating the usage of labels with different branches.
"""
import pendulum

from airflow import DAG
from airflow.operators.empty import EmptyOperator
from airflow.utils.edgemodifier import Label

with DAG(
        "example_branch_labels",
        schedule="@daily",
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
) as dag:
    ingest = EmptyOperator(task_id="ingest")
    analyse = EmptyOperator(task_id="analyze")
    check = EmptyOperator(task_id="check_integrity")
    describe = EmptyOperator(task_id="describe_integrity")
    error = EmptyOperator(task_id="email_error")
    save = EmptyOperator(task_id="save")
    report = EmptyOperator(task_id="report")

    ingest >> analyse >> check
    check >> Label("No errors") >> save >> report
    check >> Label("Errors found") >> describe >> error >> report
示例#14
0
    # [START howto_operator_dbt_cloud_get_artifact]
    get_run_results_artifact = DbtCloudGetJobRunArtifactOperator(
        task_id="get_run_results_artifact",
        run_id=trigger_job_run1.output,
        path="run_results.json")
    # [END howto_operator_dbt_cloud_get_artifact]

    # [START howto_operator_dbt_cloud_run_job_async]
    trigger_job_run2 = DbtCloudRunJobOperator(
        task_id="trigger_job_run2",
        job_id=48617,
        wait_for_termination=False,
        additional_run_config={"threads_override": 8},
    )
    # [END howto_operator_dbt_cloud_run_job_async]

    # [START howto_operator_dbt_cloud_run_job_sensor]
    job_run_sensor = DbtCloudJobRunSensor(task_id="job_run_sensor",
                                          run_id=trigger_job_run2.output,
                                          timeout=20)
    # [END howto_operator_dbt_cloud_run_job_sensor]

    begin >> Label("No async wait") >> trigger_job_run1
    begin >> Label("Do async wait with sensor") >> trigger_job_run2
    [get_run_results_artifact, job_run_sensor] >> end

    # Task dependency created via `XComArgs`:
    # trigger_job_run1 >> get_run_results_artifact
    # trigger_job_run2 >> job_run_sensor
示例#15
0
    tags=['example', 'example2'],
) as dag:
    run_this_first = EmptyOperator(
        task_id='run_this_first',
    )

    options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

    branching = BranchPythonOperator(
        task_id='branching',
        python_callable=lambda: random.choice(options),
    )
    run_this_first >> branching

    join = EmptyOperator(
        task_id='join',
        trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
    )

    for option in options:
        t = EmptyOperator(
            task_id=option,
        )

        empty_follow = EmptyOperator(
            task_id='follow_' + option,
        )

        # Label is optional here, but it can help identify more complex branches
        branching >> Label(option) >> t >> empty_follow >> join
示例#16
0
    begin = DummyOperator(task_id="begin")
    end = DummyOperator(task_id="end")

    # [START howto_operator_adf_run_pipeline]
    run_pipeline1: BaseOperator = AzureDataFactoryRunPipelineOperator(
        task_id="run_pipeline1",
        pipeline_name="pipeline1",
        parameters={"myParam": "value"},
    )
    # [END howto_operator_adf_run_pipeline]

    # [START howto_operator_adf_run_pipeline_async]
    run_pipeline2: BaseOperator = AzureDataFactoryRunPipelineOperator(
        task_id="run_pipeline2",
        pipeline_name="pipeline2",
        wait_for_termination=False,
    )

    pipeline_run_sensor: BaseOperator = AzureDataFactoryPipelineRunStatusSensor(
        task_id="pipeline_run_sensor",
        run_id=run_pipeline2.output["run_id"],
    )
    # [END howto_operator_adf_run_pipeline_async]

    begin >> Label("No async wait") >> run_pipeline1
    begin >> Label("Do async wait with sensor") >> run_pipeline2
    [run_pipeline1, pipeline_run_sensor] >> end

    # Task dependency created via `XComArgs`:
    #   run_pipeline2 >> pipeline_run_sensor
示例#17
0
with DAG(
        dag_id='example_branch_operator',
        default_args=args,
        start_date=days_ago(2),
        schedule_interval="@daily",
        tags=['example', 'example2'],
) as dag:

    run_this_first = DummyOperator(task_id='run_this_first', )

    options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

    branching = BranchPythonOperator(
        task_id='branching',
        python_callable=lambda: random.choice(options),
    )
    run_this_first >> branching

    join = DummyOperator(
        task_id='join',
        trigger_rule='none_failed_or_skipped',
    )

    for option in options:
        t = DummyOperator(task_id=option, )

        dummy_follow = DummyOperator(task_id='follow_' + option, )

        # Label is optional here, but it can help identify more complex branches
        branching >> Label(option) >> t >> dummy_follow >> join  # pylint: disable=expression-not-assigned