def test_operator_shift(self, test_dag): """Tests the >> / << style with a plain operator""" # Unpack the fixture dag, (op1, op2, op3, op4) = test_dag # Arrange the operators with a Label in the middle op1 >> Label("Label 1") >> op2 # pylint: disable=W0106 op3 << Label("Label 2") << op2 >> op4 # pylint: disable=W0106 # Check that the DAG has the right edge info assert dag.get_edge_info(op1.task_id, op2.task_id) == { "label": "Label 1" } assert dag.get_edge_info(op2.task_id, op3.task_id) == { "label": "Label 2" } assert dag.get_edge_info(op2.task_id, op4.task_id) == {}
def test_operator_set(self, test_dag): """Tests the set_upstream/downstream style with a plain operator""" # Unpack the fixture dag, (op1, op2, op3, op4) = test_dag # Arrange the operators with a Label in the middle op1.set_downstream(op2, Label("Label 1")) op3.set_upstream(op2, Label("Label 2")) op4.set_upstream(op2) # Check that the DAG has the right edge info assert dag.get_edge_info(op1.task_id, op2.task_id) == { "label": "Label 1" } assert dag.get_edge_info(op2.task_id, op3.task_id) == { "label": "Label 2" } assert dag.get_edge_info(op2.task_id, op4.task_id) == {}
def test_xcomarg_shift(self, test_dag): """Tests the >> / << style with an XComArg""" # Unpack the fixture dag, (op1, op2, op3, op4) = test_dag # Arrange the operators with a Label in the middle op1_arg = XComArg(op1, "test_key") op1_arg >> Label("Label 1") >> [op2, op3] # pylint: disable=W0106 op1_arg >> op4 # Check that the DAG has the right edge info assert dag.get_edge_info(op1.task_id, op2.task_id) == { "label": "Label 1" } assert dag.get_edge_info(op1.task_id, op4.task_id) == {}
def test_xcomarg_set(self, test_dag): """Tests the set_upstream/downstream style with an XComArg""" # Unpack the fixture dag, (op1, op2, op3, op4) = test_dag # Arrange the operators with a Label in the middle op1_arg = XComArg(op1, "test_key") op1_arg.set_downstream(op2, Label("Label 1")) op1.set_downstream([op3, op4]) # Check that the DAG has the right edge info assert dag.get_edge_info(op1.task_id, op2.task_id) == { "label": "Label 1" } assert dag.get_edge_info(op1.task_id, op4.task_id) == {}
def test_taskgroup_shift(self, test_taskgroup_dag): """Tests the set_upstream/downstream style with a TaskGroup""" # Unpack the fixture dag, group, (op1, op2, op3, op4) = test_taskgroup_dag # Arrange them with a Label in the middle op1 >> Label("Group label") >> group >> op4 # pylint: disable=W0106 # Check that the DAG has the right edge info assert dag.get_edge_info(op1.task_id, op2.task_id) == { "label": "Group label" } assert dag.get_edge_info(op1.task_id, op3.task_id) == { "label": "Group label" } assert dag.get_edge_info(op3.task_id, op4.task_id) == {}
def test_taskgroup_set(self, test_taskgroup_dag): """Tests the set_upstream/downstream style with a TaskGroup""" # Unpack the fixture dag, group, (op1, op2, op3, op4) = test_taskgroup_dag # Arrange them with a Label in the middle op1.set_downstream(group, Label("Group label")) group.set_downstream(op4) # Check that the DAG has the right edge info assert dag.get_edge_info(op1.task_id, op2.task_id) == { "label": "Group label" } assert dag.get_edge_info(op1.task_id, op3.task_id) == { "label": "Group label" } assert dag.get_edge_info(op3.task_id, op4.task_id) == {}
def test_edge_info_serialization(self): """ Tests edge_info serialization/deserialization. """ from airflow.operators.dummy import DummyOperator from airflow.utils.edgemodifier import Label with DAG("test_edge_info_serialization", start_date=datetime(2020, 1, 1)) as dag: task1 = DummyOperator(task_id="task1") task2 = DummyOperator(task_id="task2") task1 >> Label("test label") >> task2 # pylint: disable=W0106 dag_dict = SerializedDAG.to_dict(dag) SerializedDAG.validate_schema(dag_dict) json_dag = SerializedDAG.from_json(SerializedDAG.to_json(dag)) self.validate_deserialized_dag(json_dag, dag) serialized_dag = SerializedDAG.deserialize_dag(SerializedDAG.serialize_dag(dag)) assert serialized_dag.edge_info == dag.edge_info
) accurate = DummyOperator(task_id='accurate') fetch_best_model = NotebookToKeepOperator( task_id='fetch_best_model', sql='sql/FETCH_BEST_MODEL.sql', postgres_conn_id='postgres' ) publish_notebook = NotebookToGitOperator( task_id='publish_notebook', conn_id='git', nb_path='/tmp', nb_name='out-model-avocado-prediction-{{ ti.xcom_pull(task_ids="fetch_best_model") }}.ipynb' ) inaccurate = DummyOperator(task_id='inaccurate') label_accurate = Label("RMSE < 0.15") label_inaccurate = Label("RMSE >= 0.15") creating_accuracy_table >> downloading_data >> waiting_for_data >> sanity_check >> training_model_tasks >> evaluating_rmse evaluating_rmse >> label_accurate >> accurate >> fetch_best_model >> publish_notebook evaluating_rmse >> label_inaccurate >> inaccurate
with DAG( dag_id='example_branch_operator', start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, schedule_interval="@daily", tags=['example', 'example2'], ) as dag: run_this_first = DummyOperator(task_id='run_this_first', ) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), ) run_this_first >> branching join = DummyOperator( task_id='join', trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS, ) for option in options: t = DummyOperator(task_id=option, ) dummy_follow = DummyOperator(task_id='follow_' + option, ) # Label is optional here, but it can help identify more complex branches branching >> Label(option) >> t >> dummy_follow >> join
from airflow.utils.trigger_rule import TriggerRule with DAG( dag_id='example_branch_python_operator_decorator', start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, schedule="@daily", tags=['example', 'example2'], ) as dag: run_this_first = EmptyOperator(task_id='run_this_first') options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] @task.branch(task_id="branching") def random_choice(choices: list[str]) -> str: return random.choice(choices) random_choice_instance = random_choice(choices=options) run_this_first >> random_choice_instance join = EmptyOperator(task_id='join', trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS) for option in options: t = EmptyOperator(task_id=option) empty_follow = EmptyOperator(task_id='follow_' + option) # Label is optional here, but it can help identify more complex branches random_choice_instance >> Label(option) >> t >> empty_follow >> join
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Example DAG demonstrating the usage of labels with different branches. """ from airflow import DAG from airflow.operators.dummy import DummyOperator from airflow.utils.dates import days_ago from airflow.utils.edgemodifier import Label with DAG("example_branch_labels", schedule_interval="@daily", start_date=days_ago(2)) as dag: ingest = DummyOperator(task_id="ingest") analyse = DummyOperator(task_id="analyze") check = DummyOperator(task_id="check_integrity") describe = DummyOperator(task_id="describe_integrity") error = DummyOperator(task_id="email_error") save = DummyOperator(task_id="save") report = DummyOperator(task_id="report") ingest >> analyse >> check check >> Label("No errors") >> save >> report # pylint: disable=expression-not-assigned check >> Label("Errors found") >> describe >> error >> report # pylint: disable=expression-not-assigned
def airflow2_good_example(): begin = DummyOperator(task_id="begin") end = DummyOperator(task_id="end", trigger_rule=TriggerRule.NONE_FAILED) check_day_of_week = BranchDayOfWeekOperator( task_id="check_day_of_week", week_day={WeekDay.SATURDAY, WeekDay.SUNDAY}, follow_task_ids_if_true="weekend", follow_task_ids_if_false="weekday", use_task_execution_day=True, ) weekend = DummyOperator(task_id="weekend") weekday = DummyOperator(task_id="weekday") # Templated value for determining the name of the day of week based on the start date of the DagRun. day_name = "{{ dag_run.start_date.strftime('%A').lower() }}" # Begin weekday tasks. with TaskGroup("weekday_activities") as weekday_activities: which_weekday_activity_day = BranchPythonOperator( task_id="which_weekday_activity_day", python_callable=_get_activity, op_args=[day_name], ) for day, day_info in DAY_ACTIVITY_MAPPING.items(): if day_info["is_weekday"]: day_of_week = Label(label=day) activity = day_info["activity"] do_activity = BashOperator( task_id=activity.replace(" ", "_"), bash_command= f"echo It's {day.capitalize()} and I'm busy with {activity}.", ) # Declaring task dependencies within the `TaskGroup` via the classic bitshift operator. which_weekday_activity_day >> day_of_week >> do_activity # Begin weekend tasks. with TaskGroup("weekend_activities") as weekend_activities: which_weekend_activity_day = BranchPythonOperator( task_id="which_weekend_activity_day", python_callable=_get_activity, op_args=[day_name], ) saturday = Label(label="saturday") sunday = Label(label="sunday") sleeping_in = BashOperator( task_id="sleeping_in", bash_command="sleep $[ ( $RANDOM % 30 ) + 1 ]s") going_to_the_beach = _going_to_the_beach() # Because the ``going_to_the_beach()`` function has ``multiple_outputs`` enabled, each dict key is # accessible as their own `XCom` key. inviting_friends = EmailOperator( task_id="inviting_friends", to="*****@*****.**", subject=going_to_the_beach["subject"], html_content=going_to_the_beach["body"], ) # Using ``chain()`` here for list-to-list dependencies which are not supported by the bitshift # operator and to simplify the notation for the desired dependency structure. chain(which_weekend_activity_day, [saturday, sunday], [going_to_the_beach, sleeping_in]) # High-level dependencies. chain(begin, check_day_of_week, [weekday, weekend], [weekday_activities, weekend_activities], end)
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Example DAG demonstrating the usage of labels with different branches. """ import pendulum from airflow import DAG from airflow.operators.empty import EmptyOperator from airflow.utils.edgemodifier import Label with DAG( "example_branch_labels", schedule="@daily", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, ) as dag: ingest = EmptyOperator(task_id="ingest") analyse = EmptyOperator(task_id="analyze") check = EmptyOperator(task_id="check_integrity") describe = EmptyOperator(task_id="describe_integrity") error = EmptyOperator(task_id="email_error") save = EmptyOperator(task_id="save") report = EmptyOperator(task_id="report") ingest >> analyse >> check check >> Label("No errors") >> save >> report check >> Label("Errors found") >> describe >> error >> report
# [START howto_operator_dbt_cloud_get_artifact] get_run_results_artifact = DbtCloudGetJobRunArtifactOperator( task_id="get_run_results_artifact", run_id=trigger_job_run1.output, path="run_results.json") # [END howto_operator_dbt_cloud_get_artifact] # [START howto_operator_dbt_cloud_run_job_async] trigger_job_run2 = DbtCloudRunJobOperator( task_id="trigger_job_run2", job_id=48617, wait_for_termination=False, additional_run_config={"threads_override": 8}, ) # [END howto_operator_dbt_cloud_run_job_async] # [START howto_operator_dbt_cloud_run_job_sensor] job_run_sensor = DbtCloudJobRunSensor(task_id="job_run_sensor", run_id=trigger_job_run2.output, timeout=20) # [END howto_operator_dbt_cloud_run_job_sensor] begin >> Label("No async wait") >> trigger_job_run1 begin >> Label("Do async wait with sensor") >> trigger_job_run2 [get_run_results_artifact, job_run_sensor] >> end # Task dependency created via `XComArgs`: # trigger_job_run1 >> get_run_results_artifact # trigger_job_run2 >> job_run_sensor
tags=['example', 'example2'], ) as dag: run_this_first = EmptyOperator( task_id='run_this_first', ) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), ) run_this_first >> branching join = EmptyOperator( task_id='join', trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS, ) for option in options: t = EmptyOperator( task_id=option, ) empty_follow = EmptyOperator( task_id='follow_' + option, ) # Label is optional here, but it can help identify more complex branches branching >> Label(option) >> t >> empty_follow >> join
begin = DummyOperator(task_id="begin") end = DummyOperator(task_id="end") # [START howto_operator_adf_run_pipeline] run_pipeline1: BaseOperator = AzureDataFactoryRunPipelineOperator( task_id="run_pipeline1", pipeline_name="pipeline1", parameters={"myParam": "value"}, ) # [END howto_operator_adf_run_pipeline] # [START howto_operator_adf_run_pipeline_async] run_pipeline2: BaseOperator = AzureDataFactoryRunPipelineOperator( task_id="run_pipeline2", pipeline_name="pipeline2", wait_for_termination=False, ) pipeline_run_sensor: BaseOperator = AzureDataFactoryPipelineRunStatusSensor( task_id="pipeline_run_sensor", run_id=run_pipeline2.output["run_id"], ) # [END howto_operator_adf_run_pipeline_async] begin >> Label("No async wait") >> run_pipeline1 begin >> Label("Do async wait with sensor") >> run_pipeline2 [run_pipeline1, pipeline_run_sensor] >> end # Task dependency created via `XComArgs`: # run_pipeline2 >> pipeline_run_sensor
with DAG( dag_id='example_branch_operator', default_args=args, start_date=days_ago(2), schedule_interval="@daily", tags=['example', 'example2'], ) as dag: run_this_first = DummyOperator(task_id='run_this_first', ) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), ) run_this_first >> branching join = DummyOperator( task_id='join', trigger_rule='none_failed_or_skipped', ) for option in options: t = DummyOperator(task_id=option, ) dummy_follow = DummyOperator(task_id='follow_' + option, ) # Label is optional here, but it can help identify more complex branches branching >> Label(option) >> t >> dummy_follow >> join # pylint: disable=expression-not-assigned