def subdag(parent_dag_name, child_dag_name, args): """ Generate a DAG to be used as a subdag. :param str parent_dag_name: Id of the parent DAG :param str child_dag_name: Id of the child DAG :param dict args: Default arguments to provide to the subdag :return: DAG to use as a subdag :rtype: airflow.models.DAG """ dag_subdag = DAG( dag_id=f'{parent_dag_name}.{child_dag_name}', default_args=args, start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, schedule="@daily", ) for i in range(5): EmptyOperator( task_id=f'{child_dag_name}-task-{i + 1}', default_args=args, dag=dag_subdag, ) return dag_subdag
def create_test_pipeline(suffix, trigger_rule): """ Instantiate a number of operators for the given DAG. :param str suffix: Suffix to append to the operator task_ids :param str trigger_rule: TriggerRule for the join task :param DAG dag_: The DAG to run the operators on """ skip_operator = EmptySkipOperator(task_id=f'skip_operator_{suffix}') always_true = EmptyOperator(task_id=f'always_true_{suffix}') join = EmptyOperator(task_id=trigger_rule, trigger_rule=trigger_rule) final = EmptyOperator(task_id=f'final_{suffix}') skip_operator >> join always_true >> join join >> final
""" Example LatestOnlyOperator and TriggerRule interactions """ # [START example] import datetime import pendulum from airflow import DAG from airflow.operators.empty import EmptyOperator from airflow.operators.latest_only import LatestOnlyOperator from airflow.utils.trigger_rule import TriggerRule with DAG( dag_id='latest_only_with_trigger', schedule=datetime.timedelta(hours=4), start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=['example3'], ) as dag: latest_only = LatestOnlyOperator(task_id='latest_only') task1 = EmptyOperator(task_id='task1') task2 = EmptyOperator(task_id='task2') task3 = EmptyOperator(task_id='task3') task4 = EmptyOperator(task_id='task4', trigger_rule=TriggerRule.ALL_DONE) latest_only >> task1 >> [task3, task4] task2 >> [task3, task4] # [END example]
from airflow import DAG from airflow.operators.empty import EmptyOperator from airflow.operators.python import BranchPythonOperator from airflow.utils.edgemodifier import Label from airflow.utils.trigger_rule import TriggerRule with DAG( dag_id='example_branch_operator', start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, schedule="@daily", tags=['example', 'example2'], ) as dag: run_this_first = EmptyOperator( task_id='run_this_first', ) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), ) run_this_first >> branching join = EmptyOperator( task_id='join', trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS, )
notify=True, tags=['tag1', 'tag2'], # If the script at s3 location has any qubole specific macros to be replaced # macros='[{"date": "{{ ds }}"}, {"name" : "abc"}]', ) # [END howto_operator_qubole_run_hive_script] options = ['hadoop_jar_cmd', 'presto_cmd', 'db_query', 'spark_cmd'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options)) [hive_show_table, hive_s3_location] >> compare_result( hive_s3_location, hive_show_table) >> branching join = EmptyOperator(task_id='join', trigger_rule=TriggerRule.ONE_SUCCESS) # [START howto_operator_qubole_run_hadoop_jar] hadoop_jar_cmd = QuboleOperator( task_id='hadoop_jar_cmd', command_type='hadoopcmd', sub_command='jar s3://paid-qubole/HadoopAPIExamples/' 'jars/hadoop-0.20.1-dev-streaming.jar ' '-mapper wc ' '-numReduceTasks 0 -input s3://paid-qubole/HadoopAPITests/' 'data/3.tsv -output ' 's3://paid-qubole/HadoopAPITests/data/3_wc', cluster_label='{{ params.cluster_label }}', fetch_logs=True, params={ 'cluster_label': 'default',
from airflow import DAG from airflow.example_dags.subdags.subdag import subdag from airflow.operators.empty import EmptyOperator from airflow.operators.subdag import SubDagOperator DAG_NAME = 'example_subdag_operator' with DAG( dag_id=DAG_NAME, default_args={"retries": 2}, start_date=datetime.datetime(2022, 1, 1), schedule="@once", tags=['example'], ) as dag: start = EmptyOperator(task_id='start', ) section_1 = SubDagOperator( task_id='section-1', subdag=subdag(DAG_NAME, 'section-1', dag.default_args), ) some_other_task = EmptyOperator(task_id='some-other-task', ) section_2 = SubDagOperator( task_id='section-2', subdag=subdag(DAG_NAME, 'section-2', dag.default_args), ) end = EmptyOperator(task_id='end', )
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Example DAG demonstrating ``TimeDeltaSensorAsync``, a drop in replacement for ``TimeDeltaSensor`` that defers and doesn't occupy a worker slot while it waits """ import datetime import pendulum from airflow import DAG from airflow.operators.empty import EmptyOperator from airflow.sensors.time_delta import TimeDeltaSensorAsync with DAG( dag_id="example_time_delta_sensor_async", schedule_interval=None, start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=["example"], ) as dag: wait = TimeDeltaSensorAsync(task_id="wait", delta=datetime.timedelta(seconds=10)) finish = EmptyOperator(task_id="finish") wait >> finish
import pendulum from airflow import DAG from airflow.operators.datetime import BranchDateTimeOperator from airflow.operators.empty import EmptyOperator dag = DAG( dag_id="example_branch_datetime_operator", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=["example"], schedule_interval="@daily", ) # [START howto_branch_datetime_operator] empty_task_1 = EmptyOperator(task_id='date_in_range', dag=dag) empty_task_2 = EmptyOperator(task_id='date_outside_range', dag=dag) cond1 = BranchDateTimeOperator( task_id='datetime_branch', follow_task_ids_if_true=['date_in_range'], follow_task_ids_if_false=['date_outside_range'], target_upper=pendulum.datetime(2020, 10, 10, 15, 0, 0), target_lower=pendulum.datetime(2020, 10, 10, 14, 0, 0), dag=dag, ) # Run empty_task_1 if cond1 executes between 2020-10-10 14:00:00 and 2020-10-10 15:00:00 cond1 >> [empty_task_1, empty_task_2] # [END howto_branch_datetime_operator]
# under the License. # [START dag] """This dag only runs some simple tasks to test Airflow's task execution.""" import datetime import pendulum from airflow.models.dag import DAG from airflow.operators.empty import EmptyOperator now = pendulum.now(tz="UTC") now_to_the_hour = (now - datetime.timedelta(0, 0, 0, 0, 0, 3)).replace( minute=0, second=0, microsecond=0) START_DATE = now_to_the_hour DAG_NAME = 'test_dag_v1' dag = DAG( DAG_NAME, schedule='*/10 * * * *', default_args={'depends_on_past': True}, start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, ) run_this_1 = EmptyOperator(task_id='run_this_1', dag=dag) run_this_2 = EmptyOperator(task_id='run_this_2', dag=dag) run_this_2.set_upstream(run_this_1) run_this_3 = EmptyOperator(task_id='run_this_3', dag=dag) run_this_3.set_upstream(run_this_2) # [END dag]
start_date=datetime(2021, 8, 13), schedule_interval="@daily", catchup=False, default_args= { "retries": 1, "retry_delay": timedelta(minutes=3), "azure_data_factory_conn_id": "azure_data_factory", "factory_name": "my-data-factory", # This can also be specified in the ADF connection. "resource_group_name": "my-resource-group", # This can also be specified in the ADF connection. }, default_view="graph", ) as dag: begin = EmptyOperator(task_id="begin") end = EmptyOperator(task_id="end") # [START howto_operator_adf_run_pipeline] run_pipeline1: BaseOperator = AzureDataFactoryRunPipelineOperator( task_id="run_pipeline1", pipeline_name="pipeline1", parameters={"myParam": "value"}, ) # [END howto_operator_adf_run_pipeline] # [START howto_operator_adf_run_pipeline_async] run_pipeline2: BaseOperator = AzureDataFactoryRunPipelineOperator( task_id="run_pipeline2", pipeline_name="pipeline2", wait_for_termination=False,
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Example DAG demonstrating the usage of labels with different branches. """ import pendulum from airflow import DAG from airflow.operators.empty import EmptyOperator from airflow.utils.edgemodifier import Label with DAG( "example_branch_labels", schedule="@daily", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, ) as dag: ingest = EmptyOperator(task_id="ingest") analyse = EmptyOperator(task_id="analyze") check = EmptyOperator(task_id="check_integrity") describe = EmptyOperator(task_id="describe_integrity") error = EmptyOperator(task_id="email_error") save = EmptyOperator(task_id="save") report = EmptyOperator(task_id="report") ingest >> analyse >> check check >> Label("No errors") >> save >> report check >> Label("Errors found") >> describe >> error >> report
from airflow.decorators import task from airflow.models import DAG from airflow.operators.empty import EmptyOperator from airflow.utils.trigger_rule import TriggerRule with DAG( dag_id="example_nested_branch_dag", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, schedule="@daily", tags=["example"], ) as dag: @task.branch() def branch(task_id_to_return: str) -> str: return task_id_to_return branch_1 = branch.override(task_id="branch_1")(task_id_to_return="true_1") join_1 = EmptyOperator(task_id="join_1", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS) true_1 = EmptyOperator(task_id="true_1") false_1 = EmptyOperator(task_id="false_1") branch_2 = branch.override(task_id="branch_2")(task_id_to_return="true_2") join_2 = EmptyOperator(task_id="join_2", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS) true_2 = EmptyOperator(task_id="true_2") false_2 = EmptyOperator(task_id="false_2") false_3 = EmptyOperator(task_id="false_3") branch_1 >> true_1 >> join_1 branch_1 >> false_1 >> branch_2 >> [true_2, false_2] >> join_2 >> false_3 >> join_1
tags=['example2'], ) as parent_dag: # [START howto_operator_external_task_marker] parent_task = ExternalTaskMarker( task_id="parent_task", external_dag_id="example_external_task_marker_child", external_task_id="child_task1", ) # [END howto_operator_external_task_marker] with DAG( dag_id="example_external_task_marker_child", start_date=start_date, schedule_interval=None, catchup=False, tags=['example2'], ) as child_dag: # [START howto_operator_external_task_sensor] child_task1 = ExternalTaskSensor( task_id="child_task1", external_dag_id=parent_dag.dag_id, external_task_id=parent_task.task_id, timeout=600, allowed_states=['success'], failed_states=['failed', 'skipped'], mode="reschedule", ) # [END howto_operator_external_task_sensor] child_task2 = EmptyOperator(task_id="child_task2") child_task1 >> child_task2
""" Example DAG demonstrating the usage of BranchDayOfWeekOperator. """ import pendulum from airflow import DAG from airflow.operators.empty import EmptyOperator from airflow.operators.weekday import BranchDayOfWeekOperator with DAG( dag_id="example_weekday_branch_operator", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=["example"], schedule_interval="@daily", ) as dag: # [START howto_operator_day_of_week_branch] empty_task_1 = EmptyOperator(task_id='branch_true', dag=dag) empty_task_2 = EmptyOperator(task_id='branch_false', dag=dag) branch = BranchDayOfWeekOperator( task_id="make_choice", follow_task_ids_if_true="branch_true", follow_task_ids_if_false="branch_false", week_day="Monday", ) # Run empty_task_1 if branch executes on Monday branch >> [empty_task_1, empty_task_2] # [END howto_operator_day_of_week_branch]
"""Example DAG demonstrating the usage of the TaskGroup.""" import pendulum from airflow.models.dag import DAG from airflow.operators.bash import BashOperator from airflow.operators.empty import EmptyOperator from airflow.utils.task_group import TaskGroup # [START howto_task_group] with DAG( dag_id="example_task_group", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=["example"], ) as dag: start = EmptyOperator(task_id="start") # [START howto_task_group_section_1] with TaskGroup("section_1", tooltip="Tasks for section_1") as section_1: task_1 = EmptyOperator(task_id="task_1") task_2 = BashOperator(task_id="task_2", bash_command='echo 1') task_3 = EmptyOperator(task_id="task_3") task_1 >> [task_2, task_3] # [END howto_task_group_section_1] # [START howto_task_group_section_2] with TaskGroup("section_2", tooltip="Tasks for section_2") as section_2: task_1 = EmptyOperator(task_id="task_1") # [START howto_task_group_inner_section_2]
from airflow.operators.empty import EmptyOperator except ModuleNotFoundError: from airflow.operators.dummy import DummyOperator as EmptyOperator # type: ignore from airflow.providers.microsoft.winrm.hooks.winrm import WinRMHook from airflow.providers.microsoft.winrm.operators.winrm import WinRMOperator with DAG( dag_id='POC_winrm_parallel', schedule_interval='0 0 * * *', start_date=datetime(2021, 1, 1), dagrun_timeout=timedelta(minutes=60), tags=['example'], catchup=False, ) as dag: run_this_last = EmptyOperator(task_id='run_this_last') # [START create_hook] winRMHook = WinRMHook(ssh_conn_id='ssh_POC1') # [END create_hook] # [START run_operator] t1 = WinRMOperator(task_id="wintask1", command='ls -altr', winrm_hook=winRMHook) t2 = WinRMOperator(task_id="wintask2", command='sleep 60', winrm_hook=winRMHook) t3 = WinRMOperator(task_id="wintask3",
:param dict kwargs: Context :return: Id of the task to run :rtype: str """ print( f"------------- exec dttm = {kwargs['execution_date']} and minute = {kwargs['execution_date'].minute}" ) if kwargs['execution_date'].minute % 2 == 0: return "empty_task_1" else: return "empty_task_2" with DAG( dag_id='example_branch_dop_operator_v3', schedule_interval='*/1 * * * *', start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, default_args={'depends_on_past': True}, tags=['example'], ) as dag: cond = BranchPythonOperator( task_id='condition', python_callable=should_run, ) empty_task_1 = EmptyOperator(task_id='empty_task_1') empty_task_2 = EmptyOperator(task_id='empty_task_2') cond >> [empty_task_1, empty_task_2]
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=['example'], ) as dag: # [START howto_operator_short_circuit] cond_true = ShortCircuitOperator( task_id='condition_is_True', python_callable=lambda: True, ) cond_false = ShortCircuitOperator( task_id='condition_is_False', python_callable=lambda: False, ) ds_true = [EmptyOperator(task_id='true_' + str(i)) for i in [1, 2]] ds_false = [EmptyOperator(task_id='false_' + str(i)) for i in [1, 2]] chain(cond_true, *ds_true) chain(cond_false, *ds_false) # [END howto_operator_short_circuit] # [START howto_operator_short_circuit_trigger_rules] [task_1, task_2, task_3, task_4, task_5, task_6] = [EmptyOperator(task_id=f"task_{i}") for i in range(1, 7)] task_7 = EmptyOperator(task_id="task_7", trigger_rule=TriggerRule.ALL_DONE) short_circuit = ShortCircuitOperator(task_id="short_circuit", ignore_downstream_trigger_rules=False, python_callable=lambda: False)
import pendulum from airflow import DAG from airflow.operators.datetime import BranchDateTimeOperator from airflow.operators.empty import EmptyOperator dag1 = DAG( dag_id="example_branch_datetime_operator", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=["example"], schedule="@daily", ) # [START howto_branch_datetime_operator] empty_task_11 = EmptyOperator(task_id='date_in_range', dag=dag1) empty_task_21 = EmptyOperator(task_id='date_outside_range', dag=dag1) cond1 = BranchDateTimeOperator( task_id='datetime_branch', follow_task_ids_if_true=['date_in_range'], follow_task_ids_if_false=['date_outside_range'], target_upper=pendulum.datetime(2020, 10, 10, 15, 0, 0), target_lower=pendulum.datetime(2020, 10, 10, 14, 0, 0), dag=dag1, ) # Run empty_task_1 if cond1 executes between 2020-10-10 14:00:00 and 2020-10-10 15:00:00 cond1 >> [empty_task_11, empty_task_21] # [END howto_branch_datetime_operator]
# "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """Example of the LatestOnlyOperator""" import datetime as dt from airflow import DAG from airflow.operators.empty import EmptyOperator from airflow.operators.latest_only import LatestOnlyOperator with DAG( dag_id='latest_only', schedule_interval=dt.timedelta(hours=4), start_date=dt.datetime(2021, 1, 1), catchup=False, tags=['example2', 'example3'], ) as dag: latest_only = LatestOnlyOperator(task_id='latest_only') task1 = EmptyOperator(task_id='task1') latest_only >> task1