示例#1
0
def subdag(parent_dag_name, child_dag_name, args):
    """
    Generate a DAG to be used as a subdag.

    :param str parent_dag_name: Id of the parent DAG
    :param str child_dag_name: Id of the child DAG
    :param dict args: Default arguments to provide to the subdag
    :return: DAG to use as a subdag
    :rtype: airflow.models.DAG
    """
    dag_subdag = DAG(
        dag_id=f'{parent_dag_name}.{child_dag_name}',
        default_args=args,
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        schedule="@daily",
    )

    for i in range(5):
        EmptyOperator(
            task_id=f'{child_dag_name}-task-{i + 1}',
            default_args=args,
            dag=dag_subdag,
        )

    return dag_subdag
示例#2
0
def create_test_pipeline(suffix, trigger_rule):
    """
    Instantiate a number of operators for the given DAG.

    :param str suffix: Suffix to append to the operator task_ids
    :param str trigger_rule: TriggerRule for the join task
    :param DAG dag_: The DAG to run the operators on
    """
    skip_operator = EmptySkipOperator(task_id=f'skip_operator_{suffix}')
    always_true = EmptyOperator(task_id=f'always_true_{suffix}')
    join = EmptyOperator(task_id=trigger_rule, trigger_rule=trigger_rule)
    final = EmptyOperator(task_id=f'final_{suffix}')

    skip_operator >> join
    always_true >> join
    join >> final
示例#3
0
"""
Example LatestOnlyOperator and TriggerRule interactions
"""

# [START example]
import datetime

import pendulum

from airflow import DAG
from airflow.operators.empty import EmptyOperator
from airflow.operators.latest_only import LatestOnlyOperator
from airflow.utils.trigger_rule import TriggerRule

with DAG(
        dag_id='latest_only_with_trigger',
        schedule=datetime.timedelta(hours=4),
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        tags=['example3'],
) as dag:
    latest_only = LatestOnlyOperator(task_id='latest_only')
    task1 = EmptyOperator(task_id='task1')
    task2 = EmptyOperator(task_id='task2')
    task3 = EmptyOperator(task_id='task3')
    task4 = EmptyOperator(task_id='task4', trigger_rule=TriggerRule.ALL_DONE)

    latest_only >> task1 >> [task3, task4]
    task2 >> [task3, task4]
# [END example]
示例#4
0
from airflow import DAG
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import BranchPythonOperator
from airflow.utils.edgemodifier import Label
from airflow.utils.trigger_rule import TriggerRule

with DAG(
    dag_id='example_branch_operator',
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
    catchup=False,
    schedule="@daily",
    tags=['example', 'example2'],
) as dag:
    run_this_first = EmptyOperator(
        task_id='run_this_first',
    )

    options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

    branching = BranchPythonOperator(
        task_id='branching',
        python_callable=lambda: random.choice(options),
    )
    run_this_first >> branching

    join = EmptyOperator(
        task_id='join',
        trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
    )
示例#5
0
        notify=True,
        tags=['tag1', 'tag2'],
        # If the script at s3 location has any qubole specific macros to be replaced
        # macros='[{"date": "{{ ds }}"}, {"name" : "abc"}]',
    )
    # [END howto_operator_qubole_run_hive_script]

    options = ['hadoop_jar_cmd', 'presto_cmd', 'db_query', 'spark_cmd']

    branching = BranchPythonOperator(
        task_id='branching', python_callable=lambda: random.choice(options))

    [hive_show_table, hive_s3_location] >> compare_result(
        hive_s3_location, hive_show_table) >> branching

    join = EmptyOperator(task_id='join', trigger_rule=TriggerRule.ONE_SUCCESS)

    # [START howto_operator_qubole_run_hadoop_jar]
    hadoop_jar_cmd = QuboleOperator(
        task_id='hadoop_jar_cmd',
        command_type='hadoopcmd',
        sub_command='jar s3://paid-qubole/HadoopAPIExamples/'
        'jars/hadoop-0.20.1-dev-streaming.jar '
        '-mapper wc '
        '-numReduceTasks 0 -input s3://paid-qubole/HadoopAPITests/'
        'data/3.tsv -output '
        's3://paid-qubole/HadoopAPITests/data/3_wc',
        cluster_label='{{ params.cluster_label }}',
        fetch_logs=True,
        params={
            'cluster_label': 'default',
示例#6
0
from airflow import DAG
from airflow.example_dags.subdags.subdag import subdag
from airflow.operators.empty import EmptyOperator
from airflow.operators.subdag import SubDagOperator

DAG_NAME = 'example_subdag_operator'

with DAG(
        dag_id=DAG_NAME,
        default_args={"retries": 2},
        start_date=datetime.datetime(2022, 1, 1),
        schedule="@once",
        tags=['example'],
) as dag:

    start = EmptyOperator(task_id='start', )

    section_1 = SubDagOperator(
        task_id='section-1',
        subdag=subdag(DAG_NAME, 'section-1', dag.default_args),
    )

    some_other_task = EmptyOperator(task_id='some-other-task', )

    section_2 = SubDagOperator(
        task_id='section-2',
        subdag=subdag(DAG_NAME, 'section-2', dag.default_args),
    )

    end = EmptyOperator(task_id='end', )
示例#7
0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example DAG demonstrating ``TimeDeltaSensorAsync``, a drop in replacement for ``TimeDeltaSensor`` that
defers and doesn't occupy a worker slot while it waits
"""

import datetime

import pendulum

from airflow import DAG
from airflow.operators.empty import EmptyOperator
from airflow.sensors.time_delta import TimeDeltaSensorAsync

with DAG(
        dag_id="example_time_delta_sensor_async",
        schedule_interval=None,
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        tags=["example"],
) as dag:
    wait = TimeDeltaSensorAsync(task_id="wait",
                                delta=datetime.timedelta(seconds=10))
    finish = EmptyOperator(task_id="finish")
    wait >> finish
import pendulum

from airflow import DAG
from airflow.operators.datetime import BranchDateTimeOperator
from airflow.operators.empty import EmptyOperator

dag = DAG(
    dag_id="example_branch_datetime_operator",
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
    catchup=False,
    tags=["example"],
    schedule_interval="@daily",
)

# [START howto_branch_datetime_operator]
empty_task_1 = EmptyOperator(task_id='date_in_range', dag=dag)
empty_task_2 = EmptyOperator(task_id='date_outside_range', dag=dag)

cond1 = BranchDateTimeOperator(
    task_id='datetime_branch',
    follow_task_ids_if_true=['date_in_range'],
    follow_task_ids_if_false=['date_outside_range'],
    target_upper=pendulum.datetime(2020, 10, 10, 15, 0, 0),
    target_lower=pendulum.datetime(2020, 10, 10, 14, 0, 0),
    dag=dag,
)

# Run empty_task_1 if cond1 executes between 2020-10-10 14:00:00 and 2020-10-10 15:00:00
cond1 >> [empty_task_1, empty_task_2]
# [END howto_branch_datetime_operator]
示例#9
0
# under the License.
# [START dag]
"""This dag only runs some simple tasks to test Airflow's task execution."""
import datetime

import pendulum

from airflow.models.dag import DAG
from airflow.operators.empty import EmptyOperator

now = pendulum.now(tz="UTC")
now_to_the_hour = (now - datetime.timedelta(0, 0, 0, 0, 0, 3)).replace(
    minute=0, second=0, microsecond=0)
START_DATE = now_to_the_hour
DAG_NAME = 'test_dag_v1'

dag = DAG(
    DAG_NAME,
    schedule='*/10 * * * *',
    default_args={'depends_on_past': True},
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
    catchup=False,
)

run_this_1 = EmptyOperator(task_id='run_this_1', dag=dag)
run_this_2 = EmptyOperator(task_id='run_this_2', dag=dag)
run_this_2.set_upstream(run_this_1)
run_this_3 = EmptyOperator(task_id='run_this_3', dag=dag)
run_this_3.set_upstream(run_this_2)
# [END dag]
示例#10
0
        start_date=datetime(2021, 8, 13),
        schedule_interval="@daily",
        catchup=False,
        default_args=
    {
        "retries": 1,
        "retry_delay": timedelta(minutes=3),
        "azure_data_factory_conn_id": "azure_data_factory",
        "factory_name":
        "my-data-factory",  # This can also be specified in the ADF connection.
        "resource_group_name":
        "my-resource-group",  # This can also be specified in the ADF connection.
    },
        default_view="graph",
) as dag:
    begin = EmptyOperator(task_id="begin")
    end = EmptyOperator(task_id="end")

    # [START howto_operator_adf_run_pipeline]
    run_pipeline1: BaseOperator = AzureDataFactoryRunPipelineOperator(
        task_id="run_pipeline1",
        pipeline_name="pipeline1",
        parameters={"myParam": "value"},
    )
    # [END howto_operator_adf_run_pipeline]

    # [START howto_operator_adf_run_pipeline_async]
    run_pipeline2: BaseOperator = AzureDataFactoryRunPipelineOperator(
        task_id="run_pipeline2",
        pipeline_name="pipeline2",
        wait_for_termination=False,
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example DAG demonstrating the usage of labels with different branches.
"""
import pendulum

from airflow import DAG
from airflow.operators.empty import EmptyOperator
from airflow.utils.edgemodifier import Label

with DAG(
        "example_branch_labels",
        schedule="@daily",
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
) as dag:
    ingest = EmptyOperator(task_id="ingest")
    analyse = EmptyOperator(task_id="analyze")
    check = EmptyOperator(task_id="check_integrity")
    describe = EmptyOperator(task_id="describe_integrity")
    error = EmptyOperator(task_id="email_error")
    save = EmptyOperator(task_id="save")
    report = EmptyOperator(task_id="report")

    ingest >> analyse >> check
    check >> Label("No errors") >> save >> report
    check >> Label("Errors found") >> describe >> error >> report
from airflow.decorators import task
from airflow.models import DAG
from airflow.operators.empty import EmptyOperator
from airflow.utils.trigger_rule import TriggerRule

with DAG(
    dag_id="example_nested_branch_dag",
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
    catchup=False,
    schedule="@daily",
    tags=["example"],
) as dag:

    @task.branch()
    def branch(task_id_to_return: str) -> str:
        return task_id_to_return

    branch_1 = branch.override(task_id="branch_1")(task_id_to_return="true_1")
    join_1 = EmptyOperator(task_id="join_1", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)
    true_1 = EmptyOperator(task_id="true_1")
    false_1 = EmptyOperator(task_id="false_1")

    branch_2 = branch.override(task_id="branch_2")(task_id_to_return="true_2")
    join_2 = EmptyOperator(task_id="join_2", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)
    true_2 = EmptyOperator(task_id="true_2")
    false_2 = EmptyOperator(task_id="false_2")
    false_3 = EmptyOperator(task_id="false_3")

    branch_1 >> true_1 >> join_1
    branch_1 >> false_1 >> branch_2 >> [true_2, false_2] >> join_2 >> false_3 >> join_1
    tags=['example2'],
) as parent_dag:
    # [START howto_operator_external_task_marker]
    parent_task = ExternalTaskMarker(
        task_id="parent_task",
        external_dag_id="example_external_task_marker_child",
        external_task_id="child_task1",
    )
    # [END howto_operator_external_task_marker]

with DAG(
    dag_id="example_external_task_marker_child",
    start_date=start_date,
    schedule_interval=None,
    catchup=False,
    tags=['example2'],
) as child_dag:
    # [START howto_operator_external_task_sensor]
    child_task1 = ExternalTaskSensor(
        task_id="child_task1",
        external_dag_id=parent_dag.dag_id,
        external_task_id=parent_task.task_id,
        timeout=600,
        allowed_states=['success'],
        failed_states=['failed', 'skipped'],
        mode="reschedule",
    )
    # [END howto_operator_external_task_sensor]
    child_task2 = EmptyOperator(task_id="child_task2")
    child_task1 >> child_task2
"""
Example DAG demonstrating the usage of BranchDayOfWeekOperator.
"""
import pendulum

from airflow import DAG
from airflow.operators.empty import EmptyOperator
from airflow.operators.weekday import BranchDayOfWeekOperator

with DAG(
        dag_id="example_weekday_branch_operator",
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        tags=["example"],
        schedule_interval="@daily",
) as dag:
    # [START howto_operator_day_of_week_branch]
    empty_task_1 = EmptyOperator(task_id='branch_true', dag=dag)
    empty_task_2 = EmptyOperator(task_id='branch_false', dag=dag)

    branch = BranchDayOfWeekOperator(
        task_id="make_choice",
        follow_task_ids_if_true="branch_true",
        follow_task_ids_if_false="branch_false",
        week_day="Monday",
    )

    # Run empty_task_1 if branch executes on Monday
    branch >> [empty_task_1, empty_task_2]
    # [END howto_operator_day_of_week_branch]
示例#15
0
"""Example DAG demonstrating the usage of the TaskGroup."""
import pendulum

from airflow.models.dag import DAG
from airflow.operators.bash import BashOperator
from airflow.operators.empty import EmptyOperator
from airflow.utils.task_group import TaskGroup

# [START howto_task_group]
with DAG(
        dag_id="example_task_group",
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        tags=["example"],
) as dag:
    start = EmptyOperator(task_id="start")

    # [START howto_task_group_section_1]
    with TaskGroup("section_1", tooltip="Tasks for section_1") as section_1:
        task_1 = EmptyOperator(task_id="task_1")
        task_2 = BashOperator(task_id="task_2", bash_command='echo 1')
        task_3 = EmptyOperator(task_id="task_3")

        task_1 >> [task_2, task_3]
    # [END howto_task_group_section_1]

    # [START howto_task_group_section_2]
    with TaskGroup("section_2", tooltip="Tasks for section_2") as section_2:
        task_1 = EmptyOperator(task_id="task_1")

        # [START howto_task_group_inner_section_2]
示例#16
0
    from airflow.operators.empty import EmptyOperator
except ModuleNotFoundError:
    from airflow.operators.dummy import DummyOperator as EmptyOperator  # type: ignore
from airflow.providers.microsoft.winrm.hooks.winrm import WinRMHook
from airflow.providers.microsoft.winrm.operators.winrm import WinRMOperator

with DAG(
        dag_id='POC_winrm_parallel',
        schedule_interval='0 0 * * *',
        start_date=datetime(2021, 1, 1),
        dagrun_timeout=timedelta(minutes=60),
        tags=['example'],
        catchup=False,
) as dag:

    run_this_last = EmptyOperator(task_id='run_this_last')

    # [START create_hook]
    winRMHook = WinRMHook(ssh_conn_id='ssh_POC1')
    # [END create_hook]

    # [START run_operator]
    t1 = WinRMOperator(task_id="wintask1",
                       command='ls -altr',
                       winrm_hook=winRMHook)

    t2 = WinRMOperator(task_id="wintask2",
                       command='sleep 60',
                       winrm_hook=winRMHook)

    t3 = WinRMOperator(task_id="wintask3",
    :param dict kwargs: Context
    :return: Id of the task to run
    :rtype: str
    """
    print(
        f"------------- exec dttm = {kwargs['execution_date']} and minute = {kwargs['execution_date'].minute}"
    )
    if kwargs['execution_date'].minute % 2 == 0:
        return "empty_task_1"
    else:
        return "empty_task_2"


with DAG(
        dag_id='example_branch_dop_operator_v3',
        schedule_interval='*/1 * * * *',
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        default_args={'depends_on_past': True},
        tags=['example'],
) as dag:
    cond = BranchPythonOperator(
        task_id='condition',
        python_callable=should_run,
    )

    empty_task_1 = EmptyOperator(task_id='empty_task_1')
    empty_task_2 = EmptyOperator(task_id='empty_task_2')
    cond >> [empty_task_1, empty_task_2]
示例#18
0
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        tags=['example'],
) as dag:
    # [START howto_operator_short_circuit]
    cond_true = ShortCircuitOperator(
        task_id='condition_is_True',
        python_callable=lambda: True,
    )

    cond_false = ShortCircuitOperator(
        task_id='condition_is_False',
        python_callable=lambda: False,
    )

    ds_true = [EmptyOperator(task_id='true_' + str(i)) for i in [1, 2]]
    ds_false = [EmptyOperator(task_id='false_' + str(i)) for i in [1, 2]]

    chain(cond_true, *ds_true)
    chain(cond_false, *ds_false)
    # [END howto_operator_short_circuit]

    # [START howto_operator_short_circuit_trigger_rules]
    [task_1, task_2, task_3, task_4, task_5,
     task_6] = [EmptyOperator(task_id=f"task_{i}") for i in range(1, 7)]

    task_7 = EmptyOperator(task_id="task_7", trigger_rule=TriggerRule.ALL_DONE)

    short_circuit = ShortCircuitOperator(task_id="short_circuit",
                                         ignore_downstream_trigger_rules=False,
                                         python_callable=lambda: False)
示例#19
0
import pendulum

from airflow import DAG
from airflow.operators.datetime import BranchDateTimeOperator
from airflow.operators.empty import EmptyOperator

dag1 = DAG(
    dag_id="example_branch_datetime_operator",
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
    catchup=False,
    tags=["example"],
    schedule="@daily",
)

# [START howto_branch_datetime_operator]
empty_task_11 = EmptyOperator(task_id='date_in_range', dag=dag1)
empty_task_21 = EmptyOperator(task_id='date_outside_range', dag=dag1)

cond1 = BranchDateTimeOperator(
    task_id='datetime_branch',
    follow_task_ids_if_true=['date_in_range'],
    follow_task_ids_if_false=['date_outside_range'],
    target_upper=pendulum.datetime(2020, 10, 10, 15, 0, 0),
    target_lower=pendulum.datetime(2020, 10, 10, 14, 0, 0),
    dag=dag1,
)

# Run empty_task_1 if cond1 executes between 2020-10-10 14:00:00 and 2020-10-10 15:00:00
cond1 >> [empty_task_11, empty_task_21]
# [END howto_branch_datetime_operator]
示例#20
0
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""Example of the LatestOnlyOperator"""

import datetime as dt

from airflow import DAG
from airflow.operators.empty import EmptyOperator
from airflow.operators.latest_only import LatestOnlyOperator

with DAG(
        dag_id='latest_only',
        schedule_interval=dt.timedelta(hours=4),
        start_date=dt.datetime(2021, 1, 1),
        catchup=False,
        tags=['example2', 'example3'],
) as dag:
    latest_only = LatestOnlyOperator(task_id='latest_only')
    task1 = EmptyOperator(task_id='task1')

    latest_only >> task1