def test_failure_callback_only_called_once(self, mock_return_code,
                                               _check_call):
        """
        Test that ensures that when a task exits with failure by itself,
        failure callback is only called once
        """
        # use shared memory value so we can properly track value change even if
        # it's been updated across processes.
        failure_callback_called = Value('i', 0)
        callback_count_lock = Lock()

        def failure_callback(context):
            with callback_count_lock:
                failure_callback_called.value += 1
            assert context['dag_run'].dag_id == 'test_failure_callback_race'
            assert isinstance(context['exception'], AirflowFailException)

        def task_function(ti):
            raise AirflowFailException()

        dag = DAG(dag_id='test_failure_callback_race', start_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='test_exit_on_failure',
            python_callable=task_function,
            on_failure_callback=failure_callback,
            dag=dag,
        )

        dag.clear()
        with create_session() as session:
            dag.create_dagrun(
                run_id="test",
                state=State.RUNNING,
                execution_date=DEFAULT_DATE,
                start_date=DEFAULT_DATE,
                session=session,
            )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.refresh_from_db()

        job1 = LocalTaskJob(task_instance=ti,
                            ignore_ti_state=True,
                            executor=SequentialExecutor())

        # Simulate race condition where job1 heartbeat ran right after task
        # state got set to failed by ti.handle_failure but before task process
        # fully exits. See _execute loop in airflow/jobs/local_task_job.py.
        # In this case, we have:
        #  * task_runner.return_code() is None
        #  * ti.state == State.Failed
        #
        # We also need to set return_code to a valid int after job1.terminating
        # is set to True so _execute loop won't loop forever.
        def dummy_return_code(*args, **kwargs):
            return None if not job1.terminating else -9

        mock_return_code.side_effect = dummy_return_code

        with timeout(10):
            # This should be _much_ shorter to run.
            # If you change this limit, make the timeout in the callable above bigger
            job1.run()

        ti.refresh_from_db()
        assert ti.state == State.FAILED  # task exits with failure state
        assert failure_callback_called.value == 1
Пример #2
0
 def setUp(self):
     args = {
         'owner': 'airflow',
         'start_date': datetime.datetime(2017, 1, 1)
     }
     self.dag = DAG('test_dag_id', default_args=args)
Пример #3
0
def dag_backfill(args, dag=None):
    """Creates backfill job or dry run for a DAG"""
    logging.basicConfig(level=settings.LOGGING_LEVEL,
                        format=settings.SIMPLE_LOG_FORMAT)

    signal.signal(signal.SIGTERM, sigint_handler)

    import warnings
    warnings.warn(
        '--ignore-first-depends-on-past is deprecated as the value is always set to True',
        category=PendingDeprecationWarning)

    if args.ignore_first_depends_on_past is False:
        args.ignore_first_depends_on_past = True

    dag = dag or get_dag(args.subdir, args.dag_id)

    if not args.start_date and not args.end_date:
        raise AirflowException("Provide a start_date and/or end_date")

    # If only one date is passed, using same as start and end
    args.end_date = args.end_date or args.start_date
    args.start_date = args.start_date or args.end_date

    if args.task_regex:
        dag = dag.sub_dag(task_regex=args.task_regex,
                          include_upstream=not args.ignore_dependencies)

    run_conf = None
    if args.conf:
        run_conf = json.loads(args.conf)

    if args.dry_run:
        print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date))
        for task in dag.tasks:
            print("Task {0}".format(task.task_id))
            ti = TaskInstance(task, args.start_date)
            ti.dry_run()
    else:
        if args.reset_dagruns:
            DAG.clear_dags(
                [dag],
                start_date=args.start_date,
                end_date=args.end_date,
                confirm_prompt=not args.yes,
                include_subdags=True,
            )

        dag.run(start_date=args.start_date,
                end_date=args.end_date,
                mark_success=args.mark_success,
                local=args.local,
                donot_pickle=(args.donot_pickle
                              or conf.getboolean('core', 'donot_pickle')),
                ignore_first_depends_on_past=args.ignore_first_depends_on_past,
                ignore_task_deps=args.ignore_dependencies,
                pool=args.pool,
                delay_on_limit_secs=args.delay_on_limit,
                verbose=args.verbose,
                conf=run_conf,
                rerun_failed_tasks=args.rerun_failed_tasks,
                run_backwards=args.run_backwards)
Пример #4
0
def dag_bag_ext():
    """
    Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies
    set up using ExternalTaskMarker and ExternalTaskSensor.

    dag_0:   task_a_0 >> task_b_0
                             |
                             |
    dag_1:                   ---> task_a_1 >> task_b_1
                                                  |
                                                  |
    dag_2:                                        ---> task_a_2 >> task_b_2
                                                                       |
                                                                       |
    dag_3:                                                             ---> task_a_3 >> task_b_3
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)

    dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0)
    task_b_0 = ExternalTaskMarker(task_id="task_b_0",
                                  external_dag_id="dag_1",
                                  external_task_id="task_a_1",
                                  recursion_depth=3,
                                  dag=dag_0)
    task_a_0 >> task_b_0

    dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_1 = ExternalTaskSensor(task_id="task_a_1",
                                  external_dag_id=dag_0.dag_id,
                                  external_task_id=task_b_0.task_id,
                                  dag=dag_1)
    task_b_1 = ExternalTaskMarker(task_id="task_b_1",
                                  external_dag_id="dag_2",
                                  external_task_id="task_a_2",
                                  recursion_depth=2,
                                  dag=dag_1)
    task_a_1 >> task_b_1

    dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_2 = ExternalTaskSensor(task_id="task_a_2",
                                  external_dag_id=dag_1.dag_id,
                                  external_task_id=task_b_1.task_id,
                                  dag=dag_2)
    task_b_2 = ExternalTaskMarker(task_id="task_b_2",
                                  external_dag_id="dag_3",
                                  external_task_id="task_a_3",
                                  recursion_depth=1,
                                  dag=dag_2)
    task_a_2 >> task_b_2

    dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_3 = ExternalTaskSensor(task_id="task_a_3",
                                  external_dag_id=dag_2.dag_id,
                                  external_task_id=task_b_2.task_id,
                                  dag=dag_3)
    task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3)
    task_a_3 >> task_b_3

    for dag in [dag_0, dag_1, dag_2, dag_3]:
        dag_bag.bag_dag(dag, None, dag)

    return dag_bag
    )

    for i in range(2):
        DummyOperator(
            task_id='%s-task-%s' % (child_dag_name, i + 1),
            default_args=args,
            dag=dag_subdag,
        )

    return dag_subdag


with DAG(
        dag_id=DAG_NAME,
        start_date=datetime(2019, 1, 1),
        max_active_runs=1,
        default_args=DEFAULT_TASK_ARGS,
        schedule_interval=timedelta(minutes=1),
) as dag:

    start = DummyOperator(task_id='start', )

    section_1 = SubDagOperator(
        task_id='section-1',
        subdag=subdag(DAG_NAME, 'section-1', DEFAULT_TASK_ARGS),
        default_args=DEFAULT_TASK_ARGS,
    )

    some_other_task = DummyOperator(task_id='some-other-task', )

    start >> section_1 >> some_other_task  # pylint: disable=W0104
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from airflow.models.dag import DAG
from airflow.utils import timezone
from airflow.ti_deps.met_handlers.aiflow_met_handler import AIFlowMetHandler
from airflow.operators.dummy_operator import DummyOperator
from airflow.models.event import Event
from airflow.operators.send_event_operator import SendEventOperator
from airflow.operators.bash_operator import BashOperator

dag = DAG(dag_id='test_projec1',
          start_date=timezone.utcnow(),
          schedule_interval="@once")
env = {
    'PYTHONPATH':
    '/Users/chenwuchao/code/ali/ai_flow/python_ai_flow/test/python_codes/simple_python:/Users/chenwuchao/code/ali/ai_flow:/Users/chenwuchao/code/ali/ai_flow/flink_ai_flow/tests/python_codes:/Users/chenwuchao/code/ali/ai_flow/flink_ai_flow/tests:/Applications/PyCharm CE.app/Contents/helpers/pycharm:/anaconda3/lib/python37.zip:/anaconda3/lib/python3.7:/anaconda3/lib/python3.7/lib-dynload:/Users/chenwuchao/.local/lib/python3.7/site-packages:/anaconda3/lib/python3.7/site-packages:/anaconda3/lib/python3.7/site-packages/aeosa://anaconda3/lib/python3.7/site-packages:/Users/chenwuchao/airflow/dags:/Users/chenwuchao/airflow/config:/Users/chenwuchao/airflow/plugins:/Users/chenwuchao/code/ali/ai_flow/python_ai_flow:/Users/chenwuchao/code/ali/ai_flow/python_ai_flow/test/python_codes'
}
op_0 = BashOperator(
    task_id='None',
    dag=dag,
    bash_command=
    '/anaconda3/bin/python /Users/chenwuchao/code/ali/ai_flow/python_ai_flow/local_job_run.py /Users/chenwuchao/code/ali/ai_flow/python_ai_flow/test tmp_funca533b537-8e45-439c-8f71-0ad8dd9409c0LocalPythonJob_0 tmp_args713c2a6b-c023-4340-96ee-22f7c62f15b3LocalPythonJob_0 test_simple_python',
    env=env)
Пример #7
0
    return DingdingOperator(
        task_id='dingding_success_callback',
        dingding_conn_id='dingding_default',
        message_type='text',
        message=message,
        at_all=True,
    ).execute(context)


args['on_failure_callback'] = failure_callback
# [END howto_operator_dingding_failure_callback]

with DAG(
        dag_id='example_dingding_operator',
        default_args=args,
        schedule_interval='@once',
        dagrun_timeout=timedelta(minutes=60),
        tags=['example'],
) as dag:

    # [START howto_operator_dingding]
    text_msg_remind_none = DingdingOperator(
        task_id='text_msg_remind_none',
        dingding_conn_id='dingding_default',
        message_type='text',
        message='Airflow dingding text message remind none',
        at_mobiles=None,
        at_all=False)
    # [END howto_operator_dingding]

    text_msg_remind_specific = DingdingOperator(
Пример #8
0
    def sync_to_db(self, session: Optional[Session] = None):
        """Save attributes about list of DAG to the DB."""
        # To avoid circular import - airflow.models.dagbag -> airflow.models.dag -> airflow.models.dagbag
        from airflow.models.dag import DAG
        from airflow.models.serialized_dag import SerializedDagModel

        def _serialze_dag_capturing_errors(dag, session):
            """
            Try to serialize the dag to the DB, but make a note of any errors.

            We can't place them directly in import_errors, as this may be retried, and work the next time
            """
            if dag.is_subdag:
                return []
            try:
                # We cant use bulk_write_to_db as we want to capture each error individually
                SerializedDagModel.write_dag(
                    dag,
                    min_update_interval=settings.
                    MIN_SERIALIZED_DAG_UPDATE_INTERVAL,
                    session=session,
                )
                return []
            except OperationalError:
                raise
            except Exception:  # pylint: disable=broad-except
                return [(dag.fileloc,
                         traceback.format_exc(
                             limit=-self.dagbag_import_error_traceback_depth))]

        # Retry 'DAG.bulk_write_to_db' & 'SerializedDagModel.bulk_sync_to_db' in case
        # of any Operational Errors
        # In case of failures, provide_session handles rollback
        for attempt in tenacity.Retrying(
                retry=tenacity.retry_if_exception_type(
                    exception_types=OperationalError),
                wait=tenacity.wait_random_exponential(multiplier=0.5, max=5),
                stop=tenacity.stop_after_attempt(settings.MAX_DB_RETRIES),
                before_sleep=tenacity.before_sleep_log(self.log,
                                                       logging.DEBUG),
                reraise=True,
        ):
            with attempt:
                serialize_errors = []
                self.log.debug(
                    "Running dagbag.sync_to_db with retries. Try %d of %d",
                    attempt.retry_state.attempt_number,
                    settings.MAX_DB_RETRIES,
                )
                self.log.debug("Calling the DAG.bulk_sync_to_db method")
                try:
                    # Write Serialized DAGs to DB, capturing errors
                    for dag in self.dags.values():
                        serialize_errors.extend(
                            _serialze_dag_capturing_errors(dag, session))

                    DAG.bulk_write_to_db(self.dags.values(), session=session)
                except OperationalError:
                    session.rollback()
                    raise
                # Only now we are "complete" do we update import_errors - don't want to record errors from
                # previous failed attempts
                self.import_errors.update(dict(serialize_errors))
Пример #9
0
def set_dag_run_state_to_failed(
    *,
    dag: DAG,
    execution_date: Optional[datetime] = None,
    run_id: Optional[str] = None,
    commit: bool = False,
    session: SASession = NEW_SESSION,
):
    """
    Set the dag run for a specific execution date or run_id and its running task instances
    to failed.

    :param dag: the DAG of which to alter state
    :param execution_date: the execution date from which to start looking(deprecated)
    :param run_id: the DAG run_id to start looking from
    :param commit: commit DAG and tasks to be altered to the database
    :param session: database session
    :return: If commit is true, list of tasks that have been updated,
             otherwise list of tasks that will be updated
    :raises: AssertionError if dag or execution_date is invalid
    """
    if not exactly_one(execution_date, run_id):
        return []
    if not dag:
        return []

    if execution_date:
        if not timezone.is_localized(execution_date):
            raise ValueError(f"Received non-localized date {execution_date}")
        dag_run = dag.get_dagrun(execution_date=execution_date)
        if not dag_run:
            raise ValueError(
                f'DagRun with execution_date: {execution_date} not found')
        run_id = dag_run.run_id

    if not run_id:
        raise ValueError(f'Invalid dag_run_id: {run_id}')

    # Mark the dag run to failed.
    if commit:
        _set_dag_run_state(dag.dag_id, run_id, DagRunState.FAILED, session)

    # Mark only RUNNING task instances.
    task_ids = [task.task_id for task in dag.tasks]
    tis = session.query(TaskInstance).filter(
        TaskInstance.dag_id == dag.dag_id,
        TaskInstance.run_id == run_id,
        TaskInstance.task_id.in_(task_ids),
        TaskInstance.state.in_(State.running),
    )
    task_ids_of_running_tis = [task_instance.task_id for task_instance in tis]

    tasks = []
    for task in dag.tasks:
        if task.task_id not in task_ids_of_running_tis:
            continue
        task.dag = dag
        tasks.append(task)

    return set_state(tasks=tasks,
                     dag_run_id=run_id,
                     state=State.FAILED,
                     commit=commit,
                     session=session)
Пример #10
0
    Mode,
    OpenMLDBLoadDataOperator,
    OpenMLDBSelectIntoOperator, OpenMLDBSQLOperator, OpenMLDBDeployOperator,
)

import xgboost_train_sample

# cp example_dags/train_sample.csv to /tmp first
PATH_TO_DATA_FILE = os.environ.get('OPENMLDB_PATH_TO_DATA_FILE', '/tmp/train_sample.csv')
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
DAG_ID = "example_openmldb_complex"

with DAG(
        dag_id=DAG_ID,
        start_date=datetime(2021, 1, 1),
        default_args={'openmldb_conn_id': 'openmldb_conn_id'},
        max_active_runs=1,
        tags=['example'],
        catchup=False,
) as dag:
    database = "example_db"
    table = "example_table"

    create_database = OpenMLDBSQLOperator(
        task_id='create-db',
        db=database, mode=Mode.OFFSYNC,
        sql=f'create database if not exists {database}'
    )

    create_table = OpenMLDBSQLOperator(
        task_id='create-table',
        db=database, mode=Mode.OFFSYNC,
Пример #11
0
class TestCore(unittest.TestCase):
    default_scheduler_args = {"num_runs": 1}

    def setUp(self):
        self.dagbag = DagBag(dag_folder=DEV_NULL,
                             include_examples=True,
                             read_dags_from_db=False)
        self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        self.dag = DAG(TEST_DAG_ID, default_args=self.args)
        self.dag_bash = self.dagbag.dags['example_bash_operator']
        self.runme_0 = self.dag_bash.get_task('runme_0')
        self.run_after_loop = self.dag_bash.get_task('run_after_loop')
        self.run_this_last = self.dag_bash.get_task('run_this_last')

    def tearDown(self):
        session = Session()
        session.query(DagRun).filter(DagRun.dag_id == TEST_DAG_ID).delete(
            synchronize_session=False)
        session.query(TaskInstance).filter(
            TaskInstance.dag_id == TEST_DAG_ID).delete(
                synchronize_session=False)
        session.query(TaskFail).filter(TaskFail.dag_id == TEST_DAG_ID).delete(
            synchronize_session=False)
        session.commit()
        session.close()
        clear_db_dags()
        clear_db_runs()

    def test_check_operators(self):

        conn_id = "sqlite_default"

        captain_hook = BaseHook.get_hook(conn_id=conn_id)  # quite funny :D
        captain_hook.run("CREATE TABLE operator_test_table (a, b)")
        captain_hook.run("insert into operator_test_table values (1,2)")

        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op = CheckOperator(task_id='check',
                           sql="select count(*) from operator_test_table",
                           conn_id=conn_id,
                           dag=self.dag)

        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

        op = ValueCheckOperator(
            task_id='value_check',
            pass_value=95,
            tolerance=0.1,
            conn_id=conn_id,
            sql="SELECT 100",
            dag=self.dag,
        )
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

        captain_hook.run("drop table operator_test_table")

    def test_clear_api(self):
        task = self.dag_bash.tasks[0]
        task.clear(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE,
                   upstream=True,
                   downstream=True)
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.are_dependents_done()

    def test_illegal_args(self):
        """
        Tests that Operators reject illegal arguments
        """
        msg = 'Invalid arguments were passed to BashOperator (task_id: test_illegal_args).'
        with conf_vars({('operators', 'allow_illegal_arguments'): 'True'}):
            with self.assertWarns(PendingDeprecationWarning) as warning:
                BashOperator(
                    task_id='test_illegal_args',
                    bash_command='echo success',
                    dag=self.dag,
                    illegal_argument_1234='hello?',
                )
                assert any(msg in str(w) for w in warning.warnings)

    def test_illegal_args_forbidden(self):
        """
        Tests that operators raise exceptions on illegal arguments when
        illegal arguments are not allowed.
        """
        with self.assertRaises(AirflowException) as ctx:
            BashOperator(
                task_id='test_illegal_args',
                bash_command='echo success',
                dag=self.dag,
                illegal_argument_1234='hello?',
            )
        self.assertIn(
            'Invalid arguments were passed to BashOperator (task_id: test_illegal_args).',
            str(ctx.exception),
        )

    def test_bash_operator(self):
        op = BashOperator(task_id='test_bash_operator',
                          bash_command="echo success",
                          dag=self.dag)
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)

        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_bash_operator_multi_byte_output(self):
        op = BashOperator(
            task_id='test_multi_byte_bash_operator',
            bash_command="echo \u2600",
            dag=self.dag,
            output_encoding='utf-8',
        )
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_bash_operator_kill(self):
        import psutil

        sleep_time = "100%d" % os.getpid()
        op = BashOperator(
            task_id='test_bash_operator_kill',
            execution_timeout=timedelta(seconds=1),
            bash_command="/bin/bash -c 'sleep %s'" % sleep_time,
            dag=self.dag,
        )
        self.assertRaises(AirflowTaskTimeout,
                          op.run,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE)
        sleep(2)
        pid = -1
        for proc in psutil.process_iter():
            if proc.cmdline() == ['sleep', sleep_time]:
                pid = proc.pid
        if pid != -1:
            os.kill(pid, signal.SIGTERM)
            self.fail(
                "BashOperator's subprocess still running after stopping on timeout!"
            )

    def test_on_failure_callback(self):
        # Annoying workaround for nonlocal not existing in python 2
        data = {'called': False}

        def check_failure(context, test_case=self):
            data['called'] = True
            error = context.get('exception')
            test_case.assertIsInstance(error, AirflowException)

        op = BashOperator(
            task_id='check_on_failure_callback',
            bash_command="exit 1",
            dag=self.dag,
            on_failure_callback=check_failure,
        )
        self.assertRaises(AirflowException,
                          op.run,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE,
                          ignore_ti_state=True)
        self.assertTrue(data['called'])

    def test_dryrun(self):
        op = BashOperator(task_id='test_dryrun',
                          bash_command="echo success",
                          dag=self.dag)
        op.dry_run()

    def test_sqlite(self):
        import airflow.providers.sqlite.operators.sqlite

        op = airflow.providers.sqlite.operators.sqlite.SqliteOperator(
            task_id='time_sqlite',
            sql="CREATE TABLE IF NOT EXISTS unitest (dummy VARCHAR(20))",
            dag=self.dag)
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_timeout(self):
        op = PythonOperator(
            task_id='test_timeout',
            execution_timeout=timedelta(seconds=1),
            python_callable=lambda: sleep(5),
            dag=self.dag,
        )
        self.assertRaises(AirflowTaskTimeout,
                          op.run,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE,
                          ignore_ti_state=True)

    def test_python_op(self):
        def test_py_op(templates_dict, ds, **kwargs):
            if not templates_dict['ds'] == ds:
                raise Exception("failure")

        op = PythonOperator(task_id='test_py_op',
                            python_callable=test_py_op,
                            templates_dict={'ds': "{{ ds }}"},
                            dag=self.dag)
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_complex_template(self):
        def verify_templated_field(context):
            self.assertEqual(context['ti'].task.some_templated_field['bar'][1],
                             context['ds'])

        op = OperatorSubclass(
            task_id='test_complex_template',
            some_templated_field={
                'foo': '123',
                'bar': ['baz', '{{ ds }}']
            },
            dag=self.dag,
        )
        op.execute = verify_templated_field
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_template_non_bool(self):
        """
        Test templates can handle objects with no sense of truthiness
        """
        class NonBoolObject:
            def __len__(self):  # pylint: disable=invalid-length-returned
                return NotImplemented

            def __bool__(self):  # pylint: disable=invalid-bool-returned, bad-option-value
                return NotImplemented

        op = OperatorSubclass(task_id='test_bad_template_obj',
                              some_templated_field=NonBoolObject(),
                              dag=self.dag)
        op.resolve_template_files()

    def test_task_get_template(self):
        TI = TaskInstance
        ti = TI(task=self.runme_0, execution_date=DEFAULT_DATE)
        ti.dag = self.dag_bash
        self.dag_bash.create_dagrun(run_type=DagRunType.MANUAL,
                                    state=State.RUNNING,
                                    execution_date=DEFAULT_DATE)
        ti.run(ignore_ti_state=True)
        context = ti.get_template_context()

        # DEFAULT DATE is 2015-01-01
        self.assertEqual(context['ds'], '2015-01-01')
        self.assertEqual(context['ds_nodash'], '20150101')

        # next_ds is 2015-01-02 as the dag interval is daily
        self.assertEqual(context['next_ds'], '2015-01-02')
        self.assertEqual(context['next_ds_nodash'], '20150102')

        # prev_ds is 2014-12-31 as the dag interval is daily
        self.assertEqual(context['prev_ds'], '2014-12-31')
        self.assertEqual(context['prev_ds_nodash'], '20141231')

        self.assertEqual(context['ts'], '2015-01-01T00:00:00+00:00')
        self.assertEqual(context['ts_nodash'], '20150101T000000')
        self.assertEqual(context['ts_nodash_with_tz'], '20150101T000000+0000')

        self.assertEqual(context['yesterday_ds'], '2014-12-31')
        self.assertEqual(context['yesterday_ds_nodash'], '20141231')

        self.assertEqual(context['tomorrow_ds'], '2015-01-02')
        self.assertEqual(context['tomorrow_ds_nodash'], '20150102')

    def test_local_task_job(self):
        TI = TaskInstance
        ti = TI(task=self.runme_0, execution_date=DEFAULT_DATE)
        job = LocalTaskJob(task_instance=ti, ignore_ti_state=True)
        job.run()

    def test_raw_job(self):
        TI = TaskInstance
        ti = TI(task=self.runme_0, execution_date=DEFAULT_DATE)
        ti.dag = self.dag_bash
        self.dag_bash.create_dagrun(run_type=DagRunType.MANUAL,
                                    state=State.RUNNING,
                                    execution_date=DEFAULT_DATE)
        ti.run(ignore_ti_state=True)

    def test_round_time(self):

        rt1 = round_time(datetime(2015, 1, 1, 6), timedelta(days=1))
        self.assertEqual(datetime(2015, 1, 1, 0, 0), rt1)

        rt2 = round_time(datetime(2015, 1, 2), relativedelta(months=1))
        self.assertEqual(datetime(2015, 1, 1, 0, 0), rt2)

        rt3 = round_time(datetime(2015, 9, 16, 0, 0), timedelta(1),
                         datetime(2015, 9, 14, 0, 0))
        self.assertEqual(datetime(2015, 9, 16, 0, 0), rt3)

        rt4 = round_time(datetime(2015, 9, 15, 0, 0), timedelta(1),
                         datetime(2015, 9, 14, 0, 0))
        self.assertEqual(datetime(2015, 9, 15, 0, 0), rt4)

        rt5 = round_time(datetime(2015, 9, 14, 0, 0), timedelta(1),
                         datetime(2015, 9, 14, 0, 0))
        self.assertEqual(datetime(2015, 9, 14, 0, 0), rt5)

        rt6 = round_time(datetime(2015, 9, 13, 0, 0), timedelta(1),
                         datetime(2015, 9, 14, 0, 0))
        self.assertEqual(datetime(2015, 9, 14, 0, 0), rt6)

    def test_infer_time_unit(self):

        self.assertEqual('minutes', infer_time_unit([130, 5400, 10]))

        self.assertEqual('seconds', infer_time_unit([110, 50, 10, 100]))

        self.assertEqual('hours',
                         infer_time_unit([100000, 50000, 10000, 20000]))

        self.assertEqual('days', infer_time_unit([200000, 100000]))

    def test_scale_time_units(self):

        # use assert_almost_equal from numpy.testing since we are comparing
        # floating point arrays
        arr1 = scale_time_units([130, 5400, 10], 'minutes')
        assert_array_almost_equal(arr1, [2.167, 90.0, 0.167], decimal=3)

        arr2 = scale_time_units([110, 50, 10, 100], 'seconds')
        assert_array_almost_equal(arr2, [110.0, 50.0, 10.0, 100.0], decimal=3)

        arr3 = scale_time_units([100000, 50000, 10000, 20000], 'hours')
        assert_array_almost_equal(arr3, [27.778, 13.889, 2.778, 5.556],
                                  decimal=3)

        arr4 = scale_time_units([200000, 100000], 'days')
        assert_array_almost_equal(arr4, [2.315, 1.157], decimal=3)

    def test_bad_trigger_rule(self):
        with self.assertRaises(AirflowException):
            DummyOperator(task_id='test_bad_trigger',
                          trigger_rule="non_existent",
                          dag=self.dag)

    def test_terminate_task(self):
        """If a task instance's db state get deleted, it should fail"""
        from airflow.executors.sequential_executor import SequentialExecutor

        TI = TaskInstance
        dag = self.dagbag.dags.get('test_utils')
        task = dag.task_dict.get('sleeps_forever')

        ti = TI(task=task, execution_date=DEFAULT_DATE)
        job = LocalTaskJob(task_instance=ti,
                           ignore_ti_state=True,
                           executor=SequentialExecutor())

        # Running task instance asynchronously
        proc = multiprocessing.Process(target=job.run)
        proc.start()
        sleep(5)
        settings.engine.dispose()
        session = settings.Session()
        ti.refresh_from_db(session=session)
        # making sure it's actually running
        self.assertEqual(State.RUNNING, ti.state)
        ti = (session.query(TI).filter_by(dag_id=task.dag_id,
                                          task_id=task.task_id,
                                          execution_date=DEFAULT_DATE).one())

        # deleting the instance should result in a failure
        session.delete(ti)
        session.commit()
        # waiting for the async task to finish
        proc.join()

        # making sure that the task ended up as failed
        ti.refresh_from_db(session=session)
        self.assertEqual(State.FAILED, ti.state)
        session.close()

    def test_task_fail_duration(self):
        """If a task fails, the duration should be recorded in TaskFail"""

        op1 = BashOperator(task_id='pass_sleepy',
                           bash_command='sleep 3',
                           dag=self.dag)
        op2 = BashOperator(
            task_id='fail_sleepy',
            bash_command='sleep 5',
            execution_timeout=timedelta(seconds=3),
            retry_delay=timedelta(seconds=0),
            dag=self.dag,
        )
        session = settings.Session()
        try:
            op1.run(start_date=DEFAULT_DATE,
                    end_date=DEFAULT_DATE,
                    ignore_ti_state=True)
        except Exception:  # pylint: disable=broad-except
            pass
        try:
            op2.run(start_date=DEFAULT_DATE,
                    end_date=DEFAULT_DATE,
                    ignore_ti_state=True)
        except Exception:  # pylint: disable=broad-except
            pass
        op1_fails = (session.query(TaskFail).filter_by(
            task_id='pass_sleepy',
            dag_id=self.dag.dag_id,
            execution_date=DEFAULT_DATE).all())
        op2_fails = (session.query(TaskFail).filter_by(
            task_id='fail_sleepy',
            dag_id=self.dag.dag_id,
            execution_date=DEFAULT_DATE).all())

        self.assertEqual(0, len(op1_fails))
        self.assertEqual(1, len(op2_fails))
        self.assertGreaterEqual(sum([f.duration for f in op2_fails]), 3)

    def test_externally_triggered_dagrun(self):
        TI = TaskInstance

        # Create the dagrun between two "scheduled" execution dates of the DAG
        execution_date = DEFAULT_DATE + timedelta(days=2)
        execution_ds = execution_date.strftime('%Y-%m-%d')
        execution_ds_nodash = execution_ds.replace('-', '')

        dag = DAG(TEST_DAG_ID,
                  default_args=self.args,
                  schedule_interval=timedelta(weeks=1),
                  start_date=DEFAULT_DATE)
        task = DummyOperator(task_id='test_externally_triggered_dag_context',
                             dag=dag)
        dag.create_dagrun(
            run_type=DagRunType.SCHEDULED,
            execution_date=execution_date,
            state=State.RUNNING,
            external_trigger=True,
        )
        task.run(start_date=execution_date, end_date=execution_date)

        ti = TI(task=task, execution_date=execution_date)
        context = ti.get_template_context()

        # next_ds/prev_ds should be the execution date for manually triggered runs
        self.assertEqual(context['next_ds'], execution_ds)
        self.assertEqual(context['next_ds_nodash'], execution_ds_nodash)

        self.assertEqual(context['prev_ds'], execution_ds)
        self.assertEqual(context['prev_ds_nodash'], execution_ds_nodash)
Пример #12
0
 def setUp(self):
     self.dag = DAG(TEST_DAG_ID, default_args=self.TRAINING_DEFAULT_ARGS)
Пример #13
0
    def sync_to_db(self, session: Optional[Session] = None):
        """Save attributes about list of DAG to the DB."""
        # To avoid circular import - airflow.models.dagbag -> airflow.models.dag -> airflow.models.dagbag
        from airflow.models.dag import DAG
        from airflow.models.serialized_dag import SerializedDagModel

        def _serialize_dag_capturing_errors(dag, session):
            """
            Try to serialize the dag to the DB, but make a note of any errors.

            We can't place them directly in import_errors, as this may be retried, and work the next time
            """
            if dag.is_subdag:
                return []
            try:
                # We cant use bulk_write_to_db as we want to capture each error individually
                dag_was_updated = SerializedDagModel.write_dag(
                    dag,
                    min_update_interval=settings.
                    MIN_SERIALIZED_DAG_UPDATE_INTERVAL,
                    session=session,
                )
                if dag_was_updated:
                    self.log.debug("Syncing DAG permissions: %s to the DB",
                                   dag.dag_id)
                    from airflow.www.security import ApplessAirflowSecurityManager

                    security_manager = ApplessAirflowSecurityManager(
                        session=session)
                    security_manager.sync_perm_for_dag(dag.dag_id,
                                                       dag.access_control)
                return []
            except OperationalError:
                raise
            except Exception:  # pylint: disable=broad-except
                return [(dag.fileloc,
                         traceback.format_exc(
                             limit=-self.dagbag_import_error_traceback_depth))]

        # Retry 'DAG.bulk_write_to_db' & 'SerializedDagModel.bulk_sync_to_db' in case
        # of any Operational Errors
        # In case of failures, provide_session handles rollback
        for attempt in run_with_db_retries(logger=self.log):
            with attempt:
                serialize_errors = []
                self.log.debug(
                    "Running dagbag.sync_to_db with retries. Try %d of %d",
                    attempt.retry_state.attempt_number,
                    MAX_DB_RETRIES,
                )
                self.log.debug("Calling the DAG.bulk_sync_to_db method")
                try:
                    # Write Serialized DAGs to DB, capturing errors
                    for dag in self.dags.values():
                        serialize_errors.extend(
                            _serialize_dag_capturing_errors(dag, session))

                    DAG.bulk_write_to_db(self.dags.values(), session=session)
                except OperationalError:
                    session.rollback()
                    raise
                # Only now we are "complete" do we update import_errors - don't want to record errors from
                # previous failed attempts
                self.import_errors.update(dict(serialize_errors))
Пример #14
0
    def test_mark_success_on_success_callback(self):
        """
        Test that ensures that where a task is marked success in the UI
        on_success_callback gets executed
        """
        # use shared memory value so we can properly track value change even if
        # it's been updated across processes.
        success_callback_called = Value('i', 0)
        task_terminated_externally = Value('i', 1)
        shared_mem_lock = Lock()

        def success_callback(context):
            with shared_mem_lock:
                success_callback_called.value += 1
            assert context['dag_run'].dag_id == 'test_mark_success'

        dag = DAG(dag_id='test_mark_success',
                  start_date=DEFAULT_DATE,
                  default_args={'owner': 'owner1'})

        def task_function(ti):
            # pylint: disable=unused-argument
            time.sleep(60)
            # This should not happen -- the state change should be noticed and the task should get killed
            with shared_mem_lock:
                task_terminated_externally.value = 0

        task = PythonOperator(
            task_id='test_state_succeeded1',
            python_callable=task_function,
            on_success_callback=success_callback,
            dag=dag,
        )

        session = settings.Session()

        dag.clear()
        dag.create_dagrun(
            run_id="test",
            state=State.RUNNING,
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            session=session,
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.refresh_from_db()
        job1 = LocalTaskJob(task_instance=ti,
                            ignore_ti_state=True,
                            executor=SequentialExecutor())
        job1.task_runner = StandardTaskRunner(job1)

        settings.engine.dispose()
        process = multiprocessing.Process(target=job1.run)
        process.start()

        for _ in range(0, 25):
            ti.refresh_from_db()
            if ti.state == State.RUNNING:
                break
            time.sleep(0.2)
        assert ti.state == State.RUNNING
        ti.state = State.SUCCESS
        session.merge(ti)
        session.commit()

        process.join(timeout=10)
        assert success_callback_called.value == 1
        assert task_terminated_externally.value == 1
        assert not process.is_alive()
Пример #15
0
def task_3(value: str) -> None:
    """Dummy Task3"""
    print(f'[ Task3 {value} ]')


@task
def task_end() -> None:
    """Dummy Task which is Last Task of Dag"""
    print('[ Task_End  ]')


# Creating TaskGroups
@task_group
def task_group_function(value: int) -> None:
    """TaskGroup for grouping related Tasks"""
    return task_3(task_2(task_1(value)))


# Executing Tasks and TaskGroups
with DAG(dag_id="example_task_group_decorator",
         start_date=datetime(2021, 1, 1),
         catchup=False,
         tags=["example"]) as dag:
    start_task = task_start()
    end_task = task_end()
    for i in range(5):
        current_task_group = task_group_function(i)
        start_task >> current_task_group >> end_task

# [END howto_task_group_decorator]
Пример #16
0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from airflow.models.dag import DAG
from airflow.utils import timezone
from airflow.ti_deps.met_handlers.aiflow_met_handler import AIFlowMetHandler
from airflow.operators.dummy_operator import DummyOperator
from airflow.models.event import Event
from airflow.operators.send_event_operator import SendEventOperator

dag = DAG(dag_id='workflow_1',
          start_date=timezone.utcnow(),
          schedule_interval="@once")
op_0 = DummyOperator(task_id='0_job', dag=dag)
op_1 = DummyOperator(task_id='1_job', dag=dag)
op_2 = SendEventOperator(task_id='2_job',
                         dag=dag,
                         uri='localhost:50051',
                         event=Event(key='key_1',
                                     value='value_1',
                                     event_type='UNDEFINED'))
op_3 = SendEventOperator(task_id='3_job',
                         dag=dag,
                         uri='localhost:50051',
                         event=Event(key='key_2',
                                     value='value_2',
                                     event_type='UNDEFINED'))
Пример #17
0
    def setUp(self):
        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}

        self.dag = DAG('test_dag_id', default_args=args)

        self.mock_context = MagicMock()
Пример #18
0
CLUSTER_NAME = environ.get('EKS_CLUSTER_NAME', 'eks-demo')
NODEGROUP_NAME = f'{CLUSTER_NAME}-nodegroup'
ROLE_ARN = environ.get('EKS_DEMO_ROLE_ARN', 'arn:aws:iam::123456789012:role/role_name')
SUBNETS = environ.get('EKS_DEMO_SUBNETS', 'subnet-12345ab subnet-67890cd').split(' ')
VPC_CONFIG = {
    'subnetIds': SUBNETS,
    'endpointPublicAccess': True,
    'endpointPrivateAccess': False,
}


with DAG(
    dag_id='example_eks_using_defaults_dag',
    default_args={'cluster_name': CLUSTER_NAME},
    schedule_interval=None,
    start_date=datetime(2021, 1, 1),
    catchup=False,
    max_active_runs=1,
    tags=['example'],
) as dag:

    # [START howto_operator_eks_create_cluster_with_nodegroup]
    # Create an Amazon EKS cluster control plane and an EKS nodegroup compute platform in one step.
    create_cluster_and_nodegroup = EksCreateClusterOperator(
        task_id='create_eks_cluster_and_nodegroup',
        nodegroup_name=NODEGROUP_NAME,
        cluster_role_arn=ROLE_ARN,
        nodegroup_role_arn=ROLE_ARN,
        # Opting to use the same ARN for the cluster and the nodegroup here,
        # but a different ARN could be configured and passed if desired.
        resources_vpc_config=VPC_CONFIG,
class TestLatestOnlyOperator(unittest.TestCase):
    def setUp(self):
        super().setUp()
        self.dag = DAG('test_dag',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE
                       },
                       schedule_interval=INTERVAL)
        with create_session() as session:
            session.query(DagRun).delete()
            session.query(TaskInstance).delete()
        freezer = freeze_time(FROZEN_NOW)
        freezer.start()
        self.addCleanup(freezer.stop)

    def test_run(self):
        task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

    def test_skipping_non_latest(self):
        latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        downstream_task = DummyOperator(task_id='downstream', dag=self.dag)
        downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag)
        downstream_task3 = DummyOperator(task_id='downstream_3',
                                         trigger_rule=TriggerRule.NONE_FAILED,
                                         dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)
        downstream_task3.set_upstream(downstream_task)

        self.dag.create_dagrun(
            run_id="scheduled__1",
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING,
        )

        self.dag.create_dagrun(
            run_id="scheduled__2",
            start_date=timezone.utcnow(),
            execution_date=timezone.datetime(2016, 1, 1, 12),
            state=State.RUNNING,
        )

        self.dag.create_dagrun(
            run_id="scheduled__3",
            start_date=timezone.utcnow(),
            execution_date=END_DATE,
            state=State.RUNNING,
        )

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task3.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state
            for ti in latest_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): None,
                timezone.datetime(2016, 1, 1, 12): None,
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_3')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

    def test_not_skipping_external(self):
        latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        downstream_task = DummyOperator(task_id='downstream', dag=self.dag)
        downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)

        self.dag.create_dagrun(
            run_id="manual__1",
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING,
            external_trigger=True,
        )

        self.dag.create_dagrun(
            run_id="manual__2",
            start_date=timezone.utcnow(),
            execution_date=timezone.datetime(2016, 1, 1, 12),
            state=State.RUNNING,
            external_trigger=True,
        )

        self.dag.create_dagrun(
            run_id="manual__3",
            start_date=timezone.utcnow(),
            execution_date=END_DATE,
            state=State.RUNNING,
            external_trigger=True,
        )

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state
            for ti in latest_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)
from airflow.models.dag import DAG
from datetime import datetime
from airflow.operators.bash_operator import BashOperator

DAG_NAME = "infinite_dag"
default_args = {
    "owner": "airflow",
    "start_date": datetime(2020, 1, 1),
}
with DAG(DAG_NAME, schedule_interval="*/10 * * * *", default_args=default_args) as dag:
    echo_success = BashOperator(task_id="echo_success", bash_command="echo success")
Пример #21
0
    def test_external_task_sensor_fn_multiple_execution_dates(self):
        bash_command_code = """
{% set s=execution_date.time().second %}
echo "second is {{ s }}"
if [[ $(( {{ s }} % 60 )) == 1 ]]
    then
        exit 1
fi
exit 0
"""
        dag_external_id = TEST_DAG_ID + '_external'
        dag_external = DAG(dag_external_id,
                           default_args=self.args,
                           schedule_interval=timedelta(seconds=1))
        task_external_with_failure = BashOperator(
            task_id="task_external_with_failure",
            bash_command=bash_command_code,
            retries=0,
            dag=dag_external)
        task_external_without_failure = DummyOperator(
            task_id="task_external_without_failure",
            retries=0,
            dag=dag_external)

        task_external_without_failure.run(start_date=DEFAULT_DATE,
                                          end_date=DEFAULT_DATE +
                                          timedelta(seconds=1),
                                          ignore_ti_state=True)

        session = settings.Session()
        TI = TaskInstance
        try:
            task_external_with_failure.run(start_date=DEFAULT_DATE,
                                           end_date=DEFAULT_DATE +
                                           timedelta(seconds=1),
                                           ignore_ti_state=True)
            # The test_with_failure task is excepted to fail
            # once per minute (the run on the first second of
            # each minute).
        except Exception as e:  # pylint: disable=broad-except
            failed_tis = session.query(TI).filter(
                TI.dag_id == dag_external_id, TI.state == State.FAILED,
                TI.execution_date == DEFAULT_DATE +
                timedelta(seconds=1)).all()
            if len(failed_tis) == 1 and \
               failed_tis[0].task_id == 'task_external_with_failure':
                pass
            else:
                raise e

        dag_id = TEST_DAG_ID
        dag = DAG(dag_id,
                  default_args=self.args,
                  schedule_interval=timedelta(minutes=1))
        task_without_failure = ExternalTaskSensor(
            task_id='task_without_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_without_failure',
            execution_date_fn=lambda dt:
            [dt + timedelta(seconds=i) for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)
        task_with_failure = ExternalTaskSensor(
            task_id='task_with_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_with_failure',
            execution_date_fn=lambda dt:
            [dt + timedelta(seconds=i) for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)

        task_without_failure.run(start_date=DEFAULT_DATE,
                                 end_date=DEFAULT_DATE,
                                 ignore_ti_state=True)

        with self.assertRaises(AirflowSensorTimeout):
            task_with_failure.run(start_date=DEFAULT_DATE,
                                  end_date=DEFAULT_DATE,
                                  ignore_ti_state=True)
Пример #22
0
This dag only runs some simple tasks to test Airflow's task execution.
"""
from airflow.models.dag import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.bash_operator import BashOperator
from airflow import utils

DAG_ID = 'test_dag'

DEFAULT_ARGS = {
    'owner': 'airflow',
    'depends_on_past': True,
    'start_date': utils.dates.days_ago(2)
}

dag = DAG(
    **{
        'dag_id': DAG_ID,
        'schedule_interval':
        '*/10 * * * *',  # https://crontab.guru/#*/10_*_*_*_*
        'default_args': DEFAULT_ARGS,
    })

with dag:
    run_this_1 = DummyOperator(task_id='run_this_1')
    run_this_2 = DummyOperator(task_id='run_this_2')
    run_this_2.set_upstream(run_this_1)
    run_this_3 = DummyOperator(task_id='run_this_3')
    run_this_3.set_upstream(run_this_2)
    BashOperator(task_id='say_hi', bash_command='echo hi')
Пример #23
0
 def setUp(self):
     self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True)
     self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG(TEST_DAG_ID, default_args=self.args)
Пример #24
0
NODEGROUP_SUFFIX = '-nodegroup'
NODEGROUP_NAME = CLUSTER_NAME + NODEGROUP_SUFFIX
ROLE_ARN = environ.get('EKS_DEMO_ROLE_ARN',
                       'arn:aws:iam::123456789012:role/role_name')
SUBNETS = environ.get('EKS_DEMO_SUBNETS',
                      'subnet-12345ab subnet-67890cd').split(' ')
VPC_CONFIG = {
    'subnetIds': SUBNETS,
    'endpointPublicAccess': True,
    'endpointPrivateAccess': False,
}

with DAG(
        dag_id='example_eks_with_nodegroups_dag',
        default_args={'cluster_name': CLUSTER_NAME},
        schedule_interval=None,
        start_date=datetime(2021, 1, 1),
        max_active_runs=1,
        tags=['example'],
) as dag:

    # [START howto_operator_eks_create_cluster]
    # Create an Amazon EKS Cluster control plane without attaching a compute service.
    create_cluster = EKSCreateClusterOperator(
        task_id='create_eks_cluster',
        cluster_role_arn=ROLE_ARN,
        resources_vpc_config=VPC_CONFIG,
        compute=None,
    )
    # [END howto_operator_eks_create_cluster]

    await_create_cluster = EKSClusterStateSensor(
Пример #25
0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime

from airflow.models.dag import DAG
from airflow.providers.influxdb.operators.influxdb import InfluxDBOperator

dag = DAG(
    'example_influxdb_operator',
    start_date=datetime(2021, 1, 1),
    tags=['example'],
    catchup=False,
)

# [START howto_operator_influxdb]

query_influxdb_task = InfluxDBOperator(
    influxdb_conn_id='influxdb_conn_id',
    task_id='query_influxdb',
    sql='from(bucket:"test-influx") |> range(start: -10m, stop: {{ds}})',
    dag=dag,
)

# [END howto_operator_influxdb]
Пример #26
0
class TestCore(unittest.TestCase):
    default_scheduler_args = {"num_runs": 1}

    def setUp(self):
        self.dagbag = DagBag(dag_folder=DEV_NULL,
                             include_examples=True,
                             read_dags_from_db=False)
        self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        self.dag = DAG(TEST_DAG_ID, default_args=self.args)
        self.dag_bash = self.dagbag.dags['example_bash_operator']
        self.runme_0 = self.dag_bash.get_task('runme_0')
        self.run_after_loop = self.dag_bash.get_task('run_after_loop')
        self.run_this_last = self.dag_bash.get_task('run_this_last')

    def tearDown(self):
        session = Session()
        session.query(DagRun).filter(DagRun.dag_id == TEST_DAG_ID).delete(
            synchronize_session=False)
        session.query(TaskInstance).filter(
            TaskInstance.dag_id == TEST_DAG_ID).delete(
                synchronize_session=False)
        session.query(TaskFail).filter(TaskFail.dag_id == TEST_DAG_ID).delete(
            synchronize_session=False)
        session.commit()
        session.close()
        clear_db_dags()
        clear_db_runs()

    def test_check_operators(self):

        conn_id = "sqlite_default"

        captain_hook = BaseHook.get_hook(conn_id=conn_id)  # quite funny :D
        captain_hook.run("CREATE TABLE operator_test_table (a, b)")
        captain_hook.run("insert into operator_test_table values (1,2)")

        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op = CheckOperator(task_id='check',
                           sql="select count(*) from operator_test_table",
                           conn_id=conn_id,
                           dag=self.dag)

        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

        op = ValueCheckOperator(
            task_id='value_check',
            pass_value=95,
            tolerance=0.1,
            conn_id=conn_id,
            sql="SELECT 100",
            dag=self.dag,
        )
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

        captain_hook.run("drop table operator_test_table")

    def test_clear_api(self):
        task = self.dag_bash.tasks[0]
        task.clear(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE,
                   upstream=True,
                   downstream=True)
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.are_dependents_done()

    def test_illegal_args(self):
        """
        Tests that Operators reject illegal arguments
        """
        msg = 'Invalid arguments were passed to BashOperator (task_id: test_illegal_args).'
        with conf_vars({('operators', 'allow_illegal_arguments'): 'True'}):
            with pytest.warns(PendingDeprecationWarning) as warnings:
                BashOperator(
                    task_id='test_illegal_args',
                    bash_command='echo success',
                    dag=self.dag,
                    illegal_argument_1234='hello?',
                )
                assert any(msg in str(w) for w in warnings)

    def test_illegal_args_forbidden(self):
        """
        Tests that operators raise exceptions on illegal arguments when
        illegal arguments are not allowed.
        """
        with pytest.raises(AirflowException) as ctx:
            BashOperator(
                task_id='test_illegal_args',
                bash_command='echo success',
                dag=self.dag,
                illegal_argument_1234='hello?',
            )
        assert 'Invalid arguments were passed to BashOperator (task_id: test_illegal_args).' in str(
            ctx.value)

    def test_bash_operator(self):
        op = BashOperator(task_id='test_bash_operator',
                          bash_command="echo success",
                          dag=self.dag)
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)

        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_bash_operator_multi_byte_output(self):
        op = BashOperator(
            task_id='test_multi_byte_bash_operator',
            bash_command="echo \u2600",
            dag=self.dag,
            output_encoding='utf-8',
        )
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_bash_operator_kill(self):
        import psutil

        sleep_time = "100%d" % os.getpid()
        op = BashOperator(
            task_id='test_bash_operator_kill',
            execution_timeout=timedelta(seconds=1),
            bash_command=f"/bin/bash -c 'sleep {sleep_time}'",
            dag=self.dag,
        )
        with pytest.raises(AirflowTaskTimeout):
            op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        sleep(2)
        pid = -1
        for proc in psutil.process_iter():
            if proc.cmdline() == ['sleep', sleep_time]:
                pid = proc.pid
        if pid != -1:
            os.kill(pid, signal.SIGTERM)
            self.fail(
                "BashOperator's subprocess still running after stopping on timeout!"
            )

    def test_on_failure_callback(self):
        mock_failure_callback = MagicMock()

        op = BashOperator(
            task_id='check_on_failure_callback',
            bash_command="exit 1",
            dag=self.dag,
            on_failure_callback=mock_failure_callback,
        )
        with pytest.raises(AirflowException):
            op.run(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE,
                   ignore_ti_state=True)
        mock_failure_callback.assert_called_once()

    def test_dryrun(self):
        op = BashOperator(task_id='test_dryrun',
                          bash_command="echo success",
                          dag=self.dag)
        op.dry_run()

    def test_sqlite(self):
        import airflow.providers.sqlite.operators.sqlite

        op = airflow.providers.sqlite.operators.sqlite.SqliteOperator(
            task_id='time_sqlite',
            sql="CREATE TABLE IF NOT EXISTS unitest (dummy VARCHAR(20))",
            dag=self.dag)
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_timeout(self):
        op = PythonOperator(
            task_id='test_timeout',
            execution_timeout=timedelta(seconds=1),
            python_callable=lambda: sleep(5),
            dag=self.dag,
        )
        with pytest.raises(AirflowTaskTimeout):
            op.run(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE,
                   ignore_ti_state=True)

    def test_python_op(self):
        def test_py_op(templates_dict, ds, **kwargs):
            if not templates_dict['ds'] == ds:
                raise Exception("failure")

        op = PythonOperator(task_id='test_py_op',
                            python_callable=test_py_op,
                            templates_dict={'ds': "{{ ds }}"},
                            dag=self.dag)
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_complex_template(self):
        def verify_templated_field(context):
            assert context['ti'].task.some_templated_field['bar'][
                1] == context['ds']

        op = OperatorSubclass(
            task_id='test_complex_template',
            some_templated_field={
                'foo': '123',
                'bar': ['baz', '{{ ds }}']
            },
            dag=self.dag,
        )
        op.execute = verify_templated_field
        self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE)
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)

    def test_template_non_bool(self):
        """
        Test templates can handle objects with no sense of truthiness
        """
        class NonBoolObject:
            def __len__(self):  # pylint: disable=invalid-length-returned
                return NotImplemented

            def __bool__(self):  # pylint: disable=invalid-bool-returned, bad-option-value
                return NotImplemented

        op = OperatorSubclass(task_id='test_bad_template_obj',
                              some_templated_field=NonBoolObject(),
                              dag=self.dag)
        op.resolve_template_files()

    def test_task_get_template(self):
        TI = TaskInstance
        ti = TI(task=self.runme_0, execution_date=DEFAULT_DATE)
        ti.dag = self.dag_bash
        self.dag_bash.create_dagrun(run_type=DagRunType.MANUAL,
                                    state=State.RUNNING,
                                    execution_date=DEFAULT_DATE)
        ti.run(ignore_ti_state=True)
        context = ti.get_template_context()

        # DEFAULT DATE is 2015-01-01
        assert context['ds'] == '2015-01-01'
        assert context['ds_nodash'] == '20150101'

        # next_ds is 2015-01-02 as the dag interval is daily
        assert context['next_ds'] == '2015-01-02'
        assert context['next_ds_nodash'] == '20150102'

        # prev_ds is 2014-12-31 as the dag interval is daily
        assert context['prev_ds'] == '2014-12-31'
        assert context['prev_ds_nodash'] == '20141231'

        assert context['ts'] == '2015-01-01T00:00:00+00:00'
        assert context['ts_nodash'] == '20150101T000000'
        assert context['ts_nodash_with_tz'] == '20150101T000000+0000'

        assert context['yesterday_ds'] == '2014-12-31'
        assert context['yesterday_ds_nodash'] == '20141231'

        assert context['tomorrow_ds'] == '2015-01-02'
        assert context['tomorrow_ds_nodash'] == '20150102'

    def test_local_task_job(self):
        TI = TaskInstance
        ti = TI(task=self.runme_0, execution_date=DEFAULT_DATE)
        job = LocalTaskJob(task_instance=ti, ignore_ti_state=True)
        job.run()

    def test_raw_job(self):
        TI = TaskInstance
        ti = TI(task=self.runme_0, execution_date=DEFAULT_DATE)
        ti.dag = self.dag_bash
        self.dag_bash.create_dagrun(run_type=DagRunType.MANUAL,
                                    state=State.RUNNING,
                                    execution_date=DEFAULT_DATE)
        ti.run(ignore_ti_state=True)

    def test_bad_trigger_rule(self):
        with pytest.raises(AirflowException):
            DummyOperator(task_id='test_bad_trigger',
                          trigger_rule="non_existent",
                          dag=self.dag)

    def test_terminate_task(self):
        """If a task instance's db state get deleted, it should fail"""
        from airflow.executors.sequential_executor import SequentialExecutor

        TI = TaskInstance
        dag = self.dagbag.dags.get('test_utils')
        task = dag.task_dict.get('sleeps_forever')

        ti = TI(task=task, execution_date=DEFAULT_DATE)
        job = LocalTaskJob(task_instance=ti,
                           ignore_ti_state=True,
                           executor=SequentialExecutor())

        # Running task instance asynchronously
        proc = multiprocessing.Process(target=job.run)
        proc.start()
        sleep(5)
        settings.engine.dispose()
        session = settings.Session()
        ti.refresh_from_db(session=session)
        # making sure it's actually running
        assert State.RUNNING == ti.state
        ti = (session.query(TI).filter_by(dag_id=task.dag_id,
                                          task_id=task.task_id,
                                          execution_date=DEFAULT_DATE).one())

        # deleting the instance should result in a failure
        session.delete(ti)
        session.commit()
        # waiting for the async task to finish
        proc.join()

        # making sure that the task ended up as failed
        ti.refresh_from_db(session=session)
        assert State.FAILED == ti.state
        session.close()

    def test_task_fail_duration(self):
        """If a task fails, the duration should be recorded in TaskFail"""

        op1 = BashOperator(task_id='pass_sleepy',
                           bash_command='sleep 3',
                           dag=self.dag)
        op2 = BashOperator(
            task_id='fail_sleepy',
            bash_command='sleep 5',
            execution_timeout=timedelta(seconds=3),
            retry_delay=timedelta(seconds=0),
            dag=self.dag,
        )
        session = settings.Session()
        try:
            op1.run(start_date=DEFAULT_DATE,
                    end_date=DEFAULT_DATE,
                    ignore_ti_state=True)
        except Exception:  # pylint: disable=broad-except
            pass
        try:
            op2.run(start_date=DEFAULT_DATE,
                    end_date=DEFAULT_DATE,
                    ignore_ti_state=True)
        except Exception:  # pylint: disable=broad-except
            pass
        op1_fails = (session.query(TaskFail).filter_by(
            task_id='pass_sleepy',
            dag_id=self.dag.dag_id,
            execution_date=DEFAULT_DATE).all())
        op2_fails = (session.query(TaskFail).filter_by(
            task_id='fail_sleepy',
            dag_id=self.dag.dag_id,
            execution_date=DEFAULT_DATE).all())

        assert 0 == len(op1_fails)
        assert 1 == len(op2_fails)
        assert sum([f.duration for f in op2_fails]) >= 3

    def test_externally_triggered_dagrun(self):
        TI = TaskInstance

        # Create the dagrun between two "scheduled" execution dates of the DAG
        execution_date = DEFAULT_DATE + timedelta(days=2)
        execution_ds = execution_date.strftime('%Y-%m-%d')
        execution_ds_nodash = execution_ds.replace('-', '')

        dag = DAG(TEST_DAG_ID,
                  default_args=self.args,
                  schedule_interval=timedelta(weeks=1),
                  start_date=DEFAULT_DATE)
        task = DummyOperator(task_id='test_externally_triggered_dag_context',
                             dag=dag)
        dag.create_dagrun(
            run_type=DagRunType.SCHEDULED,
            execution_date=execution_date,
            state=State.RUNNING,
            external_trigger=True,
        )
        task.run(start_date=execution_date, end_date=execution_date)

        ti = TI(task=task, execution_date=execution_date)
        context = ti.get_template_context()

        # next_ds/prev_ds should be the execution date for manually triggered runs
        assert context['next_ds'] == execution_ds
        assert context['next_ds_nodash'] == execution_ds_nodash

        assert context['prev_ds'] == execution_ds
        assert context['prev_ds_nodash'] == execution_ds_nodash

    def test_dag_params_and_task_params(self):
        # This test case guards how params of DAG and Operator work together.
        # - If any key exists in either DAG's or Operator's params,
        #   it is guaranteed to be available eventually.
        # - If any key exists in both DAG's params and Operator's params,
        #   the latter has precedence.
        TI = TaskInstance

        dag = DAG(
            TEST_DAG_ID,
            default_args=self.args,
            schedule_interval=timedelta(weeks=1),
            start_date=DEFAULT_DATE,
            params={
                'key_1': 'value_1',
                'key_2': 'value_2_old'
            },
        )
        task1 = DummyOperator(
            task_id='task1',
            dag=dag,
            params={
                'key_2': 'value_2_new',
                'key_3': 'value_3'
            },
        )
        task2 = DummyOperator(task_id='task2', dag=dag)
        dag.create_dagrun(
            run_type=DagRunType.SCHEDULED,
            execution_date=DEFAULT_DATE,
            state=State.RUNNING,
            external_trigger=True,
        )
        task1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        task2.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        ti1 = TI(task=task1, execution_date=DEFAULT_DATE)
        ti2 = TI(task=task2, execution_date=DEFAULT_DATE)
        context1 = ti1.get_template_context()
        context2 = ti2.get_template_context()

        assert context1['params'] == {
            'key_1': 'value_1',
            'key_2': 'value_2_new',
            'key_3': 'value_3'
        }
        assert context2['params'] == {
            'key_1': 'value_1',
            'key_2': 'value_2_old'
        }
Пример #27
0
 def setUp(self):
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG('test_dag_id', default_args=args)
Пример #28
0
    # 'dag': dag,
    # 'sla': timedelta(hours=2),
    # 'execution_timeout': timedelta(seconds=300),
    # 'on_failure_callback': some_function,
    # 'on_success_callback': some_other_function,
    # 'on_retry_callback': another_function,
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}
# [END default_args]

# [START instantiate_dag]
dag = DAG(
    'tutorial',
    default_args=default_args,
    description='A simple tutorial DAG',
    schedule_interval=timedelta(days=1),
    tags=['example'],
)
# [END instantiate_dag]

# t1, t2 and t3 are examples of tasks created by instantiating operators
# [START basic_task]
t1 = BashOperator(
    task_id='print_date',
    bash_command='date',
    dag=dag,
)

t2 = BashOperator(
    task_id='sleep',
 def setUp(self):
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG(TEST_DAG_ID, default_args=args)
Пример #30
0
    def test_render_task_group(self):
        with DAG(dag_id="example_task_group", start_date=START_DATE) as dag:
            start = DummyOperator(task_id="start")

            with TaskGroup("section_1", tooltip="Tasks for section_1") as section_1:
                task_1 = DummyOperator(task_id="task_1")
                task_2 = BashOperator(task_id="task_2", bash_command='echo 1')
                task_3 = DummyOperator(task_id="task_3")

                task_1 >> [task_2, task_3]

            with TaskGroup("section_2", tooltip="Tasks for section_2") as section_2:
                task_1 = DummyOperator(task_id="task_1")

                with TaskGroup("inner_section_2", tooltip="Tasks for inner_section2"):
                    task_2 = BashOperator(task_id="task_2", bash_command='echo 1')
                    task_3 = DummyOperator(task_id="task_3")
                    task_4 = DummyOperator(task_id="task_4")

                    [task_2, task_3] >> task_4

            end = DummyOperator(task_id='end')

            start >> section_1 >> section_2 >> end

        dot = dot_renderer.render_dag(dag)

        assert dot.source == '\n'.join(
            [
                'digraph example_task_group {',
                '\tgraph [label=example_task_group labelloc=t rankdir=LR]',
                '\tend [color="#000000" fillcolor="#e8f7e4" label=end shape=rectangle '
                'style="filled,rounded"]',
                '\tsubgraph cluster_section_1 {',
                '\t\tcolor="#000000" fillcolor="#6495ed7f" label=section_1 shape=rectangle style=filled',
                '\t\t"section_1.upstream_join_id" [color="#000000" fillcolor=CornflowerBlue height=0.2 '
                'label="" shape=circle style="filled,rounded" width=0.2]',
                '\t\t"section_1.downstream_join_id" [color="#000000" fillcolor=CornflowerBlue height=0.2 '
                'label="" shape=circle style="filled,rounded" width=0.2]',
                '\t\t"section_1.task_1" [color="#000000" fillcolor="#e8f7e4" label=task_1 shape=rectangle '
                'style="filled,rounded"]',
                '\t\t"section_1.task_2" [color="#000000" fillcolor="#f0ede4" label=task_2 shape=rectangle '
                'style="filled,rounded"]',
                '\t\t"section_1.task_3" [color="#000000" fillcolor="#e8f7e4" label=task_3 shape=rectangle '
                'style="filled,rounded"]',
                '\t}',
                '\tsubgraph cluster_section_2 {',
                '\t\tcolor="#000000" fillcolor="#6495ed7f" label=section_2 shape=rectangle style=filled',
                '\t\t"section_2.upstream_join_id" [color="#000000" fillcolor=CornflowerBlue height=0.2 '
                'label="" shape=circle style="filled,rounded" width=0.2]',
                '\t\t"section_2.downstream_join_id" [color="#000000" fillcolor=CornflowerBlue height=0.2 '
                'label="" shape=circle style="filled,rounded" width=0.2]',
                '\t\tsubgraph "cluster_section_2.inner_section_2" {',
                '\t\t\tcolor="#000000" fillcolor="#6495ed7f" label=inner_section_2 shape=rectangle '
                'style=filled',
                '\t\t\t"section_2.inner_section_2.task_2" [color="#000000" fillcolor="#f0ede4" label=task_2 '
                'shape=rectangle style="filled,rounded"]',
                '\t\t\t"section_2.inner_section_2.task_3" [color="#000000" fillcolor="#e8f7e4" label=task_3 '
                'shape=rectangle style="filled,rounded"]',
                '\t\t\t"section_2.inner_section_2.task_4" [color="#000000" fillcolor="#e8f7e4" label=task_4 '
                'shape=rectangle style="filled,rounded"]',
                '\t\t}',
                '\t\t"section_2.task_1" [color="#000000" fillcolor="#e8f7e4" label=task_1 shape=rectangle '
                'style="filled,rounded"]',
                '\t}',
                '\tstart [color="#000000" fillcolor="#e8f7e4" label=start shape=rectangle '
                'style="filled,rounded"]',
                '\t"section_1.downstream_join_id" -> "section_2.upstream_join_id"',
                '\t"section_1.task_1" -> "section_1.task_2"',
                '\t"section_1.task_1" -> "section_1.task_3"',
                '\t"section_1.task_2" -> "section_1.downstream_join_id"',
                '\t"section_1.task_3" -> "section_1.downstream_join_id"',
                '\t"section_1.upstream_join_id" -> "section_1.task_1"',
                '\t"section_2.downstream_join_id" -> end',
                '\t"section_2.inner_section_2.task_2" -> "section_2.inner_section_2.task_4"',
                '\t"section_2.inner_section_2.task_3" -> "section_2.inner_section_2.task_4"',
                '\t"section_2.inner_section_2.task_4" -> "section_2.downstream_join_id"',
                '\t"section_2.task_1" -> "section_2.downstream_join_id"',
                '\t"section_2.upstream_join_id" -> "section_2.inner_section_2.task_2"',
                '\t"section_2.upstream_join_id" -> "section_2.inner_section_2.task_3"',
                '\t"section_2.upstream_join_id" -> "section_2.task_1"',
                '\tstart -> "section_1.upstream_join_id"',
                '}',
            ]
        )