Пример #1
0
    def test_try_adopt_task_instances_none(self):
        date = datetime.utcnow()
        start_date = datetime.utcnow() - timedelta(days=2)

        with DAG("test_try_adopt_task_instances_none"):
            task_1 = BaseOperator(task_id="task_1", start_date=start_date)

        key1 = TaskInstance(task=task_1, execution_date=date)
        tis = [key1]
        executor = celery_executor.CeleryExecutor()

        self.assertEqual(executor.try_adopt_task_instances(tis), tis)
Пример #2
0
 def _get_task_instance(self,
                        trigger_rule=TriggerRule.ALL_SUCCESS,
                        state=None,
                        upstream_task_ids=None):
     task = BaseOperator(task_id='test_task',
                         trigger_rule=trigger_rule,
                         start_date=datetime(2015, 1, 1))
     if upstream_task_ids:
         task._upstream_task_ids.update(upstream_task_ids)
     return TaskInstance(task=task,
                         state=state,
                         execution_date=task.start_date)
    def test_try_adopt_task_instances(self):
        exec_date = timezone.utcnow() - timedelta(minutes=2)
        start_date = timezone.utcnow() - timedelta(days=2)
        queued_dttm = timezone.utcnow() - timedelta(minutes=1)

        try_number = 1

        with DAG("test_try_adopt_task_instances_none") as dag:
            task_1 = BaseOperator(task_id="task_1", start_date=start_date)
            task_2 = BaseOperator(task_id="task_2", start_date=start_date)

        ti1 = TaskInstance(task=task_1, execution_date=exec_date)
        ti1.external_executor_id = '231'
        ti1.queued_dttm = queued_dttm
        ti2 = TaskInstance(task=task_2, execution_date=exec_date)
        ti2.external_executor_id = '232'
        ti2.queued_dttm = queued_dttm

        tis = [ti1, ti2]
        executor = celery_executor.CeleryExecutor()
        assert executor.running == set()
        assert executor.adopted_task_timeouts == {}
        assert executor.tasks == {}

        not_adopted_tis = executor.try_adopt_task_instances(tis)

        key_1 = TaskInstanceKey(dag.dag_id, task_1.task_id, exec_date,
                                try_number)
        key_2 = TaskInstanceKey(dag.dag_id, task_2.task_id, exec_date,
                                try_number)
        assert executor.running == {key_1, key_2}
        assert dict(executor.adopted_task_timeouts) == {
            key_1: queued_dttm + executor.task_adoption_timeout,
            key_2: queued_dttm + executor.task_adoption_timeout,
        }
        assert executor.tasks == {
            key_1: AsyncResult("231"),
            key_2: AsyncResult("232")
        }
        assert not_adopted_tis == []
Пример #4
0
 def test_no_new_fields_added_to_base_operator(self):
     """
     This test verifies that there are no new fields added to BaseOperator. And reminds that
     tests should be added for it.
     """
     base_operator = BaseOperator(task_id="10")
     fields = base_operator.__dict__
     assert {
         '_BaseOperator__instantiated': True,
         '_dag': None,
         '_downstream_task_ids': set(),
         '_inlets': [],
         '_log': base_operator.log,
         '_outlets': [],
         '_upstream_task_ids': set(),
         'depends_on_past': False,
         'do_xcom_push': True,
         'email': None,
         'email_on_failure': True,
         'email_on_retry': True,
         'end_date': None,
         'execution_timeout': None,
         'executor_config': {},
         'inlets': [],
         'label': '10',
         'max_retry_delay': None,
         'on_execute_callback': None,
         'on_failure_callback': None,
         'on_retry_callback': None,
         'on_success_callback': None,
         'outlets': [],
         'owner': 'airflow',
         'params': {},
         'pool': 'default_pool',
         'pool_slots': 1,
         'priority_weight': 1,
         'queue': 'default',
         'resources': None,
         'retries': 0,
         'retry_delay': timedelta(0, 300),
         'retry_exponential_backoff': False,
         'run_as_user': None,
         'sla': None,
         'start_date': None,
         'subdag': None,
         'task_concurrency': None,
         'task_id': '10',
         'trigger_rule': 'all_success',
         'wait_for_downstream': False,
         'weight_rule': 'downstream',
     } == fields, """
Пример #5
0
def make_simple_dag():
    """Make very simple DAG to verify serialization result."""
    dag = DAG(
        dag_id='simple_dag',
        default_args={
            "retries": 1,
            "retry_delay": timedelta(minutes=5),
            "depends_on_past": False,
        },
        start_date=datetime(2019, 8, 1),
    )
    BaseOperator(task_id='simple_task', dag=dag, owner='airflow')
    CustomBaseOperator(task_id='custom_task', dag=dag)
    return {'simple_dag': dag}
Пример #6
0
def make_simple_dag():
    """Make very simple DAG to verify serialization result."""
    dag = DAG(dag_id='simple_dag',
              default_args={
                  "retries": 1,
                  "retry_delay": timedelta(minutes=5),
                  "depends_on_past": False,
              },
              start_date=datetime(2019, 8, 1),
              is_paused_upon_creation=False,
              access_control={"test": {"can_dag_read", "can_dag_edit"}})
    BaseOperator(task_id='simple_task', dag=dag, owner='airflow')
    CustomOperator(task_id='custom_task', dag=dag)
    return {'simple_dag': dag}
Пример #7
0
    def test_deserialization_start_date(self, dag_start_date, task_start_date, expected_task_start_date):
        dag = DAG(dag_id='simple_dag', start_date=dag_start_date)
        BaseOperator(task_id='simple_task', dag=dag, start_date=task_start_date)

        serialized_dag = SerializedDAG.to_dict(dag)
        if not task_start_date or dag_start_date >= task_start_date:
            # If dag.start_date > task.start_date -> task.start_date=dag.start_date
            # because of the logic in dag.add_task()
            self.assertNotIn("start_date", serialized_dag["dag"]["tasks"][0])
        else:
            self.assertIn("start_date", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        self.assertEqual(simple_task.start_date, expected_task_start_date)
Пример #8
0
    def test_check_for_stalled_adopted_tasks(self):
        exec_date = timezone.utcnow() - timedelta(minutes=40)
        start_date = timezone.utcnow() - timedelta(days=2)
        queued_dttm = timezone.utcnow() - timedelta(minutes=30)

        try_number = 1

        with DAG("test_check_for_stalled_adopted_tasks") as dag:
            task_1 = BaseOperator(task_id="task_1", start_date=start_date)
            task_2 = BaseOperator(task_id="task_2", start_date=start_date)

        key_1 = TaskInstanceKey(dag.dag_id, task_1.task_id, exec_date, try_number)
        key_2 = TaskInstanceKey(dag.dag_id, task_2.task_id, exec_date, try_number)

        executor = celery_executor.CeleryExecutor()
        executor.adopted_task_timeouts = {
            key_1: queued_dttm + executor.task_adoption_timeout,
            key_2: queued_dttm + executor.task_adoption_timeout,
        }
        executor.tasks = {key_1: AsyncResult("231"), key_2: AsyncResult("232")}
        executor.sync()
        self.assertEqual(executor.event_buffer, {key_1: (State.FAILED, None), key_2: (State.FAILED, None)})
        self.assertEqual(executor.tasks, {})
        self.assertEqual(executor.adopted_task_timeouts, {})
Пример #9
0
    def test_deserialization_end_date(self, dag_end_date, task_end_date, expected_task_end_date):
        dag = DAG(dag_id='simple_dag', start_date=datetime(2019, 8, 1), end_date=dag_end_date)
        BaseOperator(task_id='simple_task', dag=dag, end_date=task_end_date)

        serialized_dag = SerializedDAG.to_dict(dag)
        if not task_end_date or dag_end_date <= task_end_date:
            # If dag.end_date < task.end_date -> task.end_date=dag.end_date
            # because of the logic in dag.add_task()
            assert "end_date" not in serialized_dag["dag"]["tasks"][0]
        else:
            assert "end_date" in serialized_dag["dag"]["tasks"][0]

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict["simple_task"]
        assert simple_task.end_date == expected_task_end_date
Пример #10
0
    def test_task_params_roundtrip(self, val, expected_val):
        """
        Test that params work both on Serialized DAGs & Tasks
        """
        dag = DAG(dag_id='simple_dag')
        BaseOperator(task_id='simple_task', dag=dag, params=val, start_date=datetime(2019, 8, 1))

        serialized_dag = SerializedDAG.to_dict(dag)
        if val:
            assert "params" in serialized_dag["dag"]["tasks"][0]
        else:
            assert "params" not in serialized_dag["dag"]["tasks"][0]

        deserialized_dag = SerializedDAG.from_dict(serialized_dag)
        deserialized_simple_task = deserialized_dag.task_dict["simple_task"]
        assert expected_val == deserialized_simple_task.params
Пример #11
0
    def test_operator_subclass_changing_base_defaults(self):
        assert BaseOperator(task_id='dummy').do_xcom_push is True, \
            "Precondition check! If this fails the test won't make sense"

        class MyOperator(BaseOperator):
            def __init__(self, do_xcom_push=False, **kwargs):
                super().__init__(**kwargs)
                self.do_xcom_push = do_xcom_push

        op = MyOperator(task_id='dummy')
        assert op.do_xcom_push is False

        blob = SerializedBaseOperator.serialize_operator(op)
        serialized_op = SerializedBaseOperator.deserialize_operator(blob)

        assert serialized_op.do_xcom_push is False
    def test_dag_params_roundtrip(self, val, expected_val):
        """
        Test that params work both on Serialized DAGs & Tasks
        """
        dag = DAG(dag_id='simple_dag', params=val)
        BaseOperator(task_id='simple_task', dag=dag, start_date=datetime(2019, 8, 1))

        serialized_dag = SerializedDAG.to_dict(dag)
        if val:
            self.assertIn("params", serialized_dag["dag"])
        else:
            self.assertNotIn("params", serialized_dag["dag"])

        deserialized_dag = SerializedDAG.from_dict(serialized_dag)
        deserialized_simple_task = deserialized_dag.task_dict["simple_task"]
        self.assertEqual(expected_val, deserialized_dag.params)
        self.assertEqual(expected_val, deserialized_simple_task.params)
Пример #13
0
    def test_schedule_dag_once(self):
        """
        Tests scheduling a dag scheduled for @once - should be scheduled the first time
        it is called, and not scheduled the second.
        """
        dag = DAG(self.TEST_SCHEDULE_ONCE_DAG_ID)
        dag.schedule_interval = '@once'
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime(2015, 1, 2, 0, 0)))
        dag_run = DagFileProcessor(dag_ids=[], log=mock.MagicMock()).create_dag_run(dag)
        dag_run2 = DagFileProcessor(dag_ids=[], log=mock.MagicMock()).create_dag_run(dag)

        self.assertIsNotNone(dag_run)
        self.assertIsNone(dag_run2)
        dag.clear()
Пример #14
0
    def test_schedule_dag_once(self):
        """
        Tests scheduling a dag scheduled for @once - should be scheduled the first time
        it is called, and not scheduled the second.
        """
        dag = DAG(self.TEST_SCHEDULE_ONCE_DAG_ID)
        dag.schedule_interval = '@once'
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime(2015, 1, 2, 0, 0)))
        dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag)
        dag_run2 = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag)

        self.assertIsNotNone(dag_run)
        self.assertIsNone(dag_run2)
        dag.clear()
Пример #15
0
    def test_schedule_dag_relativedelta(self):
        """
        Tests scheduling a dag with a relativedelta schedule_interval
        """
        dag_id = "test_schedule_dag_relativedelta"
        delta = relativedelta(hours=+1)
        dag = DAG(dag_id=dag_id,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime_tz(2015, 1, 2, 0, 0)))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag_run = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            datetime_tz(2015, 1, 2, 0, 0),
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'
            .format(dag_run.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
        dag_run2 = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run2)
        self.assertEqual(dag.dag_id, dag_run2.dag_id)
        self.assertIsNotNone(dag_run2.run_id)
        self.assertNotEqual('', dag_run2.run_id)
        self.assertEqual(
            datetime_tz(2015, 1, 2, 0, 0) + delta,
            dag_run2.execution_date,
            msg='dag_run2.execution_date did not match expectation: {0}'
            .format(dag_run2.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run2.state)
        self.assertFalse(dag_run2.external_trigger)
        dag.clear()
        self._clean_up(dag_id)
Пример #16
0
    def test_dag_on_success_callback_roundtrip(self, passed_success_callback, expected_value):
        """
        Test that when on_success_callback is passed to the DAG, has_on_success_callback is stored
        in Serialized JSON blob. And when it is de-serialized dag.has_on_success_callback is set to True.

        When the callback is not set, has_on_success_callback should not be stored in Serialized blob
        and so default to False on de-serialization
        """
        dag = DAG(dag_id='test_dag_on_success_callback_roundtrip', **passed_success_callback)
        BaseOperator(task_id='simple_task', dag=dag, start_date=datetime(2019, 8, 1))

        serialized_dag = SerializedDAG.to_dict(dag)
        if expected_value:
            assert "has_on_success_callback" in serialized_dag["dag"]
        else:
            assert "has_on_success_callback" not in serialized_dag["dag"]

        deserialized_dag = SerializedDAG.from_dict(serialized_dag)

        assert deserialized_dag.has_on_success_callback is expected_value
Пример #17
0
    def test_schedule_dag_no_end_date_up_to_today_only(self):
        """
        Tests that a Dag created without an end_date can only be scheduled up
        to and including the current datetime.

        For example, if today is 2016-01-01 and we are scheduling from a
        start_date of 2015-01-01, only jobs up to, but not including
        2016-01-01 should be scheduled.
        """
        session = settings.Session()
        delta = datetime.timedelta(days=1)
        now = utcnow()
        start_date = now.subtract(weeks=1)

        runs = (now - start_date).days
        dag_id = "test_schedule_dag_no_end_date_up_to_today_only"
        dag = DAG(dag_id=dag_id,
                  start_date=start_date,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake'))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag_runs = []
        for _ in range(runs):
            dag_run = dag_file_processor.create_dag_run(dag)
            dag_runs.append(dag_run)

            # Mark the DagRun as complete
            dag_run.state = State.SUCCESS
            session.merge(dag_run)
            session.commit()

        # Attempt to schedule an additional dag run (for 2016-01-01)
        additional_dag_run = dag_file_processor.create_dag_run(dag)

        for dag_run in dag_runs:
            self.assertIsNotNone(dag_run)

        self.assertIsNone(additional_dag_run)
        self._clean_up(dag_id)
Пример #18
0
    def test_schedule_dag_relativedelta(self):
        """
        Tests scheduling a dag with a relativedelta schedule_interval
        """
        delta = relativedelta(hours=+1)
        dag = DAG(self.TEST_SCHEDULE_RELATIVEDELTA_DAG_ID,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime(2015, 1, 2, 0, 0)))

        dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            datetime(2015, 1, 2, 0, 0),
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'
            .format(dag_run.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
        dag_run2 = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag)
        self.assertIsNotNone(dag_run2)
        self.assertEqual(dag.dag_id, dag_run2.dag_id)
        self.assertIsNotNone(dag_run2.run_id)
        self.assertNotEqual('', dag_run2.run_id)
        self.assertEqual(
            datetime(2015, 1, 2, 0, 0) + delta,
            dag_run2.execution_date,
            msg='dag_run2.execution_date did not match expectation: {0}'
            .format(dag_run2.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run2.state)
        self.assertFalse(dag_run2.external_trigger)
        dag.clear()
Пример #19
0
    def test_schedule_dag_no_end_date_up_to_today_only(self):
        """
        Tests that a Dag created without an end_date can only be scheduled up
        to and including the current datetime.

        For example, if today is 2016-01-01 and we are scheduling from a
        start_date of 2015-01-01, only jobs up to, but not including
        2016-01-01 should be scheduled.
        """
        session = settings.Session()
        delta = timedelta(days=1)
        now = utcnow()
        start_date = now.subtract(weeks=1)

        runs = (now - start_date).days

        dag = DAG(self.TEST_SCHEDULE_DAG_NO_END_DATE_UP_TO_TODAY_ONLY_DAG_ID,
                  start_date=start_date,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake'))

        dag_runs = []
        scheduler = jobs.SchedulerJob(**self.default_scheduler_args)
        for _ in range(runs):
            dag_run = scheduler.create_dag_run(dag)
            dag_runs.append(dag_run)

            # Mark the DagRun as complete
            dag_run.state = State.SUCCESS
            session.merge(dag_run)
            session.commit()

        # Attempt to schedule an additional dag run (for 2016-01-01)
        additional_dag_run = scheduler.create_dag_run(dag)

        for dag_run in dag_runs:
            self.assertIsNotNone(dag_run)

        self.assertIsNone(additional_dag_run)
Пример #20
0
    def test_schedule_dag_no_previous_runs(self):
        """
        Tests scheduling a dag with no previous runs
        """
        dag = DAG(self.TEST_SCHEDULE_WITH_NO_PREVIOUS_RUNS_DAG_ID)
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime(2015, 1, 2, 0, 0)))

        dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            datetime(2015, 1, 2, 0, 0),
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'
            .format(dag_run.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
        dag.clear()
 def _get_task(self, **kwargs):
     return BaseOperator(task_id='test_task', dag=DAG('test_dag'), **kwargs)
Пример #22
0
def retemplate_query(query, context):
    return (BaseOperator(
        task_id='tmp',
        template_fields=context['params']['untemplated_query'],
        context=context)).render_template(
            content=context['params']['untemplated_query'], context=context)
Пример #23
0
from airflow.models.dag import DAG
from airflow.models.baseoperator import BaseOperator
from airflow.models.baseoperator import BaseOperator

gwdgwdgougeqfouqegehflqehfl = DAG(**{"dag_id": "gwdgwdgougeqfouqegehflqehfl"})

thywtersh = BaseOperator(**{"task_id": "thywtersh"},
                         dag=gwdgwdgougeqfouqegehflqehfl)
rtwhsrtjhtwr = BaseOperator(**{"task_id": "rtwhsrtjhtwr"},
                            dag=gwdgwdgougeqfouqegehflqehfl)

thywtersh >> rtwhsrtjhtwr
Пример #24
0
from airflow.models.dag import DAG
from airflow.models.baseoperator import BaseOperator
from airflow.models.baseoperator import BaseOperator

gwdgwdg = DAG(**{"dag_id": "gwdgwdg"})

thywtersh = BaseOperator(**{"task_id": "thywtersh"}, dag=gwdgwdg)
rtwhsrtjhtwr = BaseOperator(**{"task_id": "rtwhsrtjhtwr"}, dag=gwdgwdg)

rtwhsrtjhtwr >> thywtersh
Пример #25
0
    def test_no_new_fields_added_to_base_operator(self):
        """
        This test verifies that there are no new fields added to BaseOperator. And reminds that
        tests should be added for it.
        """
        base_operator = BaseOperator(task_id="10")
        fields = base_operator.__dict__
        self.assertEqual(
            {
                '_BaseOperator__instantiated': True,
                '_dag': None,
                '_downstream_task_ids': set(),
                '_inlets': [],
                '_log': base_operator.log,
                '_outlets': [],
                '_upstream_task_ids': set(),
                'depends_on_past': False,
                'do_xcom_push': True,
                'email': None,
                'email_on_failure': True,
                'email_on_retry': True,
                'end_date': None,
                'execution_timeout': None,
                'executor_config': {},
                'inlets': [],
                'label': '10',
                'max_retry_delay': None,
                'on_execute_callback': None,
                'on_failure_callback': None,
                'on_retry_callback': None,
                'on_success_callback': None,
                'outlets': [],
                'owner': 'airflow',
                'params': {},
                'pool': 'default_pool',
                'pool_slots': 1,
                'priority_weight': 1,
                'queue': 'default',
                'resources': None,
                'retries': 0,
                'retry_delay': timedelta(0, 300),
                'retry_exponential_backoff': False,
                'run_as_user': None,
                'sla': None,
                'start_date': None,
                'subdag': None,
                'task_concurrency': None,
                'task_id': '10',
                'trigger_rule': 'all_success',
                'wait_for_downstream': False,
                'weight_rule': 'downstream'
            }, fields, """
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

     ACTION NEEDED! PLEASE READ THIS CAREFULLY AND CORRECT TESTS CAREFULLY

 Some fields were added to the BaseOperator! Please add them to the list above and make sure that
 you add support for DAG serialization - you should add the field to
 `airflow/serialization/schema.json` - they should have correct type defined there.

 Note that we do not support versioning yet so you should only add optional fields to BaseOperator.

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                         """)
Пример #26
0
 def test_deserialization_with_dag_context(self):
     with DAG(dag_id='simple_dag', start_date=datetime(2019, 8, 1, tzinfo=timezone.utc)) as dag:
         BaseOperator(task_id='simple_task')
         # should not raise RuntimeError: dictionary changed size during iteration
         SerializedDAG.to_dict(dag)