示例#1
0
    def test_backfill_rerun_failed_tasks(self):
        dag = DAG(dag_id='test_backfill_rerun_failed',
                  start_date=DEFAULT_DATE,
                  schedule_interval='@daily')

        with dag:
            DummyOperator(task_id='test_backfill_rerun_failed_task-1', dag=dag)

        dag.clear()

        executor = MockExecutor()

        job = BackfillJob(
            dag=dag,
            executor=executor,
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE + datetime.timedelta(days=2),
        )
        job.run()

        ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'),
                execution_date=DEFAULT_DATE)
        ti.refresh_from_db()
        ti.set_state(State.FAILED)

        job = BackfillJob(dag=dag,
                          executor=executor,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE + datetime.timedelta(days=2),
                          rerun_failed_tasks=True)
        job.run()
        ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'),
                execution_date=DEFAULT_DATE)
        ti.refresh_from_db()
        self.assertEqual(ti.state, State.SUCCESS)
示例#2
0
    def test_backfill_rerun_failed_tasks_without_flag(self):
        dag = DAG(dag_id='test_backfill_rerun_failed',
                  start_date=DEFAULT_DATE,
                  schedule_interval='@daily')

        with dag:
            DummyOperator(task_id='test_backfill_rerun_failed_task-1', dag=dag)

        dag.clear()

        executor = MockExecutor()

        job = BackfillJob(
            dag=dag,
            executor=executor,
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE + datetime.timedelta(days=2),
        )
        job.run()

        ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'),
                execution_date=DEFAULT_DATE)
        ti.refresh_from_db()
        ti.set_state(State.FAILED)

        job = BackfillJob(dag=dag,
                          executor=executor,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE + datetime.timedelta(days=2),
                          rerun_failed_tasks=False)

        with self.assertRaises(AirflowException):
            job.run()
示例#3
0
    def test_task_states_for_dag_run(self):

        dag2 = DagBag().dags['example_python_operator']
        task2 = dag2.get_task(task_id='print_the_context')
        default_date2 = timezone.make_aware(datetime(2016, 1, 9))
        dag2.clear()

        ti2 = TaskInstance(task2, default_date2)

        ti2.set_state(State.SUCCESS)
        ti_start = ti2.start_date
        ti_end = ti2.end_date

        with redirect_stdout(io.StringIO()) as stdout:
            task_command.task_states_for_dag_run(
                self.parser.parse_args([
                    'tasks',
                    'states-for-dag-run',
                    'example_python_operator',
                    default_date2.isoformat(),
                    '--output',
                    "json",
                ]))
        actual_out = json.loads(stdout.getvalue())

        assert len(actual_out) == 1
        assert actual_out[0] == {
            'dag_id': 'example_python_operator',
            'execution_date': '2016-01-09T00:00:00+00:00',
            'task_id': 'print_the_context',
            'state': 'success',
            'start_date': ti_start.isoformat(),
            'end_date': ti_end.isoformat(),
        }
示例#4
0
    def test_task_states_for_dag_run(self):

        dag2 = DagBag().dags['example_python_operator']

        task2 = dag2.get_task(task_id='print_the_context')
        defaut_date2 = timezone.make_aware(datetime(2016, 1, 9))
        ti2 = TaskInstance(task2, defaut_date2)

        ti2.set_state(State.SUCCESS)
        ti_start = ti2.start_date
        ti_end = ti2.end_date

        with redirect_stdout(io.StringIO()) as stdout:
            task_command.task_states_for_dag_run(
                self.parser.parse_args([
                    'tasks', 'states_for_dag_run', 'example_python_operator',
                    defaut_date2.isoformat()
                ]))
        actual_out = stdout.getvalue()

        formatted_rows = [
            ('example_python_operator', '2016-01-09 00:00:00+00:00',
             'print_the_context', 'success', ti_start, ti_end)
        ]

        expected = tabulate(
            formatted_rows,
            ['dag', 'exec_date', 'task', 'state', 'start_date', 'end_date'],
            tablefmt="fancy_grid")

        # Check that prints, and log messages, are shown
        self.assertEqual(expected.replace("\n", ""),
                         actual_out.replace("\n", ""))
示例#5
0
    def test_wait_for_downstream(self, prev_ti_state, is_ti_success):
        dag_id = 'test_wait_for_downstream'
        dag = self.dagbag.get_dag(dag_id)
        upstream, downstream = dag.tasks

        # For ti.set_state() to work, the DagRun has to exist,
        # Otherwise ti.previous_ti returns an unpersisted TI
        self.create_dag_run(dag,
                            execution_date=timezone.datetime(
                                2016, 1, 1, 0, 0, 0))
        self.create_dag_run(dag,
                            execution_date=timezone.datetime(
                                2016, 1, 2, 0, 0, 0))

        prev_ti_downstream = TI(task=downstream,
                                execution_date=timezone.datetime(
                                    2016, 1, 1, 0, 0, 0))
        ti = TI(task=upstream,
                execution_date=timezone.datetime(2016, 1, 2, 0, 0, 0))
        prev_ti = ti.get_previous_ti()
        prev_ti.set_state(State.SUCCESS)
        assert prev_ti.state == State.SUCCESS

        prev_ti_downstream.set_state(prev_ti_state)
        ti.set_state(State.QUEUED)
        ti.run()
        assert (ti.state == State.SUCCESS) == is_ti_success
示例#6
0
    def test_get_states_count_upstream_ti(self):
        """
        this test tests the helper function '_get_states_count_upstream_ti' as a unit and inside update_state
        """
        from airflow.ti_deps.dep_context import DepContext

        get_states_count_upstream_ti = TriggerRuleDep._get_states_count_upstream_ti
        session = settings.Session()
        now = timezone.utcnow()
        dag = DAG(
            'test_dagrun_with_pre_tis',
            start_date=DEFAULT_DATE,
            default_args={'owner': 'owner1'})

        with dag:
            op1 = DummyOperator(task_id='A')
            op2 = DummyOperator(task_id='B')
            op3 = DummyOperator(task_id='C')
            op4 = DummyOperator(task_id='D')
            op5 = DummyOperator(task_id='E', trigger_rule=TriggerRule.ONE_FAILED)

            op1.set_downstream([op2, op3])  # op1 >> op2, op3
            op4.set_upstream([op3, op2])  # op3, op2 >> op4
            op5.set_upstream([op2, op3, op4])  # (op2, op3, op4) >> op5

        clear_db_runs()
        dag.clear()
        dr = dag.create_dagrun(run_id='test_dagrun_with_pre_tis',
                               state=State.RUNNING,
                               execution_date=now,
                               start_date=now)

        ti_op1 = TaskInstance(task=dag.get_task(op1.task_id), execution_date=dr.execution_date)
        ti_op2 = TaskInstance(task=dag.get_task(op2.task_id), execution_date=dr.execution_date)
        ti_op3 = TaskInstance(task=dag.get_task(op3.task_id), execution_date=dr.execution_date)
        ti_op4 = TaskInstance(task=dag.get_task(op4.task_id), execution_date=dr.execution_date)
        ti_op5 = TaskInstance(task=dag.get_task(op5.task_id), execution_date=dr.execution_date)

        ti_op1.set_state(state=State.SUCCESS, session=session)
        ti_op2.set_state(state=State.FAILED, session=session)
        ti_op3.set_state(state=State.SUCCESS, session=session)
        ti_op4.set_state(state=State.SUCCESS, session=session)
        ti_op5.set_state(state=State.SUCCESS, session=session)

        session.commit()

        # check handling with cases that tasks are triggered from backfill with no finished tasks
        finished_tasks = DepContext().ensure_finished_tasks(ti_op2.task.dag, ti_op2.execution_date, session)
        self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op2),
                         (1, 0, 0, 0, 1))
        finished_tasks = dr.get_task_instances(state=State.finished() + [State.UPSTREAM_FAILED],
                                               session=session)
        self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op4),
                         (1, 0, 1, 0, 2))
        self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op5),
                         (2, 0, 1, 0, 3))

        dr.update_state()
        self.assertEqual(State.SUCCESS, dr.state)
 def get_test_ti(session, execution_date: pendulum.datetime, state: str) -> TI:
     dag.create_dagrun(
         run_id='scheduled__{}'.format(execution_date.to_iso8601_string()),
         state=state,
         execution_date=execution_date,
         start_date=pendulum.utcnow(),
         session=session
     )
     ti = TI(task=task, execution_date=execution_date)
     ti.set_state(state=State.SUCCESS, session=session)
     return ti
示例#8
0
def resetTasksStatus(task_id):
    dag_folder = conf.get('core', 'DAGS_FOLDER')
    dagbag = DagBag(dag_folder)
    check_dag = dagbag.dags[main_dag_id]
    session = settings.Session()
    execution_date = datetime.now()
    my_task = check_dag.get_task(task_id)
    ti = TaskInstance(my_task, execution_date)
    state = ti.current_state()
    logging.info("Current state of " + task_id + " is " + str(state))
    ti.set_state(None, session)
    state = ti.current_state()
    logging.info("Updated state of " + task_id + " is " + str(state))
示例#9
0
    def test_retry_handling(self, mock_pool_full):
        """
        Test that task retries are handled properly
        """
        # Mock the pool with a pool with slots open since the pool doesn't actually exist
        mock_pool_full.return_value = False

        dag = models.DAG(dag_id='test_retry_handling')
        task = BashOperator(
            task_id='test_retry_handling_op',
            bash_command='exit 1',
            retries=1,
            retry_delay=datetime.timedelta(seconds=0),
            dag=dag,
            owner='airflow',
            start_date=datetime.datetime(2016, 2, 1, 0, 0, 0))

        def run_with_error(ti):
            try:
                ti.run()
            except AirflowException:
                pass

        ti = TI(
            task=task, execution_date=datetime.datetime.now())

        # first run -- up for retry
        run_with_error(ti)
        self.assertEqual(ti.state, State.UP_FOR_RETRY)
        self.assertEqual(ti.try_number, 1)

        # second run -- fail
        run_with_error(ti)
        self.assertEqual(ti.state, State.FAILED)
        self.assertEqual(ti.try_number, 2)

        # Clear the TI state since you can't run a task with a FAILED state without
        # clearing it first
        ti.set_state(None, settings.Session())

        # third run -- up for retry
        run_with_error(ti)
        self.assertEqual(ti.state, State.UP_FOR_RETRY)
        self.assertEqual(ti.try_number, 3)

        # fourth run -- fail
        run_with_error(ti)
        self.assertEqual(ti.state, State.FAILED)
        self.assertEqual(ti.try_number, 4)
示例#10
0
    def test_retry_handling(self, mock_pool_full):
        """
        Test that task retries are handled properly
        """
        # Mock the pool with a pool with slots open since the pool doesn't actually exist
        mock_pool_full.return_value = False

        dag = models.DAG(dag_id='test_retry_handling')
        task = BashOperator(
            task_id='test_retry_handling_op',
            bash_command='exit 1',
            retries=1,
            retry_delay=datetime.timedelta(seconds=0),
            dag=dag,
            owner='airflow',
            start_date=datetime.datetime(2016, 2, 1, 0, 0, 0))

        def run_with_error(ti):
            try:
                ti.run()
            except AirflowException:
                pass

        ti = TI(
            task=task, execution_date=datetime.datetime.now())

        # first run -- up for retry
        run_with_error(ti)
        self.assertEqual(ti.state, State.UP_FOR_RETRY)
        self.assertEqual(ti.try_number, 1)

        # second run -- fail
        run_with_error(ti)
        self.assertEqual(ti.state, State.FAILED)
        self.assertEqual(ti.try_number, 2)

        # Clear the TI state since you can't run a task with a FAILED state without
        # clearing it first
        ti.set_state(None, settings.Session())

        # third run -- up for retry
        run_with_error(ti)
        self.assertEqual(ti.state, State.UP_FOR_RETRY)
        self.assertEqual(ti.try_number, 3)

        # fourth run -- fail
        run_with_error(ti)
        self.assertEqual(ti.state, State.FAILED)
        self.assertEqual(ti.try_number, 4)
 def _run_task(self, ti: TaskInstance) -> bool:
     self.log.debug("Executing task: %s", ti)
     key = ti.key
     try:
         params = self.tasks_params.pop(ti.key, {})
         _run_raw_task(ti, job_id=ti.job_id, **params)  # pylint: disable=protected-access
         self.change_state(key, State.SUCCESS)
         _run_finished_callback(ti)  # pylint: disable=protected-access
         return True
     except Exception as e:  # pylint: disable=broad-except
         ti.set_state(State.FAILED)
         self.change_state(key, State.FAILED)
         _run_finished_callback(ti)  # pylint: disable=protected-access
         self.log.exception("Failed to execute task: %s.", str(e))
         return False
示例#12
0
    def test_depends_on_past(self, prev_ti_state, is_ti_success):
        dag_id = 'test_depends_on_past'

        dag = self.dagbag.get_dag(dag_id)
        task = dag.tasks[0]

        self.create_dag_run(dag, execution_date=timezone.datetime(2016, 1, 1, 0, 0, 0))
        self.create_dag_run(dag, execution_date=timezone.datetime(2016, 1, 2, 0, 0, 0))

        prev_ti = TI(task, timezone.datetime(2016, 1, 1, 0, 0, 0))
        ti = TI(task, timezone.datetime(2016, 1, 2, 0, 0, 0))

        prev_ti.set_state(prev_ti_state)
        ti.set_state(State.QUEUED)
        ti.run()
        assert (ti.state == State.SUCCESS) == is_ti_success
示例#13
0
    def test_update_counters(self):
        dag = DAG(dag_id='test_manage_executor_state', start_date=DEFAULT_DATE)

        task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        job = BackfillJob(dag=dag)

        session = settings.Session()
        dr = dag.create_dagrun(run_id=DagRun.ID_PREFIX,
                               state=State.RUNNING,
                               execution_date=DEFAULT_DATE,
                               start_date=DEFAULT_DATE,
                               session=session)
        ti = TI(task1, dr.execution_date)
        ti.refresh_from_db()

        ti_status = BackfillJob._DagRunTaskStatus()

        # test for success
        ti.set_state(State.SUCCESS, session)
        ti_status.running[ti.key] = ti
        job._update_counters(ti_status=ti_status)
        self.assertTrue(len(ti_status.running) == 0)
        self.assertTrue(len(ti_status.succeeded) == 1)
        self.assertTrue(len(ti_status.skipped) == 0)
        self.assertTrue(len(ti_status.failed) == 0)
        self.assertTrue(len(ti_status.to_run) == 0)

        ti_status.succeeded.clear()

        # test for skipped
        ti.set_state(State.SKIPPED, session)
        ti_status.running[ti.key] = ti
        job._update_counters(ti_status=ti_status)
        self.assertTrue(len(ti_status.running) == 0)
        self.assertTrue(len(ti_status.succeeded) == 0)
        self.assertTrue(len(ti_status.skipped) == 1)
        self.assertTrue(len(ti_status.failed) == 0)
        self.assertTrue(len(ti_status.to_run) == 0)

        ti_status.skipped.clear()

        # test for failed
        ti.set_state(State.FAILED, session)
        ti_status.running[ti.key] = ti
        job._update_counters(ti_status=ti_status)
        self.assertTrue(len(ti_status.running) == 0)
        self.assertTrue(len(ti_status.succeeded) == 0)
        self.assertTrue(len(ti_status.skipped) == 0)
        self.assertTrue(len(ti_status.failed) == 1)
        self.assertTrue(len(ti_status.to_run) == 0)

        ti_status.failed.clear()

        # test for retry
        ti.set_state(State.UP_FOR_RETRY, session)
        ti_status.running[ti.key] = ti
        job._update_counters(ti_status=ti_status)
        self.assertTrue(len(ti_status.running) == 0)
        self.assertTrue(len(ti_status.succeeded) == 0)
        self.assertTrue(len(ti_status.skipped) == 0)
        self.assertTrue(len(ti_status.failed) == 0)
        self.assertTrue(len(ti_status.to_run) == 1)

        ti_status.to_run.clear()

        # test for reschedule
        ti.set_state(State.UP_FOR_RESCHEDULE, session)
        ti_status.running[ti.key] = ti
        job._update_counters(ti_status=ti_status)
        self.assertTrue(len(ti_status.running) == 0)
        self.assertTrue(len(ti_status.succeeded) == 0)
        self.assertTrue(len(ti_status.skipped) == 0)
        self.assertTrue(len(ti_status.failed) == 0)
        self.assertTrue(len(ti_status.to_run) == 1)

        ti_status.to_run.clear()

        # test for none
        ti.set_state(State.NONE, session)
        ti_status.running[ti.key] = ti
        job._update_counters(ti_status=ti_status)
        self.assertTrue(len(ti_status.running) == 0)
        self.assertTrue(len(ti_status.succeeded) == 0)
        self.assertTrue(len(ti_status.skipped) == 0)
        self.assertTrue(len(ti_status.failed) == 0)
        self.assertTrue(len(ti_status.to_run) == 1)

        ti_status.to_run.clear()

        session.close()