def test_backfill_rerun_failed_tasks(self): dag = DAG(dag_id='test_backfill_rerun_failed', start_date=DEFAULT_DATE, schedule_interval='@daily') with dag: DummyOperator(task_id='test_backfill_rerun_failed_task-1', dag=dag) dag.clear() executor = MockExecutor() job = BackfillJob( dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), ) job.run() ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.set_state(State.FAILED) job = BackfillJob(dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), rerun_failed_tasks=True) job.run() ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_backfill_rerun_failed_tasks_without_flag(self): dag = DAG(dag_id='test_backfill_rerun_failed', start_date=DEFAULT_DATE, schedule_interval='@daily') with dag: DummyOperator(task_id='test_backfill_rerun_failed_task-1', dag=dag) dag.clear() executor = MockExecutor() job = BackfillJob( dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), ) job.run() ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.set_state(State.FAILED) job = BackfillJob(dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), rerun_failed_tasks=False) with self.assertRaises(AirflowException): job.run()
def test_task_states_for_dag_run(self): dag2 = DagBag().dags['example_python_operator'] task2 = dag2.get_task(task_id='print_the_context') default_date2 = timezone.make_aware(datetime(2016, 1, 9)) dag2.clear() ti2 = TaskInstance(task2, default_date2) ti2.set_state(State.SUCCESS) ti_start = ti2.start_date ti_end = ti2.end_date with redirect_stdout(io.StringIO()) as stdout: task_command.task_states_for_dag_run( self.parser.parse_args([ 'tasks', 'states-for-dag-run', 'example_python_operator', default_date2.isoformat(), '--output', "json", ])) actual_out = json.loads(stdout.getvalue()) assert len(actual_out) == 1 assert actual_out[0] == { 'dag_id': 'example_python_operator', 'execution_date': '2016-01-09T00:00:00+00:00', 'task_id': 'print_the_context', 'state': 'success', 'start_date': ti_start.isoformat(), 'end_date': ti_end.isoformat(), }
def test_task_states_for_dag_run(self): dag2 = DagBag().dags['example_python_operator'] task2 = dag2.get_task(task_id='print_the_context') defaut_date2 = timezone.make_aware(datetime(2016, 1, 9)) ti2 = TaskInstance(task2, defaut_date2) ti2.set_state(State.SUCCESS) ti_start = ti2.start_date ti_end = ti2.end_date with redirect_stdout(io.StringIO()) as stdout: task_command.task_states_for_dag_run( self.parser.parse_args([ 'tasks', 'states_for_dag_run', 'example_python_operator', defaut_date2.isoformat() ])) actual_out = stdout.getvalue() formatted_rows = [ ('example_python_operator', '2016-01-09 00:00:00+00:00', 'print_the_context', 'success', ti_start, ti_end) ] expected = tabulate( formatted_rows, ['dag', 'exec_date', 'task', 'state', 'start_date', 'end_date'], tablefmt="fancy_grid") # Check that prints, and log messages, are shown self.assertEqual(expected.replace("\n", ""), actual_out.replace("\n", ""))
def test_wait_for_downstream(self, prev_ti_state, is_ti_success): dag_id = 'test_wait_for_downstream' dag = self.dagbag.get_dag(dag_id) upstream, downstream = dag.tasks # For ti.set_state() to work, the DagRun has to exist, # Otherwise ti.previous_ti returns an unpersisted TI self.create_dag_run(dag, execution_date=timezone.datetime( 2016, 1, 1, 0, 0, 0)) self.create_dag_run(dag, execution_date=timezone.datetime( 2016, 1, 2, 0, 0, 0)) prev_ti_downstream = TI(task=downstream, execution_date=timezone.datetime( 2016, 1, 1, 0, 0, 0)) ti = TI(task=upstream, execution_date=timezone.datetime(2016, 1, 2, 0, 0, 0)) prev_ti = ti.get_previous_ti() prev_ti.set_state(State.SUCCESS) assert prev_ti.state == State.SUCCESS prev_ti_downstream.set_state(prev_ti_state) ti.set_state(State.QUEUED) ti.run() assert (ti.state == State.SUCCESS) == is_ti_success
def test_get_states_count_upstream_ti(self): """ this test tests the helper function '_get_states_count_upstream_ti' as a unit and inside update_state """ from airflow.ti_deps.dep_context import DepContext get_states_count_upstream_ti = TriggerRuleDep._get_states_count_upstream_ti session = settings.Session() now = timezone.utcnow() dag = DAG( 'test_dagrun_with_pre_tis', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) with dag: op1 = DummyOperator(task_id='A') op2 = DummyOperator(task_id='B') op3 = DummyOperator(task_id='C') op4 = DummyOperator(task_id='D') op5 = DummyOperator(task_id='E', trigger_rule=TriggerRule.ONE_FAILED) op1.set_downstream([op2, op3]) # op1 >> op2, op3 op4.set_upstream([op3, op2]) # op3, op2 >> op4 op5.set_upstream([op2, op3, op4]) # (op2, op3, op4) >> op5 clear_db_runs() dag.clear() dr = dag.create_dagrun(run_id='test_dagrun_with_pre_tis', state=State.RUNNING, execution_date=now, start_date=now) ti_op1 = TaskInstance(task=dag.get_task(op1.task_id), execution_date=dr.execution_date) ti_op2 = TaskInstance(task=dag.get_task(op2.task_id), execution_date=dr.execution_date) ti_op3 = TaskInstance(task=dag.get_task(op3.task_id), execution_date=dr.execution_date) ti_op4 = TaskInstance(task=dag.get_task(op4.task_id), execution_date=dr.execution_date) ti_op5 = TaskInstance(task=dag.get_task(op5.task_id), execution_date=dr.execution_date) ti_op1.set_state(state=State.SUCCESS, session=session) ti_op2.set_state(state=State.FAILED, session=session) ti_op3.set_state(state=State.SUCCESS, session=session) ti_op4.set_state(state=State.SUCCESS, session=session) ti_op5.set_state(state=State.SUCCESS, session=session) session.commit() # check handling with cases that tasks are triggered from backfill with no finished tasks finished_tasks = DepContext().ensure_finished_tasks(ti_op2.task.dag, ti_op2.execution_date, session) self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op2), (1, 0, 0, 0, 1)) finished_tasks = dr.get_task_instances(state=State.finished() + [State.UPSTREAM_FAILED], session=session) self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op4), (1, 0, 1, 0, 2)) self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op5), (2, 0, 1, 0, 3)) dr.update_state() self.assertEqual(State.SUCCESS, dr.state)
def get_test_ti(session, execution_date: pendulum.datetime, state: str) -> TI: dag.create_dagrun( run_id='scheduled__{}'.format(execution_date.to_iso8601_string()), state=state, execution_date=execution_date, start_date=pendulum.utcnow(), session=session ) ti = TI(task=task, execution_date=execution_date) ti.set_state(state=State.SUCCESS, session=session) return ti
def resetTasksStatus(task_id): dag_folder = conf.get('core', 'DAGS_FOLDER') dagbag = DagBag(dag_folder) check_dag = dagbag.dags[main_dag_id] session = settings.Session() execution_date = datetime.now() my_task = check_dag.get_task(task_id) ti = TaskInstance(my_task, execution_date) state = ti.current_state() logging.info("Current state of " + task_id + " is " + str(state)) ti.set_state(None, session) state = ti.current_state() logging.info("Updated state of " + task_id + " is " + str(state))
def test_retry_handling(self, mock_pool_full): """ Test that task retries are handled properly """ # Mock the pool with a pool with slots open since the pool doesn't actually exist mock_pool_full.return_value = False dag = models.DAG(dag_id='test_retry_handling') task = BashOperator( task_id='test_retry_handling_op', bash_command='exit 1', retries=1, retry_delay=datetime.timedelta(seconds=0), dag=dag, owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) def run_with_error(ti): try: ti.run() except AirflowException: pass ti = TI( task=task, execution_date=datetime.datetime.now()) # first run -- up for retry run_with_error(ti) self.assertEqual(ti.state, State.UP_FOR_RETRY) self.assertEqual(ti.try_number, 1) # second run -- fail run_with_error(ti) self.assertEqual(ti.state, State.FAILED) self.assertEqual(ti.try_number, 2) # Clear the TI state since you can't run a task with a FAILED state without # clearing it first ti.set_state(None, settings.Session()) # third run -- up for retry run_with_error(ti) self.assertEqual(ti.state, State.UP_FOR_RETRY) self.assertEqual(ti.try_number, 3) # fourth run -- fail run_with_error(ti) self.assertEqual(ti.state, State.FAILED) self.assertEqual(ti.try_number, 4)
def test_retry_handling(self, mock_pool_full): """ Test that task retries are handled properly """ # Mock the pool with a pool with slots open since the pool doesn't actually exist mock_pool_full.return_value = False dag = models.DAG(dag_id='test_retry_handling') task = BashOperator( task_id='test_retry_handling_op', bash_command='exit 1', retries=1, retry_delay=datetime.timedelta(seconds=0), dag=dag, owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) def run_with_error(ti): try: ti.run() except AirflowException: pass ti = TI( task=task, execution_date=datetime.datetime.now()) # first run -- up for retry run_with_error(ti) self.assertEqual(ti.state, State.UP_FOR_RETRY) self.assertEqual(ti.try_number, 1) # second run -- fail run_with_error(ti) self.assertEqual(ti.state, State.FAILED) self.assertEqual(ti.try_number, 2) # Clear the TI state since you can't run a task with a FAILED state without # clearing it first ti.set_state(None, settings.Session()) # third run -- up for retry run_with_error(ti) self.assertEqual(ti.state, State.UP_FOR_RETRY) self.assertEqual(ti.try_number, 3) # fourth run -- fail run_with_error(ti) self.assertEqual(ti.state, State.FAILED) self.assertEqual(ti.try_number, 4)
def _run_task(self, ti: TaskInstance) -> bool: self.log.debug("Executing task: %s", ti) key = ti.key try: params = self.tasks_params.pop(ti.key, {}) _run_raw_task(ti, job_id=ti.job_id, **params) # pylint: disable=protected-access self.change_state(key, State.SUCCESS) _run_finished_callback(ti) # pylint: disable=protected-access return True except Exception as e: # pylint: disable=broad-except ti.set_state(State.FAILED) self.change_state(key, State.FAILED) _run_finished_callback(ti) # pylint: disable=protected-access self.log.exception("Failed to execute task: %s.", str(e)) return False
def test_depends_on_past(self, prev_ti_state, is_ti_success): dag_id = 'test_depends_on_past' dag = self.dagbag.get_dag(dag_id) task = dag.tasks[0] self.create_dag_run(dag, execution_date=timezone.datetime(2016, 1, 1, 0, 0, 0)) self.create_dag_run(dag, execution_date=timezone.datetime(2016, 1, 2, 0, 0, 0)) prev_ti = TI(task, timezone.datetime(2016, 1, 1, 0, 0, 0)) ti = TI(task, timezone.datetime(2016, 1, 2, 0, 0, 0)) prev_ti.set_state(prev_ti_state) ti.set_state(State.QUEUED) ti.run() assert (ti.state == State.SUCCESS) == is_ti_success
def test_update_counters(self): dag = DAG(dag_id='test_manage_executor_state', start_date=DEFAULT_DATE) task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow') job = BackfillJob(dag=dag) session = settings.Session() dr = dag.create_dagrun(run_id=DagRun.ID_PREFIX, state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task1, dr.execution_date) ti.refresh_from_db() ti_status = BackfillJob._DagRunTaskStatus() # test for success ti.set_state(State.SUCCESS, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 1) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 0) ti_status.succeeded.clear() # test for skipped ti.set_state(State.SKIPPED, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 1) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 0) ti_status.skipped.clear() # test for failed ti.set_state(State.FAILED, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 1) self.assertTrue(len(ti_status.to_run) == 0) ti_status.failed.clear() # test for retry ti.set_state(State.UP_FOR_RETRY, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() # test for reschedule ti.set_state(State.UP_FOR_RESCHEDULE, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() # test for none ti.set_state(State.NONE, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() session.close()