def test_mark_failure_on_failure_callback(self): """ Test that ensures that mark_failure in the UI fails the task, and executes on_failure_callback """ # use shared memory value so we can properly track value change even if # it's been updated across processes. failure_callback_called = Value('i', 0) task_terminated_externally = Value('i', 1) def check_failure(context): with failure_callback_called.get_lock(): failure_callback_called.value += 1 assert context['dag_run'].dag_id == 'test_mark_failure' assert context['exception'] == "task marked as failed externally" def task_function(ti): with create_session() as session: assert State.RUNNING == ti.state ti.log.info("Marking TI as failed 'externally'") ti.state = State.FAILED session.merge(ti) session.commit() time.sleep(10) # This should not happen -- the state change should be noticed and the task should get killed with task_terminated_externally.get_lock(): task_terminated_externally.value = 0 with DAG(dag_id='test_mark_failure', start_date=DEFAULT_DATE) as dag: task = PythonOperator( task_id='test_state_succeeded1', python_callable=task_function, on_failure_callback=check_failure, ) dag.clear() with create_session() as session: dag.create_dagrun( run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) with timeout(30): # This should be _much_ shorter to run. # If you change this limit, make the timeout in the callable above bigger job1.run() ti.refresh_from_db() assert ti.state == State.FAILED assert failure_callback_called.value == 1 assert task_terminated_externally.value == 1
def test_mark_failure_on_failure_callback(self): """ Test that ensures that mark_failure in the UI fails the task, and executes on_failure_callback """ data = {'called': False} def check_failure(context): self.assertEqual(context['dag_run'].dag_id, 'test_mark_failure') data['called'] = True def task_function(ti): print("python_callable run in pid %s", os.getpid()) with create_session() as session: self.assertEqual(State.RUNNING, ti.state) ti.log.info("Marking TI as failed 'externally'") ti.state = State.FAILED session.merge(ti) session.commit() time.sleep(60) # This should not happen -- the state change should be noticed and the task should get killed data['reached_end_of_sleep'] = True with DAG(dag_id='test_mark_failure', start_date=DEFAULT_DATE) as dag: task = PythonOperator( task_id='test_state_succeeded1', python_callable=task_function, on_failure_callback=check_failure, ) session = settings.Session() dag.clear() dag.create_dagrun( run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) with timeout(30): # This should be _much_ shorter to run. # If you change this limit, make the timeout in the callbable above bigger job1.run() ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) self.assertTrue(data['called']) self.assertNotIn( 'reached_end_of_sleep', data, 'Task should not have been allowed to run to completion')
def _run_task_by_local_task_job(args, ti): """Run LocalTaskJob, which monitors the raw task execution process""" run_job = LocalTaskJob(task_instance=ti, mark_success=args.mark_success, pickle_id=args.pickle, ignore_all_deps=args.ignore_all_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, ignore_task_deps=args.ignore_dependencies, ignore_ti_state=args.force, pool=args.pool) run_job.run()
def test_localtaskjob_maintain_heart_rate(self): dagbag = DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dagbag.dags.get('test_localtaskjob_double_trigger') task = dag.get_task('test_localtaskjob_double_trigger_task') session = settings.Session() dag.clear() dag.create_dagrun( run_id="test", state=State.SUCCESS, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session, ) ti_run = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti_run.refresh_from_db() job1 = LocalTaskJob(task_instance=ti_run, executor=SequentialExecutor()) # this should make sure we only heartbeat once and exit at the second # loop in _execute() return_codes = [None, 0] def multi_return_code(): return return_codes.pop(0) time_start = time.time() from airflow.task.task_runner.standard_task_runner import StandardTaskRunner with patch.object(StandardTaskRunner, 'start', return_value=None) as mock_start: with patch.object(StandardTaskRunner, 'return_code') as mock_ret_code: mock_ret_code.side_effect = multi_return_code job1.run() self.assertEqual(mock_start.call_count, 1) self.assertEqual(mock_ret_code.call_count, 2) time_end = time.time() self.assertEqual(self.mock_base_job_sleep.call_count, 1) self.assertEqual(job1.state, State.SUCCESS) # Consider we have patched sleep call, it should not be sleeping to # keep up with the heart rate in other unpatched places # # We already make sure patched sleep call is only called once self.assertLess(time_end - time_start, job1.heartrate) session.close()
def test_number_of_queries_single_loop(self, mock_get_task_runner, return_codes): unique_prefix = str(uuid.uuid4()) dag = DAG(dag_id=f'{unique_prefix}_test_number_of_queries', start_date=DEFAULT_DATE) task = DummyOperator(task_id='test_state_succeeded1', dag=dag) dag.clear() dag.create_dagrun(run_id=unique_prefix, state=State.NONE) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) mock_get_task_runner.return_value.return_code.side_effects = return_codes job = LocalTaskJob(task_instance=ti, executor=MockExecutor()) with assert_queries_count(13): job.run()
def _run_task_by_local_task_job(args, ti): """Run LocalTaskJob, which monitors the raw task execution process""" run_job = LocalTaskJob(task_instance=ti, mark_success=args.mark_success, pickle_id=args.pickle, ignore_all_deps=args.ignore_all_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, ignore_task_deps=args.ignore_dependencies, ignore_ti_state=args.force, pool=args.pool, server_uri=args.server_uri) try: run_job.run() finally: if args.shut_down_logging: logging.shutdown()
def test_localtaskjob_double_trigger(self): dagbag = DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dagbag.dags.get('test_localtaskjob_double_trigger') task = dag.get_task('test_localtaskjob_double_trigger_task') session = settings.Session() dag.clear() dr = dag.create_dagrun( run_id="test", state=State.SUCCESS, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session, ) ti = dr.get_task_instance(task_id=task.task_id, session=session) ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.merge(ti) session.commit() ti_run = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti_run.refresh_from_db() job1 = LocalTaskJob(task_instance=ti_run, executor=SequentialExecutor()) from airflow.task.task_runner.standard_task_runner import StandardTaskRunner with patch.object(StandardTaskRunner, 'start', return_value=None) as mock_method: job1.run() mock_method.assert_not_called() ti = dr.get_task_instance(task_id=task.task_id, session=session) self.assertEqual(ti.pid, 1) self.assertEqual(ti.state, State.RUNNING) session.close()
def test_local_task_job(self): TI = TaskInstance ti = TI( task=self.runme_0, execution_date=DEFAULT_DATE) job = LocalTaskJob(task_instance=ti, ignore_ti_state=True) job.run()
def test_failure_callback_only_called_once(self, mock_return_code, _check_call): """ Test that ensures that when a task exits with failure by itself, failure callback is only called once """ # use shared memory value so we can properly track value change even if # it's been updated across processes. failure_callback_called = Value('i', 0) callback_count_lock = Lock() def failure_callback(context): with callback_count_lock: failure_callback_called.value += 1 assert context['dag_run'].dag_id == 'test_failure_callback_race' assert isinstance(context['exception'], AirflowFailException) def task_function(ti): raise AirflowFailException() dag = DAG(dag_id='test_failure_callback_race', start_date=DEFAULT_DATE) task = PythonOperator( task_id='test_exit_on_failure', python_callable=task_function, on_failure_callback=failure_callback, dag=dag, ) dag.clear() with create_session() as session: dag.create_dagrun( run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) # Simulate race condition where job1 heartbeat ran right after task # state got set to failed by ti.handle_failure but before task process # fully exits. See _execute loop in airflow/jobs/local_task_job.py. # In this case, we have: # * task_runner.return_code() is None # * ti.state == State.Failed # # We also need to set return_code to a valid int after job1.terminating # is set to True so _execute loop won't loop forever. def dummy_return_code(*args, **kwargs): return None if not job1.terminating else -9 mock_return_code.side_effect = dummy_return_code with timeout(10): # This should be _much_ shorter to run. # If you change this limit, make the timeout in the callbable above bigger job1.run() ti.refresh_from_db() assert ti.state == State.FAILED # task exits with failure state assert failure_callback_called.value == 1