def test_backfill_rerun_failed_tasks_without_flag(self): dag = DAG(dag_id='test_backfill_rerun_failed', start_date=DEFAULT_DATE, schedule_interval='@daily') with dag: DummyOperator(task_id='test_backfill_rerun_failed_task-1', dag=dag) dag.clear() executor = TestExecutor() job = BackfillJob( dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), ) job.run() ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.set_state(State.FAILED) job = BackfillJob(dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), rerun_failed_tasks=False) with self.assertRaises(AirflowException): job.run()
def test_cli_backfill_depends_on_past(self): """ Test that CLI respects -I argument """ dag_id = 'test_dagrun_states_deadlock' run_date = DEFAULT_DATE + datetime.timedelta(days=1) args = [ 'backfill', dag_id, '-l', '-s', run_date.isoformat(), ] dag = self.dagbag.get_dag(dag_id) dag.clear() self.assertRaisesRegexp( AirflowException, 'BackfillJob is deadlocked', cli.backfill, self.parser.parse_args(args)) cli.backfill(self.parser.parse_args(args + ['-I'])) ti = TI(dag.get_task('test_depends_on_past'), run_date) ti.refresh_from_db() # task ran self.assertEqual(ti.state, State.SUCCESS) dag.clear()
def test_backfill_depends_on_past(self): """ Test that backfill respects ignore_depends_on_past """ dag = self.dagbag.get_dag('test_depends_on_past') dag.clear() run_date = DEFAULT_DATE + datetime.timedelta(days=5) # backfill should deadlock self.assertRaisesRegex( AirflowException, 'BackfillJob is deadlocked', BackfillJob(dag=dag, start_date=run_date, end_date=run_date).run) BackfillJob( dag=dag, start_date=run_date, end_date=run_date, executor=MockExecutor(), ignore_first_depends_on_past=True).run() # ti should have succeeded ti = TI(dag.tasks[0], run_date) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_scheduler_pooled_tasks(self): """ Test that the scheduler handles queued tasks correctly See issue #1299 """ session = settings.Session() if not ( session.query(Pool) .filter(Pool.pool == 'test_queued_pool') .first()): pool = Pool(pool='test_queued_pool', slots=5) session.merge(pool) session.commit() session.close() dag_id = 'test_scheduled_queued_tasks' dag = self.dagbag.get_dag(dag_id) dag.clear() scheduler = SchedulerJob(dag_id, num_runs=10) scheduler.run() task_1 = dag.tasks[0] ti = TI(task_1, dag.start_date) ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) dag.clear()
def test_clear_task_instances_without_task(self): dag = DAG('test_clear_task_instances_without_task', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='task0', owner='test', dag=dag) task1 = DummyOperator(task_id='task1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() # Remove the task from dag. dag.task_dict = {} self.assertFalse(dag.has_task(task0.task_id)) self.assertFalse(dag.has_task(task1.task_id)) session = settings.Session() qry = session.query(TI).filter( TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) session.commit() # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 2)
def test_clear_task_instances_without_dag(self): dag = DAG('test_clear_task_instances_without_dag', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='task_0', owner='test', dag=dag) task1 = DummyOperator(task_id='task_1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() with create_session() as session: qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 2)
def test_clear_task_instances(self): dag = DAG( 'test_clear_task_instances', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10), ) task0 = DummyOperator(task_id='0', owner='test', dag=dag) task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) dag.create_dagrun( execution_date=ti0.execution_date, state=State.RUNNING, run_type=DagRunType.SCHEDULED, ) ti0.run() ti1.run() with create_session() as session: qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session, dag=dag) ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 assert ti0.try_number == 2 assert ti0.max_tries == 1 assert ti1.try_number == 2 assert ti1.max_tries == 3
def test_log_file_template_with_run_task(self): """Verify that the taskinstance has the right context for log_filename_template""" with mock.patch.object(task_command, "_run_task_by_selected_method"): with conf_vars({('core', 'dags_folder'): self.dag_path}): # increment the try_number of the task to be run dag = DagBag().get_dag(self.dag_id) task = dag.get_task(self.task_id) with create_session() as session: dag.create_dagrun( execution_date=self.execution_date, start_date=timezone.utcnow(), state=State.RUNNING, run_type=DagRunType.MANUAL, session=session, ) ti = TaskInstance(task, self.execution_date) ti.refresh_from_db(session=session, lock_for_update=True) ti.try_number = 1 # not running, so starts at 0 session.merge(ti) log_file_path = os.path.join( os.path.dirname(self.ti_log_file_path), "2.log") try: task_command.task_run( self.parser.parse_args(self.task_args)) assert os.path.exists(log_file_path) finally: try: os.remove(log_file_path) except OSError: pass
def test_backfill_rerun_failed_tasks(self): dag = DAG(dag_id='test_backfill_rerun_failed', start_date=DEFAULT_DATE, schedule_interval='@daily') with dag: DummyOperator(task_id='test_backfill_rerun_failed_task-1', dag=dag) dag.clear() executor = TestExecutor() job = BackfillJob( dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), ) job.run() ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.set_state(State.FAILED) job = BackfillJob(dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), rerun_failed_tasks=True) job.run() ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_cli_backfill_depends_on_past(self): dag_id = 'test_dagrun_states_deadlock' run_date = DEFAULT_DATE + datetime.timedelta(days=1) args = [ 'backfill', dag_id, '-l', '-sd', TEST_DAGS_FOLDER, '-s', run_date.isoformat(), ] dag = get_dag(dag_id, TEST_DAGS_FOLDER) cli.backfill(self.parser.parse_args(args)) ti = TI(dag.get_task('test_depends_on_past'), run_date) ti.refresh_from_db() # task did not run self.assertEqual(ti.state, State.NONE) cli.backfill(self.parser.parse_args(args + ['-I'])) ti = TI(dag.get_task('test_depends_on_past'), run_date) ti.refresh_from_db() # task ran self.assertEqual(ti.state, State.SUCCESS)
def test_backfill_pooled_tasks(self): """ Test that queued tasks are executed by BackfillJob """ session = settings.Session() pool = Pool(pool='test_backfill_pooled_task_pool', slots=1) session.add(pool) session.commit() session.close() dag = self.dagbag.get_dag('test_backfill_pooled_task_dag') dag.clear() executor = TestExecutor(do_update=True) job = BackfillJob( dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor) # run with timeout because this creates an infinite loop if not # caught try: with timeout(seconds=5): job.run() except AirflowTaskTimeout: pass ti = TI( task=dag.get_task('test_backfill_pooled_task'), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_backfill_pooled_tasks(self): """ Test that queued tasks are executed by BackfillJob Test for https://github.com/airbnb/airflow/pull/1225 """ session = settings.Session() pool = Pool(pool='test_backfill_pooled_task_pool', slots=1) session.add(pool) session.commit() dag = self.dagbag.get_dag('test_backfill_pooled_task_dag') dag.clear() job = BackfillJob( dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) # run with timeout because this creates an infinite loop if not # caught with timeout(seconds=30): job.run() ti = TI( task=dag.get_task('test_backfill_pooled_task'), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_backfill_pooled_tasks(self): """ Test that queued tasks are executed by BackfillJob Test for https://github.com/airbnb/airflow/pull/1225 """ session = settings.Session() pool = Pool(pool='test_backfill_pooled_task_pool', slots=1) session.add(pool) session.commit() dag = self.dagbag.get_dag('test_backfill_pooled_task_dag') dag.clear() job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) # run with timeout because this creates an infinite loop if not # caught with timeout(seconds=30): job.run() ti = TI(task=dag.get_task('test_backfill_pooled_task'), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_cli_backfill_depends_on_past(self): """ Test that CLI respects -I argument """ dag_id = 'test_dagrun_states_deadlock' run_date = DEFAULT_DATE + datetime.timedelta(days=1) args = [ 'backfill', dag_id, '-l', '-s', run_date.isoformat(), ] dag = self.dagbag.get_dag(dag_id) dag.clear() self.assertRaisesRegexp(AirflowException, 'BackfillJob is deadlocked', cli.backfill, self.parser.parse_args(args)) cli.backfill(self.parser.parse_args(args + ['-I'])) ti = TI(dag.get_task('test_depends_on_past'), run_date) ti.refresh_from_db() # task ran self.assertEqual(ti.state, State.SUCCESS) dag.clear()
def test_depends_on_past(self): dag = DAG(dag_id='test_depends_on_past', start_date=DEFAULT_DATE) task = DummyOperator( task_id='test_dop_task', dag=dag, depends_on_past=True, ) dag.clear() run_date = task.start_date + datetime.timedelta(days=5) ti = TI(task, run_date) # depends_on_past prevents the run task.run(start_date=run_date, end_date=run_date) ti.refresh_from_db() self.assertIs(ti.state, None) # ignore first depends_on_past to allow the run task.run(start_date=run_date, end_date=run_date, ignore_first_depends_on_past=True) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_scheduler_pooled_tasks(self): """ Test that the scheduler handles queued tasks correctly See issue #1299 """ session = settings.Session() if not (session.query(Pool).filter( Pool.pool == 'test_queued_pool').first()): pool = Pool(pool='test_queued_pool', slots=5) session.merge(pool) session.commit() session.close() dag_id = 'test_scheduled_queued_tasks' dag = self.dagbag.get_dag(dag_id) dag.clear() scheduler = SchedulerJob(dag_id, num_runs=10) scheduler.run() task_1 = dag.tasks[0] ti = TI(task_1, dag.start_date) ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) dag.clear()
def test_success_callbak_no_race_condition(self): class CallbackWrapper(object): def wrap_task_instance(self, ti): self.task_id = ti.task_id self.dag_id = ti.dag_id self.execution_date = ti.execution_date self.task_state_in_callback = "" self.callback_ran = False def success_handler(self, context): self.callback_ran = True session = settings.Session() temp_instance = session.query(TI).filter( TI.task_id == self.task_id).filter( TI.dag_id == self.dag_id).filter( TI.execution_date == self.execution_date).one() self.task_state_in_callback = temp_instance.state cw = CallbackWrapper() dag = DAG('test_success_callbak_no_race_condition', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task = DummyOperator(task_id='op', email='*****@*****.**', on_success_callback=cw.success_handler, dag=dag) ti = TI(task=task, execution_date=datetime.datetime.now()) ti.state = State.RUNNING session = settings.Session() session.merge(ti) session.commit() cw.wrap_task_instance(ti) ti._run_raw_task() self.assertTrue(cw.callback_ran) self.assertEqual(cw.task_state_in_callback, State.RUNNING) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_success_callbak_no_race_condition(self): class CallbackWrapper: def wrap_task_instance(self, ti): self.task_id = ti.task_id self.dag_id = ti.dag_id self.execution_date = ti.execution_date self.task_state_in_callback = "" self.callback_ran = False def success_handler(self, context): self.callback_ran = True session = settings.Session() temp_instance = session.query(TI).filter( TI.task_id == self.task_id).filter( TI.dag_id == self.dag_id).filter( TI.execution_date == self.execution_date).one() self.task_state_in_callback = temp_instance.state cw = CallbackWrapper() dag = DAG('test_success_callbak_no_race_condition', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task = DummyOperator(task_id='op', email='*****@*****.**', on_success_callback=cw.success_handler, dag=dag) ti = TI(task=task, execution_date=datetime.datetime.now()) ti.state = State.RUNNING session = settings.Session() session.merge(ti) session.commit() cw.wrap_task_instance(ti) ti._run_raw_task() self.assertTrue(cw.callback_ran) self.assertEqual(cw.task_state_in_callback, State.RUNNING) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_retry_handling(self, mock_pool_full): """ Test that task retries are handled properly """ # Mock the pool with a pool with slots open since the pool doesn't actually exist mock_pool_full.return_value = False dag = models.DAG(dag_id='test_retry_handling') task = BashOperator( task_id='test_retry_handling_op', bash_command='exit 1', retries=1, retry_delay=datetime.timedelta(seconds=0), dag=dag, owner='airflow', start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) def run_with_error(ti): try: ti.run() except AirflowException: pass ti = TI( task=task, execution_date=timezone.utcnow()) self.assertEqual(ti.try_number, 1) # first run -- up for retry run_with_error(ti) self.assertEqual(ti.state, State.UP_FOR_RETRY) self.assertEqual(ti._try_number, 1) self.assertEqual(ti.try_number, 2) # second run -- fail run_with_error(ti) self.assertEqual(ti.state, State.FAILED) self.assertEqual(ti._try_number, 2) self.assertEqual(ti.try_number, 3) # Clear the TI state since you can't run a task with a FAILED state without # clearing it first dag.clear() # third run -- up for retry run_with_error(ti) self.assertEqual(ti.state, State.UP_FOR_RETRY) self.assertEqual(ti._try_number, 3) self.assertEqual(ti.try_number, 4) # fourth run -- fail run_with_error(ti) ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) self.assertEqual(ti._try_number, 4) self.assertEqual(ti.try_number, 5)
def evaluate_dagrun( self, dag_id, first_task_state, second_task_state, dagrun_state, run_kwargs=None, advance_execution_date=False, session=None): """ Helper for testing DagRun states with simple two-task DAGS """ if run_kwargs is None: run_kwargs = {} scheduler = SchedulerJob() dag = get_dag(dag_id, TEST_DAGS_FOLDER) dr = scheduler.schedule_dag(dag) if advance_execution_date: # run a second time to schedule a dagrun after the start_date dr = scheduler.schedule_dag(dag) ex_date = dr.execution_date # if 'test_dagrun_states_deadlock' in dag_id and run_kwargs: # import ipdb; ipdb.set_trace() try: dag.run(start_date=ex_date, end_date=ex_date, **run_kwargs) except AirflowException: pass # test tasks task_1, task_2 = dag.tasks ti = TI(task_1, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, first_task_state) ti = TI(task_2, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, second_task_state) # load dagrun dr = session.query(DagRun).filter( DagRun.dag_id == dag.dag_id, DagRun.execution_date == ex_date ).first() # dagrun is running self.assertEqual(dr.state, State.RUNNING) # import ipdb; ipdb.set_trace() dag.get_active_runs() # dagrun failed self.assertEqual(dr.state, dagrun_state)
def test_mark_failure_on_failure_callback(self): """ Test that ensures that mark_failure in the UI fails the task, and executes on_failure_callback """ data = {'called': False} def check_failure(context): self.assertEqual(context['dag_run'].dag_id, 'test_mark_failure') data['called'] = True def task_function(ti, **context): with create_session() as session: self.assertEqual(State.RUNNING, ti.state) ti.log.info("Marking TI as failed 'externally'") ti.state = State.FAILED session.merge(ti) session.commit() time.sleep(60) # This should not happen -- the state change should be noticed and the task should get killed data['reached_end_of_sleep'] = True with DAG(dag_id='test_mark_failure', start_date=DEFAULT_DATE) as dag: task = PythonOperator(task_id='test_state_succeeded1', python_callable=task_function, provide_context=True, on_failure_callback=check_failure) session = settings.Session() dag.clear() dag.create_dagrun(run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) with timeout(30): # This should be _much_ shorter to run. # If you change this limit, make the timeout in the callbable above bigger job1.run() ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) self.assertTrue(data['called']) self.assertNotIn( 'reached_end_of_sleep', data, 'Task should not have been allowed to run to completion')
def test_submit_task_instance_to_dask_cluster(self): """ Test that the DaskExecutor properly submits tasks to the cluster """ cluster = LocalCluster(nanny=False) executor = DaskExecutor(cluster_address=cluster.scheduler_address) args = dict(start_date=DEFAULT_DATE) def fail(): raise ValueError('Intentional failure.') with DAG('test-dag', default_args=args) as dag: # queue should be allowed, but ignored success_operator = PythonOperator(task_id='success', python_callable=lambda: True, queue='queue') fail_operator = PythonOperator(task_id='fail', python_callable=fail) success_ti = TaskInstance(success_operator, execution_date=DEFAULT_DATE) fail_ti = TaskInstance(fail_operator, execution_date=DEFAULT_DATE) # queue the tasks executor.queue_task_instance(success_ti) executor.queue_task_instance(fail_ti) # the tasks haven't been submitted to the cluster yet self.assertTrue(len(executor.futures) == 0) # after the heartbeat, they have been submitted executor.heartbeat() self.assertTrue(len(executor.futures) == 2) # wait a reasonable amount of time for the tasks to complete for _ in range(2): time.sleep(0.25) executor.heartbeat() # check that the futures were completed if len(executor.futures) == 2: raise ValueError('Failed to reach cluster before timeout.') self.assertTrue(len(executor.futures) == 0) # check that the taskinstances were updated success_ti.refresh_from_db() self.assertTrue(success_ti.state == State.SUCCESS) fail_ti.refresh_from_db() self.assertTrue(fail_ti.state == State.FAILED) cluster.close()
def test_subdag_clear_parentdag_downstream_clear(self): dag = self.dagbag.get_dag('example_subdag_operator') subdag_op_task = dag.get_task('section-1') subdag = subdag_op_task.subdag subdag.schedule_interval = '@daily' executor = TestExecutor() job = BackfillJob(dag=subdag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor, donot_pickle=True) with timeout(seconds=30): job.run() ti0 = TI( task=subdag.get_task('section-1-task-1'), execution_date=DEFAULT_DATE) ti0.refresh_from_db() self.assertEqual(ti0.state, State.SUCCESS) sdag = subdag.sub_dag( task_regex='section-1-task-1', include_downstream=True, include_upstream=False) sdag.clear( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, include_parentdag=True) ti0.refresh_from_db() self.assertEqual(State.NONE, ti0.state) ti1 = TI( task=dag.get_task('some-other-task'), execution_date=DEFAULT_DATE) self.assertEqual(State.NONE, ti1.state) # Checks that all the Downstream tasks for Parent DAG # have been cleared for task in subdag_op_task.downstream_list: ti = TI( task=dag.get_task(task.task_id), execution_date=DEFAULT_DATE ) self.assertEqual(State.NONE, ti.state) subdag.clear() dag.clear()
def task_run(args, dag=None): """Runs a single task instance""" if dag: args.dag_id = dag.dag_id log = LoggingMixin().log # Load custom airflow config if args.cfg_path: with open(args.cfg_path, 'r') as conf_file: conf_dict = json.load(conf_file) if os.path.exists(args.cfg_path): os.remove(args.cfg_path) conf.read_dict(conf_dict, source=args.cfg_path) settings.configure_vars() # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave # behind multiple open sleeping connections while heartbeating, which could # easily exceed the database connection limit when # processing hundreds of simultaneous tasks. settings.configure_orm(disable_connection_pool=True) if not args.pickle and not dag: dag = get_dag(args) elif not dag: with db.create_session() as session: log.info('Loading pickle id %s', args.pickle) dag_pickle = session.query(DagPickle).filter( DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() ti.init_run_context(raw=args.raw) hostname = get_hostname() log.info("Running %s on host %s", ti, hostname) if args.interactive: _run(args, dag, ti) else: with redirect_stdout(ti.log, logging.INFO), redirect_stderr( ti.log, logging.WARN): _run(args, dag, ti) logging.shutdown()
def evaluate_dagrun( self, dag_id, expected_task_states, # dict of task_id: state dagrun_state, run_kwargs=None, advance_execution_date=False, session=None): """ Helper for testing DagRun states with simple two-task DAGS. This is hackish: a dag run is created but its tasks are run by a backfill. """ if run_kwargs is None: run_kwargs = {} scheduler = SchedulerJob(**self.default_scheduler_args) dag = self.dagbag.get_dag(dag_id) dag.clear() dr = scheduler.create_dag_run(dag) if advance_execution_date: # run a second time to schedule a dagrun after the start_date dr = scheduler.create_dag_run(dag) ex_date = dr.execution_date try: dag.run(start_date=ex_date, end_date=ex_date, **run_kwargs) except AirflowException: pass # test tasks for task_id, expected_state in expected_task_states.items(): task = dag.get_task(task_id) ti = TI(task, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, expected_state) # load dagrun dr = DagRun.find(dag_id=dag_id, execution_date=ex_date) dr = dr[0] dr.dag = dag # dagrun is running self.assertEqual(dr.state, State.RUNNING) dr.update_state() # dagrun failed self.assertEqual(dr.state, dagrun_state)
def evaluate_dagrun( self, dag_id, expected_task_states, # dict of task_id: state dagrun_state, run_kwargs=None, advance_execution_date=False, session=None): """ Helper for testing DagRun states with simple two-task DAGS. This is hackish: a dag run is created but its tasks are run by a backfill. """ if run_kwargs is None: run_kwargs = {} scheduler = SchedulerJob() dag = self.dagbag.get_dag(dag_id) dag.clear() dr = scheduler.schedule_dag(dag) if advance_execution_date: # run a second time to schedule a dagrun after the start_date dr = scheduler.schedule_dag(dag) ex_date = dr.execution_date try: dag.run(start_date=ex_date, end_date=ex_date, **run_kwargs) except AirflowException: pass # test tasks for task_id, expected_state in expected_task_states.items(): task = dag.get_task(task_id) ti = TI(task, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, expected_state) # load dagrun dr = DagRun.find(dag_id=dag_id, execution_date=ex_date) dr = dr[0] dr.dag = dag # dagrun is running self.assertEqual(dr.state, State.RUNNING) dr.update_state() # dagrun failed self.assertEqual(dr.state, dagrun_state)
def _get_ti(task, exec_date_or_run_id): """Get the task instance through DagRun.run_id, if that fails, get the TI the old way""" dag_run = task.dag.get_dagrun(run_id=exec_date_or_run_id) if not dag_run: try: execution_date = timezone.parse(exec_date_or_run_id) ti = TaskInstance(task, execution_date) ti.refresh_from_db() return ti except (ParserError, TypeError): raise AirflowException(f"DagRun with run_id: {exec_date_or_run_id} not found") ti = dag_run.get_task_instance(task.task_id) ti.task = task return ti
def test_localtaskjob_maintain_heart_rate(self): dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dagbag.dags.get('test_localtaskjob_double_trigger') task = dag.get_task('test_localtaskjob_double_trigger_task') session = settings.Session() dag.clear() dag.create_dagrun(run_id="test", state=State.SUCCESS, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti_run = TI(task=task, execution_date=DEFAULT_DATE) ti_run.refresh_from_db() job1 = LocalTaskJob(task_instance=ti_run, executor=SequentialExecutor()) # this should make sure we only heartbeat once and exit at the second # loop in _execute() return_codes = [None, 0] def multi_return_code(): return return_codes.pop(0) time_start = time.time() from airflow.task.task_runner.standard_task_runner import StandardTaskRunner with patch.object(StandardTaskRunner, 'start', return_value=None) as mock_start: with patch.object(StandardTaskRunner, 'return_code') as mock_ret_code: mock_ret_code.side_effect = multi_return_code job1.run() self.assertEqual(mock_start.call_count, 1) self.assertEqual(mock_ret_code.call_count, 2) time_end = time.time() self.assertEqual(self.mock_base_job_sleep.call_count, 1) self.assertEqual(job1.state, State.SUCCESS) # Consider we have patched sleep call, it should not be sleeping to # keep up with the heart rate in other unpatched places # # We already make sure patched sleep call is only called once self.assertLess(time_end - time_start, job1.heartrate) session.close()
def evaluate_dagrun(self, dag_id, first_task_state, second_task_state, dagrun_state, run_kwargs=None, advance_execution_date=False, session=None): """ Helper for testing DagRun states with simple two-task DAGS """ if run_kwargs is None: run_kwargs = {} scheduler = SchedulerJob() dag = self.dagbag.get_dag(dag_id) dag.clear() dr = scheduler.schedule_dag(dag) if advance_execution_date: # run a second time to schedule a dagrun after the start_date dr = scheduler.schedule_dag(dag) ex_date = dr.execution_date try: dag.run(start_date=ex_date, end_date=ex_date, **run_kwargs) except AirflowException: pass # test tasks task_1, task_2 = dag.tasks ti = TI(task_1, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, first_task_state) ti = TI(task_2, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, second_task_state) # load dagrun dr = session.query(DagRun).filter( DagRun.dag_id == dag.dag_id, DagRun.execution_date == ex_date).first() # dagrun is running self.assertEqual(dr.state, State.RUNNING) dag.get_active_runs() # dagrun failed self.assertEqual(dr.state, dagrun_state)
def evaluate_dagrun( self, dag_id, expected_task_states, # dict of task_id: state dagrun_state, run_kwargs=None, advance_execution_date=False, session=None): """ Helper for testing DagRun states with simple two-task DAGS """ if run_kwargs is None: run_kwargs = {} scheduler = SchedulerJob() dag = self.dagbag.get_dag(dag_id) dag.clear() dr = scheduler.schedule_dag(dag) if advance_execution_date: # run a second time to schedule a dagrun after the start_date dr = scheduler.schedule_dag(dag) ex_date = dr.execution_date try: dag.run(start_date=ex_date, end_date=ex_date, **run_kwargs) except AirflowException: pass # test tasks for task_id, expected_state in expected_task_states.items(): task = dag.get_task(task_id) ti = TI(task, ex_date) ti.refresh_from_db() self.assertEqual(ti.state, expected_state) # load dagrun dr = session.query(DagRun).filter( DagRun.dag_id == dag.dag_id, DagRun.execution_date == ex_date ).first() # dagrun is running self.assertEqual(dr.state, State.RUNNING) dag.get_active_runs() # dagrun failed self.assertEqual(dr.state, dagrun_state)
def task_run(args, dag=None): """Runs a single task instance""" # Load custom airflow config if args.cfg_path: with open(args.cfg_path, 'r') as conf_file: conf_dict = json.load(conf_file) if os.path.exists(args.cfg_path): os.remove(args.cfg_path) conf.read_dict(conf_dict, source=args.cfg_path) settings.configure_vars() # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave # behind multiple open sleeping connections while heartbeating, which could # easily exceed the database connection limit when # processing hundreds of simultaneous tasks. settings.configure_orm(disable_connection_pool=True) if dag and args.pickle: raise AirflowException( "You cannot use the --pickle option when using DAG.cli() method.") elif args.pickle: print(f'Loading pickle id: {args.pickle}') dag = get_dag_by_pickle(args.pickle) elif not dag: dag = get_dag(args.subdir, args.dag_id) else: # Use DAG from parameter pass task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() ti.init_run_context(raw=args.raw) hostname = get_hostname() print(f"Running {ti} on host {hostname}") if args.interactive: _run_task_by_selected_method(args, dag, ti) else: with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \ redirect_stderr(StreamLogWriter(ti.log, logging.WARN)): _run_task_by_selected_method(args, dag, ti) logging.shutdown()
def test_local_run(self): args = self.parser.parse_args([ 'tasks', 'run', 'example_python_operator', 'print_the_context', '2018-04-27T08:39:51.298439+00:00', '--interactive', '--subdir', '/root/dags/example_python_operator.py' ]) dag = get_dag(args.subdir, args.dag_id) reset(dag.dag_id) task_command.task_run(args) task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() state = ti.current_state() self.assertEqual(state, State.SUCCESS)
def test_local_run(self): args = create_mock_args( task_id='print_the_context', dag_id='example_python_operator', subdir='/root/dags/example_python_operator.py', interactive=True, execution_date=timezone.parse('2018-04-27T08:39:51.298439+00:00')) dag = get_dag(args) reset(dag.dag_id) task_command.task_run(args) task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() state = ti.current_state() self.assertEqual(state, State.SUCCESS)
def test_depends_on_past(self): dagbag = models.DagBag() dag = dagbag.get_dag("test_depends_on_past") dag.clear() task = dag.tasks[0] run_date = task.start_date + datetime.timedelta(days=5) ti = TI(task, run_date) # depends_on_past prevents the run task.run(start_date=run_date, end_date=run_date) ti.refresh_from_db() self.assertIs(ti.state, None) # ignore first depends_on_past to allow the run task.run(start_date=run_date, end_date=run_date, ignore_first_depends_on_past=True) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_clear_task_instances_for_backfill_dagrun(self): now = timezone.utcnow() session = settings.Session() dag_id = 'test_clear_task_instances_for_backfill_dagrun' dag = DAG(dag_id=dag_id, start_date=now) self.create_dag_run(dag, execution_date=now, is_backfill=True) task0 = DummyOperator(task_id='backfill_task_0', owner='test', dag=dag) ti0 = TI(task=task0, execution_date=now) ti0.run() qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) session.commit() ti0.refresh_from_db() dr0 = session.query(DagRun).filter(DagRun.dag_id == dag_id, DagRun.execution_date == now).first() assert dr0.state == State.RUNNING
def test_heartbeat_failed_fast(self, mock_getpid): """ Test that task heartbeat will sleep when it fails fast """ mock_getpid.return_value = 1 heartbeat_records = [] def heartbeat_recorder(**kwargs): heartbeat_records.append(timezone.utcnow()) with create_session() as session: dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag_id = 'test_heartbeat_failed_fast' task_id = 'test_heartbeat_failed_fast_op' dag = dagbag.get_dag(dag_id) task = dag.get_task(task_id) dag.create_dagrun(run_id="test_heartbeat_failed_fast_run", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.commit() job = LocalTaskJob(task_instance=ti, executor=MockExecutor(do_update=False)) job.heartrate = 2 job.heartbeat_callback = heartbeat_recorder job._execute() self.assertGreater(len(heartbeat_records), 1) for i in range(1, len(heartbeat_records)): time1 = heartbeat_records[i - 1] time2 = heartbeat_records[i] # Assert that difference small enough delta = (time2 - time1).total_seconds() self.assertAlmostEqual(delta, job.heartrate, delta=0.05)
def test_depends_on_past(self): dag = get_dag('test_depends_on_past', TEST_DAGS_FOLDER) task = dag.tasks[0] run_date = task.start_date + datetime.timedelta(days=5) ti = TI(task, run_date) # depends_on_past prevents the run task.run(start_date=run_date, end_date=run_date) ti.refresh_from_db() self.assertIs(ti.state, None) # ignore first depends_on_past to allow the run task.run( start_date=run_date, end_date=run_date, ignore_first_depends_on_past=True) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_scheduler_pooled_tasks(self): """ Test that the scheduler handles queued tasks correctly See issue #1299 """ session = settings.Session() if not ( session.query(Pool) .filter(Pool.pool == 'test_queued_pool') .first()): pool = Pool(pool='test_queued_pool', slots=5) session.merge(pool) session.commit() session.close() dag_id = 'test_scheduled_queued_tasks' dag = self.dagbag.get_dag(dag_id) dag.clear() scheduler = SchedulerJob(dag_id, num_runs=1, executor=TestExecutor(), **self.default_scheduler_args) scheduler.run() task_1 = dag.tasks[0] logging.info("Trying to find task {}".format(task_1)) ti = TI(task_1, dag.start_date) ti.refresh_from_db() logging.error("TI is: {}".format(ti)) self.assertEqual(ti.state, State.QUEUED) # now we use a DIFFERENT scheduler and executor # to simulate the num-runs CLI arg scheduler2 = SchedulerJob( dag_id, num_runs=5, executor=DEFAULT_EXECUTOR.__class__(), **self.default_scheduler_args) scheduler2.run() ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) dag.clear()
def test_local_run(self): args = create_mock_args( task_id='print_the_context', dag_id='example_python_operator', subdir='/root/dags/example_python_operator.py', interactive=True, execution_date=timezone.parse('2018-04-27T08:39:51.298439+00:00') ) reset(args.dag_id) with patch('argparse.Namespace', args) as mock_args: run(mock_args) dag = get_dag(mock_args) task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() state = ti.current_state() self.assertEqual(state, State.SUCCESS)
def test_backfill_execute_subdag_with_removed_task(self): """ Ensure that subdag operators execute properly in the case where an associated task of the subdag has been removed from the dag definition, but has instances in the database from previous runs. """ dag = self.dagbag.get_dag('example_subdag_operator') subdag = dag.get_task('section-1').subdag executor = MockExecutor() job = BackfillJob(dag=subdag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor, donot_pickle=True) removed_task_ti = TI( task=DummyOperator(task_id='removed_task'), execution_date=DEFAULT_DATE, state=State.REMOVED) removed_task_ti.dag_id = subdag.dag_id session = settings.Session() session.merge(removed_task_ti) session.commit() with timeout(seconds=30): job.run() for task in subdag.tasks: instance = session.query(TI).filter( TI.dag_id == subdag.dag_id, TI.task_id == task.task_id, TI.execution_date == DEFAULT_DATE).first() self.assertIsNotNone(instance) self.assertEqual(instance.state, State.SUCCESS) removed_task_ti.refresh_from_db() self.assertEqual(removed_task_ti.state, State.REMOVED) subdag.clear() dag.clear()
def test_depends_on_past(self): dagbag = models.DagBag() dag = dagbag.get_dag('test_depends_on_past') dag.clear() task = dag.tasks[0] run_date = task.start_date + datetime.timedelta(days=5) ti = TI(task, run_date) # depends_on_past prevents the run task.run(start_date=run_date, end_date=run_date) ti.refresh_from_db() self.assertIs(ti.state, None) # ignore first depends_on_past to allow the run task.run(start_date=run_date, end_date=run_date, ignore_first_depends_on_past=True) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_heartbeat_failed_fast(self, mock_getpid): """ Test that task heartbeat will sleep when it fails fast """ mock_getpid.return_value = 1 self.mock_base_job_sleep.side_effect = time.sleep with create_session() as session: dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag_id = 'test_heartbeat_failed_fast' task_id = 'test_heartbeat_failed_fast_op' dag = dagbag.get_dag(dag_id) task = dag.get_task(task_id) dag.create_dagrun(run_id="test_heartbeat_failed_fast_run", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.commit() job = LocalTaskJob(task_instance=ti, executor=MockExecutor(do_update=False)) job.heartrate = 2 heartbeat_records = [] job.heartbeat_callback = lambda session: heartbeat_records.append( job.latest_heartbeat) job._execute() self.assertGreater(len(heartbeat_records), 2) for i in range(1, len(heartbeat_records)): time1 = heartbeat_records[i - 1] time2 = heartbeat_records[i] self.assertGreaterEqual((time2 - time1).total_seconds(), job.heartrate)
def test_backfill_depends_on_past(self): dag = get_dag('test_depends_on_past', TEST_DAGS_FOLDER) run_date = dag.start_date + datetime.timedelta(days=5) # import ipdb; ipdb.set_trace() BackfillJob(dag=dag, start_date=run_date, end_date=run_date).run() # ti should not have run ti = TI(dag.tasks[0], run_date) ti.refresh_from_db() self.assertIs(ti.state, None) BackfillJob( dag=dag, start_date=run_date, end_date=run_date, ignore_first_depends_on_past=True).run() # ti should have run ti = TI(dag.tasks[0], run_date) ti.refresh_from_db() self.assertEquals(ti.state, State.SUCCESS)
def test_clear_task_instances(self): dag = DAG('test_clear_task_instances', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='0', owner='test', dag=dag) task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() session = settings.Session() qry = session.query(TI).filter( TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session, dag=dag) session.commit() ti0.refresh_from_db() ti1.refresh_from_db() self.assertEqual(ti0.try_number, 1) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 1) self.assertEqual(ti1.max_tries, 3)
def test_dag_clear(self): dag = DAG('test_dag_clear', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='test_dag_clear_task_0', owner='test', dag=dag) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) # Next try to run will be try 1 self.assertEqual(ti0.try_number, 1) ti0.run() self.assertEqual(ti0.try_number, 2) dag.clear() ti0.refresh_from_db() self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.state, State.NONE) self.assertEqual(ti0.max_tries, 1) task1 = DummyOperator(task_id='test_dag_clear_task_1', owner='test', dag=dag, retries=2) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) self.assertEqual(ti1.max_tries, 2) ti1.try_number = 1 # Next try will be 2 ti1.run() self.assertEqual(ti1.try_number, 3) self.assertEqual(ti1.max_tries, 2) dag.clear() ti0.refresh_from_db() ti1.refresh_from_db() # after clear dag, ti2 should show attempt 3 of 5 self.assertEqual(ti1.max_tries, 4) self.assertEqual(ti1.try_number, 3) # after clear dag, ti1 should show attempt 2 of 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1)
def test_backfill_depends_on_past(self): """ Test that backfill respects ignore_depends_on_past """ dag = self.dagbag.get_dag('test_depends_on_past') dag.clear() run_date = DEFAULT_DATE + datetime.timedelta(days=5) # backfill should deadlock self.assertRaisesRegexp( AirflowException, 'BackfillJob is deadlocked', BackfillJob(dag=dag, start_date=run_date, end_date=run_date).run) BackfillJob( dag=dag, start_date=run_date, end_date=run_date, ignore_first_depends_on_past=True).run() # ti should have succeeded ti = TI(dag.tasks[0], run_date) ti.refresh_from_db() self.assertEquals(ti.state, State.SUCCESS)
def test_reschedule_handling(self, mock_pool_full): """ Test that task reschedules are handled properly """ # Mock the pool with a pool with slots open since the pool doesn't actually exist mock_pool_full.return_value = False # Return values of the python sensor callable, modified during tests done = False fail = False def callable(): if fail: raise AirflowException() return done dag = models.DAG(dag_id='test_reschedule_handling') task = PythonSensor( task_id='test_reschedule_handling_sensor', poke_interval=0, mode='reschedule', python_callable=callable, retries=1, retry_delay=datetime.timedelta(seconds=0), dag=dag, owner='airflow', start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) ti = TI(task=task, execution_date=timezone.utcnow()) self.assertEqual(ti._try_number, 0) self.assertEqual(ti.try_number, 1) def run_ti_and_assert(run_date, expected_start_date, expected_end_date, expected_duration, expected_state, expected_try_number, expected_task_reschedule_count): with freeze_time(run_date): try: ti.run() except AirflowException: if not fail: raise ti.refresh_from_db() self.assertEqual(ti.state, expected_state) self.assertEqual(ti._try_number, expected_try_number) self.assertEqual(ti.try_number, expected_try_number + 1) self.assertEqual(ti.start_date, expected_start_date) self.assertEqual(ti.end_date, expected_end_date) self.assertEqual(ti.duration, expected_duration) trs = TaskReschedule.find_for_task_instance(ti) self.assertEqual(len(trs), expected_task_reschedule_count) date1 = timezone.utcnow() date2 = date1 + datetime.timedelta(minutes=1) date3 = date2 + datetime.timedelta(minutes=1) date4 = date3 + datetime.timedelta(minutes=1) # Run with multiple reschedules. # During reschedule the try number remains the same, but each reschedule is recorded. # The start date is expected to remain the inital date, hence the duration increases. # When finished the try number is incremented and there is no reschedule expected # for this try. done, fail = False, False run_ti_and_assert(date1, date1, date1, 0, State.UP_FOR_RESCHEDULE, 0, 1) done, fail = False, False run_ti_and_assert(date2, date1, date2, 60, State.UP_FOR_RESCHEDULE, 0, 2) done, fail = False, False run_ti_and_assert(date3, date1, date3, 120, State.UP_FOR_RESCHEDULE, 0, 3) done, fail = True, False run_ti_and_assert(date4, date1, date4, 180, State.SUCCESS, 1, 0) # Clear the task instance. dag.clear() ti.refresh_from_db() self.assertEqual(ti.state, State.NONE) self.assertEqual(ti._try_number, 1) # Run again after clearing with reschedules and a retry. # The retry increments the try number, and for that try no reschedule is expected. # After the retry the start date is reset, hence the duration is also reset. done, fail = False, False run_ti_and_assert(date1, date1, date1, 0, State.UP_FOR_RESCHEDULE, 1, 1) done, fail = False, True run_ti_and_assert(date2, date1, date2, 60, State.UP_FOR_RETRY, 2, 0) done, fail = False, False run_ti_and_assert(date3, date3, date3, 0, State.UP_FOR_RESCHEDULE, 2, 1) done, fail = True, False run_ti_and_assert(date4, date3, date4, 60, State.SUCCESS, 3, 0)