def test_sub_set_subdag(self): dag = DAG('test_sub_set_subdag', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) with dag: op1 = DummyOperator(task_id='leave1') op2 = DummyOperator(task_id='leave2') op3 = DummyOperator(task_id='upstream_level_1') op4 = DummyOperator(task_id='upstream_level_2') op5 = DummyOperator(task_id='upstream_level_3') # order randomly op2.set_downstream(op3) op1.set_downstream(op3) op4.set_downstream(op5) op3.set_downstream(op4) dag.clear() dr = dag.create_dagrun(run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE) executor = MockExecutor() sub_dag = dag.sub_dag(task_regex="leave*", include_downstream=False, include_upstream=False) job = BackfillJob(dag=sub_dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor) job.run() self.assertRaises(sqlalchemy.orm.exc.NoResultFound, dr.refresh_from_db) # the run_id should have changed, so a refresh won't work drs = DagRun.find(dag_id=dag.dag_id, execution_date=DEFAULT_DATE) dr = drs[0] self.assertEqual( DagRun.generate_run_id(DagRunType.BACKFILL_JOB, DEFAULT_DATE), dr.run_id) for ti in dr.get_task_instances(): if ti.task_id == 'leave1' or ti.task_id == 'leave2': self.assertEqual(State.SUCCESS, ti.state) else: self.assertEqual(State.NONE, ti.state)
def test_backfill_multi_dates(self): dag = self.dagbag.get_dag('example_bash_operator') end_date = DEFAULT_DATE + datetime.timedelta(days=1) executor = MockExecutor(parallelism=16) job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=end_date, executor=executor, ignore_first_depends_on_past=True) job.run() expected_execution_order = [ ("runme_0", DEFAULT_DATE), ("runme_1", DEFAULT_DATE), ("runme_2", DEFAULT_DATE), ("runme_0", end_date), ("runme_1", end_date), ("runme_2", end_date), ("also_run_this", DEFAULT_DATE), ("also_run_this", end_date), ("run_after_loop", DEFAULT_DATE), ("run_after_loop", end_date), ("run_this_last", DEFAULT_DATE), ("run_this_last", end_date), ] self.assertListEqual([((dag.dag_id, task_id, when, 1), (State.SUCCESS, None)) for (task_id, when) in expected_execution_order], executor.sorted_tasks) session = settings.Session() drs = session.query(DagRun).filter( DagRun.dag_id == dag.dag_id).order_by(DagRun.execution_date).all() self.assertTrue(drs[0].execution_date == DEFAULT_DATE) self.assertTrue(drs[0].state == State.SUCCESS) self.assertTrue(drs[1].execution_date == DEFAULT_DATE + datetime.timedelta(days=1)) self.assertTrue(drs[1].state == State.SUCCESS) dag.clear() session.close()
def test_backfill_max_limit_check_within_limit(self): dag = self._get_dag_test_max_active_limits( 'test_backfill_max_limit_check_within_limit', max_active_runs=16) start_date = DEFAULT_DATE - datetime.timedelta(hours=1) end_date = DEFAULT_DATE executor = MockExecutor() job = BackfillJob(dag=dag, start_date=start_date, end_date=end_date, executor=executor, donot_pickle=True) job.run() dagruns = DagRun.find(dag_id=dag.dag_id) self.assertEqual(2, len(dagruns)) self.assertTrue(all(run.state == State.SUCCESS for run in dagruns))
def test_unfinished_dag_runs_set_to_failed(self): dag = self._get_dummy_dag('dummy_dag') dag_run = dag.create_dagrun( run_id='test', state=State.RUNNING, ) job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=8), ignore_first_depends_on_past=True) job._set_unfinished_dag_runs_to_failed([dag_run]) dag_run.refresh_from_db() self.assertEqual(State.FAILED, dag_run.state)
def test_backfill_execute_subdag(self): dag = self.dagbag.get_dag('example_subdag_operator') subdag_op_task = dag.get_task('section-1') subdag = subdag_op_task.subdag subdag.schedule_interval = '@daily' start_date = timezone.utcnow() executor = MockExecutor() job = BackfillJob(dag=subdag, start_date=start_date, end_date=start_date, executor=executor, donot_pickle=True) job.run() subdag_op_task.pre_execute(context={'execution_date': start_date}) subdag_op_task.execute(context={'execution_date': start_date}) subdag_op_task.post_execute(context={'execution_date': start_date}) history = executor.history subdag_history = history[0] # check that all 5 task instances of the subdag 'section-1' were executed self.assertEqual(5, len(subdag_history)) for sdh in subdag_history: ti = sdh[3] self.assertIn('section-1-task-', ti.task_id) with create_session() as session: successful_subdag_runs = ( session .query(DagRun) .filter(DagRun.dag_id == subdag.dag_id) .filter(DagRun.execution_date == start_date) # pylint: disable=comparison-with-callable .filter(DagRun.state == State.SUCCESS) .count() ) self.assertEqual(1, successful_subdag_runs) subdag.clear() dag.clear()
def test_backfill_execute_subdag_with_removed_task(self): """ Ensure that subdag operators execute properly in the case where an associated task of the subdag has been removed from the dag definition, but has instances in the database from previous runs. """ dag = self.dagbag.get_dag('example_subdag_operator') subdag = dag.get_task('section-1').subdag executor = MockExecutor() job = BackfillJob(dag=subdag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor, donot_pickle=True) removed_task_ti = TI( task=DummyOperator(task_id='removed_task'), execution_date=DEFAULT_DATE, state=State.REMOVED) removed_task_ti.dag_id = subdag.dag_id session = settings.Session() session.merge(removed_task_ti) session.commit() with timeout(seconds=30): job.run() for task in subdag.tasks: instance = session.query(TI).filter( TI.dag_id == subdag.dag_id, TI.task_id == task.task_id, TI.execution_date == DEFAULT_DATE).first() self.assertIsNotNone(instance) self.assertEqual(instance.state, State.SUCCESS) removed_task_ti.refresh_from_db() self.assertEqual(removed_task_ti.state, State.REMOVED) subdag.clear() dag.clear()
def test_backfill_pool_not_found(self): dag = self._get_dummy_dag( dag_id='test_backfill_pool_not_found', pool='king_pool', ) executor = MockExecutor() job = BackfillJob( dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=7), ) try: job.run() except AirflowException: return self.fail()
def test_dag_run_with_finished_tasks_set_to_success(self): dag = self._get_dummy_dag('dummy_dag') dag_run = dag.create_dagrun( run_id='test', state=State.RUNNING, ) for ti in dag_run.get_task_instances(): ti.set_state(State.SUCCESS) job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=8), ignore_first_depends_on_past=True) job._set_unfinished_dag_runs_to_failed([dag_run]) dag_run.refresh_from_db() self.assertEqual(State.SUCCESS, dag_run.state)
def test_backfill_max_limit_check_complete_loop(self): dag = self._get_dag_test_max_active_limits( 'test_backfill_max_limit_check_complete_loop') start_date = DEFAULT_DATE - datetime.timedelta(hours=1) end_date = DEFAULT_DATE # Given the max limit to be 1 in active dag runs, we need to run the # backfill job 3 times success_expected = 2 executor = MockExecutor() job = BackfillJob(dag=dag, start_date=start_date, end_date=end_date, executor=executor, donot_pickle=True) job.run() success_dagruns = len(DagRun.find(dag_id=dag.dag_id, state=State.SUCCESS)) running_dagruns = len(DagRun.find(dag_id=dag.dag_id, state=State.RUNNING)) self.assertEqual(success_expected, success_dagruns) self.assertEqual(0, running_dagruns) # no dag_runs in running state are left
def test_backfill_run_backwards(self): dag = self.dagbag.get_dag("test_start_date_scheduling") dag.clear() job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=1), run_backwards=True) job.run() session = settings.Session() tis = session.query(TI).filter( TI.dag_id == 'test_start_date_scheduling' and TI.task_id == 'dummy').order_by(TI.execution_date).all() queued_times = [ti.queued_dttm for ti in tis] self.assertTrue(queued_times == sorted(queued_times, reverse=True)) self.assertTrue(all([ti.state == State.SUCCESS for ti in tis])) dag.clear() session.close()
def test_trigger_controller_dag(self): dag = self.dagbag.get_dag('example_trigger_controller_dag') target_dag = self.dagbag.get_dag('example_trigger_target_dag') target_dag.sync_to_db() dag_file_processor = DagFileProcessor(dag_ids=[], log=Mock()) task_instances_list = Mock() dag_file_processor._process_task_instances( target_dag, task_instances_list=task_instances_list) self.assertFalse(task_instances_list.append.called) job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_first_depends_on_past=True) job.run() dag_file_processor._process_task_instances( target_dag, task_instances_list=task_instances_list) self.assertTrue(task_instances_list.append.called)
def run_backfill(self, dag_id, task_id): dag = self.dagbag.get_dag(dag_id) dag.clear() BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE).run() ti = models.TaskInstance(task=dag.get_task(task_id), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_backfill_examples(self, dag_id, expected_execution_order): """ Test backfilling example dags Try to backfill some of the example dags. Be careful, not all dags are suitable for doing this. For example, a dag that sleeps forever, or does not have a schedule won't work here since you simply can't backfill them. """ dag = self.dagbag.get_dag(dag_id) logger.info('*** Running example DAG: %s', dag.dag_id) executor = MockExecutor() job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor, ignore_first_depends_on_past=True) job.run() self.assertListEqual( [((dag_id, task_id, DEFAULT_DATE, 1), State.SUCCESS) for task_id in expected_execution_order], executor.sorted_tasks)
def test_backfill_depends_on_past(self): """ Test that backfill respects ignore_depends_on_past """ dag = self.dagbag.get_dag('test_depends_on_past') dag.clear() run_date = DEFAULT_DATE + datetime.timedelta(days=5) # backfill should deadlock self.assertRaisesRegex( AirflowException, 'BackfillJob is deadlocked', BackfillJob(dag=dag, start_date=run_date, end_date=run_date).run) BackfillJob(dag=dag, start_date=run_date, end_date=run_date, executor=MockExecutor(), ignore_first_depends_on_past=True).run() # ti should have succeeded ti = TI(dag.tasks[0], run_date) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_backfill_rerun_upstream_failed_tasks(self): dag = DAG(dag_id='test_backfill_rerun_upstream_failed', start_date=DEFAULT_DATE, schedule_interval='@daily') with dag: op1 = DummyOperator( task_id='test_backfill_rerun_upstream_failed_task-1', dag=dag) op2 = DummyOperator( task_id='test_backfill_rerun_upstream_failed_task-2', dag=dag) op1.set_upstream(op2) dag.clear() executor = MockExecutor() job = BackfillJob( dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), ) job.run() ti = TI( task=dag.get_task('test_backfill_rerun_upstream_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.set_state(State.UPSTREAM_FAILED) job = BackfillJob(dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), rerun_failed_tasks=True) job.run() ti = TI( task=dag.get_task('test_backfill_rerun_upstream_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_backfill_depends_on_past_backwards(self): """ Test that CLI respects -B argument and raises on interaction with depends_on_past """ dag_id = 'test_depends_on_past' start_date = DEFAULT_DATE + datetime.timedelta(days=1) end_date = start_date + datetime.timedelta(days=1) kwargs = dict( start_date=start_date, end_date=end_date, ) dag = self.dagbag.get_dag(dag_id) dag.clear() executor = MockExecutor() job = BackfillJob(dag=dag, executor=executor, ignore_first_depends_on_past=True, **kwargs) job.run() ti = TI(dag.get_task('test_dop_task'), end_date) ti.refresh_from_db() # runs fine forwards self.assertEqual(ti.state, State.SUCCESS) # raises backwards expected_msg = 'You cannot backfill backwards because one or more tasks depend_on_past: {}'.format( 'test_dop_task') with self.assertRaisesRegex(AirflowException, expected_msg): executor = MockExecutor() job = BackfillJob(dag=dag, executor=executor, run_backwards=True, **kwargs) job.run()
def test_backfill_rerun_failed_tasks_without_flag(self): dag = DAG( dag_id='test_backfill_rerun_failed', start_date=DEFAULT_DATE, schedule_interval='@daily') with dag: DummyOperator( task_id='test_backfill_rerun_failed_task-1', dag=dag) dag.clear() executor = MockExecutor() job = BackfillJob(dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), ) job.run() ti = TI(task=dag.get_task('test_backfill_rerun_failed_task-1'), execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.set_state(State.FAILED) job = BackfillJob(dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=2), rerun_failed_tasks=False ) with self.assertRaises(AirflowException): job.run()
def test_update_counters(self): dag = DAG(dag_id='test_manage_executor_state', start_date=DEFAULT_DATE) task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow') job = BackfillJob(dag=dag) session = settings.Session() dr = dag.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task1, dr.execution_date) ti.refresh_from_db() ti_status = BackfillJob._DagRunTaskStatus() # test for success ti.set_state(State.SUCCESS, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 1) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 0) ti_status.succeeded.clear() # test for skipped ti.set_state(State.SKIPPED, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 1) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 0) ti_status.skipped.clear() # test for failed ti.set_state(State.FAILED, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 1) self.assertTrue(len(ti_status.to_run) == 0) ti_status.failed.clear() # test for retry ti.set_state(State.UP_FOR_RETRY, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() # test for reschedule ti.set_state(State.UP_FOR_RESCHEDULE, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() # test for none ti.set_state(State.NONE, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() session.close()
def test_backfill_fill_blanks(self): dag = DAG( 'test_backfill_fill_blanks', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}, ) with dag: op1 = DummyOperator(task_id='op1') op2 = DummyOperator(task_id='op2') op3 = DummyOperator(task_id='op3') op4 = DummyOperator(task_id='op4') op5 = DummyOperator(task_id='op5') op6 = DummyOperator(task_id='op6') dag.clear() dr = dag.create_dagrun(run_id='test', state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE) executor = MockExecutor() session = settings.Session() tis = dr.get_task_instances() for ti in tis: if ti.task_id == op1.task_id: ti.state = State.UP_FOR_RETRY ti.end_date = DEFAULT_DATE elif ti.task_id == op2.task_id: ti.state = State.FAILED elif ti.task_id == op3.task_id: ti.state = State.SKIPPED elif ti.task_id == op4.task_id: ti.state = State.SCHEDULED elif ti.task_id == op5.task_id: ti.state = State.UPSTREAM_FAILED # op6 = None session.merge(ti) session.commit() session.close() job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor) self.assertRaisesRegex(AirflowException, 'Some task instances failed', job.run) self.assertRaises(sqlalchemy.orm.exc.NoResultFound, dr.refresh_from_db) # the run_id should have changed, so a refresh won't work drs = DagRun.find(dag_id=dag.dag_id, execution_date=DEFAULT_DATE) dr = drs[0] self.assertEqual(dr.state, State.FAILED) tis = dr.get_task_instances() for ti in tis: if ti.task_id in (op1.task_id, op4.task_id, op6.task_id): self.assertEqual(ti.state, State.SUCCESS) elif ti.task_id == op2.task_id: self.assertEqual(ti.state, State.FAILED) elif ti.task_id == op3.task_id: self.assertEqual(ti.state, State.SKIPPED) elif ti.task_id == op5.task_id: self.assertEqual(ti.state, State.UPSTREAM_FAILED)
def _execute(self, session=None): """ Initializes all components required to run a dag for a specified date range and calls helper method to execute the tasks. """ self._clean_zombie_dagruns_if_required() ti_status = BackfillJob._DagRunTaskStatus() # picklin' pickle_id = self.dag.pickle_id # We don't need to pickle our dag again as it already pickled on job creattion # also this will save it into databand table, that have no use for the airflow # if not self.donot_pickle and self.executor.__class__ not in ( # executors.LocalExecutor, # executors.SequentialExecutor, # ): # pickle_id = airflow_pickle(self.dag, session=session) self._workaround_db_disconnection_in_forks() executor = self.executor executor.start() ti_status.total_runs = 1 # total dag runs in backfill dag_run = None try: dag_run = self._get_dag_run(session=session) # Create relation DagRun <> Job dag_run.conf = {"job_id": self.id} session.merge(dag_run) session.commit() run_date = dag_run.execution_date if dag_run is None: raise DatabandSystemError("Can't build dagrun") tis_map = self._task_instances_for_dag_run(dag_run, session=session) if not tis_map: raise DatabandSystemError("There are no task instances to run!") ti_status.active_runs.append(dag_run) ti_status.to_run.update(tis_map or {}) processed_dag_run_dates = self._process_dag_task_instances( ti_status=ti_status, executor=executor, pickle_id=pickle_id, session=session, ) ti_status.executed_dag_run_dates.update(processed_dag_run_dates) err = self._collect_errors(ti_status=ti_status, session=session) if err: raise DatabandRunError("Airflow executor has failed to run the run") if run_date not in ti_status.executed_dag_run_dates: self.log.warning( "Dag %s is not marked as completed! %s not found in %s", self.dag_id, run_date, ti_status.executed_dag_run_dates, ) finally: # in sequential executor a keyboard interrupt would reach here and # then executor.end() -> heartbeat() -> sync() will cause the queued commands # to be run again before exiting if hasattr(executor, "commands_to_run"): executor.commands_to_run = [] try: executor.end() except Exception: logger.exception("Failed to terminate executor") session.commit() try: if dag_run and dag_run.state == State.RUNNING: # use clean SQL session fix_zombie_dagrun_task_instances(dag_run) dag_run.state = State.FAILED session.merge(dag_run) session.commit() except Exception: logger.exception("Failed to clean dag_run task instances") self.log.info("Run is completed. Exiting.")