def test_update_counters(self): dag = DAG(dag_id='test_manage_executor_state', start_date=DEFAULT_DATE) task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow') job = BackfillJob(dag=dag) session = settings.Session() dr = dag.create_dagrun(run_id=DagRun.ID_PREFIX, state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task1, dr.execution_date) ti.refresh_from_db() ti_status = BackfillJob._DagRunTaskStatus() # test for success ti.set_state(State.SUCCESS, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 1) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 0) ti_status.succeeded.clear() # test for skipped ti.set_state(State.SKIPPED, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 1) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 0) ti_status.skipped.clear() # test for failed ti.set_state(State.FAILED, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 1) self.assertTrue(len(ti_status.to_run) == 0) ti_status.failed.clear() # test for retry ti.set_state(State.UP_FOR_RETRY, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() # test for reschedule ti.set_state(State.UP_FOR_RESCHEDULE, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() # test for none ti.set_state(State.NONE, session) ti_status.running[ti.key] = ti job._update_counters(ti_status=ti_status) self.assertTrue(len(ti_status.running) == 0) self.assertTrue(len(ti_status.succeeded) == 0) self.assertTrue(len(ti_status.skipped) == 0) self.assertTrue(len(ti_status.failed) == 0) self.assertTrue(len(ti_status.to_run) == 1) ti_status.to_run.clear() session.close()
def _execute(self, session=None): """ Initializes all components required to run a dag for a specified date range and calls helper method to execute the tasks. """ # Trigger cleaning if self.airflow_config.clean_zombies_during_backfill: ClearZombieJob().run() ti_status = BackfillJob._DagRunTaskStatus() # picklin' pickle_id = self.dag.pickle_id # We don't need to pickle our dag again as it already pickled on job creattion # also this will save it into databand table, that have no use for the airflow # if not self.donot_pickle and self.executor.__class__ not in ( # executors.LocalExecutor, # executors.SequentialExecutor, # ): # pickle_id = airflow_pickle(self.dag, session=session) executor = self.executor executor.start() ti_status.total_runs = 1 # total dag runs in backfill dag_run = None try: dag_run = self._get_dag_run(session=session) # Create relation DagRun <> Job dag_run.conf = {"job_id": self.id} session.merge(dag_run) session.commit() run_date = dag_run.execution_date if dag_run is None: raise DatabandSystemError("Can't build dagrun") tis_map = self._task_instances_for_dag_run(dag_run, session=session) if not tis_map: raise DatabandSystemError("There are no task instances to run!") ti_status.active_runs.append(dag_run) ti_status.to_run.update(tis_map or {}) processed_dag_run_dates = self._process_dag_task_instances( ti_status=ti_status, executor=executor, pickle_id=pickle_id, session=session, ) ti_status.executed_dag_run_dates.update(processed_dag_run_dates) err = self._collect_errors(ti_status=ti_status, session=session) if err: raise DatabandRunError("Airflow executor has failed to run the run") if run_date not in ti_status.executed_dag_run_dates: self.log.warning( "Dag %s is not marked as completed! %s not found in %s", self.dag_id, run_date, ti_status.executed_dag_run_dates, ) finally: # in sequential executor a keyboard interrupt would reach here and # then executor.end() -> heartbeat() -> sync() will cause the queued commands # to be run again before exiting if hasattr(executor, "commands_to_run"): executor.commands_to_run = [] try: executor.end() except Exception: logger.exception("Failed to terminate executor") if dag_run and dag_run.state == State.RUNNING: _kill_dag_run_zombi(dag_run, session) session.commit() self.log.info("Run is completed. Exiting.")