def _get_dag_run(self, run_date: datetime, dag: DAG, session: Session = None): """ Returns a dag run for the given run date, which will be matched to an existing dag run if available or create a new dag run otherwise. If the max_active_runs limit is reached, this function will return None. :param run_date: the execution date for the dag run :param dag: DAG :param session: the database session object :return: a DagRun in state RUNNING or None """ run_id = f"{DagRunType.BACKFILL_JOB.value}__{run_date.isoformat()}" # consider max_active_runs but ignore when running subdags respect_dag_max_active_limit = bool(dag.schedule_interval and not dag.is_subdag) current_active_dag_count = dag.get_num_active_runs( external_trigger=False) # check if we are scheduling on top of a already existing dag_run # we could find a "scheduled" run instead of a "backfill" run = DagRun.find(dag_id=dag.dag_id, execution_date=run_date, session=session) if run is not None and len(run) > 0: run = run[0] if run.state == State.RUNNING: respect_dag_max_active_limit = False else: run = None # enforce max_active_runs limit for dag, special cases already # handled by respect_dag_max_active_limit if (respect_dag_max_active_limit and current_active_dag_count >= dag.max_active_runs): return None run = run or dag.create_dagrun( run_id=run_id, execution_date=run_date, start_date=timezone.utcnow(), state=State.RUNNING, external_trigger=False, session=session, conf=self.conf, ) # set required transient field run.dag = dag # explicitly mark as backfill and running run.state = State.RUNNING run.run_id = run_id run.verify_integrity(session=session) return run
def _get_dag_run(self, dagrun_info: DagRunInfo, dag: DAG, session: Session = None): """ Returns a dag run for the given run date, which will be matched to an existing dag run if available or create a new dag run otherwise. If the max_active_runs limit is reached, this function will return None. :param dagrun_info: Schedule information for the dag run :param dag: DAG :param session: the database session object :return: a DagRun in state RUNNING or None """ run_date = dagrun_info.logical_date # consider max_active_runs but ignore when running subdags respect_dag_max_active_limit = bool(dag.timetable.can_run and not dag.is_subdag) current_active_dag_count = dag.get_num_active_runs(external_trigger=False) # check if we are scheduling on top of a already existing dag_run # we could find a "scheduled" run instead of a "backfill" runs = DagRun.find(dag_id=dag.dag_id, execution_date=run_date, session=session) run: Optional[DagRun] if runs: run = runs[0] if run.state == DagRunState.RUNNING: respect_dag_max_active_limit = False # Fixes --conf overwrite for backfills with already existing DagRuns run.conf = self.conf or {} else: run = None # enforce max_active_runs limit for dag, special cases already # handled by respect_dag_max_active_limit if respect_dag_max_active_limit and current_active_dag_count >= dag.max_active_runs: return None run = run or dag.create_dagrun( execution_date=run_date, data_interval=dagrun_info.data_interval, start_date=timezone.utcnow(), state=DagRunState.RUNNING, external_trigger=False, session=session, conf=self.conf, run_type=DagRunType.BACKFILL_JOB, creating_job_id=self.id, ) # set required transient field run.dag = dag # explicitly mark as backfill and running run.state = DagRunState.RUNNING run.run_type = DagRunType.BACKFILL_JOB run.verify_integrity(session=session) return run