def get_all_dag_task_query( dag: DAG, session: SASession, state: TaskInstanceState, task_ids: Union[List[str], List[Tuple[str, int]]], confirmed_dates: Iterable[datetime], ): """Get all tasks of the main dag that will be affected by a state change""" is_string_list = isinstance(task_ids[0], str) qry_dag = (session.query(TaskInstance).join(TaskInstance.dag_run).filter( TaskInstance.dag_id == dag.dag_id, DagRun.execution_date.in_(confirmed_dates), )) if is_string_list: qry_dag = qry_dag.filter(TaskInstance.task_id.in_(task_ids)) else: qry_dag = qry_dag.filter( tuple_in_condition((TaskInstance.task_id, TaskInstance.map_index), task_ids)) qry_dag = qry_dag.filter( or_(TaskInstance.state.is_(None), TaskInstance.state != state)).options( contains_eager(TaskInstance.dag_run)) return qry_dag
def schedule_tis(self, schedulable_tis: Iterable[TI], session: Session = NEW_SESSION) -> int: """ Set the given task instances in to the scheduled state. Each element of ``schedulable_tis`` should have it's ``task`` attribute already set. Any EmptyOperator without callbacks is instead set straight to the success state. All the TIs should belong to this DagRun, but this code is in the hot-path, this is not checked -- it is the caller's responsibility to call this function only with TIs from a single dag run. """ # Get list of TI IDs that do not need to executed, these are # tasks using EmptyOperator and without on_execute_callback / on_success_callback dummy_ti_ids = [] schedulable_ti_ids = [] for ti in schedulable_tis: if ( ti.task.inherits_from_empty_operator and not ti.task.on_execute_callback and not ti.task.on_success_callback ): dummy_ti_ids.append(ti.task_id) else: schedulable_ti_ids.append((ti.task_id, ti.map_index)) count = 0 if schedulable_ti_ids: count += ( session.query(TI) .filter( TI.dag_id == self.dag_id, TI.run_id == self.run_id, tuple_in_condition((TI.task_id, TI.map_index), schedulable_ti_ids), ) .update({TI.state: State.SCHEDULED}, synchronize_session=False) ) # Tasks using EmptyOperator should not be executed, mark them as success if dummy_ti_ids: count += ( session.query(TI) .filter( TI.dag_id == self.dag_id, TI.run_id == self.run_id, TI.task_id.in_(dummy_ti_ids), ) .update( { TI.state: State.SUCCESS, TI.start_date: timezone.utcnow(), TI.end_date: timezone.utcnow(), TI.duration: 0, }, synchronize_session=False, ) ) return count