def poke(self, context, session=None): if self.execution_delta: dttm = context['execution_date'] - self.execution_delta elif self.execution_date_fn: dttm = self.execution_date_fn(context['execution_date']) else: dttm = context['execution_date'] dttm_filter = dttm if isinstance(dttm, list) else [dttm] serialized_dttm_filter = ','.join( [datetime.isoformat() for datetime in dttm_filter]) self.log.info('Poking for %s.%s on %s ... ', self.external_dag_id, self.external_task_id, serialized_dttm_filter) DM = DagModel TI = TaskInstance DR = DagRun # we only do the check for 1st time, no need for subsequent poke if self.check_existence and not self.has_checked_existence: dag_to_wait = session.query(DM).filter( DM.dag_id == self.external_dag_id).first() if not dag_to_wait: raise AirflowException('The external DAG ' '{} does not exist.'.format( self.external_dag_id)) else: if not os.path.exists(dag_to_wait.fileloc): raise AirflowException('The external DAG ' '{} was deleted.'.format( self.external_dag_id)) if self.external_task_id: refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag( self.external_dag_id) if not refreshed_dag_info.has_task(self.external_task_id): raise AirflowException( 'The external task' '{} in DAG {} does not exist.'.format( self.external_task_id, self.external_dag_id)) self.has_checked_existence = True if self.external_task_id: count = session.query(TI).filter( TI.dag_id == self.external_dag_id, TI.task_id == self.external_task_id, TI.state.in_(self.allowed_states), TI.execution_date.in_(dttm_filter), ).count() else: count = session.query(DR).filter( DR.dag_id == self.external_dag_id, DR.state.in_(self.allowed_states), DR.execution_date.in_(dttm_filter), ).count() session.commit() return count == len(dttm_filter)
def poke(self, context, session=None): if self.execution_delta: dttm = context['execution_date'] - self.execution_delta elif self.execution_date_fn: dttm = self.execution_date_fn(context['execution_date']) else: dttm = context['execution_date'] dttm_filter = dttm if isinstance(dttm, list) else [dttm] serialized_dttm_filter = ','.join( [datetime.isoformat() for datetime in dttm_filter]) self.log.info( 'Poking for ' '{self.external_dag_id}.' '{self.external_task_id} on ' '{} ... '.format(serialized_dttm_filter, **locals())) DM = DagModel TI = TaskInstance DR = DagRun # we only do the check for 1st time, no need for subsequent poke if self.check_existence and not self.has_checked_existence: dag_to_wait = session.query(DM).filter( DM.dag_id == self.external_dag_id ).first() if not dag_to_wait: raise AirflowException('The external DAG ' '{} does not exist.'.format(self.external_dag_id)) else: if not os.path.exists(dag_to_wait.fileloc): raise AirflowException('The external DAG ' '{} was deleted.'.format(self.external_dag_id)) if self.external_task_id: refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(self.external_dag_id) if not refreshed_dag_info.has_task(self.external_task_id): raise AirflowException('The external task' '{} in DAG {} does not exist.'.format(self.external_task_id, self.external_dag_id)) self.has_checked_existence = True if self.external_task_id: count = session.query(TI).filter( TI.dag_id == self.external_dag_id, TI.task_id == self.external_task_id, TI.state.in_(self.allowed_states), TI.execution_date.in_(dttm_filter), ).count() else: count = session.query(DR).filter( DR.dag_id == self.external_dag_id, DR.state.in_(self.allowed_states), DR.execution_date.in_(dttm_filter), ).count() session.commit() return count == len(dttm_filter)
def poke(self, context, session=None): if self.execution_delta: dttm = context['execution_date'] - self.execution_delta elif self.execution_date_fn: dttm = self._handle_execution_date_fn(context=context) else: dttm = context['execution_date'] dttm_filter = [dttm] # dttm_filter = dttm if isinstance(dttm, list) else [dttm] # serialized_dttm_filter = ','.join( # [datetime.isoformat() for datetime in dttm_filter]) self.log.info( 'Poking for %s.%s on %s ... ', self.external_dag_id, self.external_task_id, dttm_filter ) DM = DagModel TI = TaskInstance DR = DagRun if self.check_existence: dag_to_wait = session.query(DM).filter( DM.dag_id == self.external_dag_id ).first() if not dag_to_wait: raise AirflowException('The external DAG ' '{} does not exist.'.format(self.external_dag_id)) else: if not os.path.exists(dag_to_wait.fileloc): raise AirflowException('The external DAG ' '{} was deleted.'.format(self.external_dag_id)) if self.external_task_id: refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(self.external_dag_id) if not refreshed_dag_info.has_task(self.external_task_id): raise AirflowException('The external task' '{} in DAG {} does not exist.'.format(self.external_task_id, self.external_dag_id)) if self.external_task_id: # .count() is inefficient count = session.query(func.count()).filter( TI.dag_id == self.external_dag_id, TI.task_id == self.external_task_id, TI.state.in_(self.allowed_states), TI.execution_date > dttm_filter[0], ).scalar() else: # .count() is inefficient count = session.query(func.count()).filter( DR.dag_id == self.external_dag_id, DR.state.in_(self.allowed_states), DR.execution_date > dttm_filter[0], ).scalar() session.commit() return count == len(dttm_filter)
def poke(self, context, session=None): if self.execution_delta: dttm = context['execution_date'] - self.execution_delta elif self.execution_date_fn: dttm = self._handle_execution_date_fn(context=context) else: dttm = context['execution_date'] dttm_filter = dttm if isinstance(dttm, list) else [dttm] serialized_dttm_filter = ','.join( [datetime.isoformat() for datetime in dttm_filter]) self.log.info('Poking for %s.%s on %s ... ', self.external_dag_id, self.external_task_id, serialized_dttm_filter) DM = DagModel # we only do the check for 1st time, no need for subsequent poke if self.check_existence and not self.has_checked_existence: dag_to_wait = session.query(DM).filter( DM.dag_id == self.external_dag_id).first() if not dag_to_wait: raise AirflowException( f'The external DAG {self.external_dag_id} does not exist.') elif not os.path.exists(dag_to_wait.fileloc): raise AirflowException( f'The external DAG {self.external_dag_id} was deleted.') if self.external_task_id: refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag( self.external_dag_id) if not refreshed_dag_info.has_task(self.external_task_id): raise AirflowException( f'The external task {self.external_task_id} in ' f'DAG {self.external_dag_id} does not exist.') self.has_checked_existence = True count_allowed = self.get_count(dttm_filter, session, self.allowed_states) count_failed = -1 if len(self.failed_states) > 0: count_failed = self.get_count(dttm_filter, session, self.failed_states) session.commit() if count_failed == len(dttm_filter): if self.external_task_id: raise AirflowException( f'The external task {self.external_task_id} in DAG {self.external_dag_id} failed.' ) else: raise AirflowException( f'The external DAG {self.external_dag_id} failed.') return count_allowed == len(dttm_filter)
def _check_for_existence(self, session) -> None: dag_to_wait = session.query(DagModel).filter(DagModel.dag_id == self.external_dag_id).first() if not dag_to_wait: raise AirflowException(f'The external DAG {self.external_dag_id} does not exist.') if not os.path.exists(dag_to_wait.fileloc): raise AirflowException(f'The external DAG {self.external_dag_id} was deleted.') if self.external_task_ids: refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(self.external_dag_id) for external_task_id in self.external_task_ids: if not refreshed_dag_info.has_task(external_task_id): raise AirflowException( f'The external task {external_task_id} in ' f'DAG {self.external_dag_id} does not exist.' ) self._has_checked_existence = True
def poke(self, context, session=None): """ Check if specified task_id (dag_id) completed over the specified timedelta. Slight variation of LastExternalTaskSensor.poke function to check if at least one task_id (dag_id) completed over the time interval going from now to the specified one (either via execution_delta or execution_date_fn. """ curr_datetime = timezone.utcnow() if self.execution_delta: min_datetime = curr_datetime - self.execution_delta elif self.execution_date_fn: min_datetime = curr_datetime - self.execution_date_fn( context['execution_date']) else: min_datetime = context['execution_date'] self.log.info('Poking for %s.%s in state %s on timedelta %s-%s ... ', self.external_dag_id, self.external_task_id, self.allowed_states, min_datetime, curr_datetime) DM = DagModel TI = TaskInstance DR = DagRun # NOTE: v 1.10.3 introduced "check_existence" and "has_checked_existence" # we only do the check for 1st time, no need for subsequent poke if self.check_existence and not self.has_checked_existence: dag_to_wait = session.query(DM).filter( DM.dag_id == self.external_dag_id).first() if not dag_to_wait: raise AirflowException('The external DAG ' '{} does not exist.'.format( self.external_dag_id)) else: if not os.path.exists(dag_to_wait.fileloc): raise AirflowException('The external DAG ' '{} was deleted.'.format( self.external_dag_id)) if self.external_task_id: refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag( self.external_dag_id) if not refreshed_dag_info.has_task(self.external_task_id): raise AirflowException( 'The external task' '{} in DAG {} does not exist.'.format( self.external_task_id, self.external_dag_id)) self.has_checked_existence = True if self.external_task_id: count = session.query(TI).filter( TI.dag_id == self.external_dag_id, TI.task_id == self.external_task_id, TI.state.in_(self.allowed_states), DR.execution_date.between(min_datetime, curr_datetime), ).count() else: count = session.query(DR).filter( DR.dag_id == self.external_dag_id, DR.state.in_(self.allowed_states), DR.execution_date.between(min_datetime, curr_datetime), ).count() self.log.info("found %s tasks for the requested query", count) session.commit() return bool(count)