def get_many( cls, execution_date: Optional[datetime.datetime] = None, key: Optional[str] = None, task_ids: Optional[Union[str, Iterable[str]]] = None, dag_ids: Optional[Union[str, Iterable[str]]] = None, map_indexes: Union[int, Iterable[int], None] = None, include_prior_dates: bool = False, limit: Optional[int] = None, session: Session = NEW_SESSION, *, run_id: Optional[str] = None, ) -> Query: """:sphinx-autoapi-skip:""" from airflow.models.dagrun import DagRun if not exactly_one(execution_date is not None, run_id is not None): raise ValueError( f"Exactly one of run_id or execution_date must be passed. " f"Passed execution_date={execution_date}, run_id={run_id}" ) if execution_date is not None: message = "Passing 'execution_date' to 'XCom.get_many()' is deprecated. Use 'run_id' instead." warnings.warn(message, PendingDeprecationWarning, stacklevel=3) query = session.query(cls).join(cls.dag_run) if key: query = query.filter(cls.key == key) if is_container(task_ids): query = query.filter(cls.task_id.in_(task_ids)) elif task_ids is not None: query = query.filter(cls.task_id == task_ids) if is_container(dag_ids): query = query.filter(cls.dag_id.in_(dag_ids)) elif dag_ids is not None: query = query.filter(cls.dag_id == dag_ids) if is_container(map_indexes): query = query.filter(cls.map_index.in_(map_indexes)) elif map_indexes is not None: query = query.filter(cls.map_index == map_indexes) if include_prior_dates: if execution_date is not None: query = query.filter(DagRun.execution_date <= execution_date) else: dr = session.query(DagRun.execution_date).filter(DagRun.run_id == run_id).subquery() query = query.filter(cls.execution_date <= dr.c.execution_date) elif execution_date is not None: query = query.filter(DagRun.execution_date == execution_date) else: query = query.filter(cls.run_id == run_id) query = query.order_by(DagRun.execution_date.desc(), cls.timestamp.desc()) if limit: return query.limit(limit) return query
def find( cls, dag_id: Optional[Union[str, List[str]]] = None, run_id: Optional[Iterable[str]] = None, execution_date: Optional[Union[datetime, Iterable[datetime]]] = None, state: Optional[DagRunState] = None, external_trigger: Optional[bool] = None, no_backfills: bool = False, run_type: Optional[DagRunType] = None, session: Session = NEW_SESSION, execution_start_date: Optional[datetime] = None, execution_end_date: Optional[datetime] = None, ) -> List["DagRun"]: """ Returns a set of dag runs for the given search criteria. :param dag_id: the dag_id or list of dag_id to find dag runs for :param run_id: defines the run id for this dag run :param run_type: type of DagRun :param execution_date: the execution date :param state: the state of the dag run :param external_trigger: whether this dag run is externally triggered :param no_backfills: return no backfills (True), return all (False). Defaults to False :param session: database session :param execution_start_date: dag run that was executed from this date :param execution_end_date: dag run that was executed until this date """ qry = session.query(cls) dag_ids = [dag_id] if isinstance(dag_id, str) else dag_id if dag_ids: qry = qry.filter(cls.dag_id.in_(dag_ids)) if is_container(run_id): qry = qry.filter(cls.run_id.in_(run_id)) elif run_id is not None: qry = qry.filter(cls.run_id == run_id) if is_container(execution_date): qry = qry.filter(cls.execution_date.in_(execution_date)) elif execution_date is not None: qry = qry.filter(cls.execution_date == execution_date) if execution_start_date and execution_end_date: qry = qry.filter( cls.execution_date.between(execution_start_date, execution_end_date)) elif execution_start_date: qry = qry.filter(cls.execution_date >= execution_start_date) elif execution_end_date: qry = qry.filter(cls.execution_date <= execution_end_date) if state: qry = qry.filter(cls.state == state) if external_trigger is not None: qry = qry.filter(cls.external_trigger == external_trigger) if run_type: qry = qry.filter(cls.run_type == run_type) if no_backfills: qry = qry.filter(cls.run_type != DagRunType.BACKFILL_JOB) return qry.order_by(cls.execution_date).all()
def test_is_container(self): self.assertFalse(helpers.is_container("a string is not a container")) self.assertTrue(helpers.is_container(["a", "list", "is", "a", "container"])) self.assertTrue(helpers.is_container(['test_list'])) self.assertFalse(helpers.is_container('test_str_not_iterable')) # Pass an object that is not iter nor a string. self.assertFalse(helpers.is_container(10))
def test_is_container(self): assert not helpers.is_container("a string is not a container") assert helpers.is_container(["a", "list", "is", "a", "container"]) assert helpers.is_container(['test_list']) assert not helpers.is_container('test_str_not_iterable') # Pass an object that is not iter nor a string. assert not helpers.is_container(10)
def get_many( cls, execution_date: Optional[pendulum.DateTime] = None, key: Optional[str] = None, task_ids: Optional[Union[str, Iterable[str]]] = None, dag_ids: Optional[Union[str, Iterable[str]]] = None, include_prior_dates: bool = False, limit: Optional[int] = None, session: Session = NEW_SESSION, *, run_id: Optional[str] = None, ) -> Query: """:sphinx-autoapi-skip:""" if not (execution_date is None) ^ (run_id is None): raise ValueError( "Exactly one of execution_date or run_id must be passed") filters = [] if key: filters.append(cls.key == key) if task_ids: if is_container(task_ids): filters.append(cls.task_id.in_(task_ids)) else: filters.append(cls.task_id == task_ids) if dag_ids: if is_container(dag_ids): filters.append(cls.dag_id.in_(dag_ids)) else: filters.append(cls.dag_id == dag_ids) if include_prior_dates: if execution_date is None: # In theory it would be possible to build a subquery that joins to DagRun and then gets the # execution dates. Lets do that for 2.3 raise ValueError( "Using include_prior_dates needs an execution_date to be passed" ) filters.append(cls.execution_date <= execution_date) elif execution_date is not None: filters.append(cls.execution_date == execution_date) query = session.query(cls).filter(*filters) if run_id: from airflow.models.dagrun import DagRun query = query.join(cls.dag_run).filter(DagRun.run_id == run_id) query = query.order_by(cls.execution_date.desc(), cls.timestamp.desc()) if limit: return query.limit(limit) else: return query
def get_many( cls, execution_date: Optional[pendulum.DateTime] = None, key: Optional[str] = None, task_ids: Optional[Union[str, Iterable[str]]] = None, dag_ids: Optional[Union[str, Iterable[str]]] = None, include_prior_dates: bool = False, limit: Optional[int] = None, session: Session = NEW_SESSION, *, run_id: Optional[str] = None, ) -> Query: """:sphinx-autoapi-skip:""" from airflow.models.dagrun import DagRun if not exactly_one(execution_date is not None, run_id is not None): raise ValueError( "Exactly one of execution_date or run_id must be passed") if execution_date is not None: message = "Passing 'execution_date' to 'XCom.get_many()' is deprecated. Use 'run_id' instead." warnings.warn(message, PendingDeprecationWarning, stacklevel=3) query = session.query(cls) if key: query = query.filter(cls.key == key) if is_container(task_ids): query = query.filter(cls.task_id.in_(task_ids)) elif task_ids is not None: query = query.filter(cls.task_id == task_ids) if is_container(dag_ids): query = query.filter(cls.dag_id.in_(dag_ids)) elif dag_ids is not None: query = query.filter(cls.dag_id == dag_ids) if include_prior_dates: if execution_date is not None: query = query.filter(cls.execution_date <= execution_date) else: # This returns an empty query result for IN_MEMORY_DAGRUN_ID, # but that is impossible to implement. Sorry? dr = session.query(DagRun.execution_date).filter( DagRun.run_id == run_id).subquery() query = query.filter(cls.execution_date <= dr.c.execution_date) elif execution_date is not None: query = query.filter(cls.execution_date == execution_date) elif run_id == IN_MEMORY_DAGRUN_ID: query = query.filter(cls.execution_date == _DISTANT_FUTURE) else: query = query.join(cls.dag_run).filter(DagRun.run_id == run_id) query = query.order_by(cls.execution_date.desc(), cls.timestamp.desc()) if limit: return query.limit(limit) return query
def ensure_xcomarg_return_value(arg: Any) -> None: from airflow.models.xcom_arg import XCOM_RETURN_KEY, XComArg if isinstance(arg, XComArg): if arg.key != XCOM_RETURN_KEY: raise ValueError(f"cannot map over XCom with custom key {arg.key!r} from {arg.operator}") elif not is_container(arg): return elif isinstance(arg, collections.abc.Mapping): for v in arg.values(): ensure_xcomarg_return_value(v) elif isinstance(arg, collections.abc.Iterable): for v in arg: ensure_xcomarg_return_value(v)
def unknown_to_array(value): if value is None or len(value) == 0: return None if isinstance(value, basestring): if value[0] == '[' and value[-1] == ']': return ast.literal_eval(value) else: return [value] elif is_container(value): return value else: raise ValueError( 'input was not array or string or string representing an array' )
def try_xcom_pull(context, task_ids, dag_id=None, key=XCOM_RETURN_KEY, include_prior_dates=False): """ Pull XComs that optionally meet certain criteria. The default value for `key` limits the search to XComs that were returned by other tasks (as opposed to those that were pushed manually). To remove this filter, pass key=None (or any desired value). If a single task_id string is provided, the result is a tuple (True, val) where val is the value of the most recent matching XCom from that task_id. If multiple task_ids are provided, a tuple of matching values is returned. Tuple (False, None) is returned whenever no matches are found. :param key: A key for the XCom. If provided, only XComs with matching keys will be returned. The default key is 'return_value', also available as a constant XCOM_RETURN_KEY. This key is automatically given to XComs returned by tasks (as opposed to being pushed manually). To remove the filter, pass key=None. :type key: string :param task_ids: Only XComs from tasks with matching ids will be pulled. Can pass None to remove the filter. :type task_ids: string or iterable of strings (representing task_ids) :param dag_id: If provided, only pulls XComs from this DAG. If None (default), the DAG of the calling task is used. :type dag_id: string :param include_prior_dates: If False, only XComs from the current execution_date are returned. If True, XComs from previous dates are returned as well. :type include_prior_dates: bool """ if dag_id is None: dag_id = context['ti'].dag_id pull_fn = functools.partial(try_get_one, execution_date=context['ti'].execution_date, key=key, dag_id=dag_id, include_prior_dates=include_prior_dates) if is_container(task_ids): return tuple(pull_fn(task_id=t) for t in task_ids) else: return pull_fn(task_id=task_ids)
def test_is_container(self): self.assertFalse(helpers.is_container("a string is not a container")) self.assertTrue( helpers.is_container(["a", "list", "is", "a", "container"]))
def test_is_container(self): self.assertTrue(helpers.is_container(['test_list'])) self.assertFalse(helpers.is_container('test_str_not_iterable')) # Pass an object that is not iter nor a string. self.assertFalse(helpers.is_container(10))
def test_is_container(self): self.assertFalse(helpers.is_container("a string is not a container")) self.assertTrue(helpers.is_container(["a", "list", "is", "a", "container"]))
def get_many(cls, execution_date: pendulum.DateTime, key: Optional[str] = None, task_ids: Optional[Union[str, Iterable[str]]] = None, dag_ids: Optional[Union[str, Iterable[str]]] = None, include_prior_dates: bool = False, limit: Optional[int] = None, session: Session = None) -> Query: """ Composes a query to get one or more values from the xcom table. :param execution_date: Execution date for the task :type execution_date: pendulum.datetime :param key: A key for the XCom. If provided, only XComs with matching keys will be returned. To remove the filter, pass key=None. :type key: str :param task_ids: Only XComs from tasks with matching ids will be pulled. Can pass None to remove the filter. :type task_ids: str or iterable of strings (representing task_ids) :param dag_ids: If provided, only pulls XComs from this DAG. If None (default), the DAG of the calling task is used. :type dag_ids: str :param include_prior_dates: If False, only XComs from the current execution_date are returned. If True, XComs from previous dates are returned as well. :type include_prior_dates: bool :param limit: If required, limit the number of returned objects. XCom objects can be quite big and you might want to limit the number of rows. :type limit: int :param session: database session :type session: sqlalchemy.orm.session.Session """ filters = [] if key: filters.append(cls.key == key) if task_ids: if is_container(task_ids): filters.append(cls.task_id.in_(task_ids)) else: filters.append(cls.task_id == task_ids) if dag_ids: if is_container(dag_ids): filters.append(cls.dag_id.in_(dag_ids)) else: filters.append(cls.dag_id == dag_ids) if include_prior_dates: filters.append(cls.execution_date <= execution_date) else: filters.append(cls.execution_date == execution_date) query = (session.query(cls).filter(and_(*filters)).order_by( cls.execution_date.desc(), cls.timestamp.desc())) if limit: return query.limit(limit) else: return query
def get_many( cls, execution_date: Optional[pendulum.DateTime] = None, run_id: Optional[str] = None, key: Optional[str] = None, task_ids: Optional[Union[str, Iterable[str]]] = None, dag_ids: Optional[Union[str, Iterable[str]]] = None, include_prior_dates: bool = False, limit: Optional[int] = None, session: Session = None, ) -> Query: """ Composes a query to get one or more values from the xcom table. ``run_id`` and ``execution_date`` are mutually exclusive. :param execution_date: Execution date for the task :type execution_date: pendulum.datetime :param run_id: Dag run id for the task :type run_id: str :param key: A key for the XCom. If provided, only XComs with matching keys will be returned. To remove the filter, pass key=None. :type key: str :param task_ids: Only XComs from tasks with matching ids will be pulled. Can pass None to remove the filter. :type task_ids: str or iterable of strings (representing task_ids) :param dag_ids: If provided, only pulls XComs from this DAG. If None (default), the DAG of the calling task is used. :type dag_ids: str :param include_prior_dates: If False, only XComs from the current execution_date are returned. If True, XComs from previous dates are returned as well. :type include_prior_dates: bool :param limit: If required, limit the number of returned objects. XCom objects can be quite big and you might want to limit the number of rows. :type limit: int :param session: database session :type session: sqlalchemy.orm.session.Session """ if not (execution_date is None) ^ (run_id is None): raise ValueError("Exactly one of execution_date or run_id must be passed") filters = [] if key: filters.append(cls.key == key) if task_ids: if is_container(task_ids): filters.append(cls.task_id.in_(task_ids)) else: filters.append(cls.task_id == task_ids) if dag_ids: if is_container(dag_ids): filters.append(cls.dag_id.in_(dag_ids)) else: filters.append(cls.dag_id == dag_ids) if include_prior_dates: if execution_date is None: # In theory it would be possible to build a subquery that joins to DagRun and then gets the # execution dates. Lets do that for 2.3 raise ValueError("Using include_prior_dates needs an execution_date to be passed") filters.append(cls.execution_date <= execution_date) elif execution_date is not None: filters.append(cls.execution_date == execution_date) query = session.query(cls).filter(*filters) if run_id: from airflow.models.dagrun import DagRun query = query.join(cls.dag_run).filter(DagRun.run_id == run_id) query = query.order_by(cls.execution_date.desc(), cls.timestamp.desc()) if limit: return query.limit(limit) else: return query
def test_is_container(self): self.assertTrue(helpers.is_container(HelpersTest.TestObjIter())) self.assertFalse(helpers.is_container(HelpersTest.TestObjNoIter)) self.assertFalse(helpers.is_container(HelpersTest.TestObjIterStr)) self.assertFalse(helpers.is_container(HelpersTest.TestObjStrNoIter()))