def get_lineage(dag_id: str, execution_date: datetime.datetime, session=None) -> Dict[str, Dict[str, Any]]: """ Gets the lineage information for dag specified """ dag = check_and_get_dag(dag_id) check_and_get_dagrun(dag, execution_date) inlets: List[XCom] = XCom.get_many(dag_ids=dag_id, execution_date=execution_date, key=PIPELINE_INLETS, session=session).all() outlets: List[XCom] = XCom.get_many(dag_ids=dag_id, execution_date=execution_date, key=PIPELINE_OUTLETS, session=session).all() lineage: Dict[str, Dict[str, Any]] = {} for meta in inlets: lineage[meta.task_id] = {'inlets': meta.value} for meta in outlets: lineage[meta.task_id]['outlets'] = meta.value return {'task_ids': lineage}
def test_xcom_get_one_enable_pickle_type(self): json_obj = {"key": "value"} execution_date = timezone.utcnow() key = "xcom_test3" dag_id = "test_dag" task_id = "test_task3" XCom.set(key=key, value=json_obj, dag_id=dag_id, task_id=task_id, execution_date=execution_date) ret_value = XCom.get_one(key=key, dag_id=dag_id, task_id=task_id, execution_date=execution_date) assert ret_value == json_obj session = settings.Session() ret_value = (session.query(XCom).filter( XCom.key == key, XCom.dag_id == dag_id, XCom.task_id == task_id, XCom.execution_date == execution_date, ).first().value) assert ret_value == json_obj
def test_xcom_get_many(self): json_obj = {"key": "value"} execution_date = timezone.utcnow() key = "xcom_test4" dag_id1 = "test_dag4" task_id1 = "test_task4" dag_id2 = "test_dag5" task_id2 = "test_task5" XCom.set(key=key, value=json_obj, dag_id=dag_id1, task_id=task_id1, execution_date=execution_date) XCom.set(key=key, value=json_obj, dag_id=dag_id2, task_id=task_id2, execution_date=execution_date) results = XCom.get_many(key=key, execution_date=execution_date) for result in results: self.assertEqual(result.value, json_obj)
def test_xcom_get_one_disable_pickle_type(self): json_obj = {"key": "value"} execution_date = timezone.utcnow() key = "xcom_test1" dag_id = "test_dag1" task_id = "test_task1" XCom.set(key=key, value=json_obj, dag_id=dag_id, task_id=task_id, execution_date=execution_date) ret_value = XCom.get_one(key=key, dag_id=dag_id, task_id=task_id, execution_date=execution_date) self.assertEqual(ret_value, json_obj) session = settings.Session() ret_value = session.query(XCom).filter(XCom.key == key, XCom.dag_id == dag_id, XCom.task_id == task_id, XCom.execution_date == execution_date ).first().value self.assertEqual(ret_value, json_obj)
def skip( self, dag_run: "DagRun", execution_date: "DateTime", tasks: Sequence["BaseOperator"], session: "Session" = NEW_SESSION, ): """ Sets tasks instances to skipped from the same dag run. If this instance has a `task_id` attribute, store the list of skipped task IDs to XCom so that NotPreviouslySkippedDep knows these tasks should be skipped when they are cleared. :param dag_run: the DagRun for which to set the tasks to skipped :param execution_date: execution_date :param tasks: tasks to skip (not task_ids) :param session: db session to use """ if not tasks: return if execution_date and not dag_run: from airflow.models.dagrun import DagRun warnings.warn( "Passing an execution_date to `skip()` is deprecated in favour of passing a dag_run", DeprecationWarning, stacklevel=2, ) dag_run = (session.query(DagRun).filter( DagRun.dag_id == tasks[0].dag_id, DagRun.execution_date == execution_date, ).one()) elif execution_date and dag_run and execution_date != dag_run.execution_date: raise ValueError( "execution_date has a different value to dag_run.execution_date -- please only pass dag_run" ) if dag_run is None: raise ValueError("dag_run is required") self._set_state_to_skipped(dag_run, tasks, session) session.commit() # SkipMixin may not necessarily have a task_id attribute. Only store to XCom if one is available. task_id: Optional[str] = getattr(self, "task_id", None) if task_id is not None: from airflow.models.xcom import XCom XCom.set( key=XCOM_SKIPMIXIN_KEY, value={XCOM_SKIPMIXIN_SKIPPED: [d.task_id for d in tasks]}, task_id=task_id, dag_id=dag_run.dag_id, run_id=dag_run.run_id, session=session, )
def test_xcom_disable_pickle_type_fail_on_non_json(self): class PickleRce: def __reduce__(self): return os.system, ("ls -alt", ) with pytest.raises(TypeError): XCom.set( key="xcom_test3", value=PickleRce(), dag_id="test_dag3", task_id="test_task3", execution_date=timezone.utcnow(), )
def test_should_continue_with_cp(load_dag): dag_bag = load_dag('bq_to_wrench') dag = dag_bag.get_dag('bq_to_wrench') table = 'staging.users' task = dag.get_task(f'continue_if_data_{table}') assert isinstance(task, BranchPythonOperator) ti = TaskInstance(task=task, execution_date=datetime.now()) XCom.set(key=table, value={'has_data': True}, task_id=task.task_id, dag_id=dag.dag_id, execution_date=ti.execution_date) task.execute(ti.get_template_context())
def get_lineage(dag_id: str, execution_date: datetime.datetime, *, session) -> Dict[str, Dict[str, Any]]: """Gets the lineage information for dag specified.""" dag = check_and_get_dag(dag_id) dagrun = check_and_get_dagrun(dag, execution_date) inlets = XCom.get_many(dag_ids=dag_id, run_id=dagrun.run_id, key=PIPELINE_INLETS, session=session) outlets = XCom.get_many(dag_ids=dag_id, run_id=dagrun.run_id, key=PIPELINE_OUTLETS, session=session) lineage: Dict[str, Dict[str, Any]] = collections.defaultdict(dict) for meta in inlets: lineage[meta.task_id]["inlets"] = meta.value for meta in outlets: lineage[meta.task_id]["outlets"] = meta.value return {"task_ids": {k: v for k, v in lineage.items()}}
def test_should_response_200(self): XCom.set( key="job_id", value="TEST_JOB_ID", execution_date=self.default_time, task_id="TEST_SINGLE_QUERY", dag_id=self.dag.dag_id, ) response = self.client.get( "/api/v1/dags/TEST_DAG_ID/dagRuns/TEST_DAG_RUN_ID/taskInstances/TEST_SINGLE_QUERY/links", environ_overrides={'REMOTE_USER': "******"}, ) self.assertEqual(200, response.status_code, response.data) self.assertEqual( {"BigQuery Console": "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID"}, response.json )
def get_link(self, operator: BaseOperator, dttm: datetime): pipelines_conf = XCom.get_one( dag_id=operator.dag.dag_id, task_id=operator.task_id, execution_date=dttm, key=DataFusionPipelinesLink.key, ) return (DATAFUSION_PIPELINES_LINK.format(uri=pipelines_conf["uri"], ) if pipelines_conf else "")
def get_link(self, operator, dttm): # Fetch the correct execution date for the triggerED dag which is # stored in xcom during execution of the triggerING task. trigger_execution_date_iso = XCom.get_one( execution_date=dttm, key=XCOM_EXECUTION_DATE_ISO, task_id=operator.task_id, dag_id=operator.dag_id ) query = {"dag_id": operator.trigger_dag_id, "base_date": trigger_execution_date_iso} return build_airflow_url_with_query(query)
def get_xcoms(task_instance): from airflow.models.xcom import XCom execution_date = task_instance.execution_date task_id = task_instance.task_id dag_id = task_instance.dag_id results = XCom.get_many(execution_date, task_ids=task_id, dag_ids=dag_id) return [(xcom.key, str(xcom.value)) for xcom in results]
def test_should_respond_200_multiple_links(self): XCom.set( key="job_id", value=["TEST_JOB_ID_1", "TEST_JOB_ID_2"], execution_date=self.default_time, task_id="TEST_MULTIPLE_QUERY", dag_id=self.dag.dag_id, ) response = self.client.get( "/api/v1/dags/TEST_DAG_ID/dagRuns/TEST_DAG_RUN_ID/taskInstances/TEST_MULTIPLE_QUERY/links", environ_overrides={'REMOTE_USER': "******"}, ) assert 200 == response.status_code, response.data assert { "BigQuery Console #1": "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID_1", "BigQuery Console #2": "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID_2", } == response.json
def skip( self, dag_run, execution_date, tasks, session=None, ): """ Sets tasks instances to skipped from the same dag run. If this instance has a `task_id` attribute, store the list of skipped task IDs to XCom so that NotPreviouslySkippedDep knows these tasks should be skipped when they are cleared. :param dag_run: the DagRun for which to set the tasks to skipped :param execution_date: execution_date :param tasks: tasks to skip (not task_ids) :param session: db session to use """ if not tasks: return self._set_state_to_skipped(dag_run, execution_date, tasks, session) session.commit() # SkipMixin may not necessarily have a task_id attribute. Only store to XCom if one is available. try: task_id = self.task_id # noqa except AttributeError: task_id = None if task_id is not None: from airflow.models.xcom import XCom XCom.set( key=XCOM_SKIPMIXIN_KEY, value={XCOM_SKIPMIXIN_SKIPPED: [d.task_id for d in tasks]}, task_id=task_id, dag_id=dag_run.dag_id, execution_date=dag_run.execution_date, session=session, )
def get_link(self, operator: BaseOperator, dttm: datetime): storage_conf = XCom.get_one( dag_id=operator.dag.dag_id, task_id=operator.task_id, execution_date=dttm, key=StorageLink.key, ) return (GCS_STORAGE_LINK.format( uri=storage_conf["uri"], project_id=storage_conf["project_id"], ) if storage_conf else "")
def get_link( self, operator, dttm: Optional[datetime] = None, ti_key: Optional["TaskInstanceKey"] = None, ) -> str: if ti_key: conf = XCom.get_one(key=self.key, ti_key=ti_key) else: assert dttm conf = XCom.get_one( dag_id=operator.dag.dag_id, task_id=operator.task_id, execution_date=dttm, key=self.key, ) return (conf["url"].format( region=conf["region"], service_id=conf["service_id"], project_id=conf["project_id"], ) if conf else "")
def get_link(self, operator: BaseOperator, dttm: datetime): instance_conf = XCom.get_one( dag_id=operator.dag.dag_id, task_id=operator.task_id, execution_date=dttm, key=DataFusionInstanceLink.key, ) return (DATAFUSION_INSTANCE_LINK.format( region=instance_conf["region"], instance_name=instance_conf["instance_name"], project_id=instance_conf["project_id"], ) if instance_conf else "")
def test_xcom_deserialize_with_pickle_to_json_switch(self): json_obj = {"key": "value"} execution_date = timezone.utcnow() key = "xcom_test3" dag_id = "test_dag" task_id = "test_task3" with conf_vars({("core", "enable_xcom_pickling"): "True"}): XCom.set(key=key, value=json_obj, dag_id=dag_id, task_id=task_id, execution_date=execution_date) with conf_vars({("core", "enable_xcom_pickling"): "False"}): ret_value = XCom.get_one(key=key, dag_id=dag_id, task_id=task_id, execution_date=execution_date) assert ret_value == json_obj
def test_should_response_200_multiple_links(self): XCom.set( key="job_id", value=["TEST_JOB_ID_1", "TEST_JOB_ID_2"], execution_date=self.default_time, task_id="TEST_MULTIPLE_QUERY", dag_id=self.dag.dag_id, ) response = self.client.get( "/api/v1/dags/TEST_DAG_ID/dagRuns/TEST_DAG_RUN_ID/taskInstances/TEST_MULTIPLE_QUERY/links" ) self.assertEqual(200, response.status_code, response.data) self.assertEqual( { "BigQuery Console #1": "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID_1", "BigQuery Console #2": "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID_2", }, response.json, )
def get_link( self, operator: "AbstractOperator", *, ti_key: "TaskInstanceKey", ) -> str: # Fetch the correct execution date for the triggerED dag which is # stored in xcom during execution of the triggerING task. when = XCom.get_value(ti_key=ti_key, key=XCOM_EXECUTION_DATE_ISO) query = { "dag_id": cast(TriggerDagRunOperator, operator).trigger_dag_id, "base_date": when } return build_airflow_url_with_query(query)
def get_link(self, operator: BaseOperator, dttm: datetime): pipelines_conf = XCom.get_one( key=VertexAITrainingPipelinesLink.key, dag_id=operator.dag.dag_id, task_id=operator.task_id, execution_date=dttm, ) return ( VERTEX_AI_TRAINING_PIPELINES_LINK.format( project_id=pipelines_conf["project_id"], ) if pipelines_conf else "" )
def get_link(self, operator: BaseOperator, dttm: datetime): datasets_conf = XCom.get_one( key=VertexAIDatasetListLink.key, dag_id=operator.dag.dag_id, task_id=operator.task_id, execution_date=dttm, ) return ( VERTEX_AI_DATASET_LIST_LINK.format( project_id=datasets_conf["project_id"], ) if datasets_conf else "" )
def get_link(self, operator: BaseOperator, dttm: datetime): model_conf = XCom.get_one( key=VertexAIModelLink.key, dag_id=operator.dag.dag_id, task_id=operator.task_id, execution_date=dttm, ) return ( VERTEX_AI_MODEL_LINK.format( region=model_conf["region"], model_id=model_conf["model_id"], project_id=model_conf["project_id"], ) if model_conf else "" )
def get_link(self, operator: BaseOperator, dttm: datetime): conf = XCom.get_one( dag_id=operator.dag.dag_id, task_id=operator.task_id, execution_date=dttm, key=DataprocMetastoreLink.key, ) return ( conf["url"].format( region=conf["region"], service_id=conf["service_id"], project_id=conf["project_id"], ) if conf else "" )