Пример #1
0
def get_lineage(dag_id: str,
                execution_date: datetime.datetime,
                session=None) -> Dict[str, Dict[str, Any]]:
    """
    Gets the lineage information for dag specified
    """
    dag = check_and_get_dag(dag_id)
    check_and_get_dagrun(dag, execution_date)

    inlets: List[XCom] = XCom.get_many(dag_ids=dag_id,
                                       execution_date=execution_date,
                                       key=PIPELINE_INLETS,
                                       session=session).all()
    outlets: List[XCom] = XCom.get_many(dag_ids=dag_id,
                                        execution_date=execution_date,
                                        key=PIPELINE_OUTLETS,
                                        session=session).all()

    lineage: Dict[str, Dict[str, Any]] = {}
    for meta in inlets:
        lineage[meta.task_id] = {'inlets': meta.value}

    for meta in outlets:
        lineage[meta.task_id]['outlets'] = meta.value

    return {'task_ids': lineage}
Пример #2
0
    def test_xcom_get_one_enable_pickle_type(self):
        json_obj = {"key": "value"}
        execution_date = timezone.utcnow()
        key = "xcom_test3"
        dag_id = "test_dag"
        task_id = "test_task3"
        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id,
                 task_id=task_id,
                 execution_date=execution_date)

        ret_value = XCom.get_one(key=key,
                                 dag_id=dag_id,
                                 task_id=task_id,
                                 execution_date=execution_date)

        assert ret_value == json_obj

        session = settings.Session()
        ret_value = (session.query(XCom).filter(
            XCom.key == key,
            XCom.dag_id == dag_id,
            XCom.task_id == task_id,
            XCom.execution_date == execution_date,
        ).first().value)

        assert ret_value == json_obj
Пример #3
0
    def test_xcom_get_many(self):
        json_obj = {"key": "value"}
        execution_date = timezone.utcnow()
        key = "xcom_test4"
        dag_id1 = "test_dag4"
        task_id1 = "test_task4"
        dag_id2 = "test_dag5"
        task_id2 = "test_task5"

        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id1,
                 task_id=task_id1,
                 execution_date=execution_date)

        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id2,
                 task_id=task_id2,
                 execution_date=execution_date)

        results = XCom.get_many(key=key, execution_date=execution_date)

        for result in results:
            self.assertEqual(result.value, json_obj)
Пример #4
0
    def test_xcom_get_one_disable_pickle_type(self):
        json_obj = {"key": "value"}
        execution_date = timezone.utcnow()
        key = "xcom_test1"
        dag_id = "test_dag1"
        task_id = "test_task1"
        XCom.set(key=key,
                 value=json_obj,
                 dag_id=dag_id,
                 task_id=task_id,
                 execution_date=execution_date)

        ret_value = XCom.get_one(key=key,
                                 dag_id=dag_id,
                                 task_id=task_id,
                                 execution_date=execution_date)

        self.assertEqual(ret_value, json_obj)

        session = settings.Session()
        ret_value = session.query(XCom).filter(XCom.key == key, XCom.dag_id == dag_id,
                                               XCom.task_id == task_id,
                                               XCom.execution_date == execution_date
                                               ).first().value

        self.assertEqual(ret_value, json_obj)
Пример #5
0
    def skip(
        self,
        dag_run: "DagRun",
        execution_date: "DateTime",
        tasks: Sequence["BaseOperator"],
        session: "Session" = NEW_SESSION,
    ):
        """
        Sets tasks instances to skipped from the same dag run.

        If this instance has a `task_id` attribute, store the list of skipped task IDs to XCom
        so that NotPreviouslySkippedDep knows these tasks should be skipped when they
        are cleared.

        :param dag_run: the DagRun for which to set the tasks to skipped
        :param execution_date: execution_date
        :param tasks: tasks to skip (not task_ids)
        :param session: db session to use
        """
        if not tasks:
            return

        if execution_date and not dag_run:
            from airflow.models.dagrun import DagRun

            warnings.warn(
                "Passing an execution_date to `skip()` is deprecated in favour of passing a dag_run",
                DeprecationWarning,
                stacklevel=2,
            )

            dag_run = (session.query(DagRun).filter(
                DagRun.dag_id == tasks[0].dag_id,
                DagRun.execution_date == execution_date,
            ).one())
        elif execution_date and dag_run and execution_date != dag_run.execution_date:
            raise ValueError(
                "execution_date has a different value to  dag_run.execution_date -- please only pass dag_run"
            )

        if dag_run is None:
            raise ValueError("dag_run is required")

        self._set_state_to_skipped(dag_run, tasks, session)
        session.commit()

        # SkipMixin may not necessarily have a task_id attribute. Only store to XCom if one is available.
        task_id: Optional[str] = getattr(self, "task_id", None)
        if task_id is not None:
            from airflow.models.xcom import XCom

            XCom.set(
                key=XCOM_SKIPMIXIN_KEY,
                value={XCOM_SKIPMIXIN_SKIPPED: [d.task_id for d in tasks]},
                task_id=task_id,
                dag_id=dag_run.dag_id,
                run_id=dag_run.run_id,
                session=session,
            )
Пример #6
0
    def test_xcom_disable_pickle_type_fail_on_non_json(self):
        class PickleRce:
            def __reduce__(self):
                return os.system, ("ls -alt", )

        with pytest.raises(TypeError):
            XCom.set(
                key="xcom_test3",
                value=PickleRce(),
                dag_id="test_dag3",
                task_id="test_task3",
                execution_date=timezone.utcnow(),
            )
Пример #7
0
def test_should_continue_with_cp(load_dag):
    dag_bag = load_dag('bq_to_wrench')
    dag = dag_bag.get_dag('bq_to_wrench')
    table = 'staging.users'
    task = dag.get_task(f'continue_if_data_{table}')
    assert isinstance(task, BranchPythonOperator)
    ti = TaskInstance(task=task, execution_date=datetime.now())
    XCom.set(key=table,
             value={'has_data': True},
             task_id=task.task_id,
             dag_id=dag.dag_id,
             execution_date=ti.execution_date)

    task.execute(ti.get_template_context())
Пример #8
0
def get_lineage(dag_id: str, execution_date: datetime.datetime, *, session) -> Dict[str, Dict[str, Any]]:
    """Gets the lineage information for dag specified."""
    dag = check_and_get_dag(dag_id)
    dagrun = check_and_get_dagrun(dag, execution_date)

    inlets = XCom.get_many(dag_ids=dag_id, run_id=dagrun.run_id, key=PIPELINE_INLETS, session=session)
    outlets = XCom.get_many(dag_ids=dag_id, run_id=dagrun.run_id, key=PIPELINE_OUTLETS, session=session)

    lineage: Dict[str, Dict[str, Any]] = collections.defaultdict(dict)
    for meta in inlets:
        lineage[meta.task_id]["inlets"] = meta.value
    for meta in outlets:
        lineage[meta.task_id]["outlets"] = meta.value

    return {"task_ids": {k: v for k, v in lineage.items()}}
Пример #9
0
    def test_should_response_200(self):
        XCom.set(
            key="job_id",
            value="TEST_JOB_ID",
            execution_date=self.default_time,
            task_id="TEST_SINGLE_QUERY",
            dag_id=self.dag.dag_id,
        )
        response = self.client.get(
            "/api/v1/dags/TEST_DAG_ID/dagRuns/TEST_DAG_RUN_ID/taskInstances/TEST_SINGLE_QUERY/links",
            environ_overrides={'REMOTE_USER': "******"},
        )

        self.assertEqual(200, response.status_code, response.data)
        self.assertEqual(
            {"BigQuery Console": "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID"}, response.json
        )
Пример #10
0
 def get_link(self, operator: BaseOperator, dttm: datetime):
     pipelines_conf = XCom.get_one(
         dag_id=operator.dag.dag_id,
         task_id=operator.task_id,
         execution_date=dttm,
         key=DataFusionPipelinesLink.key,
     )
     return (DATAFUSION_PIPELINES_LINK.format(uri=pipelines_conf["uri"], )
             if pipelines_conf else "")
Пример #11
0
    def get_link(self, operator, dttm):
        # Fetch the correct execution date for the triggerED dag which is
        # stored in xcom during execution of the triggerING task.
        trigger_execution_date_iso = XCom.get_one(
            execution_date=dttm, key=XCOM_EXECUTION_DATE_ISO, task_id=operator.task_id, dag_id=operator.dag_id
        )

        query = {"dag_id": operator.trigger_dag_id, "base_date": trigger_execution_date_iso}
        return build_airflow_url_with_query(query)
Пример #12
0
def get_xcoms(task_instance):
    from airflow.models.xcom import XCom

    execution_date = task_instance.execution_date
    task_id = task_instance.task_id
    dag_id = task_instance.dag_id

    results = XCom.get_many(execution_date, task_ids=task_id, dag_ids=dag_id)
    return [(xcom.key, str(xcom.value)) for xcom in results]
Пример #13
0
    def test_should_respond_200_multiple_links(self):
        XCom.set(
            key="job_id",
            value=["TEST_JOB_ID_1", "TEST_JOB_ID_2"],
            execution_date=self.default_time,
            task_id="TEST_MULTIPLE_QUERY",
            dag_id=self.dag.dag_id,
        )
        response = self.client.get(
            "/api/v1/dags/TEST_DAG_ID/dagRuns/TEST_DAG_RUN_ID/taskInstances/TEST_MULTIPLE_QUERY/links",
            environ_overrides={'REMOTE_USER': "******"},
        )

        assert 200 == response.status_code, response.data
        assert {
            "BigQuery Console #1": "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID_1",
            "BigQuery Console #2": "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID_2",
        } == response.json
Пример #14
0
    def skip(
        self,
        dag_run,
        execution_date,
        tasks,
        session=None,
    ):
        """
        Sets tasks instances to skipped from the same dag run.

        If this instance has a `task_id` attribute, store the list of skipped task IDs to XCom
        so that NotPreviouslySkippedDep knows these tasks should be skipped when they
        are cleared.

        :param dag_run: the DagRun for which to set the tasks to skipped
        :param execution_date: execution_date
        :param tasks: tasks to skip (not task_ids)
        :param session: db session to use
        """
        if not tasks:
            return

        self._set_state_to_skipped(dag_run, execution_date, tasks, session)
        session.commit()

        # SkipMixin may not necessarily have a task_id attribute. Only store to XCom if one is available.
        try:
            task_id = self.task_id  # noqa
        except AttributeError:
            task_id = None

        if task_id is not None:
            from airflow.models.xcom import XCom

            XCom.set(
                key=XCOM_SKIPMIXIN_KEY,
                value={XCOM_SKIPMIXIN_SKIPPED: [d.task_id for d in tasks]},
                task_id=task_id,
                dag_id=dag_run.dag_id,
                execution_date=dag_run.execution_date,
                session=session,
            )
Пример #15
0
 def get_link(self, operator: BaseOperator, dttm: datetime):
     storage_conf = XCom.get_one(
         dag_id=operator.dag.dag_id,
         task_id=operator.task_id,
         execution_date=dttm,
         key=StorageLink.key,
     )
     return (GCS_STORAGE_LINK.format(
         uri=storage_conf["uri"],
         project_id=storage_conf["project_id"],
     ) if storage_conf else "")
Пример #16
0
 def get_link(
     self,
     operator,
     dttm: Optional[datetime] = None,
     ti_key: Optional["TaskInstanceKey"] = None,
 ) -> str:
     if ti_key:
         conf = XCom.get_one(key=self.key, ti_key=ti_key)
     else:
         assert dttm
         conf = XCom.get_one(
             dag_id=operator.dag.dag_id,
             task_id=operator.task_id,
             execution_date=dttm,
             key=self.key,
         )
     return (conf["url"].format(
         region=conf["region"],
         service_id=conf["service_id"],
         project_id=conf["project_id"],
     ) if conf else "")
Пример #17
0
 def get_link(self, operator: BaseOperator, dttm: datetime):
     instance_conf = XCom.get_one(
         dag_id=operator.dag.dag_id,
         task_id=operator.task_id,
         execution_date=dttm,
         key=DataFusionInstanceLink.key,
     )
     return (DATAFUSION_INSTANCE_LINK.format(
         region=instance_conf["region"],
         instance_name=instance_conf["instance_name"],
         project_id=instance_conf["project_id"],
     ) if instance_conf else "")
Пример #18
0
    def test_xcom_deserialize_with_pickle_to_json_switch(self):
        json_obj = {"key": "value"}
        execution_date = timezone.utcnow()
        key = "xcom_test3"
        dag_id = "test_dag"
        task_id = "test_task3"

        with conf_vars({("core", "enable_xcom_pickling"): "True"}):
            XCom.set(key=key,
                     value=json_obj,
                     dag_id=dag_id,
                     task_id=task_id,
                     execution_date=execution_date)

        with conf_vars({("core", "enable_xcom_pickling"): "False"}):
            ret_value = XCom.get_one(key=key,
                                     dag_id=dag_id,
                                     task_id=task_id,
                                     execution_date=execution_date)

        assert ret_value == json_obj
    def test_should_response_200_multiple_links(self):
        XCom.set(
            key="job_id",
            value=["TEST_JOB_ID_1", "TEST_JOB_ID_2"],
            execution_date=self.default_time,
            task_id="TEST_MULTIPLE_QUERY",
            dag_id=self.dag.dag_id,
        )
        response = self.client.get(
            "/api/v1/dags/TEST_DAG_ID/dagRuns/TEST_DAG_RUN_ID/taskInstances/TEST_MULTIPLE_QUERY/links"
        )

        self.assertEqual(200, response.status_code, response.data)
        self.assertEqual(
            {
                "BigQuery Console #1":
                "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID_1",
                "BigQuery Console #2":
                "https://console.cloud.google.com/bigquery?j=TEST_JOB_ID_2",
            },
            response.json,
        )
Пример #20
0
 def get_link(
     self,
     operator: "AbstractOperator",
     *,
     ti_key: "TaskInstanceKey",
 ) -> str:
     # Fetch the correct execution date for the triggerED dag which is
     # stored in xcom during execution of the triggerING task.
     when = XCom.get_value(ti_key=ti_key, key=XCOM_EXECUTION_DATE_ISO)
     query = {
         "dag_id": cast(TriggerDagRunOperator, operator).trigger_dag_id,
         "base_date": when
     }
     return build_airflow_url_with_query(query)
Пример #21
0
 def get_link(self, operator: BaseOperator, dttm: datetime):
     pipelines_conf = XCom.get_one(
         key=VertexAITrainingPipelinesLink.key,
         dag_id=operator.dag.dag_id,
         task_id=operator.task_id,
         execution_date=dttm,
     )
     return (
         VERTEX_AI_TRAINING_PIPELINES_LINK.format(
             project_id=pipelines_conf["project_id"],
         )
         if pipelines_conf
         else ""
     )
Пример #22
0
 def get_link(self, operator: BaseOperator, dttm: datetime):
     datasets_conf = XCom.get_one(
         key=VertexAIDatasetListLink.key,
         dag_id=operator.dag.dag_id,
         task_id=operator.task_id,
         execution_date=dttm,
     )
     return (
         VERTEX_AI_DATASET_LIST_LINK.format(
             project_id=datasets_conf["project_id"],
         )
         if datasets_conf
         else ""
     )
Пример #23
0
 def get_link(self, operator: BaseOperator, dttm: datetime):
     model_conf = XCom.get_one(
         key=VertexAIModelLink.key,
         dag_id=operator.dag.dag_id,
         task_id=operator.task_id,
         execution_date=dttm,
     )
     return (
         VERTEX_AI_MODEL_LINK.format(
             region=model_conf["region"],
             model_id=model_conf["model_id"],
             project_id=model_conf["project_id"],
         )
         if model_conf
         else ""
     )
Пример #24
0
 def get_link(self, operator: BaseOperator, dttm: datetime):
     conf = XCom.get_one(
         dag_id=operator.dag.dag_id,
         task_id=operator.task_id,
         execution_date=dttm,
         key=DataprocMetastoreLink.key,
     )
     return (
         conf["url"].format(
             region=conf["region"],
             service_id=conf["service_id"],
             project_id=conf["project_id"],
         )
         if conf
         else ""
     )