示例#1
0
    def test_lineage_is_sent_to_backend(self, mock_get_backend):
        class TestBackend(LineageBackend):
            def send_lineage(self,
                             operator,
                             inlets=None,
                             outlets=None,
                             context=None):
                assert len(inlets) == 1
                assert len(outlets) == 1

        func = mock.Mock()
        func.__name__ = 'foo'

        mock_get_backend.return_value = TestBackend()

        dag = DAG(dag_id='test_lineage_is_sent_to_backend',
                  start_date=DEFAULT_DATE)

        with dag:
            op1 = DummyOperator(task_id='task1')

        file1 = File("/tmp/some_file")

        op1.inlets.append(file1)
        op1.outlets.append(file1)

        ctx1 = {
            "ti": TI(task=op1, execution_date=DEFAULT_DATE),
            "execution_date": DEFAULT_DATE
        }

        prep = prepare_lineage(func)
        prep(op1, ctx1)
        post = apply_lineage(func)
        post(op1, ctx1)
示例#2
0
def test_lineage_backend(mock_emit, inlets, outlets):
    DEFAULT_DATE = days_ago(2)

    with mock.patch.dict(
            os.environ,
        {
            "AIRFLOW__LINEAGE__BACKEND":
            "datahub_provider.lineage.datahub.DatahubLineageBackend",
            "AIRFLOW__LINEAGE__DATAHUB_CONN_ID":
            datahub_rest_connection_config.conn_id,
            "AIRFLOW__LINEAGE__DATAHUB_KWARGS":
            json.dumps({"graceful_exceptions": False}),
        },
    ), mock.patch("airflow.models.BaseOperator.xcom_pull",
                  autospec=True), mock.patch(
                      "airflow.models.BaseOperator.xcom_push",
                      autospec=True), patch_airflow_connection(
                          datahub_rest_connection_config):
        func = mock.Mock()
        func.__name__ = "foo"

        dag = DAG(dag_id="test_lineage_is_sent_to_backend",
                  start_date=DEFAULT_DATE)

        with dag:
            op1 = DummyOperator(
                task_id="task1",
                inlets=inlets,
                outlets=outlets,
            )

        ti = TI(task=op1, execution_date=DEFAULT_DATE)
        ctx1 = {
            "dag": dag,
            "task": op1,
            "ti": ti,
            "task_instance": ti,
            "execution_date": DEFAULT_DATE,
            "ts": "2021-04-08T00:54:25.771575+00:00",
        }

        prep = prepare_lineage(func)
        prep(op1, ctx1)
        post = apply_lineage(func)
        post(op1, ctx1)

        # Verify that the inlets and outlets are registered and recognized by Airflow correctly,
        # or that our lineage backend forces it to.
        assert len(op1.inlets) == 1
        assert len(op1.outlets) == 1
        assert all(map(lambda let: isinstance(let, Dataset), op1.inlets))
        assert all(map(lambda let: isinstance(let, Dataset), op1.outlets))

        # Check that the right things were emitted.
        mock_emit.assert_called_once()
        assert len(mock_emit.call_args[0][0]) == 4
        assert all(mce.validate() for mce in mock_emit.call_args[0][0])
示例#3
0
def test_lineage_backend(mock_emit, mock_xcom_push, inlets, outlets):
    DEFAULT_DATE = days_ago(2)

    with mock.patch.dict(
        os.environ,
        {
            "AIRFLOW__LINEAGE__BACKEND": "datahub.integrations.airflow.DatahubAirflowLineageBackend",
            "AIRFLOW__LINEAGE__DATAHUB_CONN_ID": datahub_rest_connection_config.conn_id,
        },
    ), patch_airflow_connection(datahub_rest_connection_config):
        func = mock.Mock()
        func.__name__ = "foo"

        dag = DAG(dag_id="test_lineage_is_sent_to_backend", start_date=DEFAULT_DATE)

        with dag:
            op1 = DummyOperator(
                task_id="task1",
                inlets=inlets,
                outlets=outlets,
            )

        ti = TI(task=op1, execution_date=DEFAULT_DATE)
        ctx1 = {
            "dag": dag,
            "task": op1,
            "ti": ti,
            "task_instance": ti,
            "execution_date": DEFAULT_DATE,
            "ts": "2021-04-08T00:54:25.771575+00:00",
        }

        prep = prepare_lineage(func)
        prep(op1, ctx1)
        post = apply_lineage(func)
        post(op1, ctx1)

        # Verify that the inlets and outlets are registered and recognized by Airflow correctly,
        # or that our lineage backend forces it to.
        assert len(op1.inlets) == 1
        assert len(op1.outlets) == 1
        assert all(map(lambda let: isinstance(let, Dataset), op1.inlets))
        assert all(map(lambda let: isinstance(let, Dataset), op1.outlets))

        # Verify xcom push calls are correct.
        # Two calls, one for inlets and the other for outlets.
        assert mock_xcom_push.call_count == 2

        # Check that the right things were emitted.
        mock_emit.assert_called_once()
        assert len(mock_emit.call_args[0][0]) == 4
        assert all(mce.validate() for mce in mock_emit.call_args[0][0])
示例#4
0
def test_lineage_backend(mock_emit):
    # Airflow 2.x does not have lineage backend support merged back in yet.
    # As such, we must protect these imports.
    from airflow.lineage import apply_lineage, prepare_lineage

    from datahub.integrations.airflow.entities import Dataset

    DEFAULT_DATE = days_ago(2)

    with mock.patch.dict(
            os.environ,
        {
            "AIRFLOW__LINEAGE__BACKEND":
            "datahub.integrations.airflow.DatahubAirflowLineageBackend",
            "AIRFLOW__LINEAGE__DATAHUB_CONN_ID":
            datahub_rest_connection_config.conn_id,
        },
    ), patch_airflow_connection(datahub_rest_connection_config):
        func = mock.Mock()
        func.__name__ = "foo"

        dag = DAG(dag_id="test_lineage_is_sent_to_backend",
                  start_date=DEFAULT_DATE)

        with dag:
            op1 = DummyOperator(task_id="task1")

        upstream = Dataset("snowflake", "mydb.schema.tableConsumed")
        downstream = Dataset("snowflake", "mydb.schema.tableProduced")

        op1.inlets.append(upstream)
        op1.outlets.append(downstream)

        ti = TI(task=op1, execution_date=DEFAULT_DATE)
        ctx1 = {
            "dag": dag,
            "task": op1,
            "ti": ti,
            "task_instance": ti,
            "execution_date": DEFAULT_DATE,
            "ts": "2021-04-08T00:54:25.771575+00:00",
        }

        prep = prepare_lineage(func)
        prep(op1, ctx1)
        post = apply_lineage(func)
        post(op1, ctx1)

        mock_emit.assert_called_once()
        assert len(mock_emit.call_args[0][0]) == 4
        assert all(mce.validate() for mce in mock_emit.call_args[0][0])
    def test_lineage(self, _get_backend):
        backend = mock.Mock()
        send_mock = mock.Mock()
        backend.send_lineage = send_mock

        _get_backend.return_value = backend

        dag = DAG(
            dag_id='test_prepare_lineage',
            start_date=DEFAULT_DATE
        )

        f1 = File("/tmp/does_not_exist_1")
        f2 = File("/tmp/does_not_exist_2")
        f3 = File("/tmp/does_not_exist_3")

        with dag:
            op1 = DummyOperator(task_id='leave1',
                                inlets={"datasets": [f1, ]},
                                outlets={"datasets": [f2, ]})
            op2 = DummyOperator(task_id='leave2')
            op3 = DummyOperator(task_id='upstream_level_1',
                                inlets={"auto": True},
                                outlets={"datasets": [f3, ]})
            op4 = DummyOperator(task_id='upstream_level_2')
            op5 = DummyOperator(task_id='upstream_level_3',
                                inlets={"task_ids": ["leave1", "upstream_level_1"]})

            op1.set_downstream(op3)
            op2.set_downstream(op3)
            op3.set_downstream(op4)
            op4.set_downstream(op5)

        ctx1 = {"ti": TI(task=op1, execution_date=DEFAULT_DATE)}
        ctx2 = {"ti": TI(task=op2, execution_date=DEFAULT_DATE)}
        ctx3 = {"ti": TI(task=op3, execution_date=DEFAULT_DATE)}
        ctx5 = {"ti": TI(task=op5, execution_date=DEFAULT_DATE)}

        func = mock.Mock()
        func.__name__ = 'foo'

        # prepare with manual inlets and outlets
        prep = prepare_lineage(func)
        prep(op1, ctx1)

        self.assertEqual(len(op1.inlets), 1)
        self.assertEqual(op1.inlets[0], f1)

        self.assertEqual(len(op1.outlets), 1)
        self.assertEqual(op1.outlets[0], f2)

        # post process with no backend
        post = apply_lineage(func)
        post(op1, ctx1)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()

        prep(op2, ctx2)
        self.assertEqual(len(op2.inlets), 0)
        post(op2, ctx2)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()

        prep(op3, ctx3)
        self.assertEqual(len(op3.inlets), 1)
        self.assertEqual(op3.inlets[0].qualified_name, f2.qualified_name)
        post(op3, ctx3)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()

        # skip 4

        prep(op5, ctx5)
        self.assertEqual(len(op5.inlets), 2)
        post(op5, ctx5)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()
    def test_lineage_auto_branching(self, _get_backend):
        # Tests the ability for the auto feature to skip non state affecting operators
        # DAG diagram:
        #  1--->2---->4
        #       ▼     ▲
        #       3-----+
        backend = mock.Mock()
        send_mock = mock.Mock()
        backend.send_lineage = send_mock

        _get_backend.return_value = backend

        dag = DAG(
            dag_id='test_prepare_lineage_auto_branching',
            start_date=DEFAULT_DATE
        )

        f1 = File("/tmp/does_not_exist_1")

        with dag:
            op1 = DummyOperator(task_id='leave1')
            op2 = DummyOperator(task_id='branch_1', outlets={"datasets": [f1, ]})
            op3 = DummyOperator(task_id='branch_2')
            op4 = DummyOperator(task_id='upstream_level_2', inlets={"auto": True})

            op1.set_downstream(op2)
            op2.set_downstream(op3)
            op2.set_downstream(op4)
            op3.set_downstream(op4)

        ctx1 = {"ti": TI(task=op1, execution_date=DEFAULT_DATE)}
        ctx2 = {"ti": TI(task=op2, execution_date=DEFAULT_DATE)}
        ctx3 = {"ti": TI(task=op3, execution_date=DEFAULT_DATE)}
        ctx4 = {"ti": TI(task=op4, execution_date=DEFAULT_DATE)}

        func = mock.Mock()
        func.__name__ = 'foo'

        # prepare with manual inlets and outlets
        prep = prepare_lineage(func)
        prep(op1, ctx1)

        self.assertEqual(len(op1.inlets), 0)

        # post process with no backend
        post = apply_lineage(func)
        post(op1, ctx1)
        send_mock.reset_mock()

        prep(op2, ctx2)
        self.assertEqual(len(op2.inlets), 0)
        post(op2, ctx2)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()

        prep(op3, ctx3)
        self.assertEqual(len(op3.inlets), 0)
        post(op3, ctx3)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()

        prep(op4, ctx4)
        self.assertEqual(len(op4.inlets), 1)
        self.assertEqual(op4.inlets[0].name, f1.name)
        post(op4, ctx4)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()
    def test_lineage_complicated_dag(self, _get_backend):
        # Tests the ability for the auto feature to skip non state affecting operators, while still
        # retrieving data from multiple outlet sources. Notice how if outlets are not specified,
        # that the auto feature continues to traverse down the dag until not input sources are found.

        # DAG diagram:
        # 1-----------+
        #             |
        #             ▼
        #             4 ----------+
        #             ▲           ▼
        #             |           5+-------->6
        # 2-----------+           ▲
        #                         |
        #                         |
        #                         |
        # 3-----------------------+

        backend = mock.Mock()
        send_mock = mock.Mock()
        backend.send_lineage = send_mock

        _get_backend.return_value = backend

        dag = DAG(
            dag_id='test_prepare_lineage_auto_complicated_dag',
            start_date=DEFAULT_DATE
        )

        f1 = File("/tmp/does_not_exist_1")
        f2 = File("/tmp/does_not_exist_2")
        f3 = File("/tmp/does_not_exist_3")

        with dag:
            op1 = DummyOperator(task_id='leave1',
                                outlets={"datasets": [f1, ]},
                                inlets={"auto": True})
            op2 = DummyOperator(task_id='leave2',
                                outlets={"datasets": [f2, ]})
            op3 = DummyOperator(task_id='leave3',
                                outlets={"datasets": [f3, ]})
            op4 = DummyOperator(task_id='upstream_level_1')
            op5 = DummyOperator(task_id='upstream_level_2', inlets={"auto": True})
            op6 = DummyOperator(task_id='upstream_level_3', inlets={"auto": True})

            op1.set_downstream(op4)
            op2.set_downstream(op4)
            op3.set_downstream(op5)
            op4.set_downstream(op5)
            op5.set_downstream(op6)

        ctx1 = {"ti": TI(task=op1, execution_date=DEFAULT_DATE)}
        ctx2 = {"ti": TI(task=op2, execution_date=DEFAULT_DATE)}
        ctx3 = {"ti": TI(task=op3, execution_date=DEFAULT_DATE)}
        ctx4 = {"ti": TI(task=op4, execution_date=DEFAULT_DATE)}
        ctx5 = {"ti": TI(task=op5, execution_date=DEFAULT_DATE)}
        ctx6 = {"ti": TI(task=op6, execution_date=DEFAULT_DATE)}

        func = mock.Mock()
        func.__name__ = 'foo'

        # prepare with manual inlets and outlets
        prep = prepare_lineage(func)
        prep(op1, ctx1)

        self.assertEqual(len(op1.outlets), 1)
        self.assertEqual(op1.outlets[0], f1)
        self.assertEqual(len(op1.inlets), 0)

        # post process with no backend
        post = apply_lineage(func)
        post(op1, ctx1)

        prep(op2, ctx2)
        self.assertEqual(len(op2.outlets), 1)
        post(op2, ctx2)

        prep(op3, ctx3)
        self.assertEqual(len(op3.outlets), 1)
        post(op3, ctx3)

        prep(op4, ctx4)
        self.assertEqual(len(op4.inlets), 0)
        post(op4, ctx4)

        prep(op5, ctx5)
        self.assertEqual(len(op5.inlets), 3)
        self.assertEqual({file.qualified_name for file in op5.inlets}, {'file:///tmp/does_not_exist_1',
                                                                        'file:///tmp/does_not_exist_2',
                                                                        'file:///tmp/does_not_exist_3'})
        post(op5, ctx5)

        prep(op6, ctx6)
        self.assertEqual(len(op6.inlets), 3)
        self.assertEqual({file.qualified_name for file in op6.inlets}, {'file:///tmp/does_not_exist_1',
                                                                        'file:///tmp/does_not_exist_2',
                                                                        'file:///tmp/does_not_exist_3'})
        post(op6, ctx6)
示例#8
0
def test_lineage_backend(mock_emit, inlets, outlets):
    DEFAULT_DATE = days_ago(2)

    # Using autospec on xcom_pull and xcom_push methods fails on Python 3.6.
    with mock.patch.dict(
            os.environ,
        {
            "AIRFLOW__LINEAGE__BACKEND":
            "datahub_provider.lineage.datahub.DatahubLineageBackend",
            "AIRFLOW__LINEAGE__DATAHUB_CONN_ID":
            datahub_rest_connection_config.conn_id,
            "AIRFLOW__LINEAGE__DATAHUB_KWARGS":
            json.dumps({"graceful_exceptions": False}),
        },
    ), mock.patch("airflow.models.BaseOperator.xcom_pull"), mock.patch(
            "airflow.models.BaseOperator.xcom_push"), patch_airflow_connection(
                datahub_rest_connection_config):
        func = mock.Mock()
        func.__name__ = "foo"

        dag = DAG(dag_id="test_lineage_is_sent_to_backend",
                  start_date=DEFAULT_DATE)

        with dag:
            op1 = DummyOperator(
                task_id="task1_upstream",
                inlets=inlets,
                outlets=outlets,
            )
            op2 = DummyOperator(
                task_id="task2",
                inlets=inlets,
                outlets=outlets,
            )
            op1 >> op2

        # Airflow <= 2.1 requires the execution_date parameter. Newer Airflow
        # versions do not require it, but will attempt to find the associated
        # run_id in the database if execution_date is provided. As such, we
        # must fake the run_id parameter for newer Airflow versions.
        if any(
                airflow.version.version.startswith(prefix)
                for prefix in ["1", "2.0", "2.1"]):
            ti = TaskInstance(task=op2, execution_date=DEFAULT_DATE)
        else:
            ti = TaskInstance(task=op2, run_id=f"test_airflow-{DEFAULT_DATE}")
        ctx1 = {
            "dag": dag,
            "task": op2,
            "ti": ti,
            "task_instance": ti,
            "execution_date": DEFAULT_DATE,
            "ts": "2021-04-08T00:54:25.771575+00:00",
        }

        prep = prepare_lineage(func)
        prep(op2, ctx1)
        post = apply_lineage(func)
        post(op2, ctx1)

        # Verify that the inlets and outlets are registered and recognized by Airflow correctly,
        # or that our lineage backend forces it to.
        assert len(op2.inlets) == 1
        assert len(op2.outlets) == 1
        assert all(map(lambda let: isinstance(let, Dataset), op2.inlets))
        assert all(map(lambda let: isinstance(let, Dataset), op2.outlets))

        # Check that the right things were emitted.
        mock_emit.assert_called_once()
        assert len(mock_emit.call_args[0][0]) == 4
        assert all(mce.validate() for mce in mock_emit.call_args[0][0])
    def test_lineage(self, _get_backend):
        backend = mock.Mock()
        send_mock = mock.Mock()
        backend.send_lineage = send_mock

        _get_backend.return_value = backend

        dag = DAG(
            dag_id='test_prepare_lineage',
            start_date=DEFAULT_DATE
        )

        f1 = File("/tmp/does_not_exist_1")
        f2 = File("/tmp/does_not_exist_2")
        f3 = File("/tmp/does_not_exist_3")

        with dag:
            op1 = DummyOperator(task_id='leave1',
                                inlets={"datasets": [f1, ]},
                                outlets={"datasets": [f2, ]})
            op2 = DummyOperator(task_id='leave2')
            op3 = DummyOperator(task_id='upstream_level_1',
                                inlets={"auto": True},
                                outlets={"datasets": [f3, ]})
            op4 = DummyOperator(task_id='upstream_level_2')
            op5 = DummyOperator(task_id='upstream_level_3',
                                inlets={"task_ids": ["leave1", "upstream_level_1"]})

            op1.set_downstream(op3)
            op2.set_downstream(op3)
            op3.set_downstream(op4)
            op4.set_downstream(op5)

        ctx1 = {"ti": TI(task=op1, execution_date=DEFAULT_DATE)}
        ctx2 = {"ti": TI(task=op2, execution_date=DEFAULT_DATE)}
        ctx3 = {"ti": TI(task=op3, execution_date=DEFAULT_DATE)}
        ctx5 = {"ti": TI(task=op5, execution_date=DEFAULT_DATE)}

        func = mock.Mock()
        func.__name__ = 'foo'

        # prepare with manual inlets and outlets
        prep = prepare_lineage(func)
        prep(op1, ctx1)

        self.assertEqual(len(op1.inlets), 1)
        self.assertEqual(op1.inlets[0], f1)

        self.assertEqual(len(op1.outlets), 1)
        self.assertEqual(op1.outlets[0], f2)

        # post process with no backend
        post = apply_lineage(func)
        post(op1, ctx1)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()

        prep(op2, ctx2)
        self.assertEqual(len(op2.inlets), 0)
        post(op2, ctx2)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()

        prep(op3, ctx3)
        self.assertEqual(len(op3.inlets), 1)
        self.assertEqual(op3.inlets[0].qualified_name, f2.qualified_name)
        post(op3, ctx3)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()

        # skip 4

        prep(op5, ctx5)
        self.assertEqual(len(op5.inlets), 2)
        post(op5, ctx5)
        self.assertEqual(send_mock.call_count, 1)
        send_mock.reset_mock()
示例#10
0
def test_lineage_backend_capture_executions(mock_emit, inlets, outlets):
    DEFAULT_DATE = datetime.datetime(2020, 5, 17)
    mock_emitter = Mock()
    mock_emit.return_value = mock_emitter
    # Using autospec on xcom_pull and xcom_push methods fails on Python 3.6.
    with mock.patch.dict(
            os.environ,
        {
            "AIRFLOW__LINEAGE__BACKEND":
            "datahub_provider.lineage.datahub.DatahubLineageBackend",
            "AIRFLOW__LINEAGE__DATAHUB_CONN_ID":
            datahub_rest_connection_config.conn_id,
            "AIRFLOW__LINEAGE__DATAHUB_KWARGS":
            json.dumps({
                "graceful_exceptions": False,
                "capture_executions": True
            }),
        },
    ), mock.patch("airflow.models.BaseOperator.xcom_pull"), mock.patch(
            "airflow.models.BaseOperator.xcom_push"), patch_airflow_connection(
                datahub_rest_connection_config):
        func = mock.Mock()
        func.__name__ = "foo"

        dag = DAG(dag_id="test_lineage_is_sent_to_backend",
                  start_date=DEFAULT_DATE)

        with dag:
            op1 = DummyOperator(
                task_id="task1_upstream",
                inlets=inlets,
                outlets=outlets,
            )
            op2 = DummyOperator(
                task_id="task2",
                inlets=inlets,
                outlets=outlets,
            )
            op1 >> op2

        # Airflow < 2.2 requires the execution_date parameter. Newer Airflow
        # versions do not require it, but will attempt to find the associated
        # run_id in the database if execution_date is provided. As such, we
        # must fake the run_id parameter for newer Airflow versions.
        if AIRFLOW_VERSION < packaging.version.parse("2.2.0"):
            ti = TaskInstance(task=op2, execution_date=DEFAULT_DATE)
            # Ignoring type here because DagRun state is just a sring at Airflow 1
            dag_run = DagRun(
                state="success",
                run_id=f"scheduled_{DEFAULT_DATE}")  # type: ignore
            ti.dag_run = dag_run
            ti.start_date = datetime.datetime.utcnow()
            ti.execution_date = DEFAULT_DATE

        else:
            from airflow.utils.state import DagRunState

            ti = TaskInstance(task=op2, run_id=f"test_airflow-{DEFAULT_DATE}")
            dag_run = DagRun(state=DagRunState.SUCCESS,
                             run_id=f"scheduled_{DEFAULT_DATE}")
            ti.dag_run = dag_run
            ti.start_date = datetime.datetime.utcnow()
            ti.execution_date = DEFAULT_DATE

        ctx1 = {
            "dag": dag,
            "task": op2,
            "ti": ti,
            "dag_run": dag_run,
            "task_instance": ti,
            "execution_date": DEFAULT_DATE,
            "ts": "2021-04-08T00:54:25.771575+00:00",
        }

        prep = prepare_lineage(func)
        prep(op2, ctx1)
        post = apply_lineage(func)
        post(op2, ctx1)

        # Verify that the inlets and outlets are registered and recognized by Airflow correctly,
        # or that our lineage backend forces it to.
        assert len(op2.inlets) == 1
        assert len(op2.outlets) == 1
        assert all(map(lambda let: isinstance(let, Dataset), op2.inlets))
        assert all(map(lambda let: isinstance(let, Dataset), op2.outlets))

        # Check that the right things were emitted.
        assert mock_emitter.emit.call_count == 17
        # Running further checks based on python version because args only exists in python 3.7+
        if sys.version_info[:3] > (3, 7):
            assert mock_emitter.method_calls[0].args[
                0].aspectName == "dataFlowInfo"
            assert (
                mock_emitter.method_calls[0].args[0].entityUrn ==
                "urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod)"
            )

            assert mock_emitter.method_calls[1].args[
                0].aspectName == "ownership"
            assert (
                mock_emitter.method_calls[1].args[0].entityUrn ==
                "urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod)"
            )

            assert mock_emitter.method_calls[2].args[
                0].aspectName == "globalTags"
            assert (
                mock_emitter.method_calls[2].args[0].entityUrn ==
                "urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod)"
            )

            assert mock_emitter.method_calls[3].args[
                0].aspectName == "dataJobInfo"
            assert (
                mock_emitter.method_calls[3].args[0].entityUrn ==
                "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
            )

            assert (mock_emitter.method_calls[4].args[0].aspectName ==
                    "dataJobInputOutput")
            assert (
                mock_emitter.method_calls[4].args[0].entityUrn ==
                "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
            )
            assert (
                mock_emitter.method_calls[4].args[0].aspect.inputDatajobs[0] ==
                "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task1_upstream)"
            )
            assert (
                mock_emitter.method_calls[4].args[0].aspect.inputDatasets[0] ==
                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableConsumed,PROD)"
            )
            assert (
                mock_emitter.method_calls[4].args[0].aspect.outputDatasets[0]
                ==
                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableProduced,PROD)"
            )

            assert mock_emitter.method_calls[5].args[0].aspectName == "status"
            assert (
                mock_emitter.method_calls[5].args[0].entityUrn ==
                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableConsumed,PROD)"
            )

            assert mock_emitter.method_calls[6].args[0].aspectName == "status"
            assert (
                mock_emitter.method_calls[6].args[0].entityUrn ==
                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableProduced,PROD)"
            )

            assert mock_emitter.method_calls[7].args[
                0].aspectName == "ownership"
            assert (
                mock_emitter.method_calls[7].args[0].entityUrn ==
                "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
            )

            assert mock_emitter.method_calls[8].args[
                0].aspectName == "globalTags"
            assert (
                mock_emitter.method_calls[8].args[0].entityUrn ==
                "urn:li:dataJob:(urn:li:dataFlow:(airflow,test_lineage_is_sent_to_backend,prod),task2)"
            )

            assert (mock_emitter.method_calls[9].args[0].aspectName ==
                    "dataProcessInstanceProperties")
            assert (
                mock_emitter.method_calls[9].args[0].entityUrn ==
                "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb")

            assert (mock_emitter.method_calls[10].args[0].aspectName ==
                    "dataProcessInstanceRelationships")
            assert (
                mock_emitter.method_calls[10].args[0].entityUrn ==
                "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb")
            assert (mock_emitter.method_calls[11].args[0].aspectName ==
                    "dataProcessInstanceInput")
            assert (
                mock_emitter.method_calls[11].args[0].entityUrn ==
                "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb")
            assert (mock_emitter.method_calls[12].args[0].aspectName ==
                    "dataProcessInstanceOutput")
            assert (
                mock_emitter.method_calls[12].args[0].entityUrn ==
                "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb")
            assert mock_emitter.method_calls[13].args[0].aspectName == "status"
            assert (
                mock_emitter.method_calls[13].args[0].entityUrn ==
                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableConsumed,PROD)"
            )
            assert mock_emitter.method_calls[14].args[0].aspectName == "status"
            assert (
                mock_emitter.method_calls[14].args[0].entityUrn ==
                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableProduced,PROD)"
            )
            assert (mock_emitter.method_calls[15].args[0].aspectName ==
                    "dataProcessInstanceRunEvent")
            assert (
                mock_emitter.method_calls[15].args[0].entityUrn ==
                "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb")
            assert (mock_emitter.method_calls[16].args[0].aspectName ==
                    "dataProcessInstanceRunEvent")
            assert (
                mock_emitter.method_calls[16].args[0].entityUrn ==
                "urn:li:dataProcessInstance:b6375e5f5faeb543cfb5d7d8a47661fb")