示例#1
0
 def test_kafka_sink_config(self, mock_producer, mock_context):
     kafka_sink = DatahubKafkaSink.create(
         {"connection": {
             "bootstrap": "foobar:9092"
         }}, mock_context)
     kafka_sink.close()
     assert mock_producer.call_count == 1  # constructor should be called
    def test_kafka_sink_write(self, mock_k_callback, mock_producer,
                              mock_context):
        mock_producer_instance = mock_producer.return_value
        mock_k_callback_instance = mock_k_callback.return_value
        callback = MagicMock(spec=WriteCallback)
        kafka_sink = DatahubKafkaSink.create(
            {"connection": {
                "bootstrap": "foobar:9092"
            }}, mock_context)
        mce = builder.make_lineage_mce(
            [
                builder.make_dataset_urn("bigquery", "upstream1"),
                builder.make_dataset_urn("bigquery", "upstream2"),
            ],
            builder.make_dataset_urn("bigquery", "downstream1"),
        )

        re = RecordEnvelope(record=mce, metadata={})
        kafka_sink.write_record_async(re, callback)

        mock_producer_instance.poll.assert_called_once(
        )  # producer should call poll() first
        self.validate_kafka_callback(
            mock_k_callback, re,
            callback)  # validate kafka callback was constructed appropriately

        # validate that confluent_kafka.Producer.produce was called with the right arguments
        mock_producer_instance.produce.assert_called_once()
        args, kwargs = mock_producer_instance.produce.call_args
        assert kwargs["value"] == mce
        assert kwargs["key"]  # produce call should include a Kafka key
        created_callback = kwargs["on_delivery"]
        assert created_callback == mock_k_callback_instance.kafka_callback
示例#3
0
    def test_kafka_sink_write(self, mock_k_callback, mock_producer, mock_context):
        mock_producer_instance = mock_producer.return_value
        mock_k_callback_instance = mock_k_callback.return_value
        callback = MagicMock(spec=WriteCallback)
        kafka_sink = DatahubKafkaSink.create(
            {"connection": {"bootstrap": "foobar:9092"}}, mock_context
        )
        re = RecordEnvelope(record=sentinel, metadata={})
        kafka_sink.write_record_async(re, callback)
        assert mock_producer_instance.poll.call_count == 1  # poll() called once
        self.validate_kafka_callback(
            mock_k_callback, re, callback
        )  # validate kafka callback was constructed appropriately

        # validate that confluent_kafka.Producer.produce was called with the right arguments
        args, kwargs = mock_producer_instance.produce.call_args
        created_callback = kwargs["on_delivery"]
        assert created_callback == mock_k_callback_instance.kafka_callback
示例#4
0
    def test_kafka_sink_mcp(self, mock_producer, mock_callback):
        from datahub.emitter.mcp import MetadataChangeProposalWrapper

        mcp = MetadataChangeProposalWrapper(
            entityType="dataset",
            entityUrn="urn:li:dataset:(urn:li:dataPlatform:mysql,User.UserAccount,PROD)",
            changeType=models.ChangeTypeClass.UPSERT,
            aspectName="datasetProfile",
            aspect=models.DatasetProfileClass(
                rowCount=2000,
                columnCount=15,
                timestampMillis=1626995099686,
            ),
        )
        kafka_sink = DatahubKafkaSink.create(
            {"connection": {"bootstrap": "localhost:9092"}},
            PipelineContext(run_id="test"),
        )
        kafka_sink.write_record_async(
            RecordEnvelope(record=mcp, metadata={}), mock_callback
        )
        kafka_sink.close()
        assert mock_producer.call_count == 2  # constructor should be called
示例#5
0
 def test_kafka_sink_close(self, mock_producer, mock_context):
     mock_producer_instance = mock_producer.return_value
     kafka_sink = DatahubKafkaSink.create({}, mock_context)
     kafka_sink.close()
     mock_producer_instance.flush.assert_called_once()