def test_datahub_rest_emitter(requests_mock, mce, endpoint, snapshot): def match_request_text(request: requests.Request) -> bool: requested_snapshot = request.json() assert ( requested_snapshot == snapshot ), f"Expected snapshot to be {json.dumps(snapshot)}, got {json.dumps(requested_snapshot)}" return True requests_mock.post( f"{MOCK_GMS_ENDPOINT}/{endpoint}?action=ingest", request_headers={"X-RestLi-Protocol-Version": "2.0.0"}, additional_matcher=match_request_text, ) emitter = DatahubRestEmitter(MOCK_GMS_ENDPOINT) emitter.emit_mce(mce)
class DatahubRestSink(Sink): config: DatahubRestSinkConfig emitter: DatahubRestEmitter report: SinkReport def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig): super().__init__(ctx) self.config = config self.report = SinkReport() self.emitter = DatahubRestEmitter(self.config.server, self.config.token) @classmethod def create(cls, config_dict: dict, ctx: PipelineContext) -> "DatahubRestSink": config = DatahubRestSinkConfig.parse_obj(config_dict) return cls(ctx, config) def handle_work_unit_start(self, workunit: WorkUnit) -> None: pass def handle_work_unit_end(self, workunit: WorkUnit) -> None: pass def write_record_async( self, record_envelope: RecordEnvelope[MetadataChangeEvent], write_callback: WriteCallback, ) -> None: mce = record_envelope.record try: self.emitter.emit_mce(mce) self.report.report_record_written(record_envelope) write_callback.on_success(record_envelope, {}) except OperationalError as e: self.report.report_failure({"error": e.message, "info": e.info}) write_callback.on_failure(record_envelope, e, e.info) except Exception as e: self.report.report_failure({"e": e}) write_callback.on_failure(record_envelope, e, {}) def get_report(self) -> SinkReport: return self.report def close(self): pass
import datahub.emitter.mce_builder as builder from datahub.emitter.rest_emitter import DatahubRestEmitter # Construct a lineage object. lineage_mce = builder.make_lineage_mce( [ builder.make_dataset_urn("bigquery", "upstream1"), builder.make_dataset_urn("bigquery", "upstream2"), ], builder.make_dataset_urn("bigquery", "downstream"), ) # Create an emitter to the GMS REST API. emitter = DatahubRestEmitter("http://localhost:8080") # Emit metadata! emitter.emit_mce(lineage_mce)