def get_workunits(self) -> Iterable[MetadataWorkUnit]: env: str = "PROD" platform = self.platform nodes = loadManifestAndCatalog( self.config.manifest_path, self.config.catalog_path, platform, env ) for node in nodes: mce = MetadataChangeEvent() dataset_snapshot = DatasetSnapshot() dataset_snapshot.urn = node.datahub_urn custom_properties = get_custom_properties(node) dbt_properties = DatasetPropertiesClass() dbt_properties.description = node.dbt_name dbt_properties.customProperties = custom_properties dataset_snapshot.aspects.append(dbt_properties) upstreams = get_upstream_lineage(node.upstream_urns) if upstreams is not None: dataset_snapshot.aspects.append(upstreams) schema_metadata = get_schema_metadata(self.report, node, platform) dataset_snapshot.aspects.append(schema_metadata) mce.proposedSnapshot = dataset_snapshot wu = MetadataWorkUnit(id=dataset_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu
def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: try: self.inspect_version() except Exception as e: self.report.report_failure("version", f"Error: {e}") return for wu in super().get_workunits(): yield wu if (isinstance(wu, SqlWorkUnit) and isinstance(wu.metadata, MetadataChangeEvent) and isinstance(wu.metadata.proposedSnapshot, DatasetSnapshot)): lineage_mcp = None lineage_properties_aspect: Optional[ DatasetPropertiesClass] = None dataset_snapshot: DatasetSnapshotClass = wu.metadata.proposedSnapshot assert dataset_snapshot if self.config.include_table_lineage: lineage_mcp, lineage_properties_aspect = self.get_lineage_mcp( wu.metadata.proposedSnapshot.urn) if lineage_mcp is not None: lineage_wu = MetadataWorkUnit( id= f"redshift-{lineage_mcp.entityUrn}-{lineage_mcp.aspectName}", mcp=lineage_mcp, ) self.report.report_workunit(lineage_wu) yield lineage_wu if lineage_properties_aspect: aspects = dataset_snapshot.aspects if aspects is None: aspects = [] dataset_properties_aspect: Optional[ DatasetPropertiesClass] = None for aspect in aspects: if isinstance(aspect, DatasetPropertiesClass): dataset_properties_aspect = aspect if dataset_properties_aspect is None: dataset_properties_aspect = DatasetPropertiesClass() aspects.append(dataset_properties_aspect) custom_properties = ( { **dataset_properties_aspect.customProperties, **lineage_properties_aspect.customProperties, } if dataset_properties_aspect.customProperties else lineage_properties_aspect.customProperties) dataset_properties_aspect.customProperties = custom_properties dataset_snapshot.aspects = aspects dataset_snapshot.aspects.append(dataset_properties_aspect)
def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: for wu in super().get_workunits(): if (self.config.include_table_lineage and isinstance(wu, MetadataWorkUnit) and isinstance(wu.metadata, MetadataChangeEvent) and isinstance(wu.metadata.proposedSnapshot, DatasetSnapshot)): dataset_snapshot: DatasetSnapshot = wu.metadata.proposedSnapshot assert dataset_snapshot # Join the workunit stream from super with the lineage info using the urn. lineage_info = self._get_upstream_lineage_info( dataset_snapshot.urn) if lineage_info is not None: # Emit the lineage work unit upstream_lineage, upstream_column_props = lineage_info lineage_mcpw = MetadataChangeProposalWrapper( entityType="dataset", changeType=ChangeTypeClass.UPSERT, entityUrn=dataset_snapshot.urn, aspectName="upstreamLineage", aspect=upstream_lineage, ) lineage_wu = MetadataWorkUnit( id= f"{self.platform}-{lineage_mcpw.entityUrn}-{lineage_mcpw.aspectName}", mcp=lineage_mcpw, ) self.report.report_workunit(lineage_wu) yield lineage_wu # Update the super's workunit to include the column-lineage in the custom properties. We need to follow # the RCU semantics for both the aspects & customProperties in order to preserve the changes made by super. aspects = dataset_snapshot.aspects if aspects is None: aspects = [] dataset_properties_aspect: Optional[ DatasetPropertiesClass] = None for aspect in aspects: if isinstance(aspect, DatasetPropertiesClass): dataset_properties_aspect = aspect if dataset_properties_aspect is None: dataset_properties_aspect = DatasetPropertiesClass() aspects.append(dataset_properties_aspect) custom_properties = ({ **dataset_properties_aspect.customProperties, **upstream_column_props, } if dataset_properties_aspect.customProperties else upstream_column_props) dataset_properties_aspect.customProperties = custom_properties dataset_snapshot.aspects = aspects # Emit the work unit from super. yield wu
def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: for wu in super().get_workunits(): if (self.config.include_table_lineage and isinstance(wu, SqlWorkUnit) and isinstance(wu.metadata, MetadataChangeEvent) and isinstance(wu.metadata.proposedSnapshot, DatasetSnapshot)): dataset_snapshot: DatasetSnapshotClass = wu.metadata.proposedSnapshot assert dataset_snapshot lineage_mcp, lineage_properties_aspect = self.get_lineage_mcp( wu.metadata.proposedSnapshot.urn) if lineage_mcp is not None: lineage_wu = MetadataWorkUnit( id= f"{self.platform}-{lineage_mcp.entityUrn}-{lineage_mcp.aspectName}", mcp=lineage_mcp, ) self.report.report_workunit(lineage_wu) yield lineage_wu if lineage_properties_aspect: aspects = dataset_snapshot.aspects if aspects is None: aspects = [] dataset_properties_aspect: Optional[ DatasetPropertiesClass] = None for aspect in aspects: if isinstance(aspect, DatasetPropertiesClass): dataset_properties_aspect = aspect if dataset_properties_aspect is None: dataset_properties_aspect = DatasetPropertiesClass() aspects.append(dataset_properties_aspect) custom_properties = ( { **dataset_properties_aspect.customProperties, **lineage_properties_aspect.customProperties, } if dataset_properties_aspect.customProperties else lineage_properties_aspect.customProperties) dataset_properties_aspect.customProperties = custom_properties dataset_snapshot.aspects = aspects dataset_snapshot.aspects.append(dataset_properties_aspect) # Emit the work unit from super. yield wu