Пример #1
0
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        env: str = "PROD"
        platform = self.platform
        nodes = loadManifestAndCatalog(
            self.config.manifest_path, self.config.catalog_path, platform, env
        )

        for node in nodes:
            mce = MetadataChangeEvent()

            dataset_snapshot = DatasetSnapshot()
            dataset_snapshot.urn = node.datahub_urn
            custom_properties = get_custom_properties(node)

            dbt_properties = DatasetPropertiesClass()
            dbt_properties.description = node.dbt_name
            dbt_properties.customProperties = custom_properties

            dataset_snapshot.aspects.append(dbt_properties)

            upstreams = get_upstream_lineage(node.upstream_urns)
            if upstreams is not None:
                dataset_snapshot.aspects.append(upstreams)

            schema_metadata = get_schema_metadata(self.report, node, platform)
            dataset_snapshot.aspects.append(schema_metadata)

            mce.proposedSnapshot = dataset_snapshot
            wu = MetadataWorkUnit(id=dataset_snapshot.urn, mce=mce)
            self.report.report_workunit(wu)

            yield wu
Пример #2
0
    def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
        try:
            self.inspect_version()
        except Exception as e:
            self.report.report_failure("version", f"Error: {e}")
            return

        for wu in super().get_workunits():
            yield wu
            if (isinstance(wu, SqlWorkUnit)
                    and isinstance(wu.metadata, MetadataChangeEvent) and
                    isinstance(wu.metadata.proposedSnapshot, DatasetSnapshot)):
                lineage_mcp = None
                lineage_properties_aspect: Optional[
                    DatasetPropertiesClass] = None

                dataset_snapshot: DatasetSnapshotClass = wu.metadata.proposedSnapshot
                assert dataset_snapshot

                if self.config.include_table_lineage:
                    lineage_mcp, lineage_properties_aspect = self.get_lineage_mcp(
                        wu.metadata.proposedSnapshot.urn)

                if lineage_mcp is not None:
                    lineage_wu = MetadataWorkUnit(
                        id=
                        f"redshift-{lineage_mcp.entityUrn}-{lineage_mcp.aspectName}",
                        mcp=lineage_mcp,
                    )
                    self.report.report_workunit(lineage_wu)

                    yield lineage_wu

                if lineage_properties_aspect:
                    aspects = dataset_snapshot.aspects
                    if aspects is None:
                        aspects = []

                    dataset_properties_aspect: Optional[
                        DatasetPropertiesClass] = None

                    for aspect in aspects:
                        if isinstance(aspect, DatasetPropertiesClass):
                            dataset_properties_aspect = aspect

                    if dataset_properties_aspect is None:
                        dataset_properties_aspect = DatasetPropertiesClass()
                        aspects.append(dataset_properties_aspect)

                    custom_properties = (
                        {
                            **dataset_properties_aspect.customProperties,
                            **lineage_properties_aspect.customProperties,
                        } if dataset_properties_aspect.customProperties else
                        lineage_properties_aspect.customProperties)
                    dataset_properties_aspect.customProperties = custom_properties
                    dataset_snapshot.aspects = aspects

                    dataset_snapshot.aspects.append(dataset_properties_aspect)
Пример #3
0
    def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
        for wu in super().get_workunits():
            if (self.config.include_table_lineage
                    and isinstance(wu, MetadataWorkUnit)
                    and isinstance(wu.metadata, MetadataChangeEvent) and
                    isinstance(wu.metadata.proposedSnapshot, DatasetSnapshot)):
                dataset_snapshot: DatasetSnapshot = wu.metadata.proposedSnapshot
                assert dataset_snapshot
                # Join the workunit stream from super with the lineage info using the urn.
                lineage_info = self._get_upstream_lineage_info(
                    dataset_snapshot.urn)
                if lineage_info is not None:
                    # Emit the lineage work unit
                    upstream_lineage, upstream_column_props = lineage_info
                    lineage_mcpw = MetadataChangeProposalWrapper(
                        entityType="dataset",
                        changeType=ChangeTypeClass.UPSERT,
                        entityUrn=dataset_snapshot.urn,
                        aspectName="upstreamLineage",
                        aspect=upstream_lineage,
                    )
                    lineage_wu = MetadataWorkUnit(
                        id=
                        f"{self.platform}-{lineage_mcpw.entityUrn}-{lineage_mcpw.aspectName}",
                        mcp=lineage_mcpw,
                    )
                    self.report.report_workunit(lineage_wu)
                    yield lineage_wu

                    # Update the super's workunit to include the column-lineage in the custom properties. We need to follow
                    # the RCU semantics for both the aspects & customProperties in order to preserve the changes made by super.
                    aspects = dataset_snapshot.aspects
                    if aspects is None:
                        aspects = []
                    dataset_properties_aspect: Optional[
                        DatasetPropertiesClass] = None
                    for aspect in aspects:
                        if isinstance(aspect, DatasetPropertiesClass):
                            dataset_properties_aspect = aspect
                    if dataset_properties_aspect is None:
                        dataset_properties_aspect = DatasetPropertiesClass()
                        aspects.append(dataset_properties_aspect)

                    custom_properties = ({
                        **dataset_properties_aspect.customProperties,
                        **upstream_column_props,
                    } if dataset_properties_aspect.customProperties else
                                         upstream_column_props)
                    dataset_properties_aspect.customProperties = custom_properties
                    dataset_snapshot.aspects = aspects

            # Emit the work unit from super.
            yield wu
Пример #4
0
    def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
        for wu in super().get_workunits():
            if (self.config.include_table_lineage
                    and isinstance(wu, SqlWorkUnit)
                    and isinstance(wu.metadata, MetadataChangeEvent) and
                    isinstance(wu.metadata.proposedSnapshot, DatasetSnapshot)):
                dataset_snapshot: DatasetSnapshotClass = wu.metadata.proposedSnapshot
                assert dataset_snapshot

                lineage_mcp, lineage_properties_aspect = self.get_lineage_mcp(
                    wu.metadata.proposedSnapshot.urn)

                if lineage_mcp is not None:
                    lineage_wu = MetadataWorkUnit(
                        id=
                        f"{self.platform}-{lineage_mcp.entityUrn}-{lineage_mcp.aspectName}",
                        mcp=lineage_mcp,
                    )
                    self.report.report_workunit(lineage_wu)

                    yield lineage_wu

                if lineage_properties_aspect:
                    aspects = dataset_snapshot.aspects
                    if aspects is None:
                        aspects = []

                    dataset_properties_aspect: Optional[
                        DatasetPropertiesClass] = None

                    for aspect in aspects:
                        if isinstance(aspect, DatasetPropertiesClass):
                            dataset_properties_aspect = aspect

                    if dataset_properties_aspect is None:
                        dataset_properties_aspect = DatasetPropertiesClass()
                        aspects.append(dataset_properties_aspect)

                    custom_properties = (
                        {
                            **dataset_properties_aspect.customProperties,
                            **lineage_properties_aspect.customProperties,
                        } if dataset_properties_aspect.customProperties else
                        lineage_properties_aspect.customProperties)
                    dataset_properties_aspect.customProperties = custom_properties
                    dataset_snapshot.aspects = aspects

                    dataset_snapshot.aspects.append(dataset_properties_aspect)

            # Emit the work unit from super.
            yield wu