def _get_ownership(self, creator_id: int) -> Optional[OwnershipClass]: user_info_url = f"{self.config.connect_uri}/api/user/{creator_id}" try: user_info_response = self.session.get(user_info_url) user_info_response.raise_for_status() user_details = user_info_response.json() except HTTPError as http_error: self.report.report_failure( key=f"metabase-user-{creator_id}", reason=f"Unable to retrieve User info. " f"Reason: {str(http_error)}", ) return None owner_urn = builder.make_user_urn(user_details.get("email", "")) if owner_urn is not None: ownership: OwnershipClass = OwnershipClass(owners=[ OwnerClass( owner=owner_urn, type=OwnershipTypeClass.DATAOWNER, ) ]) return ownership return None
def _get_owners_aspect(self, node: DBTNode) -> OwnershipClass: owners = [ OwnerClass( owner=f"urn:li:corpuser:{node.owner}", type=OwnershipTypeClass.DATAOWNER, ) ] return OwnershipClass(owners=owners, )
def create_metadata_work_unit(timestamp): dataset_snapshot = DatasetSnapshot( urn="urn:li:dataset:(urn:li:dataPlatform:glue,datalake_grilled.Barbeque,PROD)", aspects=[], ) dataset_snapshot.aspects.append(Status(removed=False)) dataset_snapshot.aspects.append( OwnershipClass( owners=[ OwnerClass( owner="urn:li:corpuser:Susan", type=OwnershipTypeClass.DATAOWNER ) ], lastModified=AuditStampClass( time=timestamp, actor="urn:li:corpuser:datahub" ), ) ) dataset_snapshot.aspects.append( DatasetPropertiesClass( description="Grilled Food", customProperties={}, uri=None, tags=[], ) ) fields = [ SchemaField( fieldPath="Size", nativeDataType="int", type=SchemaFieldDataType(type=NumberTypeClass()), description="Maximum attendees permitted", nullable=True, recursive=False, ) ] schema_metadata = SchemaMetadata( schemaName="datalake_grilled.Barbeque", version=0, fields=fields, platform="urn:li:dataPlatform:glue", created=AuditStamp(time=timestamp, actor="urn:li:corpuser:etl"), lastModified=AuditStamp(time=timestamp, actor="urn:li:corpuser:etl"), hash="", platformSchema=MySqlDDL(tableSchema=""), ) dataset_snapshot.aspects.append(schema_metadata) mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) return MetadataWorkUnit(id="glue-datalake_grilled.Barbeque", mce=mce)
def __init__(self, config: PatternDatasetOwnershipConfig, ctx: PipelineContext): owner_pattern = config.owner_pattern generic_config = AddDatasetOwnershipConfig( get_owners_to_add=lambda _: [ OwnerClass(owner=owner, type=OwnershipTypeClass.DATAOWNER) for owner in owner_pattern.value(_.urn) ], default_actor=config.default_actor, ) super().__init__(generic_config, ctx)
def __init__(self, config: AddCustomOwnershipConfig, ctx: PipelineContext): self.ctx = ctx self.config = config with open(self.config.owners_json, "r") as f: raw_owner_urns = json.load(f) self.owners = [ OwnerClass(owner=owner, type=OwnershipTypeClass.DATAOWNER) for owner in raw_owner_urns ]
def get_owner() -> Optional[OwnershipClass]: owner = table.get("Owner") if owner: owners = [ OwnerClass( owner=f"urn:li:corpuser:{owner}", type=OwnershipTypeClass.DATAOWNER, ) ] return OwnershipClass(owners=owners, ) return None
def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent: assert tag_urn in LookerUtil.tag_definitions ownership = OwnershipClass(owners=[ OwnerClass( owner="urn:li:corpuser:datahub", type=OwnershipTypeClass.DATAOWNER, ) ]) return MetadataChangeEvent(proposedSnapshot=TagSnapshotClass( urn=tag_urn, aspects=[ownership, LookerUtil.tag_definitions[tag_urn]]))
def _get_ownership(self, user: str) -> Optional[OwnershipClass]: if user is not None: owner_urn = builder.make_user_urn(user) ownership: OwnershipClass = OwnershipClass(owners=[ OwnerClass( owner=owner_urn, type=OwnershipTypeClass.DATAOWNER, ) ]) return ownership return None
def __init__(self, config: SimpleDatasetOwnershipConfig, ctx: PipelineContext): owners = [ OwnerClass(owner=owner, type=OwnershipTypeClass.DATAOWNER) for owner in config.owner_urns ] generic_config = AddDatasetOwnershipConfig( get_owners_to_add=lambda _: owners, default_actor=config.default_actor, ) super().__init__(generic_config, ctx)
def get_owner() -> OwnershipClass: owner = table.get("Owner") if owner: owners = [ OwnerClass( owner=f"urn:li:corpuser:{owner}", type=OwnershipTypeClass.DATAOWNER, ) ] else: owners = [] return OwnershipClass(owners=owners, )
def create_owners_list_from_urn_list(owner_urns: List[str], source_type: str) -> List[OwnerClass]: ownership_source_type: Union[None, OwnershipSourceClass] = None if source_type: ownership_source_type = OwnershipSourceClass(type=source_type) owners_list = [ OwnerClass( owner=owner_urn, type=OwnershipTypeClass.DATAOWNER, source=ownership_source_type, ) for owner_urn in owner_urns ] return owners_list
def getOwners( self, key: str, owner_pattern: KeyValuePattern, ownership_type: Optional[str] = None, ) -> List[OwnerClass]: owners = [ OwnerClass( owner=owner, type=builder.validate_ownership_type(ownership_type), ) for owner in owner_pattern.value(key) ] return owners
def get_group_wu( self, group_details: "DescribeModelPackageGroupOutputTypeDef" ) -> MetadataWorkUnit: """ Get a workunit for a model group. """ # params to remove since we extract them redundant_fields = {"ModelPackageGroupName", "CreationTime"} group_arn = group_details["ModelPackageGroupArn"] group_name = group_details["ModelPackageGroupName"] self.group_arn_to_name[group_arn] = group_name owners = [] if group_details.get("CreatedBy", {}).get("UserProfileName") is not None: owners.append( OwnerClass( owner= f"urn:li:corpuser:{group_details['CreatedBy']['UserProfileName']}", type=OwnershipTypeClass.DATAOWNER, )) group_snapshot = MLModelGroupSnapshot( urn=builder.make_ml_model_group_urn("sagemaker", group_name, self.env), aspects=[ MLModelGroupPropertiesClass( createdAt=int( group_details.get("CreationTime", datetime.now()).timestamp() * 1000), description=group_details.get( "ModelPackageGroupDescription"), customProperties={ key: str(value) for key, value in group_details.items() if key not in redundant_fields }, ), OwnershipClass(owners), BrowsePathsClass(paths=[f"/sagemaker/{group_name}"]), ], ) # make the MCE and workunit mce = MetadataChangeEvent(proposedSnapshot=group_snapshot) return MetadataWorkUnit(id=group_name, mce=mce)
def _aggregate_owners(self, node: DBTNode, meta_owner_aspects: Any) -> List[OwnerClass]: owner_list: List[OwnerClass] = [] if node.owner: owner_list.append( OwnerClass( owner=f"urn:li:corpuser:{node.owner}", type=OwnershipTypeClass.DATAOWNER, )) if meta_owner_aspects and self.config.enable_meta_mapping: owner_list += meta_owner_aspects.owners owner_list = sorted(owner_list, key=lambda x: x.owner) return owner_list
def get_ownership( self, looker_dashboard: LookerDashboard) -> Optional[OwnershipClass]: if looker_dashboard.owner is not None: owner_urn = looker_dashboard.owner._get_urn( self.source_config.strip_user_ids_from_email) if owner_urn is not None: ownership: OwnershipClass = OwnershipClass(owners=[ OwnerClass( owner=owner_urn, type=OwnershipTypeClass.DATAOWNER, ) ]) return ownership return None
def __init__(self, config: SimpleDatasetOwnershipConfig, ctx: PipelineContext): ownership_type = builder.validate_ownership_type(config.ownership_type) owners = [ OwnerClass( owner=owner, type=ownership_type, ) for owner in config.owner_urns ] generic_config = AddDatasetOwnershipConfig( get_owners_to_add=lambda _: owners, default_actor=config.default_actor, ) super().__init__(generic_config, ctx)
def add_owner_to_entity_wu(entity_type: str, entity_urn: str, owner_urn: str) -> Iterable[MetadataWorkUnit]: mcp = MetadataChangeProposalWrapper( entityType=entity_type, changeType=ChangeTypeClass.UPSERT, entityUrn=f"{entity_urn}", aspectName="ownership", aspect=OwnershipClass(owners=[ OwnerClass( owner=owner_urn, type=OwnershipTypeClass.DATAOWNER, ) ]), ) wu = MetadataWorkUnit(id=f"{owner_urn}-to-{entity_urn}", mcp=mcp) yield wu
def _make_dashboard_and_chart_mces( self, looker_dashboard: LookerDashboard ) -> List[MetadataChangeEvent]: actor = self.source_config.actor sys_time = get_sys_time() chart_mces = [ self._make_chart_mce(element) for element in looker_dashboard.dashboard_elements ] dashboard_urn = f"urn:li:dashboard:({self.source_config.platform_name},{looker_dashboard.get_urn_dashboard_id()})" dashboard_snapshot = DashboardSnapshot( urn=dashboard_urn, aspects=[], ) last_modified = ChangeAuditStamps( created=AuditStamp(time=sys_time, actor=actor), lastModified=AuditStamp(time=sys_time, actor=actor), ) dashboard_info = DashboardInfoClass( description=looker_dashboard.description if looker_dashboard.description is not None else "", title=looker_dashboard.title, charts=[mce.proposedSnapshot.urn for mce in chart_mces], lastModified=last_modified, dashboardUrl=looker_dashboard.url(self.source_config.base_url), ) dashboard_snapshot.aspects.append(dashboard_info) owners = [OwnerClass(owner=actor, type=OwnershipTypeClass.DATAOWNER)] dashboard_snapshot.aspects.append( OwnershipClass( owners=owners, lastModified=AuditStampClass( time=sys_time, actor=self.source_config.actor ), ) ) dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted)) dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot) return chart_mces + [dashboard_mce]
def generate_ownership_aspect(self): ownership = OwnershipClass( owners=[ OwnerClass( owner=builder.make_user_urn(owner), type=OwnershipTypeClass.DEVELOPER, source=OwnershipSourceClass( type=OwnershipSourceTypeClass.SERVICE, # url=dag.filepath, ), ) for owner in (self.owners or []) ], lastModified=AuditStampClass(time=0, actor=builder.make_user_urn( self.orchestrator)), ) return [ownership]
def get_owner(time: int) -> OwnershipClass: owner = table.get("Owner") if owner: owners = [ OwnerClass( owner=f"urn:li:corpuser:{owner}", type=OwnershipTypeClass.DATAOWNER, ) ] else: owners = [] return OwnershipClass( owners=owners, lastModified=AuditStampClass( time=time, actor="urn:li:corpuser:datahub", ), )
def create_ownership_aspect_mce( directive: Directive) -> MetadataChangeEventClass: return MetadataChangeEventClass(proposedSnapshot=DatasetSnapshotClass( urn=dataset_name_to_urn(directive.table), aspects=[ OwnershipClass( owners=[ OwnerClass( owner=owner_name_to_urn(clean_owner_name(owner)), type=OwnershipTypeClass.DATAOWNER, ) for owner in directive.owners ], lastModified=AuditStampClass( time=int(time.time() * 1000), actor="urn:li:corpuser:datahub", ), ) ], ))
def make_ownership_aspect_from_urn_list( owner_urns: List[str], source_type: Optional[Union[str, OwnershipSourceTypeClass]]) -> OwnershipClass: for owner_urn in owner_urns: assert owner_urn.startswith( "urn:li:corpuser:"******"urn:li:corpGroup:") ownership_source_type: Union[None, OwnershipSourceClass] = None if source_type: ownership_source_type = OwnershipSourceClass(type=source_type) owners_list = [ OwnerClass( owner=owner_urn, type=OwnershipTypeClass.DATAOWNER, source=ownership_source_type, ) for owner_urn in owner_urns ] return OwnershipClass(owners=owners_list, )
def __to_datahub_dashboard( self, dashboard: PowerBiAPI.Dashboard, chart_mcps: List[MetadataChangeProposalWrapper], user_mcps: List[MetadataChangeProposalWrapper], ) -> List[MetadataChangeProposalWrapper]: """ Map PowerBi dashboard to Datahub dashboard """ dashboard_urn = builder.make_dashboard_urn(self.__config.platform_name, dashboard.get_urn_part()) chart_urn_list: List[str] = self.to_urn_set(chart_mcps) user_urn_list: List[str] = self.to_urn_set(user_mcps) def chart_custom_properties(dashboard: PowerBiAPI.Dashboard) -> dict: return { "chartCount": str(len(dashboard.tiles)), "workspaceName": dashboard.workspace_name, "workspaceId": dashboard.id, } # DashboardInfo mcp dashboard_info_cls = DashboardInfoClass( description=dashboard.displayName or "", title=dashboard.displayName or "", charts=chart_urn_list, lastModified=ChangeAuditStamps(), dashboardUrl=dashboard.webUrl, customProperties={**chart_custom_properties(dashboard)}, ) info_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.DASHBOARD_INFO, aspect=dashboard_info_cls, ) # removed status mcp removed_status_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) # dashboardKey mcp dashboard_key_cls = DashboardKeyClass( dashboardTool=self.__config.platform_name, dashboardId=Constant.DASHBOARD_ID.format(dashboard.id), ) # Dashboard key dashboard_key_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.DASHBOARD_KEY, aspect=dashboard_key_cls, ) # Dashboard Ownership owners = [ OwnerClass(owner=user_urn, type=OwnershipTypeClass.CONSUMER) for user_urn in user_urn_list if user_urn is not None ] ownership = OwnershipClass(owners=owners) # Dashboard owner MCP owner_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.OWNERSHIP, aspect=ownership, ) # Dashboard browsePaths browse_path = BrowsePathsClass( paths=["/powerbi/{}".format(self.__config.workspace_id)]) browse_path_mcp = self.new_mcp( entity_type=Constant.DASHBOARD, entity_urn=dashboard_urn, aspect_name=Constant.BROWSERPATH, aspect=browse_path, ) return [ browse_path_mcp, info_mcp, removed_status_mcp, dashboard_key_mcp, owner_mcp, ]
def get_workunits(self) -> Iterable[MetadataWorkUnit]: ( nodes, manifest_schema, manifest_version, catalog_schema, catalog_version, ) = loadManifestAndCatalog( self.config.manifest_path, self.config.catalog_path, self.config.sources_path, self.config.load_schemas, self.config.use_identifiers, self.config.tag_prefix, self.config.target_platform, self.config.env, self.config.node_type_pattern, self.report, ) additional_custom_props = { "manifest_schema": manifest_schema, "manifest_version": manifest_version, "catalog_schema": catalog_schema, "catalog_version": catalog_version, } additional_custom_props_filtered = { key: value for key, value in additional_custom_props.items() if value is not None } for node in nodes: dataset_snapshot = DatasetSnapshot(urn=node.datahub_urn, aspects=[]) description = None if node.comment and node.description and node.comment != node.description: description = f"{self.config.target_platform} comment: {node.comment}\n\ndbt model description: {node.description}" elif node.comment: description = node.comment elif node.description: description = node.description custom_props = { **get_custom_properties(node), **additional_custom_props_filtered, } dbt_properties = DatasetPropertiesClass( description=description, customProperties=custom_props, tags=node.tags) dataset_snapshot.aspects.append(dbt_properties) if node.owner: owners = [ OwnerClass( owner=f"urn:li:corpuser:{node.owner}", type=OwnershipTypeClass.DATAOWNER, ) ] dataset_snapshot.aspects.append(OwnershipClass( owners=owners, )) if node.tags: dataset_snapshot.aspects.append( GlobalTagsClass(tags=[ TagAssociationClass(f"urn:li:tag:{tag}") for tag in node.tags ])) upstreams = get_upstream_lineage(node.upstream_urns) if upstreams is not None: dataset_snapshot.aspects.append(upstreams) if self.config.load_schemas: schema_metadata = get_schema_metadata( self.report, node, self.config.target_platform) dataset_snapshot.aspects.append(schema_metadata) mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) wu = MetadataWorkUnit(id=dataset_snapshot.urn, mce=mce) self.report.report_workunit(wu) yield wu
OwnershipClass, OwnershipTypeClass, ) log = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) # Inputs -> owner, ownership_type, dataset owner_to_add = make_user_urn("jdoe") ownership_type = OwnershipTypeClass.DATAOWNER dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") # Some objects to help with conditional pathways later owner_class_to_add = OwnerClass(owner=owner_to_add, type=ownership_type) ownership_to_add = OwnershipClass(owners=[owner_class_to_add]) # First we get the current owners gms_endpoint = "http://localhost:8080" graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint)) current_owners: Optional[OwnershipClass] = graph.get_aspect_v2( entity_urn=dataset_urn, aspect="ownership", aspect_type=OwnershipClass, ) need_write = False if current_owners: if (owner_to_add, ownership_type) not in [(x.owner, x.type)
def getOwners(self, key, owner_pattern): owners = [ OwnerClass(owner=owner, type=OwnershipTypeClass.DATAOWNER) for owner in owner_pattern.value(key) ] return owners