def _make_chart_mce( self, dashboard_element: LookerDashboardElement, dashboard: LookerDashboard ) -> MetadataChangeEvent: chart_urn = builder.make_chart_urn( self.source_config.platform_name, dashboard_element.get_urn_element_id() ) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) chart_type = self._get_chart_type(dashboard_element) chart_info = ChartInfoClass( type=chart_type, description=dashboard_element.description or "", title=dashboard_element.title or "", lastModified=ChangeAuditStamps(), chartUrl=dashboard_element.url(self.source_config.base_url), inputs=dashboard_element.get_view_urns(self.source_config), customProperties={ "upstream_fields": ",".join( sorted(set(dashboard_element.upstream_fields)) ) if dashboard_element.upstream_fields else "" }, ) chart_snapshot.aspects.append(chart_info) ownership = self.get_ownership(dashboard) if ownership is not None: chart_snapshot.aspects.append(ownership) return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
def _make_chart_mce( self, dashboard_element: LookerDashboardElement) -> MetadataChangeEvent: chart_urn = builder.make_chart_urn( self.source_config.platform_name, dashboard_element.get_urn_element_id()) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) chart_type = self._get_chart_type(dashboard_element) chart_info = ChartInfoClass( type=chart_type, description=dashboard_element.description or "", title=dashboard_element.title or "", lastModified=ChangeAuditStamps(), chartUrl=dashboard_element.url(self.source_config.base_url), inputs=dashboard_element.get_view_urns( self.source_config.platform_name, self.source_config.env), ) chart_snapshot.aspects.append(chart_info) return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
def construct_dashboard_from_api_data( self, dashboard_info: dict) -> Optional[DashboardSnapshot]: dashboard_id = dashboard_info.get("id", "") dashboard_url = f"{self.config.connect_uri}/api/dashboard/{dashboard_id}" try: dashboard_response = self.session.get(dashboard_url) dashboard_response.raise_for_status() dashboard_details = dashboard_response.json() except HTTPError as http_error: self.report.report_failure( key=f"metabase-dashboard-{dashboard_id}", reason=f"Unable to retrieve dashboard. " f"Reason: {str(http_error)}", ) return None dashboard_urn = builder.make_dashboard_urn( self.platform, dashboard_details.get("id", "")) dashboard_snapshot = DashboardSnapshot( urn=dashboard_urn, aspects=[], ) last_edit_by = dashboard_details.get("last-edit-info") or {} modified_actor = builder.make_user_urn( last_edit_by.get("email", "unknown")) modified_ts = self.get_timestamp_millis_from_ts_string( f"{last_edit_by.get('timestamp')}") title = dashboard_details.get("name", "") or "" description = dashboard_details.get("description", "") or "" last_modified = ChangeAuditStamps( created=AuditStamp(time=modified_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_urns = [] cards_data = dashboard_details.get("ordered_cards", "{}") for card_info in cards_data: chart_urn = builder.make_chart_urn(self.platform, card_info.get("id", "")) chart_urns.append(chart_urn) dashboard_info_class = DashboardInfoClass( description=description, title=title, charts=chart_urns, lastModified=last_modified, dashboardUrl=f"{self.config.connect_uri}/dashboard/{dashboard_id}", customProperties={}, ) dashboard_snapshot.aspects.append(dashboard_info_class) # Ownership ownership = self._get_ownership(dashboard_details.get( "creator_id", "")) if ownership is not None: dashboard_snapshot.aspects.append(ownership) return dashboard_snapshot
def emit_dashboards(self, workbook: Dict) -> Iterable[MetadataWorkUnit]: for dashboard in workbook.get("dashboards", []): dashboard_snapshot = DashboardSnapshot( urn=builder.make_dashboard_urn(self.platform, dashboard["id"]), aspects=[], ) creator = workbook.get("owner", {}).get("username", "") created_at = dashboard.get("createdAt", datetime.now()) updated_at = dashboard.get("updatedAt", datetime.now()) last_modified = self.get_last_modified(creator, created_at, updated_at) site_part = f"/site/{self.config.site}" if self.config.site else "" dashboard_external_url = f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get('path', '')}" title = ( dashboard["name"].replace("/", REPLACE_SLASH_CHAR) if dashboard.get("name") else "" ) chart_urns = [ builder.make_chart_urn(self.platform, sheet.get("id")) for sheet in dashboard.get("sheets", []) ] dashboard_info_class = DashboardInfoClass( description="", title=title, charts=chart_urns, lastModified=last_modified, dashboardUrl=dashboard_external_url, customProperties={}, ) dashboard_snapshot.aspects.append(dashboard_info_class) if workbook.get("projectName") and workbook.get("name"): dashboard_name = title if title else dashboard["id"] # browse path browse_paths = BrowsePathsClass( paths=[ f"/{self.platform}/{workbook['projectName'].replace('/', REPLACE_SLASH_CHAR)}" f"/{workbook['name'].replace('/', REPLACE_SLASH_CHAR)}" f"/{dashboard_name}" ] ) dashboard_snapshot.aspects.append(browse_paths) else: logger.debug(f"Browse path not set for dashboard {dashboard['id']}") # Ownership owner = self._get_ownership(creator) if owner is not None: dashboard_snapshot.aspects.append(owner) yield self.get_metadata_change_event(dashboard_snapshot) yield from add_entity_to_container( self.gen_workbook_key(workbook), "dashboard", dashboard_snapshot.urn )
def _get_chart_urns(self, report_token: str) -> list: chart_urns = [] queries = self._get_queries(report_token) for query in queries: charts = self._get_charts(report_token, query.get("token", "")) # build chart urns for chart in charts: chart_urn = builder.make_chart_urn(self.platform, chart.get("token", "")) chart_urns.append(chart_urn) return chart_urns
def construct_card_from_api_data( self, card_data: dict) -> Optional[ChartSnapshot]: card_id = card_data.get("id", "") card_url = f"{self.config.connect_uri}/api/card/{card_id}" try: card_response = self.session.get(card_url) card_response.raise_for_status() card_details = card_response.json() except HTTPError as http_error: self.report.report_failure( key=f"metabase-card-{card_id}", reason=f"Unable to retrieve Card info. " f"Reason: {str(http_error)}", ) return None chart_urn = builder.make_chart_urn(self.platform, card_id) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) last_edit_by = card_details.get("last-edit-info") or {} modified_actor = builder.make_user_urn( last_edit_by.get("email", "unknown")) modified_ts = self.get_timestamp_millis_from_ts_string( f"{last_edit_by.get('timestamp')}") last_modified = ChangeAuditStamps( created=AuditStamp(time=modified_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_type = self._get_chart_type(card_details.get("id", ""), card_details.get("display")) description = card_details.get("description") or "" title = card_details.get("name") or "" datasource_urn = self.get_datasource_urn(card_details) custom_properties = self.construct_card_custom_properties(card_details) chart_info = ChartInfoClass( type=chart_type, description=description, title=title, lastModified=last_modified, chartUrl=f"{self.config.connect_uri}/card/{card_id}", inputs=datasource_urn, customProperties=custom_properties, ) chart_snapshot.aspects.append(chart_info) if card_details.get("query_type", "") == "native": raw_query = (card_details.get("dataset_query", {}).get("native", {}).get("query", "")) chart_query_native = ChartQueryClass( rawQuery=raw_query, type=ChartQueryTypeClass.SQL, ) chart_snapshot.aspects.append(chart_query_native) # Ownership ownership = self._get_ownership(card_details.get("creator_id", "")) if ownership is not None: chart_snapshot.aspects.append(ownership) return chart_snapshot
def construct_chart_from_api_data(self, chart_data: dict, query: dict, path: str) -> ChartSnapshot: chart_urn = builder.make_chart_urn(self.platform, chart_data.get("token", "")) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) last_modified = ChangeAuditStamps() creator = self._get_creator( chart_data.get("_links", {}).get("creator", {}).get("href", "")) if creator is not None: modified_actor = builder.make_user_urn(creator) created_ts = int( dp.parse(chart_data.get("created_at", "now")).timestamp() * 1000) modified_ts = int( dp.parse(chart_data.get("updated_at", "now")).timestamp() * 1000) last_modified = ChangeAuditStamps( created=AuditStamp(time=created_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_detail = (chart_data.get("view", {}) if len(chart_data.get("view", {})) != 0 else chart_data.get("view_vegas", {})) mode_chart_type = chart_detail.get( "chartType", "") or chart_detail.get("selectedChart", "") chart_type = self._get_chart_type(chart_data.get("token", ""), mode_chart_type) description = (chart_detail.get("description") or chart_detail.get("chartDescription") or "") title = chart_detail.get("title") or chart_detail.get( "chartTitle") or "" # create datasource urn platform, db_name = self._get_platform_and_dbname( query.get("data_source_id")) source_tables = self._get_source_from_query(query.get("raw_query")) datasource_urn = self._get_datasource_urn(platform, db_name, source_tables) custom_properties = self.construct_chart_custom_properties( chart_detail, mode_chart_type) # Chart Info chart_info = ChartInfoClass( type=chart_type, description=description, title=title, lastModified=last_modified, chartUrl=f"{self.config.connect_uri}" f"{chart_data.get('_links', {}).get('report_viz_web', {}).get('href', '')}", inputs=datasource_urn, customProperties=custom_properties, ) chart_snapshot.aspects.append(chart_info) # Browse Path browse_path = BrowsePathsClass(paths=[path]) chart_snapshot.aspects.append(browse_path) # Query chart_query = ChartQueryClass( rawQuery=query.get("raw_query", ""), type=ChartQueryTypeClass.SQL, ) chart_snapshot.aspects.append(chart_query) # Ownership ownership = self._get_ownership( self._get_creator( chart_data.get("_links", {}).get("creator", {}).get("href", ""))) if ownership is not None: chart_snapshot.aspects.append(ownership) return chart_snapshot
def emit_sheets_as_charts(self, workbook: Dict) -> Iterable[MetadataWorkUnit]: sheet_upstream_datasources = self.get_sheetwise_upstream_datasources( workbook) for sheet in workbook.get("sheets", []): chart_snapshot = ChartSnapshot( urn=builder.make_chart_urn(self.platform, sheet.get("id")), aspects=[], ) creator = workbook.get("owner", {}).get("username", "") created_at = sheet.get("createdAt", datetime.now()) updated_at = sheet.get("updatedAt", datetime.now()) last_modified = self.get_last_modified(creator, created_at, updated_at) if sheet.get("path"): site_part = f"/site/{self.config.site}" if self.config.site else "" sheet_external_url = ( f"{self.config.connect_uri}/#{site_part}/views/{sheet.get('path')}" ) elif sheet.get("containedInDashboards"): # sheet contained in dashboard site_part = f"/t/{self.config.site}" if self.config.site else "" dashboard_path = sheet.get("containedInDashboards")[0].get( "path", "") sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get('name', '')}" else: # hidden or viz-in-tooltip sheet sheet_external_url = None fields = {} for field in sheet.get("datasourceFields", ""): description = make_description_from_params( get_field_value_in_sheet(field, "description"), get_field_value_in_sheet(field, "formula"), ) fields[get_field_value_in_sheet(field, "name")] = description # datasource urn datasource_urn = [] data_sources = sheet_upstream_datasources.get( sheet.get("id"), set()) for ds_id in data_sources: if ds_id is None or not ds_id: continue ds_urn = builder.make_dataset_urn(self.platform, ds_id, self.config.env) datasource_urn.append(ds_urn) if ds_id not in self.datasource_ids_being_used: self.datasource_ids_being_used.append(ds_id) # Chart Info chart_info = ChartInfoClass( description="", title=sheet.get("name", ""), lastModified=last_modified, externalUrl=sheet_external_url, inputs=sorted(datasource_urn), customProperties=fields, ) chart_snapshot.aspects.append(chart_info) # Browse path browse_path = BrowsePathsClass(paths=[ f"/{self.platform}/{workbook.get('projectName', '').replace('/', REPLACE_SLASH_CHAR)}" f"/{workbook.get('name', '')}" f"/{sheet.get('name', '').replace('/', REPLACE_SLASH_CHAR)}" ]) chart_snapshot.aspects.append(browse_path) # Ownership owner = self._get_ownership(creator) if owner is not None: chart_snapshot.aspects.append(owner) # Tags tag_list = sheet.get("tags", []) if tag_list and self.config.ingest_tags: tag_list_str = [ t.get("name", "").upper() for t in tag_list if t is not None ] chart_snapshot.aspects.append( builder.make_global_tag_aspect_with_tag_list(tag_list_str)) yield self.get_metadata_change_event(chart_snapshot) yield from add_entity_to_container(self.gen_workbook_key(workbook), "chart", chart_snapshot.urn)
def __to_datahub_chart( self, tile: PowerBiAPI.Tile, ds_mcps: List[MetadataChangeProposalWrapper] ) -> List[MetadataChangeProposalWrapper]: """ Map PowerBi tile to datahub chart """ LOGGER.info("Converting tile {}(id={}) to chart".format( tile.title, tile.id)) # Create an URN for chart chart_urn = builder.make_chart_urn(self.__config.platform_name, tile.get_urn_part()) LOGGER.info("{}={}".format(Constant.CHART_URN, chart_urn)) ds_input: List[str] = self.to_urn_set(ds_mcps) def tile_custom_properties(tile: PowerBiAPI.Tile) -> dict: custom_properties = { "datasetId": tile.dataset.id if tile.dataset else "", "reportId": tile.report.id if tile.report else "", "datasetWebUrl": tile.dataset.webUrl if tile.dataset is not None else "", "createdFrom": tile.createdFrom.value, } return custom_properties # Create chartInfo mcp # Set chartUrl only if tile is created from Report chart_info_instance = ChartInfoClass( title=tile.title or "", description=tile.title or "", lastModified=ChangeAuditStamps(), inputs=ds_input, externalUrl=tile.report.webUrl if tile.report else None, customProperties={**tile_custom_properties(tile)}, ) info_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.CHART_INFO, aspect=chart_info_instance, ) # removed status mcp status_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) # ChartKey status chart_key_instance = ChartKeyClass( dashboardTool=self.__config.platform_name, chartId=Constant.CHART_ID.format(tile.id), ) chartkey_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.CHART_KEY, aspect=chart_key_instance, ) return [info_mcp, status_mcp, chartkey_mcp]
# Construct the ChartInfo aspect with the input_datasets lineage. input_datasets: List[str] = [ builder.make_dataset_urn(platform="hdfs", name="dataset1", env="PROD"), builder.make_dataset_urn(platform="hdfs", name="dataset2", env="PROD"), ] last_modified = ChangeAuditStampsClass() chart_info = ChartInfoClass( title="Baz Chart 1", description="Sample Baz chart", lastModified=last_modified, inputs=input_datasets, ) # Construct a MetadataChangeProposalWrapper object with the ChartInfo aspect. # NOTE: This will overwrite all of the existing chartInfo aspect information associated with this chart. chart_info_mcp = MetadataChangeProposalWrapper( entityType="chart", changeType=ChangeTypeClass.UPSERT, entityUrn=builder.make_chart_urn(platform="looker", name="my_chart_1"), aspectName="chartInfo", aspect=chart_info, ) # Create an emitter to the GMS REST API. emitter = DatahubRestEmitter("http://localhost:8080") # Emit metadata! emitter.emit_mcp(chart_info_mcp)