Пример #1
0
    def emit_dashboards(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
        for dashboard in workbook.get("dashboards", []):
            dashboard_snapshot = DashboardSnapshot(
                urn=builder.make_dashboard_urn(self.platform, dashboard["id"]),
                aspects=[],
            )

            creator = workbook.get("owner", {}).get("username", "")
            created_at = dashboard.get("createdAt", datetime.now())
            updated_at = dashboard.get("updatedAt", datetime.now())
            last_modified = self.get_last_modified(creator, created_at, updated_at)

            site_part = f"/site/{self.config.site}" if self.config.site else ""
            dashboard_external_url = f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get('path', '')}"
            title = (
                dashboard["name"].replace("/", REPLACE_SLASH_CHAR)
                if dashboard.get("name")
                else ""
            )
            chart_urns = [
                builder.make_chart_urn(self.platform, sheet.get("id"))
                for sheet in dashboard.get("sheets", [])
            ]
            dashboard_info_class = DashboardInfoClass(
                description="",
                title=title,
                charts=chart_urns,
                lastModified=last_modified,
                dashboardUrl=dashboard_external_url,
                customProperties={},
            )
            dashboard_snapshot.aspects.append(dashboard_info_class)

            if workbook.get("projectName") and workbook.get("name"):
                dashboard_name = title if title else dashboard["id"]
                # browse path
                browse_paths = BrowsePathsClass(
                    paths=[
                        f"/{self.platform}/{workbook['projectName'].replace('/', REPLACE_SLASH_CHAR)}"
                        f"/{workbook['name'].replace('/', REPLACE_SLASH_CHAR)}"
                        f"/{dashboard_name}"
                    ]
                )
                dashboard_snapshot.aspects.append(browse_paths)
            else:
                logger.debug(f"Browse path not set for dashboard {dashboard['id']}")

            # Ownership
            owner = self._get_ownership(creator)
            if owner is not None:
                dashboard_snapshot.aspects.append(owner)

            yield self.get_metadata_change_event(dashboard_snapshot)

            yield from add_entity_to_container(
                self.gen_workbook_key(workbook), "dashboard", dashboard_snapshot.urn
            )
Пример #2
0
    def emit_sheets_as_charts(self,
                              workbook: Dict) -> Iterable[MetadataWorkUnit]:
        sheet_upstream_datasources = self.get_sheetwise_upstream_datasources(
            workbook)
        for sheet in workbook.get("sheets", []):
            chart_snapshot = ChartSnapshot(
                urn=builder.make_chart_urn(self.platform, sheet.get("id")),
                aspects=[],
            )

            creator = workbook.get("owner", {}).get("username", "")
            created_at = sheet.get("createdAt", datetime.now())
            updated_at = sheet.get("updatedAt", datetime.now())
            last_modified = self.get_last_modified(creator, created_at,
                                                   updated_at)

            if sheet.get("path"):
                site_part = f"/site/{self.config.site}" if self.config.site else ""
                sheet_external_url = (
                    f"{self.config.connect_uri}/#{site_part}/views/{sheet.get('path')}"
                )
            elif sheet.get("containedInDashboards"):
                # sheet contained in dashboard
                site_part = f"/t/{self.config.site}" if self.config.site else ""
                dashboard_path = sheet.get("containedInDashboards")[0].get(
                    "path", "")
                sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get('name', '')}"
            else:
                # hidden or viz-in-tooltip sheet
                sheet_external_url = None
            fields = {}
            for field in sheet.get("datasourceFields", ""):
                description = make_description_from_params(
                    get_field_value_in_sheet(field, "description"),
                    get_field_value_in_sheet(field, "formula"),
                )
                fields[get_field_value_in_sheet(field, "name")] = description

            # datasource urn
            datasource_urn = []
            data_sources = sheet_upstream_datasources.get(
                sheet.get("id"), set())

            for ds_id in data_sources:
                if ds_id is None or not ds_id:
                    continue
                ds_urn = builder.make_dataset_urn(self.platform, ds_id,
                                                  self.config.env)
                datasource_urn.append(ds_urn)
                if ds_id not in self.datasource_ids_being_used:
                    self.datasource_ids_being_used.append(ds_id)

            # Chart Info
            chart_info = ChartInfoClass(
                description="",
                title=sheet.get("name", ""),
                lastModified=last_modified,
                externalUrl=sheet_external_url,
                inputs=sorted(datasource_urn),
                customProperties=fields,
            )
            chart_snapshot.aspects.append(chart_info)

            # Browse path
            browse_path = BrowsePathsClass(paths=[
                f"/{self.platform}/{workbook.get('projectName', '').replace('/', REPLACE_SLASH_CHAR)}"
                f"/{workbook.get('name', '')}"
                f"/{sheet.get('name', '').replace('/', REPLACE_SLASH_CHAR)}"
            ])
            chart_snapshot.aspects.append(browse_path)

            # Ownership
            owner = self._get_ownership(creator)
            if owner is not None:
                chart_snapshot.aspects.append(owner)

            #  Tags
            tag_list = sheet.get("tags", [])
            if tag_list and self.config.ingest_tags:
                tag_list_str = [
                    t.get("name", "").upper() for t in tag_list
                    if t is not None
                ]
                chart_snapshot.aspects.append(
                    builder.make_global_tag_aspect_with_tag_list(tag_list_str))

            yield self.get_metadata_change_event(chart_snapshot)

            yield from add_entity_to_container(self.gen_workbook_key(workbook),
                                               "chart", chart_snapshot.urn)
Пример #3
0
    def emit_datasource(self,
                        datasource: dict,
                        workbook: dict = None) -> Iterable[MetadataWorkUnit]:
        datasource_info = workbook
        if workbook is None:
            datasource_info = datasource

        project = (datasource_info.get("projectName", "").replace(
            "/", REPLACE_SLASH_CHAR) if datasource_info else "")
        datasource_id = datasource.get("id", "")
        datasource_name = f"{datasource.get('name')}.{datasource_id}"
        datasource_urn = builder.make_dataset_urn(self.platform, datasource_id,
                                                  self.config.env)
        if datasource_id not in self.datasource_ids_being_used:
            self.datasource_ids_being_used.append(datasource_id)

        dataset_snapshot = DatasetSnapshot(
            urn=datasource_urn,
            aspects=[],
        )

        # Browse path
        browse_paths = BrowsePathsClass(paths=[
            f"/{self.config.env.lower()}/{self.platform}/{project}/{datasource.get('name', '')}/{datasource_name}"
        ])
        dataset_snapshot.aspects.append(browse_paths)

        # Ownership
        owner = (self._get_ownership(
            datasource_info.get("owner", {}).get("username", ""))
                 if datasource_info else None)
        if owner is not None:
            dataset_snapshot.aspects.append(owner)

        # Dataset properties
        dataset_props = DatasetPropertiesClass(
            name=datasource.get("name"),
            description=datasource.get("description"),
            customProperties={
                "hasExtracts":
                str(datasource.get("hasExtracts", "")),
                "extractLastRefreshTime":
                datasource.get("extractLastRefreshTime", "") or "",
                "extractLastIncrementalUpdateTime":
                datasource.get("extractLastIncrementalUpdateTime", "") or "",
                "extractLastUpdateTime":
                datasource.get("extractLastUpdateTime", "") or "",
                "type":
                datasource.get("__typename", ""),
            },
        )
        dataset_snapshot.aspects.append(dataset_props)

        # Upstream Tables
        if datasource.get("upstreamTables") is not None:
            # datasource -> db table relations
            upstream_tables = self._create_upstream_table_lineage(
                datasource, project)

            if upstream_tables:
                upstream_lineage = UpstreamLineage(upstreams=upstream_tables)
                yield self.get_metadata_change_proposal(
                    datasource_urn,
                    aspect_name="upstreamLineage",
                    aspect=upstream_lineage,
                )

        # Datasource Fields
        schema_metadata = self._get_schema_metadata_for_embedded_datasource(
            datasource.get("fields", []))
        if schema_metadata is not None:
            dataset_snapshot.aspects.append(schema_metadata)

        yield self.get_metadata_change_event(dataset_snapshot)
        yield self.get_metadata_change_proposal(
            dataset_snapshot.urn,
            aspect_name="subTypes",
            aspect=SubTypesClass(typeNames=["Data Source"]),
        )

        if datasource.get("__typename") == "EmbeddedDatasource":
            yield from add_entity_to_container(self.gen_workbook_key(workbook),
                                               "dataset", dataset_snapshot.urn)
Пример #4
0
    def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
        count_on_query = len(self.custom_sql_ids_being_used)
        custom_sql_filter = "idWithin: {}".format(
            json.dumps(self.custom_sql_ids_being_used)
        )
        custom_sql_connection, total_count, has_next_page = self.get_connection_object(
            custom_sql_graphql_query, "customSQLTablesConnection", custom_sql_filter
        )

        current_count = 0
        while has_next_page:
            count = (
                count_on_query
                if current_count + count_on_query < total_count
                else total_count - current_count
            )
            (
                custom_sql_connection,
                total_count,
                has_next_page,
            ) = self.get_connection_object(
                custom_sql_graphql_query,
                "customSQLTablesConnection",
                custom_sql_filter,
                count,
                current_count,
            )
            current_count += count

            unique_custom_sql = get_unique_custom_sql(
                custom_sql_connection.get("nodes", [])
            )
            for csql in unique_custom_sql:
                csql_id: str = csql["id"]
                csql_urn = builder.make_dataset_urn(
                    self.platform, csql_id, self.config.env
                )
                dataset_snapshot = DatasetSnapshot(
                    urn=csql_urn,
                    aspects=[],
                )

                datasource_name = None
                project = None
                if len(csql["datasources"]) > 0:
                    yield from self._create_lineage_from_csql_datasource(
                        csql_urn, csql["datasources"]
                    )

                    # CustomSQLTable id owned by exactly one tableau data source
                    logger.debug(
                        f"Number of datasources referencing CustomSQLTable: {len(csql['datasources'])}"
                    )

                    datasource = csql["datasources"][0]
                    datasource_name = datasource.get("name")
                    if datasource.get(
                        "__typename"
                    ) == "EmbeddedDatasource" and datasource.get("workbook"):
                        datasource_name = (
                            f"{datasource.get('workbook').get('name')}/{datasource_name}"
                            if datasource_name
                            and datasource.get("workbook").get("name")
                            else None
                        )
                        yield from add_entity_to_container(
                            self.gen_workbook_key(datasource["workbook"]),
                            "dataset",
                            dataset_snapshot.urn,
                        )
                    project = self._get_project(datasource)

                # lineage from custom sql -> datasets/tables #
                columns = csql.get("columns", [])
                yield from self._create_lineage_to_upstream_tables(csql_urn, columns)

                #  Schema Metadata
                schema_metadata = self.get_schema_metadata_for_custom_sql(columns)
                if schema_metadata is not None:
                    dataset_snapshot.aspects.append(schema_metadata)

                # Browse path
                csql_name = csql.get("name") if csql.get("name") else csql_id

                if project and datasource_name:
                    browse_paths = BrowsePathsClass(
                        paths=[
                            f"/{self.config.env.lower()}/{self.platform}/{project}/{datasource['name']}/{csql_name}"
                        ]
                    )
                    dataset_snapshot.aspects.append(browse_paths)
                else:
                    logger.debug(f"Browse path not set for Custom SQL table {csql_id}")

                dataset_properties = DatasetPropertiesClass(
                    name=csql.get("name"), description=csql.get("description")
                )

                dataset_snapshot.aspects.append(dataset_properties)

                view_properties = ViewPropertiesClass(
                    materialized=False,
                    viewLanguage="SQL",
                    viewLogic=clean_query(csql.get("query", "")),
                )
                dataset_snapshot.aspects.append(view_properties)

                yield self.get_metadata_change_event(dataset_snapshot)
                yield self.get_metadata_change_proposal(
                    dataset_snapshot.urn,
                    aspect_name="subTypes",
                    aspect=SubTypesClass(typeNames=["View", "Custom SQL"]),
                )