Python Server примеры использования

Язык программирования: Python

Пространство имен/Пакет: tableauserverclient

Класс/Тип: Server

Примеров на hotexamples.com: 7

Python Server - 7 примеров найдено. Это лучшие примеры Python кода для tableauserverclient.Server, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Server(4)

add_http_options(2)

use_server_version(2)

is_signed_in(1)

Пример #1

Показать файл

    def _authenticate(self):
        # https://tableau.github.io/server-client-python/docs/api-ref#authentication
        authentication = None
        if self.config.username and self.config.password:
            authentication = TableauAuth(
                username=self.config.username,
                password=self.config.password,
                site_id=self.config.site,
            )
        elif self.config.token_name and self.config.token_value:
            authentication = PersonalAccessTokenAuth(self.config.token_name,
                                                     self.config.token_value,
                                                     self.config.site)
        else:
            raise ConfigurationError(
                "Tableau Source: Either username/password or token_name/token_value must be set"
            )

        try:
            self.server = Server(self.config.connect_uri,
                                 use_server_version=True)
            self.server.auth.sign_in(authentication)
        except ServerResponseError as e:
            logger.error(e)
            self.report.report_failure(
                key="tableau-login",
                reason=f"Unable to Login with credentials provided"
                f"Reason: {str(e)}",
            )
        except Exception as e:
            logger.error(e)
            self.report.report_failure(key="tableau-login",
                                       reason=f"Unable to Login"
                                       f"Reason: {str(e)}")

Пример #2

Показать файл

Файл: tableau.py Проект: phoenix-elite1050/airflow

 def __init__(self, site_id: Optional[str] = None, tableau_conn_id: str = 'tableau_default') -> None:
     super().__init__()
     self.tableau_conn_id = tableau_conn_id
     self.conn = self.get_connection(self.tableau_conn_id)
     self.site_id = site_id or self.conn.extra_dejson.get('site_id', '')
     self.server = Server(self.conn.host, use_server_version=True)
     self.tableau_conn = None

Пример #3

Показать файл

Файл: tableau.py Проект: leahecole/airflow

 def __init__(self,
              site_id: Optional[str] = None,
              tableau_conn_id: str = default_conn_name) -> None:
     super().__init__()
     self.tableau_conn_id = tableau_conn_id
     self.conn = self.get_connection(self.tableau_conn_id)
     self.site_id = site_id or self.conn.extra_dejson.get('site_id', '')
     self.server = Server(self.conn.host)
     verify: Any = self.conn.extra_dejson.get('verify', True)
     if isinstance(verify, str):
         verify = parse_boolean(verify)
     self.server.add_http_options(
         options_dict={
             'verify': verify,
             'cert': self.conn.extra_dejson.get('cert', None)
         })
     self.server.use_server_version()
     self.tableau_conn = None

Пример #4

Показать файл

 def __init__(self,
              site_id: Optional[str] = None,
              tableau_conn_id: str = default_conn_name) -> None:
     super().__init__()
     self.tableau_conn_id = tableau_conn_id
     self.conn = self.get_connection(self.tableau_conn_id)
     self.site_id = site_id or self.conn.extra_dejson.get('site_id', '')
     self.server = Server(self.conn.host)
     verify = self.conn.extra_dejson.get('verify', 'True')
     try:
         verify = bool(strtobool(verify))
     except ValueError:
         pass
     self.server.add_http_options(
         options_dict={
             'verify': verify,
             'cert': self.conn.extra_dejson.get('cert', None)
         })
     self.server.use_server_version()
     self.tableau_conn = None

Пример #5

Показать файл

Файл: tableau.py Проект: leahecole/airflow

class TableauHook(BaseHook):
    """
    Connects to the Tableau Server Instance and allows to communicate with it.

    Can be used as a context manager: automatically authenticates the connection
    when opened and signs out when closed.

    .. seealso:: https://tableau.github.io/server-client-python/docs/

    :param site_id: The id of the site where the workbook belongs to.
        It will connect to the default site if you don't provide an id.
    :param tableau_conn_id: The :ref:`Tableau Connection id <howto/connection:tableau>`
        containing the credentials to authenticate to the Tableau Server.
    """

    conn_name_attr = 'tableau_conn_id'
    default_conn_name = 'tableau_default'
    conn_type = 'tableau'
    hook_name = 'Tableau'

    def __init__(self,
                 site_id: Optional[str] = None,
                 tableau_conn_id: str = default_conn_name) -> None:
        super().__init__()
        self.tableau_conn_id = tableau_conn_id
        self.conn = self.get_connection(self.tableau_conn_id)
        self.site_id = site_id or self.conn.extra_dejson.get('site_id', '')
        self.server = Server(self.conn.host)
        verify: Any = self.conn.extra_dejson.get('verify', True)
        if isinstance(verify, str):
            verify = parse_boolean(verify)
        self.server.add_http_options(
            options_dict={
                'verify': verify,
                'cert': self.conn.extra_dejson.get('cert', None)
            })
        self.server.use_server_version()
        self.tableau_conn = None

    def __enter__(self):
        if not self.tableau_conn:
            self.tableau_conn = self.get_conn()
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        self.server.auth.sign_out()

    def get_conn(self) -> Auth.contextmgr:
        """
        Sign in to the Tableau Server.

        :return: an authorized Tableau Server Context Manager object.
        :rtype: tableauserverclient.server.Auth.contextmgr
        """
        if self.conn.login and self.conn.password:
            return self._auth_via_password()
        if 'token_name' in self.conn.extra_dejson and 'personal_access_token' in self.conn.extra_dejson:
            return self._auth_via_token()
        raise NotImplementedError(
            'No Authentication method found for given Credentials!')

    def _auth_via_password(self) -> Auth.contextmgr:
        tableau_auth = TableauAuth(username=self.conn.login,
                                   password=self.conn.password,
                                   site_id=self.site_id)
        return self.server.auth.sign_in(tableau_auth)

    def _auth_via_token(self) -> Auth.contextmgr:
        """The method is deprecated. Please, use the authentication via password instead."""
        warnings.warn(
            "Authentication via personal access token is deprecated. "
            "Please, use the password authentication to avoid inconsistencies.",
            DeprecationWarning,
        )
        tableau_auth = PersonalAccessTokenAuth(
            token_name=self.conn.extra_dejson['token_name'],
            personal_access_token=self.conn.
            extra_dejson['personal_access_token'],
            site_id=self.site_id,
        )
        return self.server.auth.sign_in_with_personal_access_token(
            tableau_auth)

    def get_all(self, resource_name: str) -> Pager:
        """
        Get all items of the given resource.
        .. see also:: https://tableau.github.io/server-client-python/docs/page-through-results

        :param resource_name: The name of the resource to paginate.
            For example: jobs or workbooks.
        :return: all items by returning a Pager.
        :rtype: tableauserverclient.Pager
        """
        try:
            resource = getattr(self.server, resource_name)
        except AttributeError:
            raise ValueError(f"Resource name {resource_name} is not found.")
        return Pager(resource.get)

    def get_job_status(self, job_id: str) -> TableauJobFinishCode:
        """
        Get the current state of a defined Tableau Job.
        .. see also:: https://tableau.github.io/server-client-python/docs/api-ref#jobs

        :param job_id: The id of the job to check.
        :return: An Enum that describe the Tableau job's return code
        :rtype: TableauJobFinishCode
        """
        return TableauJobFinishCode(
            int(self.server.jobs.get_by_id(job_id).finish_code))

    def wait_for_state(self, job_id: str, target_state: TableauJobFinishCode,
                       check_interval: float) -> bool:
        """
        Wait until the current state of a defined Tableau Job is equal
        to target_state or different from PENDING.

        :param job_id: The id of the job to check.
        :param target_state: Enum that describe the Tableau job's target state
        :param check_interval: time in seconds that the job should wait in
            between each instance state checks until operation is completed
        :return: return True if the job is equal to the target_status, False otherwise.
        :rtype: bool
        """
        finish_code = self.get_job_status(job_id=job_id)
        while finish_code == TableauJobFinishCode.PENDING and finish_code != target_state:
            self.log.info("job state: %s", finish_code)
            time.sleep(check_interval)
            finish_code = self.get_job_status(job_id=job_id)

        return finish_code == target_state

Пример #6

Показать файл

class TableauSource(Source):
    config: TableauConfig
    report: SourceReport
    platform = "tableau"
    server: Server
    upstream_tables: Dict[str, Tuple[Any, str]] = {}

    def __hash__(self):
        return id(self)

    def __init__(self, ctx: PipelineContext, config: TableauConfig):
        super().__init__(ctx)

        self.config = config
        self.report = SourceReport()
        self.server = None
        # This list keeps track of datasource being actively used by workbooks so that we only retrieve those
        # when emitting published data sources.
        self.datasource_ids_being_used: List[str] = []
        # This list keeps track of datasource being actively used by workbooks so that we only retrieve those
        # when emitting custom SQL data sources.
        self.custom_sql_ids_being_used: List[str] = []

        self._authenticate()

    def close(self) -> None:
        if self.server is not None:
            self.server.auth.sign_out()

    def _authenticate(self):
        # https://tableau.github.io/server-client-python/docs/api-ref#authentication
        authentication = None
        if self.config.username and self.config.password:
            authentication = TableauAuth(
                username=self.config.username,
                password=self.config.password,
                site_id=self.config.site,
            )
        elif self.config.token_name and self.config.token_value:
            authentication = PersonalAccessTokenAuth(self.config.token_name,
                                                     self.config.token_value,
                                                     self.config.site)
        else:
            raise ConfigurationError(
                "Tableau Source: Either username/password or token_name/token_value must be set"
            )

        try:
            self.server = Server(self.config.connect_uri,
                                 use_server_version=True)
            self.server.auth.sign_in(authentication)
        except ServerResponseError as e:
            logger.error(e)
            self.report.report_failure(
                key="tableau-login",
                reason=f"Unable to Login with credentials provided"
                f"Reason: {str(e)}",
            )
        except Exception as e:
            logger.error(e)
            self.report.report_failure(key="tableau-login",
                                       reason=f"Unable to Login"
                                       f"Reason: {str(e)}")

    def get_connection_object(
        self,
        query: str,
        connection_type: str,
        query_filter: str,
        count: int = 0,
        current_count: int = 0,
    ) -> Tuple[dict, int, int]:
        query_data = query_metadata(self.server, query, connection_type, count,
                                    current_count, query_filter)

        if "errors" in query_data:
            self.report.report_warning(
                key="tableau-metadata",
                reason=
                f"Connection: {connection_type} Error: {query_data['errors']}",
            )

        connection_object = (query_data.get("data").get(connection_type, {})
                             if query_data.get("data") else {})

        total_count = connection_object.get("totalCount", 0)
        has_next_page = connection_object.get("pageInfo",
                                              {}).get("hasNextPage", False)
        return connection_object, total_count, has_next_page

    def emit_workbooks(self,
                       workbooks_page_size: int) -> Iterable[MetadataWorkUnit]:

        projects = (f"projectNameWithin: {json.dumps(self.config.projects)}"
                    if self.config.projects else "")

        workbook_connection, total_count, has_next_page = self.get_connection_object(
            workbook_graphql_query, "workbooksConnection", projects)

        current_count = 0
        while has_next_page:
            count = (workbooks_page_size if current_count +
                     workbooks_page_size < total_count else total_count -
                     current_count)
            (
                workbook_connection,
                total_count,
                has_next_page,
            ) = self.get_connection_object(
                workbook_graphql_query,
                "workbooksConnection",
                projects,
                count,
                current_count,
            )

            current_count += count

            for workbook in workbook_connection.get("nodes", []):
                yield from self.emit_workbook_as_container(workbook)
                yield from self.emit_sheets_as_charts(workbook)
                yield from self.emit_dashboards(workbook)
                yield from self.emit_embedded_datasource(workbook)
                yield from self.emit_upstream_tables()

    def _track_custom_sql_ids(self, field: dict) -> None:
        # Tableau shows custom sql datasource as a table in ColumnField.
        if field.get("__typename", "") == "ColumnField":
            for column in field.get("columns", []):
                table_id = column.get("table", {}).get("id")

                if (table_id is not None
                        and table_id not in self.custom_sql_ids_being_used):
                    self.custom_sql_ids_being_used.append(table_id)

    def _create_upstream_table_lineage(
            self,
            datasource: dict,
            project: str,
            is_custom_sql: bool = False) -> List[UpstreamClass]:
        upstream_tables = []

        for table in datasource.get("upstreamTables", []):
            # skip upstream tables when there is no column info when retrieving embedded datasource
            # and when table name is None
            # Schema details for these will be taken care in self.emit_custom_sql_ds()
            if not is_custom_sql and not table.get("columns"):
                continue
            elif table["name"] is None:
                continue

            upstream_db = table.get("database", {}).get("name", "")
            schema = self._get_schema(table.get("schema", ""), upstream_db)
            table_urn = make_table_urn(
                self.config.env,
                upstream_db,
                table.get("connectionType", ""),
                schema,
                table.get("name", ""),
            )

            upstream_table = UpstreamClass(
                dataset=table_urn,
                type=DatasetLineageTypeClass.TRANSFORMED,
            )
            upstream_tables.append(upstream_table)
            table_path = f"{project.replace('/', REPLACE_SLASH_CHAR)}/{datasource.get('name', '')}/{table.get('name', '')}"
            self.upstream_tables[table_urn] = (
                table.get("columns", []),
                table_path,
            )

        for datasource in datasource.get("upstreamDatasources", []):
            datasource_urn = builder.make_dataset_urn(self.platform,
                                                      datasource["id"],
                                                      self.config.env)
            upstream_table = UpstreamClass(
                dataset=datasource_urn,
                type=DatasetLineageTypeClass.TRANSFORMED,
            )
            upstream_tables.append(upstream_table)

        return upstream_tables

    def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
        count_on_query = len(self.custom_sql_ids_being_used)
        custom_sql_filter = "idWithin: {}".format(
            json.dumps(self.custom_sql_ids_being_used))
        custom_sql_connection, total_count, has_next_page = self.get_connection_object(
            custom_sql_graphql_query, "customSQLTablesConnection",
            custom_sql_filter)

        current_count = 0
        while has_next_page:
            count = (count_on_query if current_count +
                     count_on_query < total_count else total_count -
                     current_count)
            (
                custom_sql_connection,
                total_count,
                has_next_page,
            ) = self.get_connection_object(
                custom_sql_graphql_query,
                "customSQLTablesConnection",
                custom_sql_filter,
                count,
                current_count,
            )
            current_count += count

            unique_custom_sql = get_unique_custom_sql(
                custom_sql_connection.get("nodes", []))
            for csql in unique_custom_sql:
                csql_id: str = csql.get("id", "")
                csql_urn = builder.make_dataset_urn(self.platform, csql_id,
                                                    self.config.env)
                dataset_snapshot = DatasetSnapshot(
                    urn=csql_urn,
                    aspects=[],
                )

                # lineage from datasource -> custom sql source #
                yield from self._create_lineage_from_csql_datasource(
                    csql_urn, csql.get("datasources", []))

                # lineage from custom sql -> datasets/tables #
                columns = csql.get("columns", [])
                yield from self._create_lineage_to_upstream_tables(
                    csql_urn, columns)

                #  Schema Metadata
                schema_metadata = self.get_schema_metadata_for_custom_sql(
                    columns)
                if schema_metadata is not None:
                    dataset_snapshot.aspects.append(schema_metadata)

                # Browse path
                browse_paths = BrowsePathsClass(paths=[
                    f"/{self.config.env.lower()}/{self.platform}/Custom SQL/{csql.get('name', '')}/{csql_id}"
                ])
                dataset_snapshot.aspects.append(browse_paths)

                dataset_properties = DatasetPropertiesClass(
                    name=csql.get("name"), description=csql.get("description"))

                dataset_snapshot.aspects.append(dataset_properties)

                view_properties = ViewPropertiesClass(
                    materialized=False,
                    viewLanguage="SQL",
                    viewLogic=clean_query(csql.get("query", "")),
                )
                dataset_snapshot.aspects.append(view_properties)

                yield self.get_metadata_change_event(dataset_snapshot)
                yield self.get_metadata_change_proposal(
                    dataset_snapshot.urn,
                    aspect_name="subTypes",
                    aspect=SubTypesClass(typeNames=["View", "Custom SQL"]),
                )

    def get_schema_metadata_for_custom_sql(
            self, columns: List[dict]) -> Optional[SchemaMetadata]:
        schema_metadata = None
        for field in columns:
            # Datasource fields
            fields = []
            nativeDataType = field.get("remoteType", "UNKNOWN")
            TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)
            schema_field = SchemaField(
                fieldPath=field.get("name", ""),
                type=SchemaFieldDataType(type=TypeClass()),
                nativeDataType=nativeDataType,
                description=field.get("description", ""),
            )
            fields.append(schema_field)

            schema_metadata = SchemaMetadata(
                schemaName="test",
                platform=f"urn:li:dataPlatform:{self.platform}",
                version=0,
                fields=fields,
                hash="",
                platformSchema=OtherSchema(rawSchema=""),
            )
        return schema_metadata

    def _create_lineage_from_csql_datasource(
            self, csql_urn: str,
            csql_datasource: List[dict]) -> Iterable[MetadataWorkUnit]:
        for datasource in csql_datasource:
            datasource_urn = builder.make_dataset_urn(self.platform,
                                                      datasource.get("id", ""),
                                                      self.config.env)
            upstream_csql = UpstreamClass(
                dataset=csql_urn,
                type=DatasetLineageTypeClass.TRANSFORMED,
            )

            upstream_lineage = UpstreamLineage(upstreams=[upstream_csql])
            yield self.get_metadata_change_proposal(
                datasource_urn,
                aspect_name="upstreamLineage",
                aspect=upstream_lineage)

    def _create_lineage_to_upstream_tables(
            self, csql_urn: str,
            columns: List[dict]) -> Iterable[MetadataWorkUnit]:
        used_datasources = []
        # Get data sources from columns' reference fields.
        for field in columns:
            data_sources = [
                reference.get("datasource")
                for reference in field.get("referencedByFields", {})
                if reference.get("datasource") is not None
            ]

            for datasource in data_sources:
                if datasource.get("id", "") in used_datasources:
                    continue
                used_datasources.append(datasource.get("id", ""))
                upstream_tables = self._create_upstream_table_lineage(
                    datasource,
                    datasource.get("workbook", {}).get("projectName", ""),
                    True,
                )
                if upstream_tables:
                    upstream_lineage = UpstreamLineage(
                        upstreams=upstream_tables)
                    yield self.get_metadata_change_proposal(
                        csql_urn,
                        aspect_name="upstreamLineage",
                        aspect=upstream_lineage,
                    )

    def _get_schema_metadata_for_embedded_datasource(
            self, datasource_fields: List[dict]) -> Optional[SchemaMetadata]:
        fields = []
        schema_metadata = None
        for field in datasource_fields:
            # check datasource - custom sql relations from a field being referenced
            self._track_custom_sql_ids(field)

            nativeDataType = field.get("dataType", "UNKNOWN")
            TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)

            schema_field = SchemaField(
                fieldPath=field["name"],
                type=SchemaFieldDataType(type=TypeClass()),
                description=make_description_from_params(
                    field.get("description", ""), field.get("formula")),
                nativeDataType=nativeDataType,
                globalTags=get_tags_from_params([
                    field.get("role", ""),
                    field.get("__typename", ""),
                    field.get("aggregation", ""),
                ]) if self.config.ingest_tags else None,
            )
            fields.append(schema_field)

        if fields:
            schema_metadata = SchemaMetadata(
                schemaName="test",
                platform=f"urn:li:dataPlatform:{self.platform}",
                version=0,
                fields=fields,
                hash="",
                platformSchema=OtherSchema(rawSchema=""),
            )

        return schema_metadata

    def get_metadata_change_event(
        self, snap_shot: Union["DatasetSnapshot", "DashboardSnapshot",
                               "ChartSnapshot"]
    ) -> MetadataWorkUnit:
        mce = MetadataChangeEvent(proposedSnapshot=snap_shot)
        work_unit = MetadataWorkUnit(id=snap_shot.urn, mce=mce)
        self.report.report_workunit(work_unit)
        return work_unit

    def get_metadata_change_proposal(
        self,
        urn: str,
        aspect_name: str,
        aspect: Union["UpstreamLineage", "SubTypesClass"],
    ) -> MetadataWorkUnit:
        mcp = MetadataChangeProposalWrapper(
            entityType="dataset",
            changeType=ChangeTypeClass.UPSERT,
            entityUrn=urn,
            aspectName=aspect_name,
            aspect=aspect,
        )
        mcp_workunit = MetadataWorkUnit(
            id=f"tableau-{mcp.entityUrn}-{mcp.aspectName}",
            mcp=mcp,
            treat_errors_as_warnings=True,
        )
        self.report.report_workunit(mcp_workunit)
        return mcp_workunit

    def emit_datasource(self,
                        datasource: dict,
                        workbook: dict = None) -> Iterable[MetadataWorkUnit]:
        datasource_info = workbook
        if workbook is None:
            datasource_info = datasource

        project = (datasource_info.get("projectName", "").replace(
            "/", REPLACE_SLASH_CHAR) if datasource_info else "")
        datasource_id = datasource.get("id", "")
        datasource_name = f"{datasource.get('name')}.{datasource_id}"
        datasource_urn = builder.make_dataset_urn(self.platform, datasource_id,
                                                  self.config.env)
        if datasource_id not in self.datasource_ids_being_used:
            self.datasource_ids_being_used.append(datasource_id)

        dataset_snapshot = DatasetSnapshot(
            urn=datasource_urn,
            aspects=[],
        )

        # Browse path
        browse_paths = BrowsePathsClass(paths=[
            f"/{self.config.env.lower()}/{self.platform}/{project}/{datasource.get('name', '')}/{datasource_name}"
        ])
        dataset_snapshot.aspects.append(browse_paths)

        # Ownership
        owner = (self._get_ownership(
            datasource_info.get("owner", {}).get("username", ""))
                 if datasource_info else None)
        if owner is not None:
            dataset_snapshot.aspects.append(owner)

        # Dataset properties
        dataset_props = DatasetPropertiesClass(
            name=datasource.get("name"),
            description=datasource.get("description"),
            customProperties={
                "hasExtracts":
                str(datasource.get("hasExtracts", "")),
                "extractLastRefreshTime":
                datasource.get("extractLastRefreshTime", "") or "",
                "extractLastIncrementalUpdateTime":
                datasource.get("extractLastIncrementalUpdateTime", "") or "",
                "extractLastUpdateTime":
                datasource.get("extractLastUpdateTime", "") or "",
                "type":
                datasource.get("__typename", ""),
            },
        )
        dataset_snapshot.aspects.append(dataset_props)

        # Upstream Tables
        if datasource.get("upstreamTables") is not None:
            # datasource -> db table relations
            upstream_tables = self._create_upstream_table_lineage(
                datasource, project)

            if upstream_tables:
                upstream_lineage = UpstreamLineage(upstreams=upstream_tables)
                yield self.get_metadata_change_proposal(
                    datasource_urn,
                    aspect_name="upstreamLineage",
                    aspect=upstream_lineage,
                )

        # Datasource Fields
        schema_metadata = self._get_schema_metadata_for_embedded_datasource(
            datasource.get("fields", []))
        if schema_metadata is not None:
            dataset_snapshot.aspects.append(schema_metadata)

        yield self.get_metadata_change_event(dataset_snapshot)
        yield self.get_metadata_change_proposal(
            dataset_snapshot.urn,
            aspect_name="subTypes",
            aspect=SubTypesClass(typeNames=["Data Source"]),
        )

        if datasource.get("__typename") == "EmbeddedDatasource":
            yield from add_entity_to_container(self.gen_workbook_key(workbook),
                                               "dataset", dataset_snapshot.urn)

    def emit_published_datasources(self) -> Iterable[MetadataWorkUnit]:
        count_on_query = len(self.datasource_ids_being_used)
        datasource_filter = "idWithin: {}".format(
            json.dumps(self.datasource_ids_being_used))
        (
            published_datasource_conn,
            total_count,
            has_next_page,
        ) = self.get_connection_object(
            published_datasource_graphql_query,
            "publishedDatasourcesConnection",
            datasource_filter,
        )

        current_count = 0
        while has_next_page:
            count = (count_on_query if current_count +
                     count_on_query < total_count else total_count -
                     current_count)
            (
                published_datasource_conn,
                total_count,
                has_next_page,
            ) = self.get_connection_object(
                published_datasource_graphql_query,
                "publishedDatasourcesConnection",
                datasource_filter,
                count,
                current_count,
            )

            current_count += count
            for datasource in published_datasource_conn.get("nodes", []):
                yield from self.emit_datasource(datasource)

    def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]:
        for (table_urn, (columns, path)) in self.upstream_tables.items():
            dataset_snapshot = DatasetSnapshot(
                urn=table_urn,
                aspects=[],
            )
            # Browse path
            browse_paths = BrowsePathsClass(
                paths=[f"/{self.config.env.lower()}/{self.platform}/{path}"])
            dataset_snapshot.aspects.append(browse_paths)

            fields = []
            for field in columns:
                nativeDataType = field.get("remoteType", "UNKNOWN")
                TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType,
                                                   NullTypeClass)

                schema_field = SchemaField(
                    fieldPath=field["name"],
                    type=SchemaFieldDataType(type=TypeClass()),
                    description="",
                    nativeDataType=nativeDataType,
                )

                fields.append(schema_field)

            schema_metadata = SchemaMetadata(
                schemaName="test",
                platform=f"urn:li:dataPlatform:{self.platform}",
                version=0,
                fields=fields,
                hash="",
                platformSchema=OtherSchema(rawSchema=""),
            )
            if schema_metadata is not None:
                dataset_snapshot.aspects.append(schema_metadata)

            yield self.get_metadata_change_event(dataset_snapshot)

    # Older tableau versions do not support fetching sheet's upstreamDatasources,
    # This achieves the same effect by using datasource's downstreamSheets
    def get_sheetwise_upstream_datasources(self, workbook: dict) -> dict:
        sheet_upstream_datasources: dict = {}

        for embedded_ds in workbook.get("embeddedDatasources", []):
            for sheet in embedded_ds.get("downstreamSheets", []):
                if sheet.get("id") not in sheet_upstream_datasources:
                    sheet_upstream_datasources[sheet.get("id")] = set()
                sheet_upstream_datasources[sheet.get("id")].add(
                    embedded_ds.get("id"))

        for published_ds in workbook.get("upstreamDatasources", []):
            for sheet in published_ds.get("downstreamSheets", []):
                if sheet.get("id") not in sheet_upstream_datasources:
                    sheet_upstream_datasources[sheet.get("id")] = set()
                sheet_upstream_datasources[sheet.get("id")].add(
                    published_ds.get("id"))
        return sheet_upstream_datasources

    def emit_sheets_as_charts(self,
                              workbook: Dict) -> Iterable[MetadataWorkUnit]:
        sheet_upstream_datasources = self.get_sheetwise_upstream_datasources(
            workbook)
        for sheet in workbook.get("sheets", []):
            chart_snapshot = ChartSnapshot(
                urn=builder.make_chart_urn(self.platform, sheet.get("id")),
                aspects=[],
            )

            creator = workbook.get("owner", {}).get("username", "")
            created_at = sheet.get("createdAt", datetime.now())
            updated_at = sheet.get("updatedAt", datetime.now())
            last_modified = self.get_last_modified(creator, created_at,
                                                   updated_at)

            if sheet.get("path"):
                site_part = f"/site/{self.config.site}" if self.config.site else ""
                sheet_external_url = (
                    f"{self.config.connect_uri}/#{site_part}/views/{sheet.get('path')}"
                )
            elif sheet.get("containedInDashboards"):
                # sheet contained in dashboard
                site_part = f"/t/{self.config.site}" if self.config.site else ""
                dashboard_path = sheet.get("containedInDashboards")[0].get(
                    "path", "")
                sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get('name', '')}"
            else:
                # hidden or viz-in-tooltip sheet
                sheet_external_url = None
            fields = {}
            for field in sheet.get("datasourceFields", ""):
                description = make_description_from_params(
                    get_field_value_in_sheet(field, "description"),
                    get_field_value_in_sheet(field, "formula"),
                )
                fields[get_field_value_in_sheet(field, "name")] = description

            # datasource urn
            datasource_urn = []
            data_sources = sheet_upstream_datasources.get(
                sheet.get("id"), set())

            for ds_id in data_sources:
                if ds_id is None or not ds_id:
                    continue
                ds_urn = builder.make_dataset_urn(self.platform, ds_id,
                                                  self.config.env)
                datasource_urn.append(ds_urn)
                if ds_id not in self.datasource_ids_being_used:
                    self.datasource_ids_being_used.append(ds_id)

            # Chart Info
            chart_info = ChartInfoClass(
                description="",
                title=sheet.get("name", ""),
                lastModified=last_modified,
                externalUrl=sheet_external_url,
                inputs=sorted(datasource_urn),
                customProperties=fields,
            )
            chart_snapshot.aspects.append(chart_info)

            # Browse path
            browse_path = BrowsePathsClass(paths=[
                f"/{self.platform}/{workbook.get('projectName', '').replace('/', REPLACE_SLASH_CHAR)}"
                f"/{workbook.get('name', '')}"
                f"/{sheet.get('name', '').replace('/', REPLACE_SLASH_CHAR)}"
            ])
            chart_snapshot.aspects.append(browse_path)

            # Ownership
            owner = self._get_ownership(creator)
            if owner is not None:
                chart_snapshot.aspects.append(owner)

            #  Tags
            tag_list = sheet.get("tags", [])
            if tag_list and self.config.ingest_tags:
                tag_list_str = [
                    t.get("name", "").upper() for t in tag_list
                    if t is not None
                ]
                chart_snapshot.aspects.append(
                    builder.make_global_tag_aspect_with_tag_list(tag_list_str))

            yield self.get_metadata_change_event(chart_snapshot)

            yield from add_entity_to_container(self.gen_workbook_key(workbook),
                                               "chart", chart_snapshot.urn)

    def emit_workbook_as_container(
            self, workbook: Dict) -> Iterable[MetadataWorkUnit]:

        workbook_container_key = self.gen_workbook_key(workbook)
        creator = workbook.get("owner", {}).get("username")

        owner_urn = (builder.make_user_urn(creator) if
                     (creator and self.config.ingest_owner) else None)

        site_part = f"/site/{self.config.site}" if self.config.site else ""
        workbook_uri = workbook.get("uri", "")
        workbook_part = (workbook_uri[workbook_uri.index("/workbooks/"):]
                         if workbook.get("uri") else None)
        workbook_external_url = (
            f"{self.config.connect_uri}/#{site_part}{workbook_part}"
            if workbook_part else None)

        tag_list = workbook.get("tags", [])
        tag_list_str = (
            [t.get("name", "").upper() for t in tag_list if t is not None] if
            (tag_list and self.config.ingest_tags) else None)

        container_workunits = gen_containers(
            container_key=workbook_container_key,
            name=workbook.get("name", ""),
            sub_types=["Workbook"],
            description=workbook.get("description"),
            owner_urn=owner_urn,
            external_url=workbook_external_url,
            tags=tag_list_str,
        )

        for wu in container_workunits:
            self.report.report_workunit(wu)
            yield wu

    def gen_workbook_key(self, workbook):
        return WorkbookKey(platform=self.platform,
                           instance=None,
                           workbook_id=workbook["id"])

    def emit_dashboards(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
        for dashboard in workbook.get("dashboards", []):
            dashboard_snapshot = DashboardSnapshot(
                urn=builder.make_dashboard_urn(self.platform,
                                               dashboard.get("id", "")),
                aspects=[],
            )

            creator = workbook.get("owner", {}).get("username", "")
            created_at = dashboard.get("createdAt", datetime.now())
            updated_at = dashboard.get("updatedAt", datetime.now())
            last_modified = self.get_last_modified(creator, created_at,
                                                   updated_at)

            site_part = f"/site/{self.config.site}" if self.config.site else ""
            dashboard_external_url = f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get('path', '')}"
            title = dashboard.get("name", "").replace("/",
                                                      REPLACE_SLASH_CHAR) or ""
            chart_urns = [
                builder.make_chart_urn(self.platform, sheet.get("id"))
                for sheet in dashboard.get("sheets", [])
            ]
            dashboard_info_class = DashboardInfoClass(
                description="",
                title=title,
                charts=chart_urns,
                lastModified=last_modified,
                dashboardUrl=dashboard_external_url,
                customProperties={},
            )
            dashboard_snapshot.aspects.append(dashboard_info_class)

            # browse path
            browse_paths = BrowsePathsClass(paths=[
                f"/{self.platform}/{workbook.get('projectName', '').replace('/', REPLACE_SLASH_CHAR)}"
                f"/{workbook.get('name', '').replace('/', REPLACE_SLASH_CHAR)}"
                f"/{title}"
            ])
            dashboard_snapshot.aspects.append(browse_paths)

            # Ownership
            owner = self._get_ownership(creator)
            if owner is not None:
                dashboard_snapshot.aspects.append(owner)

            yield self.get_metadata_change_event(dashboard_snapshot)

            yield from add_entity_to_container(self.gen_workbook_key(workbook),
                                               "dashboard",
                                               dashboard_snapshot.urn)

    def emit_embedded_datasource(self,
                                 workbook: Dict) -> Iterable[MetadataWorkUnit]:
        for datasource in workbook.get("embeddedDatasources", []):
            yield from self.emit_datasource(datasource, workbook)

    @lru_cache(maxsize=None)
    def _get_schema(self, schema_provided: str, database: str) -> str:
        schema = schema_provided
        if not schema_provided and database in self.config.default_schema_map:
            schema = self.config.default_schema_map[database]

        return schema

    @lru_cache(maxsize=None)
    def get_last_modified(self, creator: str, created_at: bytes,
                          updated_at: bytes) -> ChangeAuditStamps:
        last_modified = ChangeAuditStamps()
        if creator:
            modified_actor = builder.make_user_urn(creator)
            created_ts = int(dp.parse(created_at).timestamp() * 1000)
            modified_ts = int(dp.parse(updated_at).timestamp() * 1000)
            last_modified = ChangeAuditStamps(
                created=AuditStamp(time=created_ts, actor=modified_actor),
                lastModified=AuditStamp(time=modified_ts,
                                        actor=modified_actor),
            )
        return last_modified

    @lru_cache(maxsize=None)
    def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
        if self.config.ingest_owner and user:
            owner_urn = builder.make_user_urn(user)
            ownership: OwnershipClass = OwnershipClass(owners=[
                OwnerClass(
                    owner=owner_urn,
                    type=OwnershipTypeClass.DATAOWNER,
                )
            ])
            return ownership

        return None

    @classmethod
    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
        config = TableauConfig.parse_obj(config_dict)
        return cls(ctx, config)

    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        if self.server is None or not self.server.is_signed_in():
            return
        try:
            yield from self.emit_workbooks(self.config.workbooks_page_size)
            if self.datasource_ids_being_used:
                yield from self.emit_published_datasources()
            if self.custom_sql_ids_being_used:
                yield from self.emit_custom_sql_datasources()
        except MetadataQueryException as md_exception:
            self.report.report_failure(
                key="tableau-metadata",
                reason=
                f"Unable to retrieve metadata from tableau. Information: {str(md_exception)}",
            )

    def get_report(self) -> SourceReport:
        return self.report

Пример #7

Показать файл

class TableauHook(BaseHook):
    """
    Connects to the Tableau Server Instance and allows to communicate with it.
    .. see also:: https://tableau.github.io/server-client-python/docs/

    :param site_id: The id of the site where the workbook belongs to.
        It will connect to the default site if you don't provide an id.
    :type site_id: Optional[str]
    :param tableau_conn_id: The :ref:`Tableau Connection id <howto/connection:tableau>`
        containing the credentials to authenticate to the Tableau Server.
    :type tableau_conn_id: str
    """

    conn_name_attr = 'tableau_conn_id'
    default_conn_name = 'tableau_default'
    conn_type = 'tableau'
    hook_name = 'Tableau'

    def __init__(self,
                 site_id: Optional[str] = None,
                 tableau_conn_id: str = default_conn_name) -> None:
        super().__init__()
        self.tableau_conn_id = tableau_conn_id
        self.conn = self.get_connection(self.tableau_conn_id)
        self.site_id = site_id or self.conn.extra_dejson.get('site_id', '')
        self.server = Server(self.conn.host)
        verify = self.conn.extra_dejson.get('verify', 'True')
        try:
            verify = bool(strtobool(verify))
        except ValueError:
            pass
        self.server.add_http_options(
            options_dict={
                'verify': verify,
                'cert': self.conn.extra_dejson.get('cert', None)
            })
        self.server.use_server_version()
        self.tableau_conn = None

    def __enter__(self):
        if not self.tableau_conn:
            self.tableau_conn = self.get_conn()
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        self.server.auth.sign_out()

    def get_conn(self) -> Auth.contextmgr:
        """
        Signs in to the Tableau Server and automatically signs out if used as ContextManager.
        :return: an authorized Tableau Server Context Manager object.
        :rtype: tableauserverclient.server.Auth.contextmgr
        """
        if self.conn.login and self.conn.password:
            return self._auth_via_password()
        if 'token_name' in self.conn.extra_dejson and 'personal_access_token' in self.conn.extra_dejson:
            return self._auth_via_token()
        raise NotImplementedError(
            'No Authentication method found for given Credentials!')

    def _auth_via_password(self) -> Auth.contextmgr:
        tableau_auth = TableauAuth(username=self.conn.login,
                                   password=self.conn.password,
                                   site_id=self.site_id)
        return self.server.auth.sign_in(tableau_auth)

    def _auth_via_token(self) -> Auth.contextmgr:
        tableau_auth = PersonalAccessTokenAuth(
            token_name=self.conn.extra_dejson['token_name'],
            personal_access_token=self.conn.
            extra_dejson['personal_access_token'],
            site_id=self.site_id,
        )
        return self.server.auth.sign_in_with_personal_access_token(
            tableau_auth)

    def get_all(self, resource_name: str) -> Pager:
        """
        Get all items of the given resource.
        .. see also:: https://tableau.github.io/server-client-python/docs/page-through-results

        :param resource_name: The name of the resource to paginate.
            For example: jobs or workbooks
        :type resource_name: str
        :return: all items by returning a Pager.
        :rtype: tableauserverclient.Pager
        """
        resource = getattr(self.server, resource_name)
        return Pager(resource.get)