Python ColumnMetadata 예제들, whale.models.table_metadata.ColumnMetadata Python 예제들

예제 #1

0

파일 보기

파일: bigquery_metadata_extractor.py 프로젝트: Victoriapm/whale

    def _iterate_over_cols(self, parent: str, column: str,
                           cols: List[ColumnMetadata], total_cols: int) -> int:
        if len(parent) > 0:
            col_name = \
                '{parent}.{field}'.format(parent=parent, field=column['name'])
        else:
            col_name = column['name']

        if column['type'] == 'RECORD':
            col = ColumnMetadata(name=col_name,
                                 description=column.get('description', ''),
                                 col_type=column['type'],
                                 sort_order=total_cols)
            cols.append(col)
            total_cols += 1
            for field in column['fields']:
                total_cols = \
                    self._iterate_over_cols(col_name, field, cols, total_cols)
            return total_cols
        else:
            col = ColumnMetadata(name=col_name,
                                 description=column.get('description', ''),
                                 col_type=column['type'],
                                 sort_order=total_cols)
            cols.append(col)
            return total_cols + 1

예제 #2

0

파일 보기

파일: test_spanner_metadata_extractor.py 프로젝트: vamshirapolu/whale

    def test_multiple_results(self, mock_client):
        col1_name = "col1"
        col2_name = "col2"
        col1_type = "int"
        col2_type = "char"
        col1_sort_order = "1"
        col2_sort_order = "2"
        mock_client.return_value.instance.return_value.database.return_value.snapshot.return_value.__enter__.return_value.execute_sql.return_value = [
            [col1_name, col1_type, col1_sort_order, self.schema, self.table],
            [col2_name, col2_type, col2_sort_order, self.schema, self.table],
        ]

        extractor = SpannerMetadataExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())
        )
        result = extractor.extract()
        assert result.database == self.connection_name
        assert result.cluster == self.project_id
        assert result.schema == f"{self.instance_id}.{self.database_id}"
        assert result.name == self.table
        self.assertEqual(
            result.columns[0].__repr__(),
            ColumnMetadata(
                col1_name, None, col1_type, col1_sort_order, None
            ).__repr__(),
        )
        self.assertEqual(
            result.columns[1].__repr__(),
            ColumnMetadata(
                col2_name, None, col2_type, col2_sort_order, None
            ).__repr__(),
        )

예제 #3

0

파일 보기

    def _iterate_over_cols(self, parent: str, column: str,
                           cols: List[ColumnMetadata], total_cols: int) -> int:
        if len(parent) > 0:
            col_name = "{parent}.{field}".format(parent=parent,
                                                 field=column["name"])
        else:
            col_name = column["name"]

        if column["type"] == "RECORD":
            col = ColumnMetadata(
                name=col_name,
                description=column.get("description", ""),
                col_type=column["type"],
                sort_order=total_cols,
            )
            cols.append(col)
            total_cols += 1
            for field in column["fields"]:
                total_cols = self._iterate_over_cols(col_name, field, cols,
                                                     total_cols)
            return total_cols
        else:
            col = ColumnMetadata(
                name=col_name,
                description=column.get("description", ""),
                col_type=column["type"],
                sort_order=total_cols,
            )
            cols.append(col)
            return total_cols + 1

예제 #4

0

파일 보기

파일: test_presto_engine.py 프로젝트: Victoriapm/whale

 def test_get_all_table_metadata_from_information_schema(
         self, mock_settings) -> None:
     self.engine.init(self.conf)
     self.engine.execute = MagicMock(
         side_effect=presto_engine_execute_side_effect)
     mock_columns = [
         ColumnMetadata(
             name=MOCK_INFORMATION_SCHEMA_RESULT_1['col_name'],
             description=MOCK_INFORMATION_SCHEMA_RESULT_1[
                 'col_description'],  # noqa: 501
             col_type=MOCK_INFORMATION_SCHEMA_RESULT_1['col_type'],
             sort_order=MOCK_INFORMATION_SCHEMA_RESULT_1['col_sort_order'],
             is_partition_column=None),
         ColumnMetadata(
             name=MOCK_INFORMATION_SCHEMA_RESULT_2['col_name'],
             description=MOCK_INFORMATION_SCHEMA_RESULT_2[
                 'col_description'],  # noqa: 501
             col_type=MOCK_INFORMATION_SCHEMA_RESULT_2['col_type'],
             sort_order=MOCK_INFORMATION_SCHEMA_RESULT_2['col_sort_order'],
             is_partition_column=None)
     ]
     expected = TableMetadata(
         database=MOCK_DATABASE_NAME,
         cluster=MOCK_CLUSTER_NAME,
         schema=MOCK_SCHEMA_NAME,
         name=MOCK_TABLE_NAME,
         columns=mock_columns,
         is_view=bool(MOCK_INFORMATION_SCHEMA_RESULT_1['is_view']),
     )
     results = self.engine.get_all_table_metadata_from_information_schema(
         cluster=MOCK_CLUSTER_NAME)
     result = next(results)
     self.maxDiff = None
     self.assertEqual(result.__repr__(), expected.__repr__())

예제 #5

0

파일 보기

파일: glue_extractor.py 프로젝트: manuelzander/whale

    def _get_extract_iter(self) -> Iterator[TableMetadata]:
        for row in self._get_raw_extract_iter():
            columns, i = [], 0

            for column in row["StorageDescriptor"]["Columns"] + row.get(
                    "PartitionKeys", []):
                columns.append(
                    ColumnMetadata(
                        column["Name"],
                        column["Comment"] if "Comment" in column else None,
                        column["Type"],
                        i,
                    ))
                i += 1

            catalog, schema, table = self._parse_location(
                location=row["StorageDescriptor"]["Location"],
                name=row["Name"])

            if self._connection_name:
                database = self._connection_name + "/" + row["DatabaseName"]
            else:
                database = row["DatabaseName"]

            yield TableMetadata(
                database,
                catalog,
                schema,
                table,
                row.get("Description")
                or row.get("Parameters", {}).get("comment"),
                columns,
                row.get("TableType") == "VIRTUAL_VIEW",
            )

예제 #6

0

파일 보기

파일: test_presto_loop_extractor.py 프로젝트: dataframehq/whale

    def test_table_metadata_extraction_with_single_result(self, mock1,
                                                          mock2) -> None:
        extractor = PrestoLoopExtractor()
        conf = self.conf.copy()
        conf.put("is_table_metadata_enabled", True)
        extractor.init(conf)
        extractor.execute = MagicMock(
            side_effect=presto_engine_execute_side_effect)

        results = extractor.extract()
        is_partition_column = (True if MOCK_COLUMN_RESULT[2] == "partition key"
                               else False)
        expected = TableMetadata(
            database=extractor._database,
            cluster=None,
            schema=MOCK_SCHEMA_NAME,
            name=MOCK_TABLE_NAME,
            columns=[
                ColumnMetadata(
                    name=MOCK_COLUMN_RESULT[0],
                    description=MOCK_COLUMN_RESULT[3],
                    data_type=MOCK_COLUMN_RESULT[1],
                    sort_order=0,
                    is_partition_column=is_partition_column,
                )
            ],
        )
        self.assertEqual(results.__repr__(), expected.__repr__())

예제 #7

0

파일 보기

파일: test_markdown_transformer.py 프로젝트: manuelzander/whale

    def test_transformed_record_contains_components(self):
        """"""
        column = ColumnMetadata(
            name=COLUMN,
            col_type="Integer",
            sort_order=0,
            description=COLUMN_DESCRIPTION,
        )
        record = TableMetadata(
            database=DATABASE,
            cluster=CLUSTER,
            schema=SCHEMA,
            name=TABLE,
            columns=[column],
        )
        components = [
            DATABASE,
            CLUSTER,
            SCHEMA,
            TABLE,
            COLUMN,
            COLUMN_DESCRIPTION,
        ]
        transformer = MarkdownTransformer()
        transformer.init(self._conf)
        transformed_record = transformer.transform(record)
        markdown_blob = transformed_record.markdown_blob
        transformer.close()

        has_components = all(x in markdown_blob for x in components)

        self.assertEqual(has_components, True)

예제 #8

0

파일 보기

파일: snowflake_metadata_extractor.py 프로젝트: Victoriapm/whale

    def _get_extract_iter(self) -> Iterator[TableMetadata]:
        """
        Using itertools.groupby and raw level iterator, it groups to table and
        yields TableMetadata
        :return:
        """
        for _, group in groupby(self._get_raw_extract_iter(),
                                self._get_table_key):
            columns = []
            for row in group:
                column_description = \
                    unidecode(row['col_description']) \
                    if row['col_description'] else None
                last_row = row
                columns.append(
                    ColumnMetadata(name=row['col_name'],
                                   description=column_description,
                                   col_type=row['col_type'],
                                   sort_order=row['col_sort_order']))

            description = \
                unidecode(last_row['description']) \
                if last_row['description'] else None

            yield TableMetadata(database=self._database,
                                cluster=last_row['cluster'],
                                schema=last_row['schema'],
                                name=last_row['name'],
                                description=description,
                                columns=columns,
                                is_view=last_row['is_view'] == 'true')

예제 #9

0

파일 보기

    def _get_extract_iter(self) -> Iterator[TableMetadata]:
        """
        Using itertools.groupby and raw level iterator, it groups to table and yields TableMetadata
        :return:
        """
        for key, group in groupby(self._get_raw_extract_iter(), self._get_table_key):
            columns = []

            for row in group:
                last_row = row
                columns.append(
                    ColumnMetadata(
                        row["col_name"],
                        row["col_description"],
                        row["data_type"],
                        row["col_sort_order"],
                    )
                )

            yield TableMetadata(
                self._database,
                last_row["cluster"],
                last_row["schema"],
                last_row["name"],
                last_row["description"],
                columns,
            )

예제 #10

0

파일 보기

파일: splice_machine_metadata_extractor.py 프로젝트: vamshirapolu/whale

    def _get_extract_iter(self) -> Iterator[TableMetadata]:
        """
        Using itertools.groupby and raw level iterator, it groups to table and
        yields TableMetadata
        :return:
        """

        for _, group in groupby(self._get_raw_extract_iter(),
                                self._get_table_key):
            columns = []
            for row in group:
                last_row = row
                columns.append(
                    ColumnMetadata(
                        name=row["column_name"],
                        description=None,
                        col_type=row["column_type"],
                        sort_order=row["column_sort_order"],
                    ))

            yield TableMetadata(
                database=self._database,
                cluster=None,
                schema=last_row["schema_name"],
                name=last_row["table_name"],
                description=None,
                columns=columns,
                is_view=last_row["table_type"] == "V",
            )

예제 #11

0

파일 보기

파일: bigquery_metadata_extractor.py 프로젝트: vamshirapolu/whale

    def _iterate_over_cols(
        self,
        tags_dict: dict,
        parent: str,
        column: str,
        cols: List[ColumnMetadata],
        total_cols: int,
    ) -> int:
        if len(parent) > 0:
            col_name = "{parent}.{field}".format(parent=parent,
                                                 field=column["name"])
        else:
            col_name = column["name"]

        tags = None
        if tags_dict and "tags" in tags_dict:
            for tag in tags_dict["tags"]:
                if "column" in tag:
                    if tag["column"] == col_name:
                        tags = tag

        if column["type"] == "RECORD":
            col = ColumnMetadata(
                name=col_name,
                description=column.get("description", ""),
                col_type=column["type"],
                sort_order=total_cols,
                tags=tags,
            )
            cols.append(col)
            total_cols += 1
            for field in column["fields"]:
                total_cols = self._iterate_over_cols(tags_dict, col_name,
                                                     field, cols, total_cols)
            return total_cols
        else:
            col = ColumnMetadata(
                name=col_name,
                description=column.get("description", ""),
                col_type=column["type"],
                sort_order=total_cols,
                tags=tags,
            )
            cols.append(col)
            return total_cols + 1

예제 #12

0

파일 보기

    def get_table_metadata(
        self,
        schema: str,
        table: str,
        cluster: Optional[str] = None,
        is_view_query_enabled: Optional[bool] = False,
    ):
        # Format table and schema addresses for queries.
        full_schema_address = self._get_full_schema_address(cluster, schema)
        full_table_address = "{}.{}".format(full_schema_address, table)

        # Execute query that gets column type + partition information.
        columns_query = "show columns in {}".format(full_table_address)
        column_query_results = self.execute(columns_query, has_header=True)
        column_query_field_names = next(column_query_results)
        columns = []
        for i, column_query_result in enumerate(column_query_results):
            column_dict = dict(zip(column_query_field_names, column_query_result))
            columns.append(
                ColumnMetadata(
                    name=column_dict["Column"],
                    description=column_dict["Comment"],
                    col_type=column_dict["Type"],
                    sort_order=i,
                    is_partition_column=column_dict["Extra"] == "partition key",
                )
            )

        if is_view_query_enabled:
            # Execute query that returns if table is a view.
            view_query = """
                select table_type
                from information_schema.tables
                where table_schema='{table_schema}'
                  and table_name='{table_name}'
                """.format(
                table_schema=schema, table_name=table
            )
            view_query_results = self.execute(view_query, has_header=False)
            is_view = next(view_query_results)[0] == "VIEW"
        else:
            is_view = False

        return TableMetadata(
            database=self._database,
            cluster=cluster,
            schema=schema,
            name=table,
            description=None,
            columns=columns,
            is_view=is_view,
        )

예제 #13

0

파일 보기

파일: amundsen_neo4j_metadata_extractor.py 프로젝트: vamshirapolu/whale

    def _get_extract_iter(self):
        with self.driver.session() as session:
            if not hasattr(self, "results"):
                self.results = session.read_transaction(self._execute_query)

            for result in self.results:
                # Parse watermark information.
                partition_columns = []
                for watermark in result["watermarks"]:
                    partition_columns.append(watermark["partition_key"])

                # Parse column information.
                column_names = result["column_names"]
                column_descriptions = result["column_descriptions"]
                column_types = result["column_types"]
                column_sort_orders = result["column_sort_orders"]
                zipped_columns = zip_longest(column_names, column_descriptions,
                                             column_types, column_sort_orders)

                column_metadatas = []
                for (
                        column_name,
                        column_description,
                        column_type,
                        column_sort_order,
                ) in zipped_columns:
                    if column_name in partition_columns:
                        is_partition_column = True
                    else:
                        is_partition_column = False
                    column_metadatas.append(
                        ColumnMetadata(
                            name=column_name,
                            description=column_description,
                            col_type=column_type,
                            sort_order=column_sort_order,
                            is_partition_column=is_partition_column,
                        ))

                yield TableMetadata(
                    database=result["database"],
                    cluster=result["cluster"],
                    schema=result["schema"],
                    name=result["name"],
                    description=result["description"],
                    columns=column_metadatas,
                    is_view=result["is_view"],
                    tags=result["tags"],
                )

예제 #14

0

파일 보기

파일: amundsen_neo4j_metadata_extractor.py 프로젝트: Victoriapm/whale

    def _get_extract_iter(self):
        with self.driver.session() as session:
            if not hasattr(self, 'results'):
                self.results = session.read_transaction(self._execute_query)

            for result in self.results:
                # Parse watermark information.
                partition_columns = []
                for watermark in result['watermarks']:
                    partition_columns.append(watermark['partition_key'])

                # Parse column information.
                column_names = result['column_names']
                column_descriptions = result['column_descriptions']
                column_types = result['column_types']
                column_sort_orders = result['column_sort_orders']
                zipped_columns = zip_longest(column_names, column_descriptions,
                                             column_types, column_sort_orders)

                column_metadatas = []
                for column_name, \
                        column_description, \
                        column_type, \
                        column_sort_order \
                        in zipped_columns:
                    if column_name in partition_columns:
                        is_partition_column = True
                    else:
                        is_partition_column = False
                    column_metadatas.append(
                        ColumnMetadata(
                            name=column_name,
                            description=column_description,
                            col_type=column_type,
                            sort_order=column_sort_order,
                            is_partition_column=is_partition_column,
                        ))

                yield TableMetadata(
                    database=result['database'],
                    cluster=result['cluster'],
                    schema=result['schema'],
                    name=result['name'],
                    description=result['description'],
                    columns=column_metadatas,
                    is_view=result['is_view'],
                    tags=result['tags'],
                )

예제 #15

0

파일 보기

    def test_extraction_with_single_result(self):
        with patch.object(splice_machine_metadata_extractor,
                          "splice_connect") as mock_connect:
            column = ColumnMetadata("column1", None, "int", 0)
            table = TableMetadata(
                self.DATABASE,
                self.CLUSTER,
                "test_schema",
                "test_table",
                None,
                [column],
            )

            # Connection returns a cursor
            mock_cursor = MagicMock()
            mock_execute = MagicMock()
            mock_fetchall = MagicMock()

            # self.connection = splice_connect(...)
            mock_connection = MagicMock()
            mock_connect.return_value = mock_connection
            # self.cursor = self.connection.cursor()
            mock_connection.cursor.return_value = mock_cursor

            # self.cursor.execute(...)
            mock_cursor.execute = mock_execute

            # for row in self.cursor.fetchall()
            mock_cursor.fetchall = mock_fetchall

            mock_fetchall.return_value = [[
                table.schema,
                table.name,
                "not-a-view",
                column.name,
                column.sort_order,
                column.type,
            ]]

            extractor = self.Extractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = table

            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())

예제 #16

0

파일 보기

파일: spanner_metadata_extractor.py 프로젝트: rsyi/manuelzander-whale

    def _get_extract_iter(self):
        # type: () -> Iterator[TableMetadata]
        """
        Using itertools.groupby and raw level iterator, it groups to table and yields TableMetadata
        :return:
        """

        with self.database.snapshot() as snapshot:
            results = snapshot.execute_sql(self.sql_stmt)
            header = SpannerMetadataExtractor.HEADER
            headered_results = [
                dict(zip(header, result)) for result in results
            ]

            for _, group in groupby(headered_results, self._get_table_key):
                columns = []

                for row in group:
                    last_row = row
                    columns.append(
                        ColumnMetadata(
                            row["col_name"],
                            None,
                            row["col_type"],
                            row["col_sort_order"],
                        ))

                schema = "{}.{}".format(self._instance_id, self._database_id)

                yield TableMetadata(
                    database=self._connection_name or "spanner",
                    cluster=self._project_id,
                    schema=schema,
                    name=last_row["name"],
                    description=None,
                    columns=columns,
                )

예제 #17

0

파일 보기

    def _get_extract_iter(self) -> Iterator[TableMetadata]:
        """
        Using itertools.groupby and raw level iterator, it groups to table and
        yields TableMetadata
        :return:
        """
        for _, group in groupby(self._get_raw_extract_iter(), self._get_table_key):
            columns = []
            for row in group:
                column_description = (
                    unidecode(row["col_description"])
                    if row["col_description"]
                    else None
                )
                last_row = row
                columns.append(
                    ColumnMetadata(
                        name=row["col_name"],
                        description=column_description,
                        col_type=row["col_type"],
                        sort_order=row["col_sort_order"],
                    )
                )

            description = (
                unidecode(last_row["description"]) if last_row["description"] else None
            )

            yield TableMetadata(
                database=self._database,
                cluster=last_row["cluster"],
                schema=last_row["schema"],
                name=last_row["name"],
                description=description,
                columns=columns,
                is_view=last_row["is_view"] == "true",
            )

예제 #18

0

파일 보기

    def get_all_table_metadata_from_information_schema(
        self,
        cluster: Optional[str] = None,
        where_clause_suffix: str = "",
    ):

        unformatted_query = """
        SELECT
          a.table_catalog AS cluster
          , a.table_schema AS schema
          , a.table_name AS name
          , NULL AS description
          , a.column_name AS col_name
          , a.ordinal_position as col_sort_order
          , IF(a.extra_info = 'partition key', 1, 0) AS is_partition_col
          , a.comment AS col_description
          , a.data_type AS col_type
          , IF(b.table_name is not null, 1, 0) AS is_view
        FROM {cluster_prefix}information_schema.columns a
        LEFT JOIN {cluster_prefix}information_schema.views b
            ON a.table_catalog = b.table_catalog
            and a.table_schema = b.table_schema
            and a.table_name = b.table_name
        {where_clause_suffix}
        """

        LOGGER.info(
            "Pulling all table metadata in bulk from"
            + "information_schema in cluster name: {}".format(cluster)
        )

        if cluster is not None:
            cluster_prefix = cluster + "."
        else:
            cluster_prefix = ""

        formatted_query = unformatted_query.format(
            cluster_prefix=cluster_prefix, where_clause_suffix=where_clause_suffix
        )

        LOGGER.info("SQL for presto: {}".format(formatted_query))

        query_results = self.execute(formatted_query, is_dict_return_enabled=True)

        for _, group in groupby(query_results, self._get_table_key):
            columns = []
            for row in group:
                last_row = row
                columns.append(
                    ColumnMetadata(
                        row["col_name"],
                        row["col_description"],
                        row["col_type"],
                        row["col_sort_order"],
                    )
                )

            yield TableMetadata(
                self._database,
                cluster or self._default_cluster_name,
                last_row["schema"],
                last_row["name"],
                last_row["description"],
                columns,
                is_view=bool(last_row["is_view"]),
            )

예제 #19

0

파일 보기

    def test_extraction_with_single_result(self):
        # type: () -> None
        with patch.object(SQLAlchemyExtractor,
                          "_get_connection") as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute
            table = {
                "schema": "test_schema",
                "name": "test_table",
                "description": "a table for testing",
                "cluster": self.conf[SnowflakeMetadataExtractor.CLUSTER_KEY],
                "is_view": "false",
            }

            sql_execute.return_value = [
                self._union(
                    {
                        "col_name": "col_id1",
                        "col_type": "number",
                        "col_description": "description of id1",
                        "col_sort_order": 0,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "col_id2",
                        "col_type": "number",
                        "col_description": "description of id2",
                        "col_sort_order": 1,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "is_active",
                        "col_type": "boolean",
                        "col_description": None,
                        "col_sort_order": 2,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "source",
                        "col_type": "varchar",
                        "col_description": "description of source",
                        "col_sort_order": 3,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "etl_created_at",
                        "col_type": "timestamp_ltz",
                        "col_description": "description of etl_created_at",
                        "col_sort_order": 4,
                    },
                    table,
                ),
                self._union(
                    {
                        "col_name": "ds",
                        "col_type": "varchar",
                        "col_description": None,
                        "col_sort_order": 5,
                    },
                    table,
                ),
            ]

            extractor = SnowflakeMetadataExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = TableMetadata(
                "prod",
                "MY_CLUSTER",
                "test_schema",
                "test_table",
                "a table for testing",
                [
                    ColumnMetadata("col_id1", "description of id1", "number",
                                   0),
                    ColumnMetadata("col_id2", "description of id2", "number",
                                   1),
                    ColumnMetadata("is_active", None, "boolean", 2),
                    ColumnMetadata("source", "description of source",
                                   "varchar", 3),
                    ColumnMetadata(
                        "etl_created_at",
                        "description of etl_created_at",
                        "timestamp_ltz",
                        4,
                    ),
                    ColumnMetadata("ds", None, "varchar", 5),
                ],
            )

            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())

예제 #20

0

파일 보기

파일: test_glue_extractor.py 프로젝트: vamshirapolu/whale

    def test_extraction_with_single_result(self) -> None:
        with patch.object(GlueExtractor, "_search_tables") as mock_search:
            mock_search.return_value = [{
                "Name":
                "test_catalog_test_schema_test_table",
                "DatabaseName":
                "test_database",
                "Description":
                "a table for testing",
                "StorageDescriptor": {
                    "Columns": [
                        {
                            "Name": "col_id1",
                            "Type": "bigint",
                            "Comment": "description of id1",
                        },
                        {
                            "Name": "col_id2",
                            "Type": "bigint",
                            "Comment": "description of id2",
                        },
                        {
                            "Name": "is_active",
                            "Type": "boolean"
                        },
                        {
                            "Name": "source",
                            "Type": "varchar",
                            "Comment": "description of source",
                        },
                        {
                            "Name": "etl_created_at",
                            "Type": "timestamp",
                            "Comment": "description of etl_created_at",
                        },
                        {
                            "Name": "ds",
                            "Type": "varchar"
                        },
                    ],
                    "Location":
                    "test_catalog.test_schema.test_table",
                },
                "PartitionKeys": [
                    {
                        "Name": "partition_key1",
                        "Type": "string",
                        "Comment": "description of partition_key1",
                    },
                ],
                "TableType":
                "EXTERNAL_TABLE",
            }]

            extractor = GlueExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = TableMetadata(
                "test_database",
                None,
                None,
                "test_catalog_test_schema_test_table",
                "a table for testing",
                [
                    ColumnMetadata("col_id1", "description of id1", "bigint",
                                   0),
                    ColumnMetadata("col_id2", "description of id2", "bigint",
                                   1),
                    ColumnMetadata("is_active", None, "boolean", 2),
                    ColumnMetadata("source", "description of source",
                                   "varchar", 3),
                    ColumnMetadata(
                        "etl_created_at",
                        "description of etl_created_at",
                        "timestamp",
                        4,
                    ),
                    ColumnMetadata("ds", None, "varchar", 5),
                    ColumnMetadata("partition_key1",
                                   "description of partition_key1", "string",
                                   6),
                ],
                False,
            )
            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())

예제 #21

0

파일 보기

파일: test_snowflake_metadata_extractor.py 프로젝트: Victoriapm/whale

    def test_extraction_with_single_result(self):
        # type: () -> None
        with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute
            table = {'schema': 'test_schema',
                     'name': 'test_table',
                     'description': 'a table for testing',
                     'cluster':
                     self.conf[SnowflakeMetadataExtractor.CLUSTER_KEY],
                     'is_view': 'false'
                     }

            sql_execute.return_value = [
                self._union(
                    {'col_name': 'col_id1',
                     'col_type': 'number',
                     'col_description': 'description of id1',
                     'col_sort_order': 0}, table),
                self._union(
                    {'col_name': 'col_id2',
                     'col_type': 'number',
                     'col_description': 'description of id2',
                     'col_sort_order': 1}, table),
                self._union(
                    {'col_name': 'is_active',
                     'col_type': 'boolean',
                     'col_description': None,
                     'col_sort_order': 2}, table),
                self._union(
                    {'col_name': 'source',
                     'col_type': 'varchar',
                     'col_description': 'description of source',
                     'col_sort_order': 3}, table),
                self._union(
                    {'col_name': 'etl_created_at',
                     'col_type': 'timestamp_ltz',
                     'col_description': 'description of etl_created_at',
                     'col_sort_order': 4}, table),
                self._union(
                    {'col_name': 'ds',
                     'col_type': 'varchar',
                     'col_description': None,
                     'col_sort_order': 5}, table)
            ]

            extractor = SnowflakeMetadataExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = TableMetadata('prod', 'MY_CLUSTER', 'test_schema', 'test_table', 'a table for testing',
                                     [ColumnMetadata('col_id1', 'description of id1', 'number', 0),
                                      ColumnMetadata('col_id2', 'description of id2', 'number', 1),
                                      ColumnMetadata('is_active', None, 'boolean', 2),
                                      ColumnMetadata('source', 'description of source', 'varchar', 3),
                                      ColumnMetadata('etl_created_at', 'description of etl_created_at',
                                                     'timestamp_ltz', 4),
                                      ColumnMetadata('ds', None, 'varchar', 5)])

            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())