Пример #1
0
def main(args=None) -> None:
    """
    The command line interface entry point.

    :param args: the command line arguments for sqllineage command
    """
    parser = argparse.ArgumentParser(prog="sqllineage",
                                     description="SQL Lineage Parser.")
    parser.add_argument("-e",
                        metavar="<quoted-query-string>",
                        help="SQL from command line")
    parser.add_argument("-f", metavar="<filename>", help="SQL from files")
    parser.add_argument(
        "-v",
        "--verbose",
        help="increase output verbosity, show statement level lineage result",
        action="store_true",
    )
    parser.add_argument(
        "-g",
        "--graph-visualization",
        help="show graph visualization of the lineage within a webserver",
        action="store_true",
    )
    parser.add_argument(
        "-p",
        help="the port visualization webserver will be listening on",
        type=int,
        default=DEFAULT_PORT,
        metavar="<port_number>{0..65536}",
    )
    args = parser.parse_args(args)
    if args.e and args.f:
        logging.warning(
            "Both -e and -f options are specified. -e option will be ignored")
    if args.f or args.e:
        sql = extract_sql_from_args(args)
        runner = LineageRunner(
            sql,
            verbose=args.verbose,
            draw_options={
                "p": args.p,
                "f": args.f if args.f else None
            },
        )
        if args.graph_visualization:
            runner.draw()
        else:
            print(runner)
    elif args.graph_visualization:
        return draw_lineage_graph(
            **{
                "p":
                args.p,
                "f":
                os.path.join(os.path.dirname(__file__),
                             "data/tpcds/query01.sql"),
            })
    else:
        parser.print_help()
Пример #2
0
def test_runner_dummy():
    runner = LineageRunner(
        """insert into tab2 select col1, col2, col3, col4, col5, col6 from tab1;
insert overwrite table tab3 select * from tab2""",
        verbose=True,
    )
    assert str(runner)
    assert runner.to_cytoscape() is not None
Пример #3
0
def lineage():
    # this is to avoid circular import
    from sqllineage.runner import LineageRunner

    req_args = Namespace(**request.get_json())
    sql = extract_sql_from_args(req_args)
    lr = LineageRunner(sql, verbose=True)
    resp = {"verbose": str(lr), "dag": lr.to_cytoscape()}
    return jsonify(resp)
Пример #4
0
def lineage():
    # this is to avoid circular import
    from sqllineage.runner import LineageRunner

    req_args = Namespace(**request.get_json())
    sql = extract_sql_from_args(req_args)
    resp = LineageRunner(sql).to_cytoscape()
    return jsonify(resp)
Пример #5
0
def helper(sql, source_tables=None, target_tables=None):
    lp = LineageRunner(sql)
    assert set(lp.source_tables) == (
        set() if source_tables is None else {Table(t) for t in source_tables}
    )
    assert set(lp.target_tables) == (
        set() if target_tables is None else {Table(t) for t in target_tables}
    )
Пример #6
0
def test_dummy():
    assert str(
        LineageRunner(
            """insert into tab2 select col1, col2, col3, col4, col5, col6 from tab1;
insert overwrite table tab3 select * from tab2""",
            verbose=True,
        ))
    with pytest.raises(SystemExit) as e:
        main()
    assert e.value.code == 2
Пример #7
0
    def get_datasource_urn(self, card_details):
        platform, database_name = self.get_datasource_from_id(
            card_details.get("database_id", "")
        )
        query_type = card_details.get("dataset_query", {}).get("type", {})
        source_paths = set()

        if query_type == "query":
            source_table_id = (
                card_details.get("dataset_query", {})
                .get("query", {})
                .get("source-table")
            )
            if source_table_id is not None:
                schema_name, table_name = self.get_source_table_from_id(source_table_id)
                if table_name:
                    source_paths.add(
                        f"{schema_name + '.' if schema_name else ''}{table_name}"
                    )
        else:
            try:
                raw_query = (
                    card_details.get("dataset_query", {})
                    .get("native", {})
                    .get("query", "")
                )
                parser = LineageRunner(raw_query)

                for table in parser.source_tables:
                    sources = str(table).split(".")
                    source_schema, source_table = sources[-2], sources[-1]
                    if source_schema == "<default>":
                        source_schema = str(self.config.default_schema)

                    source_paths.add(f"{source_schema}.{source_table}")
            except Exception as e:
                self.report.report_failure(
                    key="metabase-query",
                    reason=f"Unable to retrieve lineage from query. "
                    f"Query: {raw_query} "
                    f"Reason: {str(e)} ",
                )
                return None

        # Create dataset URNs
        dataset_urn = []
        dbname = f"{database_name + '.' if database_name else ''}"
        source_tables = list(map(lambda tbl: f"{dbname}{tbl}", source_paths))
        dataset_urn = [
            builder.make_dataset_urn(platform, name, self.config.env)
            for name in source_tables
        ]

        return dataset_urn
Пример #8
0
def test_dummy(_):
    assert str(
        LineageRunner(
            """insert into tab2 select col1, col2, col3, col4, col5, col6 from tab1;
insert overwrite table tab3 select * from tab2""",
            verbose=True,
        ))
    main([])
    main(["-e", "select * from dual"])
    main(["-f", __file__])
    main(["-e", "select * from dual", "-f", __file__])
    main([
        "-e",
        "insert overwrite table tab1 select * from tab1 union select * from tab2",
        "-g",
    ])
Пример #9
0
    def _get_sources_from_query(self, db_name: str, query: str) -> List[LineageDataset]:
        sources = list()

        parser = LineageRunner(query)

        for table in parser.source_tables:
            source_schema, source_table = str(table).split(".")
            if source_schema == "<default>":
                source_schema = str(self.config.default_schema)

            source = LineageDataset(
                platform=LineageDatasetPlatform.REDSHIFT,
                path=f"{db_name}.{source_schema}.{source_table}",
            )
            sources.append(source)

        return sources
Пример #10
0
    def _get_source_from_query(self, raw_query: str) -> set:
        query = self._replace_definitions(raw_query)
        parser = LineageRunner(query)
        source_paths = set()
        try:
            for table in parser.source_tables:
                sources = str(table).split(".")
                source_schema, source_table = sources[-2], sources[-1]
                if source_schema == "<default>":
                    source_schema = str(self.config.default_schema)

                source_paths.add(f"{source_schema}.{source_table}")
        except Exception as e:
            self.report.report_failure(
                key="mode-query",
                reason=f"Unable to retrieve lineage from query. "
                f"Query: {raw_query} "
                f"Reason: {str(e)} ",
            )

        return source_paths
Пример #11
0
def test_insert_without_table():
    with pytest.raises(SQLLineageException):
        LineageRunner("insert into select * from foo")
Пример #12
0
def test_split_statements_with_desc():
    sql = """SELECT 1;

DESC tab1;"""
    assert len(LineageRunner(sql).statements()) == 2
Пример #13
0
from sqllineage.runner import LineageRunner
'''
数据血缘关系梳理工具:
1.开源工具sqllineage
    pip install sqllineage
    写代码
2.在线工具
https://sqlflow.gudusoft.com/#/
'''

sql = """

"""
result = LineageRunner(sql)
print(result)
# 打印result,会产出下面的信息
# Statements(#): 2
# Source Tables:
#    db1.table12
#    db2.table21
#    db2.table22
# Target Tables:
#    db3.table3
# Intermediate Tables:
#    db1.table11

# 也可以直接获取各个源表
for tbl in result.source_tables:
    print(tbl)
# db1.table12
# db2.table21
Пример #14
0
def test_split_statements_with_show_create_table():
    sql = """SELECT 1;

SHOW CREATE TABLE tab1;"""
    assert len(LineageRunner(sql).statements()) == 2
Пример #15
0
def test_statements_trim_comment():
    comment = "------------------\n"
    sql = "select * from dual;"
    assert LineageRunner(comment +
                         sql).statements(strip_comments=True)[0] == sql
Пример #16
0
def test_split_statements_with_comment():
    sql = """SELECT 1;

-- SELECT 2;"""
    assert len(LineageRunner(sql).statements()) == 1
Пример #17
0
def test_split_statements_with_heading_and_ending_new_line():
    sql = "\nSELECT * FROM tab1;\nSELECT * FROM tab2;\n"
    assert len(LineageRunner(sql).statements()) == 2
Пример #18
0
def test_with_cte_without_table():
    with pytest.raises(SQLLineageException):
        LineageRunner(
            "with as select * from foo insert into table bar select * from foo"
        )
Пример #19
0
def test_select_without_table():
    with pytest.raises(SQLLineageException):
        LineageRunner("select * from where foo='bar'")
Пример #20
0
def test_split_statements():
    sql = "SELECT * FROM tab1; SELECT * FROM tab2;"
    assert len(LineageRunner(sql).statements()) == 2