def _handle_target_table_token(self, sub_token: TokenList) -> None: if isinstance(sub_token, Function): # insert into tab (col1, col2) values (val1, val2); Here tab (col1, col2) will be parsed as Function # referring https://github.com/andialbrecht/sqlparse/issues/483 for further information if not isinstance(sub_token.token_first(skip_cm=True), Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add( Table.create(sub_token.token_first(skip_cm=True))) elif isinstance(sub_token, Comparison): # create table tab1 like tab2, tab1 like tab2 will be parsed as Comparison # referring https://github.com/andialbrecht/sqlparse/issues/543 for further information if not (isinstance(sub_token.left, Identifier) and isinstance(sub_token.right, Identifier)): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add(Table.create(sub_token.left)) self._lineage_result.read.add(Table.create(sub_token.right)) else: if not isinstance(sub_token, Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add(Table.create(sub_token))
def _handle_temp_table_token(self, sub_token: TokenList) -> None: if isinstance(sub_token, Identifier): self._lineage_result.intermediate.add(Table.create(sub_token)) self._extract_from_dml(sub_token) elif isinstance(sub_token, IdentifierList): for temp_tab_token in sub_token: if isinstance(temp_tab_token, Identifier): self._lineage_result.intermediate.add( Table.create(temp_tab_token)) self._extract_from_dml(temp_tab_token) else: raise SQLLineageException( "An Identifier or IdentifierList is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token))
def _extract_from_ddl_alter(self, stmt: Statement) -> None: tables = [ Table.create(t) for t in stmt.tokens if isinstance(t, Identifier) ] keywords = [t for t in stmt.tokens if t.is_keyword] if any(k.normalized == "RENAME" for k in keywords) and len(tables) == 2: self._lineage_result.rename.add((tables[0], tables[1]))
def _handle_source_table_token(self, sub_token: TokenList) -> None: if isinstance(sub_token, Identifier): if isinstance(sub_token.token_first(skip_cm=True), Parenthesis): # SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier # and this Identifier's get_real_name method would return alias name dt # referring https://github.com/andialbrecht/sqlparse/issues/218 for further information pass else: self._lineage_result.read.add(Table.create(sub_token)) elif isinstance(sub_token, IdentifierList): # This is to support join in ANSI-89 syntax for token in sub_token.tokens: if isinstance(token, Identifier): self._lineage_result.read.add(Table.create(token)) elif isinstance(sub_token, Parenthesis): # SELECT col1 FROM (SELECT col2 FROM tab1), the subquery will be parsed as Parenthesis # This syntax without alias for subquery is invalid in MySQL, while valid for SparkSQL pass else: raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token))
def _extract_from_ddl_drop(self, stmt: Statement) -> None: for table in { Table.create(t) for t in stmt.tokens if isinstance(t, Identifier) }: self._lineage_result.drop.add(table)
def _extract_from_DML(self, token: Token) -> None: source_table_token_flag = ( target_table_token_flag) = temp_table_token_flag = False for sub_token in token.tokens: if isinstance(sub_token, TokenList): self._extract_from_DML(sub_token) if sub_token.ttype in Keyword: if any( re.match(regex, sub_token.normalized) for regex in SOURCE_TABLE_TOKENS): source_table_token_flag = True elif sub_token.normalized in TARGET_TABLE_TOKENS: target_table_token_flag = True elif sub_token.normalized in TEMP_TABLE_TOKENS: temp_table_token_flag = True continue if source_table_token_flag: if self.__token_negligible_before_tablename(sub_token): continue else: if isinstance(sub_token, Identifier): if isinstance(sub_token.token_first(skip_cm=True), Parenthesis): # SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier # and this Identifier's get_real_name method would return alias name dt # referring https://github.com/andialbrecht/sqlparse/issues/218 for further information pass else: self._lineage_result.read.add( Table.create(sub_token)) elif isinstance(sub_token, Parenthesis): # SELECT col1 FROM (SELECT col2 FROM tab1), the subquery will be parsed as Parenthesis # This syntax without alias for subquery is invalid in MySQL, while valid for SparkSQL pass else: raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) source_table_token_flag = False elif target_table_token_flag: if self.__token_negligible_before_tablename(sub_token): continue elif isinstance(sub_token, Function): # insert into tab (col1, col2) values (val1, val2); Here tab (col1, col2) will be parsed as Function # referring https://github.com/andialbrecht/sqlparse/issues/483 for further information if not isinstance(sub_token.token_first(skip_cm=True), Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add( Table.create(sub_token.token_first(skip_cm=True))) elif isinstance(sub_token, Comparison): # create table tab1 like tab2, tab1 like tab2 will be parsed as Comparison # referring https://github.com/andialbrecht/sqlparse/issues/543 for further information if not (isinstance(sub_token.left, Identifier) and isinstance(sub_token.right, Identifier)): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add(Table.create( sub_token.left)) self._lineage_result.read.add(Table.create( sub_token.right)) else: if not isinstance(sub_token, Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add(Table.create(sub_token)) target_table_token_flag = False elif temp_table_token_flag: if self.__token_negligible_before_tablename(sub_token): continue else: if not isinstance(sub_token, Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.intermediate.add( Table.create(sub_token)) self._extract_from_DML(sub_token) temp_table_token_flag = False