def parse(sql: str) -> SqlMeta: if sql is None: raise ValueError("A sql statement must be provided.") # Tokenize the SQL statement statements = sqlparse.parse(sql) # We assume only one statement in SQL tokens = TokenList(statements[0].tokens) log.debug(f"Successfully tokenized sql statement: {tokens}") in_tables = [] out_tables = [] idx, token = tokens.token_next_by(t=T.Keyword) while token: if _is_in_table(token): idx, in_table = _get_table(tokens, idx) in_tables.append(in_table) elif _is_out_table(token): idx, out_table = _get_table(tokens, idx) out_tables.append(out_table) idx, token = tokens.token_next_by(t=T.Keyword, idx=idx) return SqlMeta(in_tables, out_tables)
def recurse(self, tokens: TokenList) -> SqlMeta: in_tables, out_tables = set(), set() idx, token = tokens.token_next_by(t=T.Keyword) while token: # Main parser switch if self.is_cte(token): cte_name, cte_intables = self.parse_cte(idx, tokens) for intable in cte_intables: if intable not in self.ctes: in_tables.add(intable) elif _is_in_table(token): idx, extracted_tables = _get_tables(tokens, idx, self.default_schema) for table in extracted_tables: if table.name not in self.ctes: in_tables.add(table) elif _is_out_table(token): idx, extracted_tables = _get_tables(tokens, idx, self.default_schema) out_tables.add( extracted_tables[0]) # assuming only one out_table idx, token = tokens.token_next_by(t=T.Keyword, idx=idx) return SqlMeta(list(in_tables), list(out_tables))
def __get_full_name(tlist: TokenList) -> Optional[str]: """ Return the full unquoted table name if valid, i.e., conforms to the following [[cluster.]schema.]table construct. :param tlist: The SQL tokens :returns: The valid full table name """ # Strip the alias if present. idx = len(tlist.tokens) if tlist.has_alias(): ws_idx, _ = tlist.token_next_by(t=Whitespace) if ws_idx != -1: idx = ws_idx tokens = tlist.tokens[:idx] if (len(tokens) in (1, 3, 5) and all( imt(token, t=[Name, String]) for token in tokens[0::2]) and all( imt(token, m=(Punctuation, ".")) for token in tokens[1::2])): return ".".join( [remove_quotes(token.value) for token in tokens[0::2]]) return None
def _get_table(tlist: TokenList) -> Optional[Table]: """ Return the table if valid, i.e., conforms to the [[catalog.]schema.]table construct. :param tlist: The SQL tokens :returns: The table if the name conforms """ # Strip the alias if present. idx = len(tlist.tokens) if tlist.has_alias(): ws_idx, _ = tlist.token_next_by(t=Whitespace) if ws_idx != -1: idx = ws_idx tokens = tlist.tokens[:idx] if (len(tokens) in (1, 3, 5) and all(imt(token, t=[Name, String]) for token in tokens[::2]) and all( imt(token, m=(Punctuation, ".")) for token in tokens[1::2])): return Table( *[remove_quotes(token.value) for token in tokens[::-2]]) return None
def _define_primary_key( metadata: AllFieldMetadata, column_definition_group: token_groups.TokenList, ) -> typing.Optional[AllFieldMetadata]: idx, constraint_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "CONSTRAINT")) idx, primary_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "PRIMARY"), idx=(idx or -1)) if constraint_keyword is not None and primary_keyword is None: raise exceptions.NotSupportedError( "When a column definition clause begins with CONSTRAINT, " "only a PRIMARY KEY constraint is supported") if primary_keyword is None: return None # If the keyword isn't followed by column name(s), then it's part of # a regular column definition and should be handled by _define_column if not _contains_column_name(column_definition_group, idx): return None new_metadata: AllFieldMetadata = deepcopy(metadata) while True: idx, primary_key_column = column_definition_group.token_next_by( t=token_types.Name, idx=idx) # 'id' is defined and managed by Fauna, so we ignore any attempts # to manage it from SQLAlchemy if primary_key_column is None or primary_key_column.value == "id": break primary_key_column_name = primary_key_column.value new_metadata[primary_key_column_name] = { **DEFAULT_FIELD_METADATA, # type: ignore **new_metadata.get(primary_key_column_name, {}), # type: ignore "unique": True, "not_null": True, } return new_metadata
def _define_unique_constraint( metadata: AllFieldMetadata, column_definition_group: token_groups.TokenList, ) -> typing.Optional[AllFieldMetadata]: idx, unique_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "UNIQUE")) if unique_keyword is None: return None # If the keyword isn't followed by column name(s), then it's part of # a regular column definition and should be handled by _define_column if not _contains_column_name(column_definition_group, idx): return None new_metadata = deepcopy(metadata) while True: idx, unique_key_column = column_definition_group.token_next_by( t=token_types.Name, idx=idx) # 'id' is defined and managed by Fauna, so we ignore any attempts # to manage it from SQLAlchemy if unique_key_column is None or unique_key_column.value == "id": break unique_key_column_name = unique_key_column.value new_metadata[unique_key_column_name] = { **DEFAULT_FIELD_METADATA, # type: ignore **new_metadata.get(unique_key_column_name, {}), # type: ignore "unique": True, } return new_metadata
def _define_column( metadata: AllFieldMetadata, column_definition_group: token_groups.TokenList, ) -> AllFieldMetadata: idx, column = column_definition_group.token_next_by(t=token_types.Name) column_name = column.value # "id" is auto-generated by Fauna, so we ignore it in SQL column definitions if column_name == "id": return metadata idx, data_type = column_definition_group.token_next_by(t=token_types.Name, idx=idx) _, not_null_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "NOT NULL")) _, unique_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "UNIQUE")) _, primary_key_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "PRIMARY KEY")) _, default_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "DEFAULT")) _, check_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "CHECK")) if check_keyword is not None: raise exceptions.NotSupportedError("CHECK keyword is not supported.") column_metadata: typing.Union[FieldMetadata, typing.Dict[str, str]] = metadata.get( column_name, {}) is_primary_key = primary_key_keyword is not None is_not_null = (not_null_keyword is not None or is_primary_key or column_metadata.get("not_null") or False) is_unique = (unique_keyword is not None or is_primary_key or column_metadata.get("unique") or False) default_value = (default_keyword if default_keyword is None else sql.extract_value(default_keyword.value)) return { **metadata, column_name: { **DEFAULT_FIELD_METADATA, # type: ignore **metadata.get(column_name, EMPTY_DICT), # type: ignore "unique": is_unique, "not_null": is_not_null, "default": default_value, "type": DATA_TYPE_MAP[data_type.value], }, }
def _extract_limit_from_query(statement: TokenList) -> Optional[int]: """ Extract limit clause from SQL statement. :param statement: SQL statement :return: Limit extracted from query, None if no limit present in statement """ idx, _ = statement.token_next_by(m=(Keyword, "LIMIT")) if idx is not None: _, token = statement.token_next(idx=idx) if token: if isinstance(token, IdentifierList): # In case of "LIMIT <offset>, <limit>", find comma and extract # first succeeding non-whitespace token idx, _ = token.token_next_by(m=(sqlparse.tokens.Punctuation, ",")) _, token = token.token_next(idx=idx) if token and token.ttype == sqlparse.tokens.Literal.Number.Integer: return int(token.value) return None
def _define_foreign_key_constraint( metadata: AllFieldMetadata, column_definition_group: token_groups.TokenList ) -> typing.Optional[AllFieldMetadata]: idx, foreign_keyword = column_definition_group.token_next_by( m=(token_types.Keyword, "FOREIGN")) if foreign_keyword is None: return None idx, _ = column_definition_group.token_next_by(m=(token_types.Name, "KEY"), idx=idx) idx, foreign_key_column = column_definition_group.token_next_by( t=token_types.Name, idx=idx) column_name = foreign_key_column.value idx, _ = column_definition_group.token_next_by(m=(token_types.Keyword, "REFERENCES"), idx=idx) idx, reference_table = column_definition_group.token_next_by( t=token_types.Name, idx=idx) reference_table_name = reference_table.value idx, reference_column = column_definition_group.token_next_by( t=token_types.Name, idx=idx) reference_column_name = reference_column.value if any( metadata.get(column_name, EMPTY_DICT).get("references", EMPTY_DICT)): raise exceptions.NotSupportedError( "Foreign keys with multiple references are not currently supported." ) if reference_column_name != "id": raise exceptions.NotSupportedError( "Foreign keys referring to fields other than ID are not currently supported." ) return { **metadata, column_name: { **DEFAULT_FIELD_METADATA, # type: ignore **metadata.get(column_name, EMPTY_DICT), "references": { reference_table_name: reference_column_name }, }, }