def extract_ignore_mask_source( cls, source: str, inline_comment_regex: RegexLexer, rule_codes: List[str], ) -> Tuple[List[NoQaDirective], List[SQLBaseError]]: """Look for inline ignore comments and return NoQaDirectives. Very similar to extract_ignore_mask_tree(), but can be run on raw source (i.e. does not require the code to have parsed successfully). """ ignore_buff: List[NoQaDirective] = [] violations: List[SQLBaseError] = [] for idx, line in enumerate(source.split("\n")): match = inline_comment_regex.search(line) if line else None if match: ignore_entry = cls.parse_noqa(line[match[0]:match[1]], idx + 1, rule_codes) if isinstance(ignore_entry, SQLParseError): violations.append(ignore_entry) # pragma: no cover elif ignore_entry: ignore_buff.append(ignore_entry) if ignore_buff: linter_logger.info("Parsed noqa directives from file: %r", ignore_buff) return ignore_buff, violations
Anything, RegexLexer, CodeSegment, Indent, Dedent, OptionallyBracketed, ) from sqlfluff.core.dialects import load_raw_dialect ansi_dialect = load_raw_dialect("ansi") teradata_dialect = ansi_dialect.copy_as("teradata") teradata_dialect.patch_lexer_matchers([ # so it also matches 1. RegexLexer("numeric_literal", r"([0-9]+(\.[0-9]*)?)", CodeSegment), ]) # Remove unused keywords from the dialect. teradata_dialect.sets("unreserved_keywords").difference_update([ # 'auto_increment', # The following are moved to being reserved keywords "UNION", "TIMESTAMP", "DATE", ]) teradata_dialect.sets("unreserved_keywords").update([ "AUTOINCREMENT", "ACTIVITYCOUNT", "CASESPECIFIC",
exasol_fs_dialect.insert_lexer_matchers( [ StringLexer( "walrus_operator", ":=", CodeSegment, segment_kwargs={"type": "walrus_operator"}, ), RegexLexer( "function_script_terminator", r";\s+\/(?!\*)|\s+\/$", CodeSegment, segment_kwargs={"type": "statement_terminator"}, subdivider=StringLexer("semicolon", ";", CodeSegment, segment_kwargs={"type": "semicolon"}), trim_post_subdivide=RegexLexer( "newline", r"(\n|\r\n)+", NewlineSegment, ), ), RegexLexer("atsign_literal", r"@[a-zA-Z_][\w]*", CodeSegment), RegexLexer("dollar_literal", r"[$][a-zA-Z0-9_.]*", CodeSegment), ], before="not_equal", ) exasol_fs_dialect.add( FunctionScriptTerminatorSegment=NamedParser("function_script_terminator",
from sqlfluff.core.dialects import load_raw_dialect ansi_dialect = load_raw_dialect("ansi") bigquery_dialect = ansi_dialect.copy_as("bigquery") bigquery_dialect.patch_lexer_matchers([ # Quoted literals can have r or b (case insensitive) prefixes, in any order, to # indicate a raw/regex string or byte sequence, respectively. Allow escaped quote # characters inside strings by allowing \" with an optional even multiple of # backslashes in front of it. # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#string_and_bytes_literals # Triple quoted variant first, then single quoted RegexLexer( "single_quote", r"([rR]?[bB]?|[bB]?[rR]?)?('''((?<!\\)(\\{2})*\\'|'{,2}(?!')|[^'])*(?<!\\)(\\{2})*'''|'((?<!\\)(\\{2})*\\'|[^'])*(?<!\\)(\\{2})*')", CodeSegment, ), RegexLexer( "double_quote", r'([rR]?[bB]?|[bB]?[rR]?)?(\"\"\"((?<!\\)(\\{2})*\\\"|\"{,2}(?!\")|[^\"])*(?<!\\)(\\{2})*\"\"\"|"((?<!\\)(\\{2})*\\"|[^"])*(?<!\\)(\\{2})*")', CodeSegment, ), ]) bigquery_dialect.add( DoubleQuotedLiteralSegment=NamedParser( "double_quote", CodeSegment, name="quoted_literal", type="literal",
exasol_fs_dialect.insert_lexer_matchers( [ StringLexer( "walrus_operator", ":=", CodeSegment, segment_kwargs={"type": "walrus_operator"}, ), RegexLexer( "function_script_terminator", r";\s+\/(?!\*)|\s+\/$", CodeSegment, segment_kwargs={"type": "statement_terminator"}, subdivider=StringLexer("semicolon", ";", CodeSegment, segment_kwargs={"type": "semicolon"}), trim_post_subdivide=RegexLexer( "newline", r"(\n|\r\n)+", NewlineSegment, ), ), ], before="not_equal", ) exasol_fs_dialect.add( FunctionScriptTerminatorSegment=NamedParser("function_script_terminator", CodeSegment, type="statement_terminator"),
StringLexer, CodeSegment, StringParser, NamedParser, RegexParser, ) ansi_dialect = load_raw_dialect("ansi") postgres_dialect = load_raw_dialect("postgres") snowflake_dialect = postgres_dialect.copy_as("snowflake") snowflake_dialect.patch_lexer_matchers([ # In snowflake, a double single quote resolves as a single quote in the string. # https://docs.snowflake.com/en/sql-reference/data-types-text.html#single-quoted-string-constants RegexLexer("single_quote", r"'([^']|'')*'", CodeSegment), ]) snowflake_dialect.insert_lexer_matchers( [ # Keyword assigner needed for keyword functions. StringLexer("parameter_assigner", "=>", CodeSegment), # Column selector # https://docs.snowflake.com/en/sql-reference/sql/select.html#parameters RegexLexer("column_selector", r"\$[0-9]+", CodeSegment), ], before="not_equal", ) snowflake_dialect.sets("unreserved_keywords").update([ "API",
CodeSegment, StringParser, SymbolSegment, Delimited, RegexParser, ) from sqlfluff.core.dialects import load_raw_dialect ansi_dialect = load_raw_dialect("ansi") mysql_dialect = ansi_dialect.copy_as("mysql") mysql_dialect.patch_lexer_matchers( [ RegexLexer( "inline_comment", r"(-- |#)[^\n]*", CommentSegment, segment_kwargs={"trim_start": ("-- ", "#")}, ) ] ) # Reserve USE, FORCE & IGNORE mysql_dialect.sets("unreserved_keywords").difference_update( [ "FORCE", "IGNORE", "USE", "SQL_BUFFER_RESULT", "SQL_NO_CACHE", "SQL_CACHE", "DUMPFILE",
NamedParser, SymbolSegment, ) from sqlfluff.core.dialects import load_raw_dialect ansi_dialect = load_raw_dialect("ansi") postgres_dialect = ansi_dialect.copy_as("postgres") postgres_dialect.insert_lexer_matchers( # JSON Operators: https://www.postgresql.org/docs/9.5/functions-json.html [ RegexLexer( "json_operator", r"->>|#>>|->|#>|@>|<@|\?\||\?|\?&|#-", CodeSegment, ) ], before="not_equal", ) # https://www.postgresql.org/docs/current/sql-keywords-appendix.html # SPACE has special status in some SQL dialects, but not Postgres. postgres_dialect.sets("unreserved_keywords").remove("SPACE") # Reserve WITHIN (required for the WithinGroupClauseSegment) postgres_dialect.sets("unreserved_keywords").remove("WITHIN") postgres_dialect.sets("unreserved_keywords").update([ "WITHIN", "ANALYZE", "VERBOSE",
from sqlfluff.core.parser.segments.base import BracketedSegment from sqlfluff.dialects import dialect_ansi as ansi ansi_dialect = load_raw_dialect("ansi") oracle_dialect = ansi_dialect.copy_as("oracle") oracle_dialect.sets("unreserved_keywords").difference_update(["COMMENT"]) oracle_dialect.sets("reserved_keywords").update( ["COMMENT", "ON", "UPDATE", "INDEXTYPE", "PROMPT"]) oracle_dialect.insert_lexer_matchers( [ RegexLexer( "prompt_command", r"PROMPT([^(\r\n)])*((?=\n)|(?=\r\n))?", CommentSegment, ), StringLexer("at_sign", "@", CodeSegment), ], before="code", ) oracle_dialect.add(AtSignSegment=StringParser("@", SymbolSegment, type="at_sign"), ) class AlterTableStatementSegment(ansi.AlterTableStatementSegment): """An `ALTER TABLE` statement.
SymbolSegment, ) from sqlfluff.core.dialects import load_raw_dialect ansi_dialect = load_raw_dialect("ansi") postgres_dialect = load_raw_dialect("postgres") cockroach_dialect = postgres_dialect.copy_as("cockroach") cockroach_dialect.insert_lexer_matchers( # this is needed for Cockroach's hideous index optimiser hint syntax, "tablename@indexname" [RegexLexer( "at", r"(?<![<])@(?![>])", CodeSegment, )], before="json_operator", ) # r"[_A-Z]*[_A-Z0-9]@[_A-Z]*[_A-Z0-9]", cockroach_dialect.add(AtSegment=StringParser("@", SymbolSegment, name="at", type="at"), ) @cockroach_dialect.segment(replace=True) class DropIndexStatementSegment(BaseSegment):
) from sqlfluff.core.parser.segments.base import BracketedSegment from sqlfluff.dialects.dialect_bigquery_keywords import ( bigquery_reserved_keywords, bigquery_unreserved_keywords, ) ansi_dialect = load_raw_dialect("ansi") bigquery_dialect = ansi_dialect.copy_as("bigquery") bigquery_dialect.insert_lexer_matchers( # JSON Operators: https://www.postgresql.org/docs/9.5/functions-json.html [ StringLexer("right_arrow", "=>", CodeSegment), StringLexer("question_mark", "?", CodeSegment), RegexLexer("atsign_literal", r"@[a-zA-Z_][\w]*", CodeSegment), ], before="equals", ) bigquery_dialect.patch_lexer_matchers( [ # Quoted literals can have r or b (case insensitive) prefixes, in any order, to # indicate a raw/regex string or byte sequence, respectively. Allow escaped # quote characters inside strings by allowing \" with an optional even multiple # of backslashes in front of it. # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#string_and_bytes_literals # Triple quoted variant first, then single quoted RegexLexer( "single_quote", r"([rR]?[bB]?|[bB]?[rR]?)?('''((?<!\\)(\\{2})*\\'|'{,2}(?!')|[^'])"
Bracketed, RegexLexer, CommentSegment, NamedParser, CodeSegment, ) from sqlfluff.core.dialects import load_raw_dialect ansi_dialect = load_raw_dialect("ansi") mysql_dialect = ansi_dialect.copy_as("mysql") mysql_dialect.patch_lexer_matchers( [ RegexLexer( "inline_comment", r"(-- |#)[^\n]*", CommentSegment, segment_kwargs={"trim_start": ("-- ", "#")}, ) ] ) # Reserve USE, FORCE & IGNORE mysql_dialect.sets("unreserved_keywords").difference_update(["FORCE", "IGNORE", "USE"]) mysql_dialect.sets("reserved_keywords").update(["FORCE", "IGNORE", "USE"]) mysql_dialect.replace( QuotedIdentifierSegment=NamedParser( "back_quote", CodeSegment, name="quoted_identifier", type="identifier",
from sqlfluff.dialects.tsql_keywords import RESERVED_KEYWORDS, UNRESERVED_KEYWORDS ansi_dialect = load_raw_dialect("ansi") tsql_dialect = ansi_dialect.copy_as("tsql") # Should really clear down the old keywords but some are needed by certain segments # tsql_dialect.sets("reserved_keywords").clear() # tsql_dialect.sets("unreserved_keywords").clear() tsql_dialect.sets("reserved_keywords").update(RESERVED_KEYWORDS) tsql_dialect.sets("unreserved_keywords").update(UNRESERVED_KEYWORDS) tsql_dialect.insert_lexer_matchers( [ RegexLexer( "atsign", r"[@][a-zA-Z0-9_]+", CodeSegment, ), RegexLexer( "square_quote", r"\[([a-zA-Z0-9][^\[\]]*)*\]", CodeSegment, ), # T-SQL unicode strings RegexLexer("single_quote_with_n", r"N'([^'\\]|\\.)*'", CodeSegment), ], before="back_quote", ) tsql_dialect.add( BracketedIdentifierSegment=NamedParser("square_quote",