Python RegexLexer示例，sqlfluff.core.parser.RegexLexer Python示例

示例#1

0

显示文件

    def extract_ignore_mask_source(
        cls,
        source: str,
        inline_comment_regex: RegexLexer,
        rule_codes: List[str],
    ) -> Tuple[List[NoQaDirective], List[SQLBaseError]]:
        """Look for inline ignore comments and return NoQaDirectives.

        Very similar to extract_ignore_mask_tree(), but can be run on raw source
        (i.e. does not require the code to have parsed successfully).
        """
        ignore_buff: List[NoQaDirective] = []
        violations: List[SQLBaseError] = []
        for idx, line in enumerate(source.split("\n")):
            match = inline_comment_regex.search(line) if line else None
            if match:
                ignore_entry = cls.parse_noqa(line[match[0]:match[1]], idx + 1,
                                              rule_codes)
                if isinstance(ignore_entry, SQLParseError):
                    violations.append(ignore_entry)  # pragma: no cover
                elif ignore_entry:
                    ignore_buff.append(ignore_entry)
        if ignore_buff:
            linter_logger.info("Parsed noqa directives from file: %r",
                               ignore_buff)
        return ignore_buff, violations

示例#2

0

显示文件

文件： dialect_teradata.py 项目： stjordanis/sqlfluff

    Anything,
    RegexLexer,
    CodeSegment,
    Indent,
    Dedent,
    OptionallyBracketed,
)

from sqlfluff.core.dialects import load_raw_dialect

ansi_dialect = load_raw_dialect("ansi")
teradata_dialect = ansi_dialect.copy_as("teradata")

teradata_dialect.patch_lexer_matchers([
    # so it also matches 1.
    RegexLexer("numeric_literal", r"([0-9]+(\.[0-9]*)?)", CodeSegment),
])

# Remove unused keywords from the dialect.
teradata_dialect.sets("unreserved_keywords").difference_update([
    # 'auto_increment',
    # The following are moved to being reserved keywords
    "UNION",
    "TIMESTAMP",
    "DATE",
])

teradata_dialect.sets("unreserved_keywords").update([
    "AUTOINCREMENT",
    "ACTIVITYCOUNT",
    "CASESPECIFIC",

示例#3

0

显示文件

文件： dialect_exasol_fs.py 项目： stjordanis/sqlfluff

exasol_fs_dialect.insert_lexer_matchers(
    [
        StringLexer(
            "walrus_operator",
            ":=",
            CodeSegment,
            segment_kwargs={"type": "walrus_operator"},
        ),
        RegexLexer(
            "function_script_terminator",
            r";\s+\/(?!\*)|\s+\/$",
            CodeSegment,
            segment_kwargs={"type": "statement_terminator"},
            subdivider=StringLexer("semicolon",
                                   ";",
                                   CodeSegment,
                                   segment_kwargs={"type": "semicolon"}),
            trim_post_subdivide=RegexLexer(
                "newline",
                r"(\n|\r\n)+",
                NewlineSegment,
            ),
        ),
        RegexLexer("atsign_literal", r"@[a-zA-Z_][\w]*", CodeSegment),
        RegexLexer("dollar_literal", r"[$][a-zA-Z0-9_.]*", CodeSegment),
    ],
    before="not_equal",
)

exasol_fs_dialect.add(
    FunctionScriptTerminatorSegment=NamedParser("function_script_terminator",

示例#4

0

显示文件

文件： dialect_bigquery.py 项目： zhongjiajie/sqlfluff

from sqlfluff.core.dialects import load_raw_dialect

ansi_dialect = load_raw_dialect("ansi")
bigquery_dialect = ansi_dialect.copy_as("bigquery")

bigquery_dialect.patch_lexer_matchers([
    # Quoted literals can have r or b (case insensitive) prefixes, in any order, to
    # indicate a raw/regex string or byte sequence, respectively.  Allow escaped quote
    # characters inside strings by allowing \" with an optional even multiple of
    # backslashes in front of it.
    # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#string_and_bytes_literals
    # Triple quoted variant first, then single quoted
    RegexLexer(
        "single_quote",
        r"([rR]?[bB]?|[bB]?[rR]?)?('''((?<!\\)(\\{2})*\\'|'{,2}(?!')|[^'])*(?<!\\)(\\{2})*'''|'((?<!\\)(\\{2})*\\'|[^'])*(?<!\\)(\\{2})*')",
        CodeSegment,
    ),
    RegexLexer(
        "double_quote",
        r'([rR]?[bB]?|[bB]?[rR]?)?(\"\"\"((?<!\\)(\\{2})*\\\"|\"{,2}(?!\")|[^\"])*(?<!\\)(\\{2})*\"\"\"|"((?<!\\)(\\{2})*\\"|[^"])*(?<!\\)(\\{2})*")',
        CodeSegment,
    ),
])

bigquery_dialect.add(
    DoubleQuotedLiteralSegment=NamedParser(
        "double_quote",
        CodeSegment,
        name="quoted_literal",
        type="literal",

示例#5

0

显示文件

文件： dialect_exasol_fs.py 项目： scrambldchannel/sqlfluff

exasol_fs_dialect.insert_lexer_matchers(
    [
        StringLexer(
            "walrus_operator",
            ":=",
            CodeSegment,
            segment_kwargs={"type": "walrus_operator"},
        ),
        RegexLexer(
            "function_script_terminator",
            r";\s+\/(?!\*)|\s+\/$",
            CodeSegment,
            segment_kwargs={"type": "statement_terminator"},
            subdivider=StringLexer("semicolon",
                                   ";",
                                   CodeSegment,
                                   segment_kwargs={"type": "semicolon"}),
            trim_post_subdivide=RegexLexer(
                "newline",
                r"(\n|\r\n)+",
                NewlineSegment,
            ),
        ),
    ],
    before="not_equal",
)

exasol_fs_dialect.add(
    FunctionScriptTerminatorSegment=NamedParser("function_script_terminator",
                                                CodeSegment,
                                                type="statement_terminator"),

示例#6

0

显示文件

    StringLexer,
    CodeSegment,
    StringParser,
    NamedParser,
    RegexParser,
)

ansi_dialect = load_raw_dialect("ansi")
postgres_dialect = load_raw_dialect("postgres")

snowflake_dialect = postgres_dialect.copy_as("snowflake")

snowflake_dialect.patch_lexer_matchers([
    # In snowflake, a double single quote resolves as a single quote in the string.
    # https://docs.snowflake.com/en/sql-reference/data-types-text.html#single-quoted-string-constants
    RegexLexer("single_quote", r"'([^']|'')*'", CodeSegment),
])

snowflake_dialect.insert_lexer_matchers(
    [
        # Keyword assigner needed for keyword functions.
        StringLexer("parameter_assigner", "=>", CodeSegment),
        # Column selector
        # https://docs.snowflake.com/en/sql-reference/sql/select.html#parameters
        RegexLexer("column_selector", r"\$[0-9]+", CodeSegment),
    ],
    before="not_equal",
)

snowflake_dialect.sets("unreserved_keywords").update([
    "API",

示例#7

0

显示文件

文件： dialect_mysql.py 项目： stjordanis/sqlfluff

    CodeSegment,
    StringParser,
    SymbolSegment,
    Delimited,
    RegexParser,
)
from sqlfluff.core.dialects import load_raw_dialect

ansi_dialect = load_raw_dialect("ansi")
mysql_dialect = ansi_dialect.copy_as("mysql")

mysql_dialect.patch_lexer_matchers(
    [
        RegexLexer(
            "inline_comment",
            r"(-- |#)[^\n]*",
            CommentSegment,
            segment_kwargs={"trim_start": ("-- ", "#")},
        )
    ]
)

# Reserve USE, FORCE & IGNORE
mysql_dialect.sets("unreserved_keywords").difference_update(
    [
        "FORCE",
        "IGNORE",
        "USE",
        "SQL_BUFFER_RESULT",
        "SQL_NO_CACHE",
        "SQL_CACHE",
        "DUMPFILE",

示例#8

0

显示文件

文件： dialect_postgres.py 项目： zhongjiajie/sqlfluff

    NamedParser,
    SymbolSegment,
)

from sqlfluff.core.dialects import load_raw_dialect

ansi_dialect = load_raw_dialect("ansi")

postgres_dialect = ansi_dialect.copy_as("postgres")

postgres_dialect.insert_lexer_matchers(
    # JSON Operators: https://www.postgresql.org/docs/9.5/functions-json.html
    [
        RegexLexer(
            "json_operator",
            r"->>|#>>|->|#>|@>|<@|\?\||\?|\?&|#-",
            CodeSegment,
        )
    ],
    before="not_equal",
)

# https://www.postgresql.org/docs/current/sql-keywords-appendix.html
# SPACE has special status in some SQL dialects, but not Postgres.
postgres_dialect.sets("unreserved_keywords").remove("SPACE")
# Reserve WITHIN (required for the WithinGroupClauseSegment)
postgres_dialect.sets("unreserved_keywords").remove("WITHIN")
postgres_dialect.sets("unreserved_keywords").update([
    "WITHIN",
    "ANALYZE",
    "VERBOSE",

示例#9

0

显示文件

from sqlfluff.core.parser.segments.base import BracketedSegment
from sqlfluff.dialects import dialect_ansi as ansi

ansi_dialect = load_raw_dialect("ansi")
oracle_dialect = ansi_dialect.copy_as("oracle")

oracle_dialect.sets("unreserved_keywords").difference_update(["COMMENT"])
oracle_dialect.sets("reserved_keywords").update(
    ["COMMENT", "ON", "UPDATE", "INDEXTYPE", "PROMPT"])

oracle_dialect.insert_lexer_matchers(
    [
        RegexLexer(
            "prompt_command",
            r"PROMPT([^(\r\n)])*((?=\n)|(?=\r\n))?",
            CommentSegment,
        ),
        StringLexer("at_sign", "@", CodeSegment),
    ],
    before="code",
)

oracle_dialect.add(AtSignSegment=StringParser("@",
                                              SymbolSegment,
                                              type="at_sign"), )


class AlterTableStatementSegment(ansi.AlterTableStatementSegment):
    """An `ALTER TABLE` statement.

示例#10

0

显示文件

    SymbolSegment,
)

from sqlfluff.core.dialects import load_raw_dialect

ansi_dialect = load_raw_dialect("ansi")

postgres_dialect = load_raw_dialect("postgres")

cockroach_dialect = postgres_dialect.copy_as("cockroach")

cockroach_dialect.insert_lexer_matchers(
    # this is needed for Cockroach's hideous index optimiser hint syntax, "tablename@indexname"
    [RegexLexer(
        "at",
        r"(?<![<])@(?![>])",
        CodeSegment,
    )],
    before="json_operator",
)

# r"[_A-Z]*[_A-Z0-9]@[_A-Z]*[_A-Z0-9]",

cockroach_dialect.add(AtSegment=StringParser("@",
                                             SymbolSegment,
                                             name="at",
                                             type="at"), )


@cockroach_dialect.segment(replace=True)
class DropIndexStatementSegment(BaseSegment):

示例#11

0

显示文件

文件： dialect_bigquery.py 项目： sti0/sqlfluff

)
from sqlfluff.core.parser.segments.base import BracketedSegment
from sqlfluff.dialects.dialect_bigquery_keywords import (
    bigquery_reserved_keywords,
    bigquery_unreserved_keywords,
)

ansi_dialect = load_raw_dialect("ansi")
bigquery_dialect = ansi_dialect.copy_as("bigquery")

bigquery_dialect.insert_lexer_matchers(
    # JSON Operators: https://www.postgresql.org/docs/9.5/functions-json.html
    [
        StringLexer("right_arrow", "=>", CodeSegment),
        StringLexer("question_mark", "?", CodeSegment),
        RegexLexer("atsign_literal", r"@[a-zA-Z_][\w]*", CodeSegment),
    ],
    before="equals",
)

bigquery_dialect.patch_lexer_matchers(
    [
        # Quoted literals can have r or b (case insensitive) prefixes, in any order, to
        # indicate a raw/regex string or byte sequence, respectively.  Allow escaped
        # quote characters inside strings by allowing \" with an optional even multiple
        # of backslashes in front of it.
        # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#string_and_bytes_literals
        # Triple quoted variant first, then single quoted
        RegexLexer(
            "single_quote",
            r"([rR]?[bB]?|[bB]?[rR]?)?('''((?<!\\)(\\{2})*\\'|'{,2}(?!')|[^'])"

示例#12

0

显示文件

    Bracketed,
    RegexLexer,
    CommentSegment,
    NamedParser,
    CodeSegment,
)
from sqlfluff.core.dialects import load_raw_dialect

ansi_dialect = load_raw_dialect("ansi")
mysql_dialect = ansi_dialect.copy_as("mysql")

mysql_dialect.patch_lexer_matchers(
    [
        RegexLexer(
            "inline_comment",
            r"(-- |#)[^\n]*",
            CommentSegment,
            segment_kwargs={"trim_start": ("-- ", "#")},
        )
    ]
)

# Reserve USE, FORCE & IGNORE
mysql_dialect.sets("unreserved_keywords").difference_update(["FORCE", "IGNORE", "USE"])
mysql_dialect.sets("reserved_keywords").update(["FORCE", "IGNORE", "USE"])

mysql_dialect.replace(
    QuotedIdentifierSegment=NamedParser(
        "back_quote",
        CodeSegment,
        name="quoted_identifier",
        type="identifier",

示例#13

0

显示文件

文件： dialect_tsql.py 项目： stjordanis/sqlfluff

from sqlfluff.dialects.tsql_keywords import RESERVED_KEYWORDS, UNRESERVED_KEYWORDS

ansi_dialect = load_raw_dialect("ansi")
tsql_dialect = ansi_dialect.copy_as("tsql")

# Should really clear down the old keywords but some are needed by certain segments
# tsql_dialect.sets("reserved_keywords").clear()
# tsql_dialect.sets("unreserved_keywords").clear()
tsql_dialect.sets("reserved_keywords").update(RESERVED_KEYWORDS)
tsql_dialect.sets("unreserved_keywords").update(UNRESERVED_KEYWORDS)

tsql_dialect.insert_lexer_matchers(
    [
        RegexLexer(
            "atsign",
            r"[@][a-zA-Z0-9_]+",
            CodeSegment,
        ),
        RegexLexer(
            "square_quote",
            r"\[([a-zA-Z0-9][^\[\]]*)*\]",
            CodeSegment,
        ),
        # T-SQL unicode strings
        RegexLexer("single_quote_with_n", r"N'([^'\\]|\\.)*'", CodeSegment),
    ],
    before="back_quote",
)

tsql_dialect.add(
    BracketedIdentifierSegment=NamedParser("square_quote",