Пример #1
0
def select_parsing(query):
    selectStmt = Forward()
    selectStmt <<= (SELECT + ("*" | columnNameList)("columnsToShow") + FROM +
                    tableNameList("tables") +
                    Optional(Group(WHERE + whereExpression), "")("where"))
    selectStmt.ignore(oracleSqlComment)
    return selectStmt.parseString(query)
Пример #2
0
def build_parser(root_directory, path, fake_root=os.getcwd(), file_reader=None):
    from pyparsing import nestedExpr
    from pyparsing import QuotedString
    from pyparsing import Group
    from pyparsing import restOfLine
    from pyparsing import Word
    from pyparsing import alphanums
    from pyparsing import cStyleComment
    from pyparsing import OneOrMore
    from pyparsing import ZeroOrMore
    from pyparsing import Optional
    from pyparsing import Forward
    from pyparsing import Literal
    from pyparsing import Keyword

    root = Forward()

    include_handler = IncludeHandler(
        root_directory,
        path,
        root,
        fake_root=fake_root,
        file_reader=file_reader)

    # relaxed grammar
    identifier = Word(alphanums + "-_.:/")

    comment = ("//" + restOfLine).suppress() \
        | ("#" + restOfLine).suppress() \
        | cStyleComment

    endstmt = Literal(";").suppress()

    argument = QuotedString('"') \
        | identifier

    arguments = ZeroOrMore(argument)

    statements = Forward()

    section = nestedExpr("{", "}", statements)

    include = Keyword("include").suppress() + QuotedString('"')

    regular = identifier + Group(arguments) + Optional(section, default=[])

    statement = include.setParseAction(include_handler.pyparsing_call) \
        | regular.setParseAction(include_handler.pyparsing_mark)

    statements << OneOrMore(statement + endstmt)

    root << Optional(statements)

    root.ignore(comment)

    setattr(
        root, 'parse_file',
        lambda f, root=root: root.parseFile(f, parseAll=True))

    return root
Пример #3
0
def delete_query_parse(query):
    deleteStmt = Forward()
    # define the grammar
    # e.g the rules to parse an sql select query
    deleteStmt <<= (DELETE + FROM + tableName("tableName") +
                    Optional(Group(WHERE + whereExpression), "")("where"))
    deleteStmt.ignore(oracleSqlComment)
    return deleteStmt.parseString(query)
Пример #4
0
def create_db_parse(query):
    # Forward declaration of selectStmt to define it later
    createDBStmt = Forward()
    # define the grammar
    # e.g the rules to parse an sql select query
    createDBStmt <<= (CREATE + DATABASE +
                      Optional(IF_NOT_EXISTS)("existence_clause") +
                      ident("dbName"))
    createDBStmt.ignore(oracleSqlComment)
    return createDBStmt.parseString(query)
Пример #5
0
def insert_query_parse(query):
    insertStmt = Forward()
    value = (realNum | intNum | quotedString | dblQuotedString)
    values = Group(delimitedList(value))
    valuesWithParenthesis = "(" + values + ")"
    valuesList = Group(delimitedList(valuesWithParenthesis))
    # define the grammar
    insertStmt <<= (INSERT + INTO + tableName("tableName") + VALUES +
                    valuesList("valuesList"))
    insertStmt.ignore(oracleSqlComment)
    return insertStmt.parseString(query)
Пример #6
0
def create_table_query_parse(query):
    createTableStmt = Forward()
    charType = Combine(CHAR + "(" + intNum + ")")
    varcharType = Combine(VARCHAR + "(" + intNum + ")")

    columnTypeName = (INT | FLOAT | charType | varcharType | DATE | DATETIME
                      | TIME | YEAR)

    column = Group(columnName + columnTypeName + Optional(NOT_NULL | NULL))
    # Token for a sublist of column name
    columnList = Group(delimitedList(column))
    # define the grammar
    # e.g the rules to parse an sql select query
    createTableStmt <<= (CREATE + TABLE + tableName("tableName") + "(" +
                         columnList("columnList") + ")")
    createTableStmt.ignore(oracleSqlComment)
    return createTableStmt.parseString(query)
    return tableName
Пример #7
0
    def _get_parser(cls):
        if cls._parser is not None:
            return cls._parser

        ParserElement.enablePackrat()

        LPAR, RPAR, COMMA, LBRACKET, RBRACKET, LT, GT = map(Literal, "(),[]<>")
        ungrouped_select_stmt = Forward().setName("select statement")

        # keywords
        (
            UNION,
            ALL,
            AND,
            INTERSECT,
            EXCEPT,
            COLLATE,
            ASC,
            DESC,
            ON,
            USING,
            NATURAL,
            INNER,
            CROSS,
            LEFT,
            RIGHT,
            OUTER,
            FULL,
            JOIN,
            AS,
            INDEXED,
            NOT,
            SELECT,
            DISTINCT,
            FROM,
            WHERE,
            GROUP,
            BY,
            HAVING,
            ORDER,
            BY,
            LIMIT,
            OFFSET,
            OR,
            CAST,
            ISNULL,
            NOTNULL,
            NULL,
            IS,
            BETWEEN,
            ELSE,
            END,
            CASE,
            WHEN,
            THEN,
            EXISTS,
            COLLATE,
            IN,
            LIKE,
            GLOB,
            REGEXP,
            MATCH,
            ESCAPE,
            CURRENT_TIME,
            CURRENT_DATE,
            CURRENT_TIMESTAMP,
            WITH,
            EXTRACT,
            PARTITION,
            ROWS,
            RANGE,
            UNBOUNDED,
            PRECEDING,
            CURRENT,
            ROW,
            FOLLOWING,
            OVER,
            INTERVAL,
            DATE_ADD,
            DATE_SUB,
            ADDDATE,
            SUBDATE,
            REGEXP_EXTRACT,
            SPLIT,
            ORDINAL,
            FIRST_VALUE,
            LAST_VALUE,
            NTH_VALUE,
            LEAD,
            LAG,
            PERCENTILE_CONT,
            PRECENTILE_DISC,
            RANK,
            DENSE_RANK,
            PERCENT_RANK,
            CUME_DIST,
            NTILE,
            ROW_NUMBER,
            DATE,
            TIME,
            DATETIME,
            TIMESTAMP,
            UNNEST,
            INT64,
            NUMERIC,
            FLOAT64,
            BOOL,
            BYTES,
            GEOGRAPHY,
            ARRAY,
            STRUCT,
            SAFE_CAST,
            ANY_VALUE,
            ARRAY_AGG,
            ARRAY_CONCAT_AGG,
            AVG,
            BIT_AND,
            BIT_OR,
            BIT_XOR,
            COUNT,
            COUNTIF,
            LOGICAL_AND,
            LOGICAL_OR,
            MAX,
            MIN,
            STRING_AGG,
            SUM,
            CORR,
            COVAR_POP,
            COVAR_SAMP,
            STDDEV_POP,
            STDDEV_SAMP,
            STDDEV,
            VAR_POP,
            VAR_SAMP,
            VARIANCE,
            TIMESTAMP_ADD,
            TIMESTAMP_SUB,
            GENERATE_ARRAY,
            GENERATE_DATE_ARRAY,
            GENERATE_TIMESTAMP_ARRAY,
            FOR,
            SYSTEMTIME,
            AS,
            OF,
            WINDOW,
            RESPECT,
            IGNORE,
            NULLS,
        ) = map(
            CaselessKeyword,
            """
            UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING,
            NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED,
            NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY,
            LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE,
            END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP,
            MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH,
            EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT,
            ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE,
            SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE,
            NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK,
            DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME,
            DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES,
            GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG,
            ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF,
            LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR,
            COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP,
            VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY,
            GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS,
            OF, WINDOW, RESPECT, IGNORE, NULLS
                 """.replace(",", "").split(),
        )

        keyword_nonfunctions = MatchFirst((
            UNION,
            ALL,
            INTERSECT,
            EXCEPT,
            COLLATE,
            ASC,
            DESC,
            ON,
            USING,
            NATURAL,
            INNER,
            CROSS,
            LEFT,
            RIGHT,
            OUTER,
            FULL,
            JOIN,
            AS,
            INDEXED,
            NOT,
            SELECT,
            DISTINCT,
            FROM,
            WHERE,
            GROUP,
            BY,
            HAVING,
            ORDER,
            BY,
            LIMIT,
            OFFSET,
            CAST,
            ISNULL,
            NOTNULL,
            NULL,
            IS,
            BETWEEN,
            ELSE,
            END,
            CASE,
            WHEN,
            THEN,
            EXISTS,
            COLLATE,
            IN,
            LIKE,
            GLOB,
            REGEXP,
            MATCH,
            STRUCT,
            WINDOW,
        ))

        keyword = keyword_nonfunctions | MatchFirst((
            ESCAPE,
            CURRENT_TIME,
            CURRENT_DATE,
            CURRENT_TIMESTAMP,
            DATE_ADD,
            DATE_SUB,
            ADDDATE,
            SUBDATE,
            INTERVAL,
            STRING_AGG,
            REGEXP_EXTRACT,
            SPLIT,
            ORDINAL,
            UNNEST,
            SAFE_CAST,
            PARTITION,
            TIMESTAMP_ADD,
            TIMESTAMP_SUB,
            ARRAY,
            GENERATE_ARRAY,
            GENERATE_DATE_ARRAY,
            GENERATE_TIMESTAMP_ARRAY,
        ))

        identifier_word = Word(alphas + "_@#", alphanums + "@$#_")
        identifier = ~keyword + identifier_word.copy()
        collation_name = identifier.copy()
        # NOTE: Column names can be keywords.  Doc says they cannot, but in practice it seems to work.
        column_name = identifier.copy()
        cast_to = identifier.copy()
        qualified_column_name = Group(
            delimitedList(column_name, delim=".") + Optional(
                Suppress("::") + delimitedList(cast_to("cast"), delim="::")))
        # NOTE: As with column names, column aliases can be keywords, e.g. functions like `current_time`.  Other
        # keywords, e.g. `from` make parsing pretty difficult (e.g. "SELECT a from from b" is confusing.)
        column_alias = ~keyword_nonfunctions + column_name.copy()
        table_name = identifier.copy()
        table_alias = identifier.copy()
        index_name = identifier.copy()
        function_name = identifier.copy()
        parameter_name = identifier.copy()
        # NOTE: The expression in a CASE statement can be an integer.  E.g. this is valid SQL:
        # select CASE 1 WHEN 1 THEN -1 ELSE -2 END from test_table
        unquoted_case_identifier = ~keyword + Word(alphanums + "$_")
        quoted_case_identifier = ~keyword + (QuotedString('"') ^ Suppress("`")
                                             + CharsNotIn("`") + Suppress("`"))
        case_identifier = quoted_case_identifier | unquoted_case_identifier
        case_expr = (Optional(case_identifier + Suppress(".")) +
                     Optional(case_identifier + Suppress(".")) +
                     case_identifier)

        # expression
        expr = Forward().setName("expression")

        integer = Regex(r"[+-]?\d+")
        numeric_literal = Regex(r"[+-]?\d*\.?\d+([eE][+-]?\d+)?")
        string_literal = QuotedString("'") | QuotedString('"') | QuotedString(
            "`")
        regex_literal = "r" + string_literal
        blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
        date_or_time_literal = (DATE | TIME | DATETIME
                                | TIMESTAMP) + string_literal
        literal_value = (
            numeric_literal
            | string_literal
            | regex_literal
            | blob_literal
            | date_or_time_literal
            | NULL
            | CURRENT_TIME + Optional(LPAR + Optional(string_literal) + RPAR)
            | CURRENT_DATE + Optional(LPAR + Optional(string_literal) + RPAR)
            | CURRENT_TIMESTAMP +
            Optional(LPAR + Optional(string_literal) + RPAR))
        bind_parameter = Word("?",
                              nums) | Combine(oneOf(": @ $") + parameter_name)
        type_name = oneOf(
            """TEXT REAL INTEGER BLOB NULL TIMESTAMP STRING DATE
            INT64 NUMERIC FLOAT64 BOOL BYTES DATETIME GEOGRAPHY TIME ARRAY
            STRUCT""",
            caseless=True,
        )
        date_part = oneOf(
            """DAY DAY_HOUR DAY_MICROSECOND DAY_MINUTE DAY_SECOND
            HOUR HOUR_MICROSECOND HOUR_MINUTE HOUR_SECOND MICROSECOND MINUTE
            MINUTE_MICROSECOND MINUTE_SECOND MONTH QUARTER SECOND
            SECOND_MICROSECOND WEEK YEAR YEAR_MONTH""",
            caseless=True,
        )
        datetime_operators = (DATE_ADD | DATE_SUB | ADDDATE | SUBDATE
                              | TIMESTAMP_ADD | TIMESTAMP_SUB)

        def invalid_date_add(s, loc, tokens):
            prev_newline = s[:loc].rfind('\n')
            prev_prev_newline = s[:prev_newline].rfind('\n')
            if '--ignore' in s[prev_prev_newline:prev_newline]:
                pass
            else:
                raise RuntimeError(
                    "{} is not valid, did you mean 'date_add'".format(
                        tokens[0]))

        #bad_datetime_operators = (
        #    CaselessKeyword('dateadd').setParseAction(invalid_date_add)
        #)

        grouping_term = expr.copy()
        ordering_term = Group(
            expr("order_key") + Optional(COLLATE + collation_name("collate")) +
            Optional(ASC | DESC)("direction"))("ordering_term")

        function_arg = expr.copy()("function_arg")
        function_args = Optional(
            "*"
            | Optional(DISTINCT) + delimitedList(function_arg) +
            Optional((RESPECT | IGNORE) + NULLS))("function_args")
        function_call = ((function_name | keyword)("function_name") + LPAR +
                         Group(function_args)("function_args_group") +
                         RPAR)('function')

        navigation_function_name = (FIRST_VALUE
                                    | LAST_VALUE
                                    | NTH_VALUE
                                    | LEAD
                                    | LAG
                                    | PERCENTILE_CONT
                                    | PRECENTILE_DISC)
        aggregate_function_name = (ANY_VALUE
                                   | ARRAY_AGG
                                   | ARRAY_CONCAT_AGG
                                   | AVG
                                   | BIT_AND
                                   | BIT_OR
                                   | BIT_XOR
                                   | COUNT
                                   | COUNTIF
                                   | LOGICAL_AND
                                   | LOGICAL_OR
                                   | MAX
                                   | MIN
                                   | STRING_AGG
                                   | SUM)
        statistical_aggregate_function_name = (CORR
                                               | COVAR_POP
                                               | COVAR_SAMP
                                               | STDDEV_POP
                                               | STDDEV_SAMP
                                               | STDDEV
                                               | VAR_POP
                                               | VAR_SAMP
                                               | VARIANCE)
        numbering_function_name = (RANK | DENSE_RANK | PERCENT_RANK | CUME_DIST
                                   | NTILE | ROW_NUMBER)
        analytic_function_name = (
            navigation_function_name
            | aggregate_function_name
            | statistical_aggregate_function_name
            | numbering_function_name)("analytic_function_name")
        partition_expression_list = delimitedList(grouping_term)(
            "partition_expression_list")
        window_frame_boundary_start = (UNBOUNDED + PRECEDING
                                       | numeric_literal +
                                       (PRECEDING | FOLLOWING)
                                       | CURRENT + ROW)
        window_frame_boundary_end = (UNBOUNDED + FOLLOWING
                                     | numeric_literal +
                                     (PRECEDING | FOLLOWING)
                                     | CURRENT + ROW)
        window_frame_clause = (ROWS | RANGE) + (
            ((UNBOUNDED + PRECEDING) | (numeric_literal + PRECEDING) |
             (CURRENT + ROW))
            | (BETWEEN + window_frame_boundary_start + AND +
               window_frame_boundary_end))
        window_name = identifier.copy()("window_name")
        window_specification = (
            Optional(window_name) +
            Optional(PARTITION + BY + partition_expression_list) +
            Optional(ORDER + BY + delimitedList(ordering_term)) +
            Optional(window_frame_clause)("window_specification"))
        analytic_function = (
            analytic_function_name + LPAR +
            function_args.setParseAction(debug) + RPAR + OVER +
            (window_name | LPAR + Optional(window_specification)
             ('window') + RPAR))("analytic_function")

        string_agg_term = (STRING_AGG + LPAR +
                           Optional(DISTINCT)('has_distinct') +
                           expr('string_agg_expr') +
                           Optional(COMMA + string_literal('delimiter')) +
                           Optional(ORDER + BY + expr + Optional(ASC | DESC) +
                                    Optional(LIMIT + integer)) +
                           RPAR)("string_agg")
        array_literal = (
            Optional(ARRAY + Optional(LT + delimitedList(type_name) + GT)) +
            LBRACKET + delimitedList(expr) + RBRACKET)
        interval = INTERVAL + expr + date_part
        array_generator = (GENERATE_ARRAY + LPAR + numeric_literal + COMMA +
                           numeric_literal + COMMA + numeric_literal + RPAR)
        date_array_generator = (
            (GENERATE_DATE_ARRAY | GENERATE_TIMESTAMP_ARRAY) + LPAR +
            expr("start_date") + COMMA + expr("end_date") +
            Optional(COMMA + interval) + RPAR)

        explicit_struct = (
            STRUCT + Optional(LT + delimitedList(type_name) + GT) + LPAR +
            Optional(delimitedList(expr + Optional(AS + identifier))) + RPAR)

        case_when = WHEN + expr.copy()("when")
        case_then = THEN + expr.copy()("then")
        case_clauses = Group(ZeroOrMore(case_when + case_then))
        case_else = ELSE + expr.copy()("_else")
        case_stmt = (CASE + Optional(case_expr.copy()) +
                     case_clauses("case_clauses") + Optional(case_else) +
                     END)("case")

        class SelectStatement(SemanticToken):
            def __init__(self, tokens):
                self.tokens = tokens

            def getName(self):
                return 'select'

            @classmethod
            def parse(cls, tokens):
                return SelectStatement(tokens)

        class Function(SemanticToken):
            def __init__(self, func, tokens):
                self.func = func
                self.tokens = tokens

            def getName(self):
                return 'function'

            @classmethod
            def parse(cls, tokens):
                method = tokens[0]
                args = tokens[2:-1]
                return Function(method, args)

            def __repr__(self):
                return "func:{}({})".format(self.func, self.tokens)

        class WindowFunction(Function):
            def __init__(self, func, tokens, func_args, partition_args,
                         order_args, window_args):
                self.func = func
                self.tokens = tokens
                self.func_args = func_args
                self.partition_args = partition_args
                self.order_args = order_args
                self.window_args = window_args

            def getName(self):
                return 'window function'

            @classmethod
            def parse(cls, tokens):
                return WindowFunction(tokens.analytic_function_name, tokens,
                                      tokens.function_args,
                                      tokens.partition_expression_list,
                                      tokens.ordering_term,
                                      tokens.window_specification)

            def __repr__(self):
                return "window:{}({})over({}, {}, {})".format(
                    self.func, self.func_args, self.partition_args,
                    self.order_args, self.window_args)

        class CaseStatement(SemanticToken):
            def __init__(self, tokens, whens, _else):
                self.tokens = tokens
                self.whens = whens
                self._else = _else

            def getName(self):
                return 'case'

            @classmethod
            def parse_whens(self, tokens):
                whens = []
                while len(tokens) > 0:
                    _, when, _, then, *tokens = tokens
                    whens.append({"when": when, "then": then})
                return whens

            @classmethod
            def parse(cls, tokens):
                whens = tokens[1]
                _else = tokens[3]
                return CaseStatement(tokens, cls.parse_whens(whens), _else)

            def __repr__(self):
                return "<case statement ({}, {})>".format(
                    len(self.whens), self._else)

        expr_term = (
            (analytic_function)("analytic_function").setParseAction(
                WindowFunction.parse)
            | (CAST + LPAR + expr + AS + type_name + RPAR)("cast")
            | (SAFE_CAST + LPAR + expr + AS + type_name + RPAR)("safe_cast")
            | (Optional(EXISTS) + LPAR + ungrouped_select_stmt +
               RPAR)("subselect")
            | (literal_value)("literal")
            | (bind_parameter)("bind_parameter")
            | (EXTRACT + LPAR + expr + FROM + expr + RPAR)("extract")
            | case_stmt.setParseAction(CaseStatement.parse)
            | (datetime_operators + LPAR + expr + COMMA + interval +
               RPAR)("date_operation")
            #| (bad_datetime_operators + LPAR + expr + COMMA + interval + RPAR)
            | string_agg_term("string_agg_term")
            | array_literal("array_literal")
            | array_generator("array_generator")
            | date_array_generator("date_array_generator")
            | explicit_struct("explicit_struct")
            | function_call("function_call").setParseAction(Function.parse)
            | qualified_column_name("column").setParseAction(
                lambda x: ".".join([str(i) for i in x[0]]))
        ).setParseAction(debug) + Optional(LBRACKET +
                                           (OFFSET | ORDINAL) + LPAR + expr +
                                           RPAR + RBRACKET)("offset_ordinal")

        struct_term = (LPAR + delimitedList(expr_term) + RPAR)

        KNOWN_OPS = [(BETWEEN, AND),
                     Literal("||").setName("concat"),
                     Literal("*").setName("mul"),
                     Literal("/").setName("div"),
                     Literal("+").setName("add"),
                     Literal("-").setName("sub"),
                     Literal("<>").setName("neq"),
                     Literal(">").setName("gt"),
                     Literal("<").setName("lt"),
                     Literal(">=").setName("gte"),
                     Literal("<=").setName("lte"),
                     Literal("=").setName("eq"),
                     Literal("==").setName("eq"),
                     Literal("!=").setName("neq"),
                     IN.setName("in"),
                     IS.setName("is"),
                     LIKE.setName("like"),
                     OR.setName("or"),
                     AND.setName("and"),
                     NOT.setName('not')]

        class Operator(SemanticToken):
            def __init__(self, op, assoc, name, tokens):
                self.op = op
                self.assoc = assoc
                self.name = name
                self.tokens = tokens

            def getName(self):
                return 'operator'

            @classmethod
            def parse(cls, tokens):
                # ARRANGE INTO {op: params} FORMAT
                toks = tokens[0]
                if toks[1] in KNOWN_OPS:
                    op = KNOWN_OPS[KNOWN_OPS.index(toks[1])]
                    if toks.subselect:
                        import ipdb
                        ipdb.set_trace()
                    return Operator(op, 'binary', op.name, [toks[0], toks[2:]])
                else:
                    import ipdb
                    ipdb.set_trace()
                    return tokens

            @classmethod
            def parse_unary(cls, tokens):
                toks = tokens[0]
                if toks[0] in KNOWN_OPS:
                    op = KNOWN_OPS[KNOWN_OPS.index(toks[0])]
                else:
                    import ipdb
                    ipdb.set_trace()
                return Operator(op, 'unary', op.name, [toks[1:]])

            @classmethod
            def parse_ternary(cls, tokens):
                import ipdb
                ipdb.set_trace()

            def __repr__(self):
                return "<operator({}, {}, {})>".format(self.op, self.assoc,
                                                       self.tokens)

        UNARY, BINARY, TERNARY = 1, 2, 3
        expr << infixNotation(
            (expr_term | struct_term),
            [
                (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT,
                 Operator.parse_unary),
                (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT,
                 Operator.parse_unary),
                ("||", BINARY, opAssoc.LEFT, Operator.parse),
                (oneOf("* / %"), BINARY, opAssoc.LEFT, Operator.parse),
                (oneOf("+ -"), BINARY, opAssoc.LEFT, Operator.parse),
                (oneOf("<< >> & |"), BINARY, opAssoc.LEFT, Operator.parse),
                (oneOf("= > < >= <= <> != !< !>"), BINARY, opAssoc.LEFT,
                 Operator.parse),
                (IS + Optional(NOT)
                 | Optional(NOT) + IN
                 | Optional(NOT) + LIKE
                 | GLOB
                 | MATCH
                 | REGEXP, BINARY, opAssoc.LEFT, Operator.parse),
                ((BETWEEN, AND), TERNARY, opAssoc.LEFT,
                 Operator.parse_ternary),
                (Optional(NOT) + IN + LPAR +
                 Group(ungrouped_select_stmt | delimitedList(expr)) + RPAR,
                 UNARY, opAssoc.LEFT, Operator.parse_unary),
                (AND, BINARY, opAssoc.LEFT, Operator.parse),
                (OR, BINARY, opAssoc.LEFT, Operator.parse),
            ],
            lpar=Literal('('),
            rpar=Literal(')'),
        )
        quoted_expr = (expr ^ Suppress('"') + expr + Suppress('"')
                       ^ Suppress("'") + expr + Suppress("'")
                       ^ Suppress("`") + expr + Suppress("`"))("quoted_expr")

        compound_operator = (UNION + Optional(ALL | DISTINCT)
                             | INTERSECT + DISTINCT
                             | EXCEPT + DISTINCT
                             | INTERSECT
                             | EXCEPT)("compound_operator")

        join_constraint = Group(
            Optional(ON + expr
                     | USING + LPAR +
                     Group(delimitedList(qualified_column_name)) +
                     RPAR))("join_constraint")

        join_op = (COMMA
                   | Group(
                       Optional(NATURAL) + Optional(INNER
                                                    | CROSS
                                                    | LEFT + OUTER
                                                    | LEFT
                                                    | RIGHT + OUTER
                                                    | RIGHT
                                                    | FULL + OUTER
                                                    | OUTER
                                                    | FULL) + JOIN))("join_op")

        join_source = Forward()

        # We support three kinds of table identifiers.
        #
        # First, dot delimited info like project.dataset.table, where
        # each component follows the rules described in the BigQuery
        # docs, namely:
        #  Contain letters (upper or lower case), numbers, and underscores
        #
        # Second, a dot delimited quoted string.  Since it's quoted, we'll be
        # liberal w.r.t. what characters we allow.  E.g.:
        #  `project.dataset.name-with-dashes`
        #
        # Third, a series of quoted strings, delimited by dots, e.g.:
        #  `project`.`dataset`.`name-with-dashes`
        #
        # We won't attempt to support combinations, like:
        #  project.dataset.`name-with-dashes`
        #  `project`.`dataset.name-with-dashes`

        def record_table_identifier(t):
            identifier_list = t.asList()
            padded_list = [None] * (3 - len(identifier_list)) + identifier_list
            cls._table_identifiers.add(tuple(padded_list))

        standard_table_part = ~keyword + Word(alphanums + "_")
        standard_table_identifier = (
            Optional(standard_table_part("project") + Suppress(".")) +
            Optional(standard_table_part("dataset") + Suppress(".")) +
            standard_table_part("table")
        ).setParseAction(lambda t: record_table_identifier(t))

        quoted_project_part = (
            Suppress('"') + CharsNotIn('"') + Suppress('"')
            | Suppress("'") + CharsNotIn("'") + Suppress("'")
            | Suppress("`") + CharsNotIn("`") + Suppress("`"))
        quoted_table_part = (Suppress('"') + CharsNotIn('".') + Suppress('"')
                             | Suppress("'") + CharsNotIn("'.") + Suppress("'")
                             |
                             Suppress("`") + CharsNotIn("`.") + Suppress("`"))
        quoted_table_parts_identifier = (
            Optional(quoted_project_part("project") + Suppress(".")) +
            Optional(quoted_table_part("dataset") + Suppress(".")) +
            quoted_table_part("table")
        ).setParseAction(lambda t: record_table_identifier(t))

        def record_quoted_table_identifier(t):
            identifier_list = t.asList()[0].split(".")
            first = ".".join(identifier_list[0:-2]) or None
            second = identifier_list[-2]
            third = identifier_list[-1]
            identifier_list = [first, second, third]
            padded_list = [None] * (3 - len(identifier_list)) + identifier_list
            cls._table_identifiers.add(tuple(padded_list))

        quotable_table_parts_identifier = (
            Suppress('"') + CharsNotIn('"') + Suppress('"')
            | Suppress("'") + CharsNotIn("'") + Suppress("'")
            | Suppress("`") + CharsNotIn("`") + Suppress("`")
        ).setParseAction(lambda t: record_quoted_table_identifier(t))

        table_identifier = (standard_table_identifier
                            | quoted_table_parts_identifier
                            | quotable_table_parts_identifier)

        def record_ref(t):
            lol = [t.op] + t.ref_target.asList()
            cls._with_aliases.add(tuple(lol))
            cls._table_identifiers.add(tuple(lol))

        ref_target = identifier.copy()
        single_source = (
            # ref + source statements
            ((Suppress('{{') +
              (CaselessKeyword('ref') | CaselessKeyword("source"))("op") + LPAR
              + delimitedList((Suppress("'") | Suppress('"')) + ref_target +
                              (Suppress("'") | Suppress('"')))("ref_target") +
              RPAR + Suppress("}}")).setParseAction(record_ref)
             | table_identifier) +
            Optional(Optional(AS) + table_alias("table_alias*")) +
            Optional(FOR + SYSTEMTIME + AS + OF + string_literal) +
            Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)
            ("index")
            | (LPAR + ungrouped_select_stmt + RPAR +
               Optional(Optional(AS) + table_alias))('subquery')
            | (LPAR + join_source + RPAR)
            | (UNNEST + LPAR + expr + RPAR) +
            Optional(Optional(AS) + column_alias))

        join_source << (Group(single_source + OneOrMore(
            Group(join_op + single_source + join_constraint)('joins*')))
                        | single_source)('sources*')

        over_partition = (
            PARTITION + BY +
            delimitedList(partition_expression_list))("over_partition")
        over_order = ORDER + BY + delimitedList(ordering_term)
        over_unsigned_value_specification = expr
        over_window_frame_preceding = (
            UNBOUNDED + PRECEDING
            | over_unsigned_value_specification + PRECEDING
            | CURRENT + ROW)
        over_window_frame_following = (
            UNBOUNDED + FOLLOWING
            | over_unsigned_value_specification + FOLLOWING
            | CURRENT + ROW)
        over_window_frame_bound = (over_window_frame_preceding
                                   | over_window_frame_following)
        over_window_frame_between = (BETWEEN + over_window_frame_bound + AND +
                                     over_window_frame_bound)
        over_window_frame_extent = (over_window_frame_preceding
                                    | over_window_frame_between)
        over_row_or_range = (ROWS | RANGE) + over_window_frame_extent
        over = (OVER + LPAR + Optional(over_partition) + Optional(over_order) +
                Optional(over_row_or_range) + RPAR)("over")

        result_column = (
            Optional(table_name + ".") + "*" +
            Optional(EXCEPT + LPAR + delimitedList(column_name) + RPAR)
            | Group(quoted_expr + Optional(over) +
                    Optional(Optional(AS) + column_alias('alias'))))

        window_select_clause = (WINDOW + identifier + AS + LPAR +
                                window_specification + RPAR)

        select_core = (
            SELECT + Optional(DISTINCT | ALL) +
            Group(delimitedList(result_column))("columns") +
            Optional(FROM - join_source("from*")) +
            Optional(WHERE + expr('where')) +
            Optional(GROUP + BY +
                     Group(delimitedList(grouping_term))("group_by_terms")) +
            Optional(HAVING + expr("having_expr")) +
            Optional(ORDER + BY +
                     Group(delimitedList(ordering_term))("order_by_terms")) +
            Optional(delimitedList(window_select_clause)))
        grouped_select_core = select_core | (LPAR + select_core + RPAR)

        ungrouped_select_stmt << (
            grouped_select_core +
            ZeroOrMore(compound_operator + grouped_select_core) +
            Optional(LIMIT +
                     (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr)
                      | expr)("limit")))("select")
        select_stmt = ungrouped_select_stmt | (LPAR + ungrouped_select_stmt +
                                               RPAR)

        # define comment format, and ignore them
        sql_comment = oneOf("-- #") + restOfLine | cStyleComment
        select_stmt.ignore(sql_comment)

        def record_with_alias(t):
            identifier_list = t.asList()
            padded_list = [None] * (3 - len(identifier_list)) + identifier_list
            cls._with_aliases.add(tuple(padded_list))

        with_stmt = Forward().setName("with statement")
        with_clause = Group(
            identifier.setParseAction(lambda t: record_with_alias(t))
            ('cte_name') - AS - LPAR + (select_stmt | with_stmt) - RPAR)
        with_core = WITH + delimitedList(with_clause)('ctes')
        with_stmt << (with_core - ~Literal(',') + ungrouped_select_stmt)
        with_stmt.ignore(sql_comment)

        select_or_with = select_stmt | with_stmt
        select_or_with_parens = LPAR + select_or_with - RPAR

        cls._parser = select_or_with | select_or_with_parens
        return cls._parser
Пример #8
0
    def build(self):

        # ------------------------------------------
        #   C. building blocks
        # ------------------------------------------
        self.termop = Regex(
            "|".join(self.neighbourhood_symbols),
            re.IGNORECASE).setParseAction(upcaseTokens).setName("termop")
        termword = Word(self.unicode_printables + self.separators +
                        self.wildcards).setName("term")
        termword_termop = (termword + OneOrMore(self.termop + termword))

        # ------------------------------------------
        #   D. triple
        # ------------------------------------------

        index = Word(alphanums).setName("index")

        #index = Word(indexchars).setName("index")
        #SolrProximitySuffix = Suppress(Optional(Word('~') + Word(nums)))

        binop = oneOf(self.binop_symbols, caseless=True).setName("binop")
        term = (

            # Attempt to parse {!complexphrase}text:"((aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*))"~6 ...
            # ... but failed.
            #Combine(quotedString.setParseAction(removeQuotes) + SolrProximitySuffix).setName("term") ^

            # term is a quoted string, easy peasy
            quotedString.setName("term") ^

            # term is just a termword, easy too
            termword.setName("term") ^

            # term contains neighbourhood operators, so should have been wrapped in parenthesis
            Combine('(' + Suppress(ZeroOrMore(' ')) + termword_termop +
                    Suppress(ZeroOrMore(' ')) + ')').setName("term") ^

            # convenience/gracefulness: we also allow terms containing
            # neighbourhood operators without being wrapped in parenthesis
            Combine(termword_termop).setName("term"))

        # ------------------------------------------
        #   E. condition
        # ------------------------------------------
        cqlStatement = Forward()

        # Parse regular cql condition notation 'index=term'.
        cqlConditionBase = Group(

            # a regular triple
            (index + binop + term).setResultsName("triple") |

            # a regular subquery
            ("(" + cqlStatement + ")").setResultsName("subquery"))

        # Parse value shortcut notations like 'index=(term)' or 'index=(term1 and term2 or term3)'.
        cqlConditionShortcut = Group(

            # a triple in value shortcut notation (contains only the single term)
            # "term + NotAny(binop)" helps giving proper error messages like
            # "ParseException: Expected term (at char 4)" for erroneous queries like "foo="
            (term + NotAny(binop)).setResultsName("triple-short") |

            # a subquery containing values in shortcut notation
            (index + binop + "(" + cqlStatement +
             ")").setResultsName("subquery-short"))

        #cqlCondition = cqlConditionBase
        cqlCondition = cqlConditionBase | cqlConditionShortcut

        # ------------------------------------------
        #   F. statement
        # ------------------------------------------

        cqlStatement << cqlCondition + ZeroOrMore(self.booleans_or +
                                                  cqlStatement)

        # apply SQL comment format
        cqlComment = "--" + restOfLine
        cqlStatement.ignore(cqlComment)

        self.parser = cqlStatement
Пример #9
0
term << operatorPrecedence(
    number | predicate | variable,
    [
        (oneOf("+ -"), 1, opAssoc.RIGHT, FOLUnOp),
        (oneOf("^"), 2, opAssoc.LEFT, FOLBinOp),
        (oneOf("* /"), 2, opAssoc.LEFT, FOLBinOp),
        (oneOf("+ -"), 2, opAssoc.LEFT, FOLBinOp),
        (oneOf("< <= > >= "), 2, opAssoc.LEFT, FOLBinOp),
    ],
)


# main parser for FOL formula
formula = Forward()
formula.ignore(comment)

forall_expression = Group(
    forall.setResultsName("quantifier")
    + delimitedList(variable).setResultsName("vars")
    + colon
    + formula.setResultsName("args")
).setParseAction(FOLQuant)
exists_expression = Group(
    exists.setResultsName("quantifier")
    + delimitedList(variable).setResultsName("vars")
    + colon
    + formula.setResultsName("args")
).setParseAction(FOLQuant)

operand = forall_expression | exists_expression | boolean | term
Пример #10
0
def performIPOperatorSanityCheck(componentName, propagationDimension, operatorCodeSlice, codeBlock):
    """
    Check that the user hasn't tried to use an IP operator where an IP operator cannot be used.
    
    IP operators must be diagonal, so one cannot have expressions of the form ``dy_dt = L[x];`` for IP operators.
    This is valid for EX operators, but not for IP. This is a common mistake for users to make, and so we should
    do our best to spot it and report the error. Another mistake users make is trying to multiply the operator,
    for example ``dy_dt = i*L[y];``. This code does a sophisticated validation by constructing a parse tree for
    each statement in the code taking into account operator precedence. This sanity checking is even able to pick
    up problems such as ``dphi_dt = i*(V*phi + U*mod2(phi)*phi + T[phi]);``.
    If the user's code passes this test, then it is a reasonable assumption that they are using IP operators safely.
    """
    
    operatorString = codeBlock.codeString[operatorCodeSlice]
    
    expr = Forward()
    
    operatorKeyword = Keyword(operatorString).setResultsName('targetOperator')
    
    operand = operatorKeyword \
                | (identifier + Group('(' + delimitedList(expr) + ')')) \
                | (identifier + Group(OneOrMore('[' + expr + ']'))) \
                | quotedString.copy() \
                | identifier \
                | numericConstant
    operand.ignore(cppStyleComment.copy())
    
    expr << operatorPrecedence(
        operand,
        [
            (oneOf('++ --'), 1, opAssoc.LEFT),
            (oneOf('. ->'), 2, opAssoc.LEFT),
            (~oneOf('-> -= += *= &= |=') + oneOf('+ - ! ~ * & ++ --'), 1, opAssoc.RIGHT),
            (~oneOf('*= /= %=') + oneOf('* / %'), 2, opAssoc.LEFT),
            (~oneOf('++ -- -> -= +=') + oneOf('+ -'), 2, opAssoc.LEFT),
# Although the operators below don't all have the same precedence, as we don't actually
# care about them as they are all invalid uses of the IP operator, we can cheat and lump
# them together
            (~oneOf('<<= >>= &= |=') + oneOf('<< >> < <= > >= == != & ^ | && ||'), 2, opAssoc.LEFT),
# Correct ordering
            # (~oneOf('<<= >>=') + oneOf('<< >>'), 2, opAssoc.LEFT),
            # (~oneOf('<< >> <<= >>=') + oneOf('< <= > >='), 2, opAssoc.LEFT),
            # (oneOf('== !='), 2, opAssoc.LEFT),
            # (~oneOf('&& &=') + '&', 2, opAssoc.LEFT),
            # ('^', 2, opAssoc.LEFT),
            # (~oneOf('|| |=') + '|', 2, opAssoc.LEFT),
            # ('&&', 2, opAssoc.LEFT),
            # ('||', 2, opAssoc.LEFT),
            (('?',':'), 3, opAssoc.RIGHT),
            (~Literal('==') + oneOf('= += -= *= /= %= <<= >>= &= ^= |= =>'), 2, opAssoc.RIGHT),
            (',', 2, opAssoc.LEFT),
        ]
    )
    expr.ignore(cppStyleComment.copy())
    
    statement = expr + Suppress(';')
    
    stack = []
    expectedAssignmentVariable = 'd%(componentName)s_d%(propagationDimension)s' % locals()
    
    def validateStack():
        """
        It is the job of this function to validate the operations that the located operator is involved in.
        The stack describes the part of the parse tree in which the operator was found. The first element in the stack
        is the outermost operation, and the last the innermost. The last element is guaranteed to be the operator itself.
        """
        # Reverse the stack as we want to search the parse tree from inner-most expression to outer-most.
        stack.reverse()
        assignmentHit = False
        errorMessageCommon = "Due to the way IP operators work, they can only contribute to the derivative of the variable " \
            "they act on, i.e. dx_dt = L[x]; not dy_dt = L[x];\n\n"
        
        # We don't need to check the first element of the stack
        # as we are guaranteed that it is the operator itself. This will be useful for determining
        # which part of the parse tree we should be looking at.
        for idx, node in enumerate(stack[1:]):
            if len(node) == 1: continue
            # idx is the index in the stack of the next element *deeper* in the parse tree.
            previousStackEntry = stack[idx]
            if not isinstance(stack[idx], basestring):
                previousStackEntry = previousStackEntry.asList()
            binaryOpIdx = node.asList().index(previousStackEntry) - 1
            if binaryOpIdx < 0: binaryOpIdx = 1
            # Unary '+' is safe.
            if node[0] == '+': continue
            # Binary '+' is safe.
            if node[binaryOpIdx] == '+': continue
            # Binary '-' is safe if the operator is the first argument.
            if node[binaryOpIdx] == '-' and node.asList().index(previousStackEntry) == 0: continue
            # Assignment is safe if it there is only one, and if it's to the right variable
            if node[binaryOpIdx] in ['=', '+=']:
                if node[0] == expectedAssignmentVariable:
                    assignmentHit = True
                    continue
                else:
                    return errorMessageCommon + "In this case, you should probably use an EX operator instead of an "\
                            "IP operator."
            else:
                return errorMessageCommon + "You appear to be using the IP operator in an unsafe operation. " \
                        "The most likely cause is trying to multiply it by something, e.g. dphi_dt = 0.5*L[phi]; "\
                        "If this is the cause and you are multiplying by a constant, just move the constant into the "\
                        "definition of the operator itself. i.e. L = -0.5*kx*kx; If you are multiplying by something "\
                        "that isn't constant e.g. dphi_dt = x*L[phi]; where x is a dimension, you must use an EX operator "\
                        "instead."
        if not assignmentHit:
            return errorMessageCommon + "You appear to be missing the assignment for this particular operator."
        return True
    
    class FoundTargetException(Exception): pass
    
    def findOperatorInParseTree(results):
        stack.append(results)
        if 'targetOperator' in results:
            stack.append(results.targetOperator)
            raise FoundTargetException()
        for item in results:
            if isinstance(item, basestring): continue
            findOperatorInParseTree(item)
        del stack[-1]
    
    try:
        foundOperator = False
        for tokens, start, end in statement.scanString(codeBlock.codeString):
            if start > operatorCodeSlice.stop or end < operatorCodeSlice.start: continue
            try:
                findOperatorInParseTree(tokens)
            except FoundTargetException:
                foundOperator = True
                result = validateStack()
                if result is not True:
                    raise CodeParserException(
                        codeBlock,
                        operatorCodeSlice.start,
                        result + ("\n\nThe conflict was caused by the operator '%s'." \
                        % operatorString)
                    )
        if not foundOperator:
            parserWarning(
                codeBlock.xmlElement,
                "Unable to check the safety of your IP operator '%s' because the containing expression could not be found. "
                "Please send a copy of your script to [email protected] so this problem can be investigated." \
                % operatorString
            )
    except RuntimeError:
        parserWarning(
            codeBlock.xmlElement,
            "Unable to check the safety of your IP operator because your code is too deeply nested."
        )
Пример #11
0
identifier = Word(alphas + '_', alphanums + '_')
numericConstant = Regex(r'\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\.?[0-9]*)|(\.[0-9]+))((e|E)(\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f|ll|LL|ull|ULL)?\b')

ignoreExpr = cppStyleComment.copy() | quotedString.copy()

baseExpr = Forward()

arrayAccess = originalTextFor(nestedExpr('[', ']', baseExpr, ignoreExpr))
parenthisedExpression = originalTextFor(nestedExpr('(', ')', baseExpr, ignoreExpr))
functionCall = nestedExpr('(', ')', delimitedList(baseExpr), ignoreExpr)
alphaNumPlusSafePunctuation = alphanums + '!#$%&\\*+-./:;<=>@^_`{|}~'

baseExpr << OneOrMore(originalTextFor(identifier + functionCall) | quotedString.copy() \
                | identifier | numericConstant | arrayAccess | parenthisedExpression \
                | Word(alphaNumPlusSafePunctuation))
baseExpr.ignore(cppStyleComment.copy())


def targetComponentsForOperatorsInString(operatorNames, codeBlock):
    """
    Return a list of pairs of operator names and their targets that are in `codeString`.
    The valid operator names searched for are `operatorNames`. For example, if 'L' is in `operatorNames`,
    then in the code ``L[phi]`` the return value would be ``('L', 'phi', slice(firstCharacterIndex, lastCharacterIndex))``.
    """
    parser = MatchFirst(Keyword(operatorName) for operatorName in operatorNames).setResultsName('name') \
                + Optional(nestedExpr('[', ']', baseExpr, ignoreExpr).setResultsName('target'))
    parser.ignore(cppStyleComment.copy())
    parser.ignore(quotedString.copy())
    results = []
    for tokens, start, end in parser.scanString(codeBlock.codeString):
        if 'target' in tokens:
Пример #12
0
from pyparsing import (Regex, OneOrMore, Forward, delimitedList, restOfLine, Group as Grp,
                       Suppress)
from musicobject import Tone, Group, Transformed


musicobject = Forward()

comment = '#' + restOfLine
musicobject.ignore(comment)

#fraction = Regex(r'(\d*[./]?\d*)')
number = Regex(r'[\d./]+')
number.setParseAction(lambda s, l, t: [float(eval(t[0]))])

frequency_symbol = Regex(r'[abcdefg_]\d?[#-]?')
frequency_number = number
frequency = frequency_number ^ frequency_symbol

duration = number

tone = frequency ^ (Suppress('(') + frequency + Suppress(',') + duration + Suppress(')'))
tone.setParseAction(lambda s, l, t: Tone(*t))

group = Suppress('{') + delimitedList(Grp(OneOrMore(musicobject)), ',') + Suppress('}')
group.setParseAction(lambda s, l, t: Group(t))

transformed = tone + '*' + musicobject
transformed.setParseAction(lambda s, l, t: Transformed(t[0], t[2]))
musicobject << (tone ^ group ^ transformed)

Пример #13
0
    def getParseTree(self, PassedString):
        """This method is responsible for parsing the given string and returning a parse tree to the caller."""

        # Uhh...?
        copyStatement = Forward()

        # Define literals.
        colon = Literal(':').suppress()
        leftBracket = Literal('[').suppress()
        rightBracket = Literal(']').suppress()
        quote = Literal('"').suppress()
        copyKeyword = Keyword("copy", caseless=True)
        nowKeyword = Keyword("now", caseless=True)
        onKeyword = Keyword("on", caseless=True)
        reverseKeyword = Keyword("reverse", caseless=True)
        singleKeyword = Keyword("single", caseless=True)
        threadsKeyword = Keyword("threads", caseless=True)
        whereKeyword = Keyword("where", caseless=True)

        # Basic server group and attribute parsing logic.
        identifier = Word(alphas + '/', alphanums + '_').setName("identifier")
        filename = Word(string.letters +
                        string.punctuation).setName("filename")
        attribStr = delimitedList(identifier)
        attribStrList = Group(attribStr)
        groupStr = identifier + ZeroOrMore(
            leftBracket + attribStrList.setResultsName("attribs") +
            rightBracket)
        groupStrList = Group(delimitedList(groupStr))
        localfileStr = filename
        remotefileStr = filename

        # Extended server group and attribute parsing logic.
        whereExpression = Forward()
        and_ = Keyword("and", caseless=True)
        or_ = Keyword("or", caseless=True)

        binaryOpStr = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
        integerStr = Word(nums)
        rightValue = integerStr | quotedString

        whereCondition = Group(identifier + binaryOpStr + rightValue)
        whereExpression << whereCondition + ZeroOrMore(
            (and_ | or_) + whereExpression)

        # define the grammar
        copyStatement << (
            copyKeyword + localfileStr.setResultsName("local_filename") +
            Optional(groupStrList.setResultsName("groups") + colon) +
            remotefileStr.setResultsName("remote_filename") +
            Optional(Group(whereKeyword + whereExpression),
                     "").setResultsName("where") + Optional(nowKeyword) +
            Optional(reverseKeyword) + Optional(singleKeyword) +
            Optional(threadsKeyword))

        copyStatement.ignore(pythonStyleComment)

        CopyParser = copyStatement
        myTokens = CopyParser.parseString(PassedString)

        print "Output: tokens = ", myTokens
        print "Output: tokens.attribs =", myTokens.attribs
        print "Output: tokens.local_filename =", myTokens.local_filename
        print "Output: tokens.groups =", myTokens.groups
        print "Output: tokens.remote_filename = ", myTokens.remote_filename
        print "Output: tokens.where =", myTokens.where

        return myTokens
Пример #14
0
index_kw = Keyword('index', caseless=True)
_index_stmt = Forward()
_index_stmt << (Optional(create_kw) + index_kw +
                index_source.setResultsName('source') + '(' +
                column_name_list.setResultsName('columns') + ')')

# Examples:
# index = index_stmt.parseString('INDEX partition1 (col1, col2, col3);')
# print(index.source)
# 'partition1'
# print(index.columns)
# ['col1', 'col2', 'col3']

# define Oracle comment format, and ignore them
oracle_sql_comment = '--' + restOfLine
_view_stmt.ignore(oracle_sql_comment)
_index_stmt.ignore(oracle_sql_comment)


def substitute_vids(library, statement):
    """ Replace all of the references to tables and partitions with their vids.

    This is a bit of a hack -- it ought to work with the parser, but instead it just looks for
    common SQL tokens that indicate an identifier.

    :param statement: an sqlstatement. String.
    :return: tuple: new_statement, set of table vids, set of partition vids.
    """
    from ambry.identity import ObjectNumber, TableNumber, NotObjectNumberError
    from ambry.orm.exc import NotFoundError
Пример #15
0
def parser(text):
    cvtTuple = lambda toks: tuple(toks.asList())
    cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList())))
    #cvtDict = lambda toks: dict(toks.asList())
    cvtGlobDict = lambda toks: GlobDict(toks.asList())
    cvtDict = cvtGlobDict
    extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end])

    def pythonize(toks):
        s = toks[0]
        if s == 'true':
            return True
        elif s == 'false':
            return False
        elif s == 'none':
            return [None]
        elif s.isdigit():
            return int(s)
        elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s):
            return float(s)
        return toks[0]

    def noneDefault(s, loc, t):
        return t if len(t) else [RawEOL]

    # define punctuation as suppressed literals
    lbrace, rbrace = map(Suppress, "{}")

    identifier = Word(printables, excludeChars='{}"\'')
    quotedStr = QuotedString('"', escChar='\\', multiline=True) | \
                QuotedString('\'', escChar='\\', multiline=True)
    quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \
                       QuotedString('\'', escChar='\\', unquoteResults=False)
    dictStr = Forward()
    setStr = Forward()
    objStr = Forward()

    #anyIdentifier = identifier | quotedIdentifier
    oddIdentifier = identifier + quotedIdentifier
    dictKey = dictStr | quotedStr | \
              Combine(oddIdentifier).setParseAction(cvtRaw)
    dictKey.setParseAction(cvtRaw)

    dictValue = quotedStr | dictStr | setStr | \
                Combine(oddIdentifier).setParseAction(cvtRaw)

    if OLD_STYLE_KEYS:
        dictKey |= Combine(identifier + ZeroOrMore(White(' ') + (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd()))))
        dictValue |= identifier.setParseAction(pythonize)
    else:
        dictKey |= identifier
        dictValue |= delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True).setParseAction(pythonize)

    ParserElement.setDefaultWhitespaceChars(' \t')
    #dictEntry = Group(Combine(OneOrMore(identifier | quotedIdentifier)).setParseAction(cvtRaw) +
    dictEntry = Group(dictKey +
                      Optional(White(' ').suppress() + dictValue).setParseAction(noneDefault) +
                      Optional(White(' ').suppress()) +
                      LineEnd().suppress())
    #dictEntry = Group(SkipTo(dictKey + LineEnd() + dictKey))
    dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace)
    dictStr.setParseAction(cvtDict)
    ParserElement.setDefaultWhitespaceChars(' \t\r\n')

    setEntry = identifier.setParseAction(pythonize) | quotedString.setParseAction(removeQuotes)
    setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace)
    setStr.setParseAction(cvtTuple)

    # TODO: take other literals as arguments
    blobObj = Group(((Literal('ltm') + Literal('rule') + identifier) | \
                     (Literal('rule') + identifier)).setParseAction(cvtRaw) +
                    originalTextFor(nestedExpr('{', '}')).setParseAction(extractText))

    objEntry = Group(OneOrMore(identifier | quotedIdentifier).setParseAction(cvtRaw) +
                     Optional(dictStr).setParseAction(noneDefault))
    objStr << (Optional(delimitedList(blobObj | objEntry, delim=LineEnd())))
    objStr.setParseAction(cvtGlobDict)
    #objStr.setParseAction(cvtTuple)
    objStr.ignore(pythonStyleComment)

    return objStr.parseString(text)[0]
Пример #16
0
_index_stmt << (
    Optional(create_kw)
    + index_kw
    + index_source.setResultsName('source')
    + '(' + column_name_list.setResultsName('columns') + ')')

# Examples:
# index = index_stmt.parseString('INDEX partition1 (col1, col2, col3);')
# print(index.source)
# 'partition1'
# print(index.columns)
# ['col1', 'col2', 'col3']

# define Oracle comment format, and ignore them
oracle_sql_comment = '--' + restOfLine
_view_stmt.ignore(oracle_sql_comment)
_index_stmt.ignore(oracle_sql_comment)


def substitute_vids(library, statement):
    """ Replace all of the references to tables and partitions with their vids.

    This is a bit of a hack -- it ought to work with the parser, but instead it just looks for
    common SQL tokens that indicate an identifier.

    :param statement: an sqlstatement. String.
    :return: tuple: new_statement, set of table vids, set of partition vids.
    """
    from ambry.identity import ObjectNumber, TableNumber, NotObjectNumberError
    from ambry.orm.exc import NotFoundError
Пример #17
0
def braces_parser(text, opener=BLOB_OPENER, closer=BLOB_CLOSER):
    cvtTuple = lambda toks: tuple(toks.asList())  # @IgnorePep8
    cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList()))
                                    )  # @IgnorePep8
    cvtDict = lambda toks: GlobDict(toks.asList())  # @IgnorePep8
    extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end
                                              ])  # @IgnorePep8

    def pythonize(toks):
        s = toks[0]
        if s == 'true':
            return True
        elif s == 'false':
            return False
        elif s == 'none':
            return [None]
        elif s.isdigit():
            return int(s)
        elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s):
            return float(s)
        return toks[0]

    def noneDefault(s, loc, t):
        return t if len(t) else [RawEOL]

    # define punctuation as suppressed literals
    lbrace, rbrace = map(Suppress, "{}")

    identifier = Word(printables, excludeChars='{}"\'')
    quotedStr = QuotedString('"', escChar='\\', multiline=True) | \
        QuotedString('\'', escChar='\\', multiline=True)
    quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \
        QuotedString('\'', escChar='\\', unquoteResults=False)
    dictStr = Forward()
    setStr = Forward()
    objStr = Forward()

    oddIdentifier = identifier + quotedIdentifier
    dictKey = quotedIdentifier | \
        Combine(oddIdentifier).setParseAction(cvtRaw)
    dictKey.setParseAction(cvtRaw)

    dictValue = quotedStr | dictStr | setStr | \
        Combine(oddIdentifier).setParseAction(cvtRaw)

    if OLD_STYLE_KEYS:
        dictKey |= Combine(identifier + ZeroOrMore(
            White(' ') +
            (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd()))))
        dictValue |= identifier.setParseAction(pythonize)
    else:
        dictKey |= identifier
        dictValue |= Or([
            delimitedList(identifier | quotedIdentifier,
                          delim=White(' '),
                          combine=True),
            Combine(
                delimitedList(identifier | quotedIdentifier,
                              delim=White(' '),
                              combine=True) +
                Optional(
                    White(' ') + originalTextFor(nestedExpr('{', '}')).
                    setParseAction(extractText))).setParseAction(cvtRaw)
        ])

    ParserElement.setDefaultWhitespaceChars(' \t')
    dictEntry = Group(dictKey +
                      Optional(White(' ').suppress() +
                               dictValue).setParseAction(noneDefault) +
                      Optional(White(' ').suppress()) + LineEnd().suppress())
    dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace)
    dictStr.setParseAction(cvtDict)
    ParserElement.setDefaultWhitespaceChars(' \t\r\n')

    setEntry = identifier.setParseAction(
        pythonize) | quotedString.setParseAction(removeQuotes) | dictStr
    setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace)
    setStr.setParseAction(cvtTuple)

    objEntry = dictStr.ignore(pythonStyleComment)
    objStr << delimitedList(objEntry, delim=LineEnd())

    return objStr.parseString(text)[0]
Пример #18
0
ID = ~MatchFirst([Keyword(w) for w in _keywords]) + Regex(r"[a-zA-Z_][a-zA-Z0-9_$]*")("id")
LP, RP, LB, RB, LC, RC, COLON, SEMICOLON, CAMMA, PERIOD, SHARP, EQUAL, AT, ASTA, Q, PLUS, MINUS, USC, APS = map(
    Suppress, ("()[]{}:;,.#=@*?+-_'")
)

DSLASH = Suppress(Literal("//"))

for k in _keywords:
    setattr(this_mod, k.swapcase(), Keyword(k)("keyword"))
    # setattr(sys.modules[__name__],k,Literal(k))

with open(_non_terminal_symbols_file, "r") as f:
    for name in (line.strip() for line in f):
        sym = Forward()(name)
        sym.enablePackrat()
        sym.ignore(cStyleComment)
        # print("sym={0}".format(name))
        setattr(this_mod, name, sym)


def alias(grammar, name):
    if name:
        return Group(grammar)(name)
    else:
        return Group(grammar)


class ErrorReportException(ParseException):
    pass