def __init__(self): """ expop :: '^' multop :: 'x' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal( "." ) e = CaselessLiteral( "E" ) fnumber = Combine( Word( "+-"+nums, nums ) + Optional( point + Optional( Word( nums ) ) ) + Optional( e + Word( "+-"+nums, nums ) ) ) ident = Word(alphas, alphas+nums+"_$") plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "x" ) div = Literal( "/" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div expop = Literal( "^" ) pi = CaselessLiteral( "PI" ) expr = Forward() atom = ((Optional(oneOf("- +")) + (pi|e|fnumber|ident+lpar+expr+rpar).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar+expr+rpar) ).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( self.pushFirst ) ) term = factor + ZeroOrMore( ( multop + factor ).setParseAction( self.pushFirst ) ) expr << term + ZeroOrMore( ( addop + term ).setParseAction( self.pushFirst ) ) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+" : operator.add, "-" : operator.sub, "x" : operator.mul, "/" : operator.truediv, "^" : operator.pow } self.fn = { "sin" : math.sin, "cos" : math.cos, "tan" : math.tan, "abs" : abs, "trunc" : lambda a: int(a), "round" : round, "sgn" : lambda a: abs(a)>epsilon and cmp(a,0) or 0}
def bnf(exprStack): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ def pushFirst(strg, loc, toks): exprStack.append(toks[0]) def pushUMinus(strg, loc, toks): for t in toks: if t == '-': exprStack.append('unary -') #~ exprStack.append('-1') #~ exprStack.append('*') else: break point = Literal('.') e = CaselessLiteral('E') #~ fnumber = Combine(Word('+-'+nums, nums) + #~ Optional(point + Optional(Word(nums))) + #~ Optional(e + Word('+-'+nums, nums))) fnumber = Regex(r' [+-]? \d+ (:? \. \d* )? (:? [eE] [+-]? \d+)?', re.X) xnumber = Regex(r'0 [xX] [0-9 a-f A-F]+', re.X) ident = Word(alphas, alphas + nums + '_$') plus = Literal('+') minus = Literal('-') mult = Literal('*') div = Literal('/') lpar = Literal('(').suppress() rpar = Literal(')').suppress() addop = plus | minus multop = mult | div expop = Literal('^') pi = CaselessLiteral('PI') expr = Forward() atom_parts = pi | e | fnumber | xnumber | ident + lpar + expr + rpar | ident atom_action = atom_parts.setParseAction(pushFirst) group = Group(lpar + expr + rpar) atom = ((0, None) * minus + atom_action | group).setParseAction(pushUMinus) # by defining exponentiation as 'atom [ ^ factor ]...' instead of 'atom [ ^ atom ]...', we get right-to-left exponents, instead of left-to-righ # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore((expop + factor).setParseAction(pushFirst)) term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) return expr
def _BNF(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ if not self.bnf: point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() # comma = Literal( "," ).suppress() comma = Literal(",") addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") var_list = [Literal(i) for i in self.var_names] expr = Forward() arg_func = Forward() or_vars = MatchFirst(var_list) # atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + delimitedList(Group(expr)) + rpar | or_vars ).setParseAction( self._pushFirst ) | ( lpar + delimitedList(Group(expr)).suppress() + rpar ) ).setParseAction(self._pushUMinus) atom = ((Optional("-") + ( pi | e | fnumber | ident + lpar + arg_func + rpar | or_vars ).setParseAction( self._pushFirst )) | \ (Optional("-") + ( lpar + arg_func.suppress() + rpar )) ).setParseAction(self._pushUMinus) # expr + ZeroOrMore( "," + expr ) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self._pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self._pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self._pushFirst)) arg_func << expr + ZeroOrMore( (comma + expr).setParseAction(self._pushFirst)) self.bnf = expr return self.bnf
def parse(str): tokens = '' # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group((columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore( (and_ | or_) + whereExpression) # define the grammar selectStmt << (selectToken + ('*' | columnNameList).setResultsName("columns") + fromToken + tableNameList.setResultsName("tables") + Optional(Group(CaselessLiteral("where") + whereExpression), "").setResultsName("where")) simpleSQL = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore(oracleSqlComment) try: tokens = simpleSQL.parseString(str) except ParseException, err: print " " * err.loc + "^\n" + err.msg print err
def _get_bnf(self): """ Returns the `Backus–Naur Form` for the parser """ if not self.bnf: # Operators exponent_operator = Literal("^") # negate_operator = Literal("!") # TODO: Implement this so we can write `!True` multiply_operator = oneOf("* / %") add_operator = oneOf("+ -") comparison_operator = oneOf("== != < <= > >= & |") ^ Keyword("in") # Functions e = CaselessLiteral("E") pi = CaselessLiteral("PI") lparen, rparen, lbrack, rbrack = map(Suppress, "()[]") ident = Word(alphas, alphas + nums + "_$") variable = Combine(Literal("$") + Word(alphanums + "_")) boolean = Keyword("True") ^ Keyword("False") string = quotedString.setParseAction(removeQuotes) numeric = Combine( Word("+-" + nums, nums) + Optional(Literal(".") + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) none = Keyword("None") expression = Forward() lists = Forward() lists << (lbrack + Optional( delimitedList(numeric ^ variable ^ boolean ^ string)) + rbrack) atom = (Optional("-") + (pi | e | numeric | ident + lparen + expression + rparen).setParseAction(self.push_stack) | (variable | none | boolean | string | Group(lists)).setParseAction(self.push_stack) | (lparen + expression.suppress() + rparen)).setParseAction( self.push_unary_stack) # By defining exponentiation as "atom [^factor]" instead of "atom [^atom], # we get left to right exponents. 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (exponent_operator + factor).setParseAction(self.push_stack)) boolean = factor + ZeroOrMore( (comparison_operator + factor).setParseAction(self.push_stack)) term = boolean + ZeroOrMore( (multiply_operator + boolean).setParseAction(self.push_stack)) self.bnf = expression << term + ZeroOrMore( (add_operator + term).setParseAction(self.push_stack)) return self.bnf
def __init__(self, slack=None): super(SimpleBot, self).__init__(slack=slack) self.name = 'Hello World' self.expr = CaselessLiteral( 'mycommand' ) # Bot will trigger on messages starting with !mycommand # Bot will trigger on messages of the form !othercommand [alphanumeric argument] self.other_name = 'Greeter' self.second_expr = CaselessLiteral('othercommand') + Optional( Word(alphanums).setResultsName('username')) + StringEnd() self.second_doc = 'Greet a user\n\tothercommand [name]'
def __init__(self): self.realNum = Combine( Optional(tokens.arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(tokens.E + Optional(tokens.arithSign) + Word(nums))) self.intNum = Combine( Optional(tokens.arithSign) + Word(nums) + Optional(tokens.E + Optional("+") + Word(nums))) self.columnRval = self.realNum | self.intNum | quotedString | tokens.columnName # need to add support for alg expressions self.whereCondition = Group( (tokens.columnName + tokens.binop + self.columnRval) | (tokens.columnName + tokens.in_ + tokens.LPAREN + delimitedList(self.columnRval) + tokens.RPAREN) | (tokens.columnName + tokens.in_ + tokens.LPAREN + tokens.selectStmt + tokens.RPAREN) | (tokens.LPAREN + tokens.whereExpression + tokens.RPAREN)) tokens.whereExpression << (self.whereCondition + Optional( ZeroOrMore(tokens.and_ | tokens.or_) + tokens.whereExpression)) self.joinCondition = Group( (tokens.tableName + tokens.on_ + tokens.whereExpression)) tokens.joinExpression << (self.joinCondition) # define the grammar tokens.selectStmt << ( tokens.selectToken.setResultsName("command") + ('*' | tokens.columnNameList).setResultsName("columns") + tokens.fromToken + tokens.tableNameList.setResultsName("tables") + Optional(Group(CaselessLiteral("join") + tokens.joinExpression), "").setResultsName("join") + Optional( Group( Suppress(CaselessLiteral("where")) + tokens.whereExpression), "").setResultsName("where")) #self.valuesIter = ( self.columnRval | "," + self.columnRval) tokens.insertStmt << ( tokens.insertToken.setResultsName("command") + tokens.intoToken.setResultsName("middle") + tokens.columnNameList.setResultsName("tables") + tokens.valuesToken.setResultsName("val") + tokens.LPAREN + Group(delimitedList(self.columnRval, delim=r', ')).setResultsName("insValues") + tokens.RPAREN) self.simpleSQL = tokens.selectStmt | tokens.insertStmt # define Oracle comment format, and ignore them self.oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore(self.oracleSqlComment)
def build_parser(): """ Build a pyparsing parser for our custom topology description language. :return: A pyparsing parser. :rtype: pyparsing.MatchFirst """ ParserElement.setDefaultWhitespaceChars(' \t') nl = Suppress(LineEnd()) inumber = Word(nums).setParseAction(lambda l, s, t: int(t[0])) fnumber = (Combine( Optional('-') + Word(nums) + '.' + Word(nums) + Optional('E' | 'e' + Optional('-') + Word(nums))) ).setParseAction(lambda toks: float(toks[0])) boolean = (CaselessLiteral('true') | CaselessLiteral('false') ).setParseAction(lambda l, s, t: t[0].casefold() == 'true') comment = Literal('#') + restOfLine + nl text = QuotedString('"') identifier = Word(alphas, alphanums + '_') empty_line = LineStart() + LineEnd() item_list = ((text | fnumber | inumber | boolean) + Optional(Suppress(',')) + Optional(nl)) custom_list = (Suppress('(') + Optional(nl) + Group(OneOrMore(item_list)) + Optional(nl) + Suppress(')')).setParseAction(lambda tok: tok.asList()) attribute = Group( identifier('key') + Suppress(Literal('=')) + (custom_list | text | fnumber | inumber | boolean | identifier)('value') + Optional(nl)) attributes = (Suppress(Literal('[')) + Optional(nl) + OneOrMore(attribute) + Suppress(Literal(']'))) node = identifier('node') port = Group(node + Suppress(Literal(':')) + (identifier | inumber)('port')) link = Group( port('endpoint_a') + Suppress(Literal('--')) + port('endpoint_b')) environment_spec = (attributes + nl).setResultsName('env_spec', listAllMatches=True) nodes_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(node))('nodes')) + nl).setResultsName('node_spec', listAllMatches=True) ports_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(port))('ports')) + nl).setResultsName('port_spec', listAllMatches=True) link_spec = (Group(Optional(attributes)('attributes') + link('links')) + nl).setResultsName('link_spec', listAllMatches=True) statements = OneOrMore(comment | link_spec | ports_spec | nodes_spec | environment_spec | empty_line) return statements
def _parse_line(delimiters=' \t'): """Parse a single data line that may contain string or numerical data. Float and Int 'words' are converted to their appropriate type. Exponentiation is supported, as are NaN and Inf.""" # Somewhat of a hack, but we can only use printables if the delimiter is # just whitespace. Otherwise, some seprators (like ',' or '=') potentially # get parsed into the general string text. So, if we have non whitespace # delimiters, we need to fall back to just alphanums, and then add in any # missing but important symbols to parse. if delimiters.isspace(): textchars = printables else: textchars = alphanums symbols = [ '.', '/', '+', '*', '^', '(', ')', '[', ']', '=', ':', ';', '?', '%', '&', '!', '#', '|', '<', '>', '{', '}', '-', '_', '@', '$', '~' ] for symbol in symbols: if symbol not in delimiters: textchars = textchars + symbol string_text = Word(textchars) digits = Word(nums) dot = "." sign = oneOf("+ -") ee = CaselessLiteral('E') | CaselessLiteral('D') num_int = ToInteger(Combine(Optional(sign) + digits)) num_float = ToFloat( Combine( Optional(sign) + ((digits + dot + Optional(digits)) | (dot + digits)) + Optional(ee + Optional(sign) + digits))) # special case for a float written like "3e5" mixed_exp = ToFloat(Combine(digits + ee + Optional(sign) + digits)) nan = ToInf(oneOf("Inf -Inf")) | \ ToNan(oneOf("NaN nan NaN% NaNQ NaNS qNaN sNaN " + \ "1.#SNAN 1.#QNAN -1.#IND")) # sep = Literal(" ") | Literal("\n") data = (OneOrMore((nan | num_float | mixed_exp | num_int | string_text))) return data
def check_boolean_exprs(exprs=None, operand=(), send_error=True): ''' Check whether a boolean expression is properly formed. :param exprs: The string to evaluate. :param operand: The name of the operands. :param send_error: Whether to throw an error if expression is malformed. :return: A boolean. :Example: >>> from pygtftk.utils import check_boolean_exprs >>> assert check_boolean_exprs('s > 1 and (s < 2 or y < 2.5)', operand=['s', 'y']) ''' lparen = Literal("(") rparen = Literal(")") and_operator = CaselessLiteral("and") or_operator = CaselessLiteral("or") comparison_operator = oneOf(['==', '!=', '>', '>=', '<', '<=']) point = Literal('.') exponent = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusorminus) + number) float_nb = Combine(integer + Optional(point + Optional(number)) + Optional(exponent + integer)) value = float_nb identifier = oneOf(operand, caseless=False) # .setParseAction(_embed) group_1 = identifier + comparison_operator + value group_2 = value + comparison_operator + identifier comparison = group_1 | group_2 boolean_expr = operatorPrecedence(comparison, [(and_operator, 2, opAssoc.LEFT), (or_operator, 2, opAssoc.LEFT)]) boolean_expr_par = lparen + boolean_expr + rparen expression = Forward() expression << boolean_expr | boolean_expr_par try: expression.parseString(exprs, parseAll=True) return True except ParseException as err: if send_error: message(err.msg, force=True) message('Operand should be one of: ' + ", ".join(operand)) message("Boolean expression not supported.", type="ERROR") return False
def BNF(): """ expop :: '^' multop :: '*' | '/' | '>>' | '<<' | '|' | '&' addop :: '+' | '-' hex :: '0x' + integer integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ global bnf if not bnf: point = Literal( "." ) e = CaselessLiteral( "E" ) hexnum = CaselessLiteral("0x") + OneOrMore(oneOf(nums + 'a b c d e f A B C D E F')) hexnum.setParseAction(lambda s,l,t:str(int(''.join(t),16))) fnumber = Combine( Word( "+-"+nums, nums ) + Optional( point + Optional( Word( nums ) ) ) + Optional( e + Word( "+-"+nums, nums ) ) ) ident = Word(alphas, alphas+nums+"_$") plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) lshift = Literal( "<<" ) rshift = Literal( ">>" ) or_ = Literal( "|" ) and_ = Literal( "&" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div | lshift | rshift | or_ | and_ expop = Literal( "^" ) pi = CaselessLiteral( "PI" ) expr = Forward() atom = (Optional("-") + ( pi | e | hexnum | fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) ) term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) ) expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) ) bnf = expr return bnf
def create_pre_timedelta_literal(tok): """Detects <number> <timescale> <preposition>""" delta = originalTextFor( Or([ Word(nums) + ZeroOrMore(',' + Word(nums + ',')) + ZeroOrMore('.' + Word(nums)), CaselessLiteral('an'), CaselessLiteral('a') ])) + CaselessLiteral(tok) + DateParser.get_preposition_literals() delta.setName('pre' + tok).\ setParseAction(DateParser.generate_pre_timedelta) return delta
def bnf(self): ''' The BNF grammar is defined bellow. expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* ''' if not self._bnf: point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") minus = Literal("-") plus = Literal("+") div = Literal("/") mult = Literal("*") rpar = Literal(")").suppress() lpar = Literal("(").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = (Optional("-") + (pi | e | fnumber | ident + lpar + delimitedList(expr) + rpar).setParseAction(self.push_first) | (lpar + expr.suppress() + rpar)).setParseAction( self.push_minus) # The right way to define exponentiation is -> 2^3^2 = 2^(3^2), # not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.push_first)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.push_first)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.push_first)) self._bnf = expr return self._bnf
def __init__(self): point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.pushUMinus) factor = Forward() factor << atom + \ ZeroOrMore((expop + factor).setParseAction(self.pushFirst)) term = factor + \ ZeroOrMore((multop + factor).setParseAction(self.pushFirst)) expr << term + \ ZeroOrMore((addop + term).setParseAction(self.pushFirst)) self.bnf = expr epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow } self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "exp": math.exp, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
def gen_partitions_parser(cls): # Init full parts matcher only on demand # invalid_partition_prefix - used to detect any invalid prefix # attached to the number of partitions. The prefix is used # later on to flag invalid schemas. return ( Combine(Optional(Literal("/*!") + Word(nums))).suppress() + CaselessLiteral("PARTITION") + CaselessLiteral("BY") + (cls.PTYPE_HASH | cls.PTYPE_KEY | cls.PTYPE_RANGE | cls.PTYPE_LIST) + Optional( CaselessLiteral("PARTITIONS") + Optional(Combine(Regex("[^0-9]"))) ("invalid_partition_prefix") + Word(nums)("num_partitions")) + Optional(cls.LEFT_PARENTHESES + cls.PART_DEFS("part_defs") + cls.RIGHT_PARENTHESES))
def create_post_timedelta_literal(tok): """Detects <plus/minus> <number> <timescale>""" delta = Or([CaselessLiteral(t) for t in ['+', '-', 'plus', 'minus']]) + originalTextFor( Or([ Word(nums) + ZeroOrMore(',' + Word(nums + ',')) + ZeroOrMore('.' + Word(nums)), CaselessLiteral('an'), CaselessLiteral('a') ])) + CaselessLiteral(tok) + StringEnd() delta.setName('post' + tok).\ setParseAction(DateParser.generate_post_timedelta) return delta
def dice_sides(): """ Return the Backus-Naur form (BNF) of the grammar describing the number of sides for a die. :return: The BNF grammar for the number of sides of a dice, e.g. 6. """ token = ( integer() | CaselessLiteral("fate") | CaselessLiteral("f") # | StringStart() + CaselessLiteral("f") + StringEnd() \ # | StringStart() + CaselessLiteral("fate") + StringEnd() ) return token
def BNF(): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ global bnf if not bnf: point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional("-") + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(pushFirst) | (lpar + expr.suppress() + rpar)).setParseAction(pushUMinus)) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of # left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore((expop + factor).setParseAction(pushFirst)) term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) bnf = expr return bnf
def _reset_tokens(self): """ Set up the tokens for pyparsing. """ # Somewhat of a hack, but we can only use printables if the delimiter is # just whitespace. Otherwise, some seprators (like ',' or '=') potentially # get parsed into the general string text. So, if we have non whitespace # delimiters, we need to fall back to just alphanums, and then add in any # missing but important symbols to parse. if self._delimiter.isspace(): textchars = printables else: textchars = alphanums symbols = [ '.', '/', '+', '*', '^', '(', ')', '[', ']', '=', ':', ';', '?', '%', '&', '!', '#', '|', '<', '>', '{', '}', '-', '_', '@', '$', '~' ] for symbol in symbols: if symbol not in self._delimiter: textchars = textchars + symbol digits = Word(nums) dot = "." sign = oneOf("+ -") ee = CaselessLiteral('E') | CaselessLiteral('D') num_int = _ToInteger(Combine(Optional(sign) + digits)) num_float = _ToFloat( Combine( Optional(sign) + ((digits + dot + Optional(digits)) | (dot + digits)) + Optional(ee + Optional(sign) + digits))) # special case for a float written like "3e5" mixed_exp = _ToFloat(Combine(digits + ee + Optional(sign) + digits)) nan = (_ToInf(oneOf("Inf -Inf")) | _ToNan( oneOf("NaN nan NaN% NaNQ NaNS qNaN sNaN 1.#SNAN 1.#QNAN -1.#IND")) ) string_text = Word(textchars) self.line_parse_token = (OneOrMore( (nan | num_float | mixed_exp | num_int | string_text)))
def define_cluster(): ''' Algorithm Definition of Cluster Keyword :returns pyparsing object ''' algoPhrase = (Literal("algorithm") + Literal("=")).suppress() kmeansd = kmeans.define_kmeans() algo = algoPhrase + MatchFirst([kmeansd]).setResultsName("algorithm") # Grammar for Feature Selection feature_prefix = Optional(CaselessLiteral("feature") + Literal("=")).suppress() feature_value = oneOf(["False", "AUTO", "RFE"]).setResultsName("feature") feature = feature_prefix + feature_value #define so that there can be multiple verisions of Cluster clusterKeyword = Keyword("cluster", caseless=True).setResultsName("cluster") #define predictor word to specify column numbers predPhrase = (Literal("predictors") + Literal("=")).suppress() predictorsDef = choice_columns.setResultsName("predictors") preds = predPhrase + predictorsDef labelPhrase = (Literal("label") + Literal("=")).suppress() labelDef = choice_columns.setResultsName("label") labels = labelPhrase + labelDef option = MatchFirst([preds, labels, algo]) options = delimitedList(option, delim=',') cluster = clusterKeyword + openParen + Optional(options) + closeParen return cluster
def __init__(self): #---------------------------------------- # number ::= [0-9]+ # point ::= . # exp ::= [eE] # integer ::= [-]<number> # float_number ::= <integer>[<point>[<number]][<exp><integer>][f] # unit_symbol ::= [a-zA-Z]+[0-9]? #---------------------------------------- number = Word(nums) point = Literal('.') exp = CaselessLiteral('E') integer = Combine(Optional('-') + number) float_number = Combine(integer + Optional(point + Optional(number)) + Optional(exp + integer)) degree_symbol = Literal(u'\u00b0') prime = Literal("'") ^ Literal(u'\u2032') double_prime = Literal('"') ^ Literal(u'\u2033') unit_symbol = Regex(r'[a-zA-Z]+[a-zA-Z_]*[\^]?[0-9]*') ^ degree_symbol ^ prime ^ double_prime #---------------------------------------- # per ::= / # unit_combination ::= <unit_symbol> [[/ ] <unit_symbol>]* # unit ::= [<float_number>] <unit_combination> #---------------------------------------- per = Literal('/') mult = Literal('.') at = Literal('@') operators = per ^ mult ^ at unit_or_float = unit_symbol ^ float_number operation = unit_or_float + Optional(operators) + unit_or_float expression = OneOrMore(operation) parenthetical_expression = '(' + expression + ')' ordered_expression = parenthetical_expression ^ expression ^ unit_or_float equation = ordered_expression + Optional(per) + ordered_expression self.sentence = equation ^ ordered_expression ^ unit_or_float
def parseString(s): goto = CaselessLiteral("goto") letters = "ABCDEFGHIJKLMNOPRSTUVZYXWQ0123456789_-." lowerLet = letters.lower() caps = "ABCSXYZ" lowerCaps = caps.lower() digits = "0123456789" parenthesesL = Suppress("(") parenthesesR = Suppress(")") dot = "." minus = "-" comma = Literal(",").setParseAction(replaceWith(".")) semicolon = Suppress(";") element = Word(caps, max=1) | Word(lowerCaps, max=1) number = Word(digits) integer = Optional(minus) + number floa = Combine(integer + Optional(comma + number) + Optional(dot + number)) elementRef = element + Optional(floa) targetName = Word(letters, max=13) | Word(lowerLet, max=13) attribute = elementRef | targetName #go = Group(goto + parenthesesL + OneOrMore( elementRef + Optional(semicolon)) + parenthesesR) go = Group(goto + parenthesesL + OneOrMore(attribute + Optional(semicolon)) + parenthesesR) command = go + ZeroOrMore(go) #command = goto + parenthesesL + OneOrMore( elementRef + Optional(semicolon)) + parenthesesR while True: try: formulaData = command.parseString(s) break except ValueError: return "error" return formulaData
def get_ips(headers): "Return a list of IP address from mail headers" ips = [] end = Word(alphas, alphas + "-") + ":" + restOfLine begin = Or( (Suppress("Received: "), Suppress( CaselessLiteral("X-Originating-IP: ")))) + restOfLine token = Group(begin + SkipTo(end, False)) #hdrs = [] def parse_headers(): "parse headers" for msg_hdr in token.searchString(headers): for hdr in msg_hdr: fullhdr = ' '.join(hdr) fullhdr = fullhdr.replace('\n', '') yield fullhdr hdrs = parse_headers() def extract_ips(header): "Extract headers" match = FIND_IPS_RE.findall(header) match.reverse() ips.extend( (address[0] or address[1] or address[2] for address in match)) def dedup(seq): "remove duplicate IP's" seen = set() seen_add = seen.add return [val for val in seq if val not in seen and not seen_add(val)] map(extract_ips, hdrs) return dedup(ips)
def __init__(self): ParserElement.enablePackrat() hexadecimal_integer = Combine(CaselessLiteral('0x') + Word(hexnums)) \ .setName('hexadecimal integer') \ .setParseAction(lambda *t: int(t[2][0][2:], 16)) decimal_integer = Word(nums) \ .setName('decimal integer') \ .setParseAction(lambda t: int(''.join(t))) identifier = Word(alphanums + '_$') \ .setName('identifier') baseExpr = (hexadecimal_integer | decimal_integer | identifier) operators = [ (oneOf('+ - ~ !'), 1, opAssoc.RIGHT, self.nest_operand_pairs), (oneOf('* /'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('+ -'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('<< >>'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('<= < > >='), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('== !='), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('&'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('^'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('|'), 2, opAssoc.LEFT, self.nest_operand_pairs), ] self.expr = infixNotation(baseExpr, operators) + StringEnd()
def _construct_grammar(): logical_operator = get_logical_operator() logical_expression = get_logical_expression() facets_expression = get_facet_expression() highlight_expression = get_highlight_expression() sort_expression = get_sort_expression() aggs_expression = get_aggregations_expression() nested_expression = get_nested_expression() # The below line describes how the type expression should be. type_expression = Word('type')\ + Word(':').suppress()\ + Word(srange("[a-zA-Z0-9_]"))\ + Optional(CaselessLiteral('AND')).suppress() type_expression.setParseAction(parse_type_expression) base_expression = Optional(highlight_expression)\ + Optional(sort_expression)\ + Optional(type_expression)\ + ZeroOrMore( (facets_expression | aggs_expression | nested_expression | logical_expression) + Optional(logical_operator) ).setParseAction(parse_one_or_more_logical_expressions) base_expression.setParseAction(parse_type_logical_facets_expression) return base_expression
def __init__(self): _by = CaselessLiteral('by') _add = Literal('+').suppress() _nest = Literal('>').suppress() _lpar = Literal('(').suppress() _rpar = Literal(')').suppress() _variable = ~_by + Word(alphanums + '_' + '.') def _p_act(s, l, t): # dirty hack to unpack group permutations # if has lists in result than it is multidimension cross levels_array = list(map(lambda v: isinstance(v, ParseResults), t)) if sum(levels_array): # find where second level of cross starts first_cross_starts_at = levels_array.index(True) t = t.asList() levels = [t[:first_cross_starts_at]] levels.extend(t[first_cross_starts_at:]) return list(map(lambda v: list(v), product(*levels))) return t self.expr = Forward() atom = _variable | (_lpar + ungroup(self.expr) + _rpar) cross = Forward() cross << (atom + ZeroOrMore(Group(_nest + atom))).setParseAction(_p_act) add = Forward() add << cross + ZeroOrMore(_add + cross) self.expr << Group(add + ZeroOrMore(add))
def __parser(expression): """ adopted from Paul McGuire example. http://pyparsing.wikispaces.com/file/view/fourFn.py """ expr_stack = [] def push_first(strg, loc, toks): expr_stack.append(toks[0]) def push_u_minus(strg, loc, toks): if toks and toks[0] == '-': expr_stack.append('unary -') point = Literal('.') _e = CaselessLiteral('E') fnumber = Combine( Word('+-' + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(_e + Word('+-' + nums, nums))) ident = Word(alphas, alphas + nums + '_$') plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") _pi = CaselessLiteral("PI") x = CaselessLiteral("X") expr = Forward() atom = (Optional("-") + (x | _pi | _e | fnumber | ident + lpar + expr + rpar).setParseAction(push_first) | (lpar + expr.suppress() + rpar)).setParseAction(push_u_minus) factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(push_first)) term = factor + ZeroOrMore( (multop + factor).setParseAction(push_first)) expr << term + ZeroOrMore((addop + term).setParseAction(push_first)) expr.parseString(expression) return expr_stack
def create_bnf(term_descs): """term_descs .. list of TermParse objects (sign, term_name, term_arg_names), where sign can be real or complex multiplier""" lc = ['+'] # Linear combination context. equal = Literal("=").setParseAction(rhs(lc)) zero = Literal("0").suppress() point = Literal(".") e = CaselessLiteral("E") inumber = Word("+-" + nums, nums) fnumber = Combine(Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) number = fnumber + Optional(Literal('j'), default='') add_op = oneOf('+ -') number_expr = Forward() number_expr << ZeroOrMore('(') + number \ + ZeroOrMore(add_op + number_expr) \ + ZeroOrMore(')') ident = Word(alphas, alphanums + "_") integral = Combine((Literal('i') + Word(alphanums)) | Literal('i') | Literal('a') | Word(nums))("integral") history = Optional('[' + inumber + ']', default='')("history") variable = Combine(Word(alphas, alphanums + '._') + history) derivative = Combine(Literal('d') + variable \ + Literal('/') + Literal('dt')) trace = Combine(Literal('tr') + '(' + variable + ')') generalized_var = derivative | trace | variable args = Group(delimitedList(generalized_var)) flag = Literal('a') term = Optional(Literal('+') | Literal('-'), default='+')("sign") \ + Optional(number_expr + Literal('*').suppress(), default=['1.0', ''])("mul") \ + Combine(ident("name") \ + Optional("." + (integral + "." + ident("region") + "." + flag("flag") | integral + "." + ident("region") | ident("region") )))("term_desc") + "(" \ + Optional(args, default=[''])("args") + ")" term.setParseAction(collect_term(term_descs, lc)) rhs1 = equal + OneOrMore(term) rhs2 = equal + zero equation = StringStart() + OneOrMore(term) \ + Optional(rhs1 | rhs2) + StringEnd() ## term.setDebug() return equation
def _define_encode_options(): #encode strategy strategyKeyword = (CaselessLiteral('strategy') + Literal('=')).suppress() strategyOptions = _define_encode_strategies() strategy = strategyKeyword + Quote + MatchFirst( strategyOptions).setResultsName('encodeStrategy') + Quote #persist persistKeyword = (CaselessLiteral('persist') + Literal('=')).suppress() persistValue = Quote + Word(everythingWOQuotes).setResultsName( 'encodePersist') + Quote persist = Optional(persistKeyword + persistValue) option = MatchFirst([strategy, persist]) encodeOptions = openParen + delimitedList(option, delim=',') + closeParen return encodeOptions
def __init__(self): if not ParserElement: return with warnings.catch_warnings(): # In Python 2.6, pyparsing throws warnings on its own code. warnings.simplefilter("ignore") orOperator = Suppress( CaselessLiteral("OR")).setResultsName("OR_OPERATOR") quoteContents = Group(Word(ALLCHARS.replace("\"", ""))) quoteContents.leaveWhitespace() quotedWord = Group(Suppress('"') + quoteContents + Suppress('"')).setResultsName("QUOTES") plainWord = Group( NotAny(CaselessLiteral("OR")) + Word(WORDCHARS.replace("-", ""), WORDCHARS)).setResultsName( "PLAINWORD") anyWord = Group( NotAny('(') + ~FollowedBy(')') + Word(ALLWORDCHARS)).setResultsName("ANYWORD") keyWord = Group( Combine( Optional("-") + Word(string.ascii_letters) + Literal(":") + (Word(WORDCHARS) | quotedWord))).setResultsName("KEYWORD") notExpr = Group( Suppress("-") + NotAny(string.whitespace) + (quotedWord | plainWord)).setResultsName("NOT") word = Group(keyWord | notExpr | quotedWord | plainWord).setResultsName("WORD") grammar = Forward() parens = Forward() orOperand = Group(word | parens | notExpr | anyWord).setResultsName("OR_OPERAND") orExpr = Group( FollowedBy(orOperand + orOperator + orOperand) + Group(orOperand + OneOrMore(orOperator + orOperand)) ).setResultsName("OR_EXPRESSION") oneExpr = Group(orExpr | parens | word | anyWord).setResultsName("ONE EXPRESSION") parens <<= Group( Group(Optional("-")).setResultsName("NOT_PARENTHESIS") + Suppress("(") + ZeroOrMore(parens | grammar) + Suppress(")")).setResultsName("PARENTHESIS") grammar <<= ((oneExpr + grammar) | oneExpr).setResultsName("GRAMMAR") self._grammar = grammar
def __init__(self, slack=None): self.name = "Simple Poll" self.expr = CaselessLiteral('poll') + symbols.comma_list + StringEnd() self.doc = "Basic poll (up to 10 options)\n\tpoll <option1>, <option2> [, ... <option10>])" self.emoji = [ ':one:', ':two:', ':three:', ':four:', ':five:', ':six:', ':seven:', ':eight:', ':nine:', ':keycap_ten:' ]
def __init__(self): self.json_query = {'query':{}, 'and': [], 'or': []} self.tokens = None #-------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> #-------------------------------------------------------------------------------------- integer = Regex(r'-?[0-9]+') # Word matches space for some reason double = Regex(r'-?[0-9]+.?[0-9]*') number = double | integer #-------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) #-------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number units = CaselessLiteral('km') | CaselessLiteral('mi') distance = number + units distance.setParseAction( lambda x : self.frame.update({'dist' : float(x[0]), 'units' : x[1]})) #-------------------------------------------------------------------------------------- # Date #-------------------------------------------------------------------------------------- date = python_string #-------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> #-------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction(lambda x : self.frame.update({'filter' : x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x : self.frame.update({'index' : x[0]})) resource_id = Regex(r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')').setParseAction(removeQuotes) collection_id = resource_id #-------------------------------------------------------------------------------------- # <from-statement> ::= "FROM" <number> # <to-statement> ::= "TO" <number> #-------------------------------------------------------------------------------------- from_statement = CaselessLiteral("FROM") + number from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]})) to_statement = CaselessLiteral("TO") + number to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]})) #-------------------------------------------------------------------------------------- # <date-from-statement> ::= "FROM" <date> # <date-to-statement> ::= "TO" <date> #-------------------------------------------------------------------------------------- date_from_statement = CaselessLiteral("FROM") + date date_from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]})) date_to_statement = CaselessLiteral("TO") + date date_to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]})) #-------------------------------------------------------------------------------------- # <time-query> ::= "TIME FROM" <date> "TO" <date> #-------------------------------------------------------------------------------------- time_query = CaselessLiteral("TIME") + Optional(date_from_statement) + Optional(date_to_statement) time_query.setParseAction(lambda x : self.time_frame()) # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}})) #-------------------------------------------------------------------------------------- # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- time_bounds = CaselessLiteral("TIMEBOUNDS") + date_from_statement + date_to_statement time_bounds.setParseAction(lambda x : self.time_bounds_frame()) #-------------------------------------------------------------------------------------- # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- vertical_bounds = CaselessLiteral("VERTICAL") + from_statement + to_statement vertical_bounds.setParseAction(lambda x : self.vertical_bounds_frame()) #-------------------------------------------------------------------------------------- # <range-query> ::= "VALUES" [<from-statement>] [<to-statement>] #-------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + Optional(from_statement) + Optional(to_statement) # Add the range to the frame object range_query.setParseAction(lambda x : self.range_frame()) #-------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> #-------------------------------------------------------------------------------------- geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction(lambda x : self.frame.update({'lat': float(x[5]), 'lon':float(x[7])})) geo_bbox = CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords geo_bbox.setParseAction(lambda x : self.frame.update({'top_left':[float(x[5]),float(x[3])], 'bottom_right':[float(x[10]),float(x[8])]})) #-------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <fuzzy-query> ::= "LIKE" <field-query> # <match-query> ::= "MATCH" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) #-------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query term_query.setParseAction(lambda x : self.frame.update({'value':x[1]})) geo_query = CaselessLiteral("GEO") + ( geo_distance | geo_bbox ) fuzzy_query = CaselessLiteral("LIKE") + field_query fuzzy_query.setParseAction(lambda x : self.frame.update({'fuzzy':x[1]})) match_query = CaselessLiteral("MATCH") + field_query match_query.setParseAction(lambda x : self.frame.update({'match':x[1]})) #-------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> #-------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction(lambda x: self.json_query.update({'limit' : int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction(lambda x: self.frame.update({'depth' : int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string order_parameter.setParseAction(lambda x: self.json_query.update({'order' : {x[2] : 'asc'}})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction(lambda x : self.json_query.update({'skip' : int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter #-------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <owner-query> ::= "HAS" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> | <owner-query> #-------------------------------------------------------------------------------------- search_query = CaselessLiteral("SEARCH") + field + (range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name # Add the field to the frame object search_query.setParseAction(lambda x : self.frame.update({'field' : x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction(lambda x : self.frame.update({'collection': x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction(lambda x : self.frame.update({'association':x[2]})) owner_query = CaselessLiteral("HAS") + resource_id + Optional(depth_parameter) owner_query.setParseAction(lambda x : self.frame.update({'owner':x[1]})) query = search_query | association_query | collection_query | owner_query #-------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* #-------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x : self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x : self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x : self.or_frame()) self.sentence = primary_query + (intersection ^ union)*(0,None) + query_parameter*(0,None)
exits = CaselessLiteral('exits')('verb') + LineEnd() say_verb = oneOf('say "', caseless=True)('verb') say_verb.setParseAction(replaceWith('say')) words = Combine(OneOrMore(Word(printables)), joinString=' ', adjacent=False) msg = words('message') say = say_verb + msg tell_verb = CaselessLiteral('tell')('verb') tell = tell_verb + msg to_to_ = CaselessLiteral('to ') to_to_.setParseAction(replaceWith('to')) to_ = Suppress(to_to_) words_without_to = Combine(OneOrMore(~to_ + Word(alphanums)), joinString=' ', adjacent=False)('objname') listen_verb = CaselessLiteral('listen')('verb') listen = listen_verb + Optional(to_) + objref shout_verb = CaselessLiteral('shout')('verb') shout = shout_verb + msg emote_verb = oneOf('emote :', caseless=True)('verb') emote_verb.setParseAction(replaceWith('emote')) emote = emote_verb + msg
bool = CaselessLiteral("true") | CaselessLiteral("false") columnRval = "None" | bool | realNum | intNum | quotedString | columnName # need to add support for alg expressions assignment = (columnName + setOpToken + columnRval) whereCondition = Group( ( columnName + binop + columnRval ) | ( columnName + in_ + "(" + delimitedList( columnRval, ',', combine=True) + ")" ) | ( columnName + in_ + "(" + selectStmt + ")" ) | ( columnName + between_ + columnRval + and_ + columnRval ) | ( "(" + whereExpression + ")" ).setResultsName("whereExpr") | ( columnName + CaselessLiteral('is') + CaselessLiteral('null')) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) orderByToken = CaselessLiteral("order by") limitToken = CaselessLiteral("limit") offsetToken = CaselessLiteral("offset") asc_ = CaselessLiteral('ASC') desc_ = CaselessLiteral('DESC') # define the grammar selectStmt << (selectToken + (columnNameList.setResultsName( "columns" ) + fromToken + (collection | rootEntity)) + Optional( Group( CaselessLiteral("where") + whereExpression ).setResultsName("where") , "" ) + Optional(orderByToken.suppress() +
import operator from functools import reduce from pyparsing import CaselessLiteral, Optional, Word, ZeroOrMore, Literal from dice import Die NUMBER = Word('0123456789') NUMBER.setParseAction(lambda tokens: int(tokens[0])) DIE = CaselessLiteral('d').suppress() + NUMBER('sides') DIE.setParseAction(lambda tokens: Die(tokens.sides)) DICE = NUMBER('multiplier') + DIE('die') DICE.setParseAction(lambda tokens: tokens.die * tokens.multiplier) DICE_GROUP = \ (DIE | DICE | NUMBER) + \ ZeroOrMore(Literal('+').suppress() + (DIE | DICE | NUMBER)) DICE_GROUP.setParseAction(lambda tokens: reduce(operator.add, tokens)) def parse(string): return DICE_GROUP.parseString(string)[0]
def __init__(self): self.json_query = {"query": {}, "and": [], "or": []} self.tokens = None # -------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> # -------------------------------------------------------------------------------------- integer = Regex(r"-?[0-9]+") # Word matches space for some reason double = Regex(r"-?[0-9]+.?[0-9]*") number = double | integer # -------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) # -------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number units = CaselessLiteral("km") | CaselessLiteral("nm") | CaselessLiteral("mi") distance = number + units distance.setParseAction(lambda x: self.frame.update({"dist": float(x[0]), "units": x[1]})) # -------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 '"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> # -------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction(lambda x: self.frame.update({"filter": x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x: self.frame.update({"index": x[0]})) resource_id = Regex(r'("(?:[a-zA-Z0-9])*"|\'(?:[a-zA-Z0-9]*)\')').setParseAction(removeQuotes) collection_id = resource_id # -------------------------------------------------------------------------------------- # <range-query> ::= "VALUES FROM" <number> "TO" <number> # -------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + CaselessLiteral("FROM") + number + CaselessLiteral("TO") + number # Add the range to the frame object range_query.setParseAction(lambda x: self.frame.update({"range": {"from": float(x[2]), "to": float(x[4])}})) # -------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> # -------------------------------------------------------------------------------------- geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction(lambda x: self.frame.update({"lat": float(x[5]), "lon": float(x[7])})) geo_bbox = ( CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords ) geo_bbox.setParseAction( lambda x: self.frame.update( {"top_left": [float(x[5]), float(x[3])], "bottom_right": [float(x[10]), float(x[8])]} ) ) # -------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) # -------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query # Add the term to the frame object term_query.setParseAction(lambda x: self.frame.update({"value": x[1]})) geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox) # -------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> # -------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction(lambda x: self.frame.update({"limit": int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction(lambda x: self.frame.update({"depth": int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string order_parameter.setParseAction(lambda x: self.frame.update({"order": {x[2]: "asc"}})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction(lambda x: self.frame.update({"offset": int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter # -------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> # -------------------------------------------------------------------------------------- search_query = ( CaselessLiteral("SEARCH") + field + (range_query | term_query | geo_query) + CaselessLiteral("FROM") + index_name + query_parameter * (0, None) ) # Add the field to the frame object search_query.setParseAction(lambda x: self.frame.update({"field": x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction(lambda x: self.frame.update({"collection": x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction(lambda x: self.frame.update({"association": x[2]})) query = search_query | association_query | collection_query # -------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* # -------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x: self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x: self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x: self.or_frame()) self.sentence = primary_query + (intersection ^ union) * (0, None)