def _parse_expr(text, ldelim="(", rdelim=")"): """Parse mathematical expression using PyParsing.""" var = pyparsing.Word(pyparsing.alphas + "_", pyparsing.alphanums + "_") point = pyparsing.Literal(".") exp = pyparsing.CaselessLiteral("E") number = pyparsing.Combine( pyparsing.Word("+-" + pyparsing.nums, pyparsing.nums) + pyparsing.Optional(point + pyparsing.Optional(pyparsing.Word(pyparsing.nums))) + pyparsing.Optional( exp + pyparsing.Word("+-" + pyparsing.nums, pyparsing.nums) ) ) atom = var | number oplist = [ (pyparsing.Literal("**"), 2, pyparsing.opAssoc.RIGHT), (pyparsing.oneOf("+ - ~"), 1, pyparsing.opAssoc.RIGHT), (pyparsing.oneOf("* / // %"), 2, pyparsing.opAssoc.LEFT), (pyparsing.oneOf("+ -"), 2, pyparsing.opAssoc.LEFT), (pyparsing.oneOf("<< >>"), 2, pyparsing.opAssoc.LEFT), (pyparsing.Literal("&"), 2, pyparsing.opAssoc.LEFT), (pyparsing.Literal("^"), 2, pyparsing.opAssoc.LEFT), (pyparsing.Literal("|"), 2, pyparsing.opAssoc.LEFT), ] # Get functions expr = pyparsing.infixNotation( atom, oplist, lpar=pyparsing.Suppress(ldelim), rpar=pyparsing.Suppress(rdelim) ) return expr.parseString(text)[0]
def get_idl_line_parser(): """ Based on http://pyparsing.wikispaces.com/file/view/parsePythonValue.py """ from pyparsing import \ Word, ZeroOrMore, OneOrMore, Optional, oneOf, StringEnd, Suppress, Group, Combine, \ nums, dblQuotedString, removeQuotes s = Suppress int_number = Combine(Optional(oneOf("+ -")) + Word(nums)).setParseAction(lambda tokens: int(tokens[0])).setName("integer") float_number = \ Combine(Optional(oneOf("+ -")) + Word(nums) + Optional("." + Optional(Word(nums)) + Optional(oneOf("e E")+Optional(oneOf("+ -")) +Word(nums)))) \ .setName("float") \ .setParseAction( lambda tokens: float(tokens[0]) ) bounding_box = s('(') + OneOrMore( int_number | s(',') ) + s(')') bounding_box_with_score = Group(bounding_box + Optional( ( s(":") | s("~") ) + float_number )) #filename = s('"') + Word(alphanums + "/_.~") + s('"') quoted = dblQuotedString.setParseAction(removeQuotes) filename = quoted idl_line = filename + Optional(s(':') + ZeroOrMore(bounding_box_with_score | s(','))) + ( s(";") | s(".") ) + StringEnd() #print( filename.parseString("\"left/image_00000004_0.png\"") ) #print( bounding_box.parseString("(221, 183, 261, 289)") ) return idl_line.parseString
def parse(formula): """Parse formula string and create abstract syntax tree (AST). """ # LTL expression _ltl_expr = pp.operatorPrecedence( _proposition, [ ("'", 1, pp.opAssoc.LEFT, ASTUnTempOp), ("!", 1, pp.opAssoc.RIGHT, ASTNot), (_UnaryTempOps, 1, pp.opAssoc.RIGHT, ASTUnTempOp), (pp.oneOf("& &&"), 2, pp.opAssoc.LEFT, ASTAnd), (pp.oneOf("| ||"), 2, pp.opAssoc.LEFT, ASTOr), (pp.oneOf("xor ^"), 2, pp.opAssoc.LEFT, ASTXor), ("->", 2, pp.opAssoc.RIGHT, ASTImp), ("<->", 2, pp.opAssoc.RIGHT, ASTBiImp), (pp.oneOf("= == !="), 2, pp.opAssoc.RIGHT, ASTComparator), (pp.oneOf("U V R"), 2, pp.opAssoc.RIGHT, ASTBiTempOp), ], ) _ltl_expr.ignore(pp.LineStart() + "--" + pp.restOfLine) # Increase recursion limit for complex formulae sys.setrecursionlimit(2000) try: return _ltl_expr.parseString(formula, parseAll=True)[0] except RuntimeError: raise pp.ParseException("Maximum recursion depth exceeded," "could not parse")
def check_in_parsing(s): update = (Literal('@') + Group(OneOrMore(Word(alphas))) + StringEnd()) | (Literal('@') + SkipTo(oneOf("4 for For FOR")) + oneOf('4 for For FOR') + Word(nums)) #for s in smstestlist: # print s try: u = update.parseString(s) print u if len(u) == 2: u[1] = ' '.join(u[1]) if u[1] in safezones: print "user in safezone " + u[1] combo = {'location':u[1],'hours':'24'} return combo else: print "Error: " + u[1] + " is not a safezone!" return "Error: " + u[1] + " is not a safezone! Syntax for checking in: @ location 4 numberofhours" else: print "user at " + u[1] + " for " + u[3] + " hours." combo = {'location':u[1],'hours':str(u[3])} return combo except: print "Error: Unable to understand!"
def interpretLine(self,l): #Construct parsing rules natural=pyparsing.Word(pyparsing.nums) natural_n=pyparsing.Word(pyparsing.nums) natural_n.setParseAction(lambda t: int(t[0])) integer=pyparsing.Optional(pyparsing.oneOf(['-', '+']))+natural decimal=pyparsing.Word('.',pyparsing.nums) exponent=pyparsing.Literal('e')+integer number=pyparsing.Combine(integer+pyparsing.Optional(decimal)+pyparsing.Optional(exponent)) number.setParseAction(lambda t: float(t[0])) cmd=pyparsing.Group(pyparsing.oneOf(CMDS)+natural_n) coord=pyparsing.Group(pyparsing.oneOf(AXES)+number) line=cmd+pyparsing.Group(pyparsing.ZeroOrMore(coord)) if len(l.strip())==0: return False res=line.parseString(l).asList() letter=res[0][0] num=res[0][1] if letter=='G': return self.interpretG(num,res[1]) if letter=='M': return self.interpretM(num) if letter=='T': return self.interpretT(num)
def __init__(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal( "." ) e = CaselessLiteral( "E" ) fnumber = Combine( Word( "+-"+alphanums+"_", alphanums+"_" ) + Optional( point + Optional( Word( alphanums+"_" ) ) ) + Optional( e + Word( "+-"+alphanums+"_", alphanums+"_" ) ) ) ident = Word(alphas,alphanums + "_") plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div expop = Literal( "^" ) pi = CaselessLiteral( "PI" ) expr = Forward() function = ident + lpar + expr + ZeroOrMore("," + expr) + rpar atom = ((Optional(oneOf("- +")) + (pi|e|function|fnumber).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar+expr+rpar) ).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( self.pushFirst ) ) term = factor + ZeroOrMore( ( multop + factor ).setParseAction( self.pushFirst ) ) expr << term + ZeroOrMore( ( addop + term ).setParseAction( self.pushFirst ) ) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+" : operator.add, "-" : operator.sub, "*" : operator.mul, "/" : operator.truediv, "^" : operator.pow } self.fn = { "sin" : math.sin, "cos" : math.cos, "tan" : math.tan, "abs" : abs, "trunc" : lambda a: int(a), "round" : round, "sgn" : lambda a: abs(a)>epsilon and cmp(a,0) or 0}
def _parseFeatureSignature(sig): """This function parses a given feature-signature.""" mal = set() def _rewriteOne(p): return "" def _rewriteTwo(p): return "" def _addIdentifier2Mal(p): mal.add(p[0]) operand = __string | __hexadec | __function | __integer | __identifier.setParseAction(_addIdentifier2Mal) compoperator = pypa.oneOf("< > <= >= == !=") calcoperator = pypa.oneOf("+ - * / % & | << >>") expr = pypa.operatorPrecedence( operand, [ ("defined", 1, pypa.opAssoc.RIGHT, _rewriteOne), ("!", 1, pypa.opAssoc.RIGHT, _rewriteOne), (calcoperator, 2, pypa.opAssoc.LEFT, _rewriteTwo), (compoperator, 2, pypa.opAssoc.LEFT, _rewriteTwo), ("&&", 2, pypa.opAssoc.LEFT, _rewriteTwo), ("||", 2, pypa.opAssoc.LEFT, _rewriteTwo), ], ) try: rsig = expr.parseString(sig)[0] except pypa.ParseException, e: print("ERROR (parse): cannot parse sig (%s) -- (%s)" % (sig, e.col)) return sig
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() ## word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL) )) ## redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") |Optional(fd_src, 1) + Literal(">"))\ +(word | (fd_dst + Optional("-"))) # "&>word" || ">&word" full_redir = (oneOf("&> >&") + word)\ .setParseAction(lambda t:("&" ,">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = Optional(fd_src | Literal("&"), 1) + \ Literal(">>") + word # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word redirector = (fd_redir | full_redir | here_doc | add_to_file | fd_bind)\ .setParseAction(lambda token: tuple(token)) ## single command (args/redir list) command = Group(OneOrMore(redirector | word)) ## logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon ## pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def _parse_line(): """Parse a single data line that may contain string or numerical data. Float and Int 'words' are converted to their appropriate type. Exponentiation is supported, as are NaN and Inf.""" digits = Word(nums) dot = "." sign = oneOf("+ -") ee = CaselessLiteral('E') | CaselessLiteral('D') num_int = ToInteger(Combine( Optional(sign) + digits )) num_float = ToFloat(Combine( Optional(sign) + ((digits + dot + Optional(digits)) | (dot + digits)) + Optional(ee + Optional(sign) + digits) )) # special case for a float written like "3e5" mixed_exp = ToFloat(Combine( digits + ee + Optional(sign) + digits )) nan = ToInf(oneOf("Inf -Inf")) | \ ToNan(oneOf("NaN nan NaN% NaNQ NaNS qNaN sNaN " + \ "1.#SNAN 1.#QNAN -1.#IND")) # sep = Literal(" ") | Literal("\n") data = ( OneOrMore( (nan | num_float | mixed_exp | num_int | Word(printables)) ) ) return data
def __init__(self, ffilter, queue_out): FuzzQueue.__init__(self, queue_out) Thread.__init__(self) self.setName('filter_thread') self.queue_out = queue_out if PYPARSING: element = oneOf("c l w h") digits = "XB0123456789" integer = Word( digits )#.setParseAction( self.__convertIntegers ) elementRef = Group(element + oneOf("= != < > >= <=") + integer) operator = oneOf("and or") definition = elementRef + ZeroOrMore( operator + elementRef) nestedformula = Group(Suppress(Optional(Literal("("))) + definition + Suppress(Optional(Literal(")")))) self.finalformula = nestedformula + ZeroOrMore( operator + nestedformula) elementRef.setParseAction(self.__compute_element) nestedformula.setParseAction(self.__compute_formula) self.finalformula.setParseAction(self.__myreduce) self.res = None self.hideparams = ffilter if "XXX" in self.hideparams['codes']: self.hideparams['codes'].append("0") self.baseline = None
def _parse_template(self, options, template): """Parse a template string.""" variable_name = Word(alphas + " ") variable_prefix = Optional(Word(alphas) + ":") variable = "{" + variable_prefix + variable_name + "}" variable.setParseAction(self._replace_variable(options)) block_name = oneOf("Title Description PreviousPage NextPage") block_start = "{block:" + block_name + "}" block_end = "{/block:" + block_name + "}" block = block_start + SkipTo(block_end) + block_end block.setParseAction(self._replace_block(options)) block_type_name = oneOf("Text Photo Panorama Photoset Quote Link Chat Video Audio") block_type_start = "{block:" + block_type_name + "}" block_type_end = "{/block:" + block_type_name + "}" block_type = block_type_start + SkipTo(block_type_end) + block_type_end block_type.setParseAction(self._replace_block_type(options)) block_cond_name = Word(alphas) block_cond_start = "{block:If" + Optional("Not") + block_cond_name + "}" block_cond_end = "{/block:If" + Optional("Not") + block_cond_name + "}" block_cond = block_cond_start + SkipTo(block_cond_end) + block_cond_end block_cond.setParseAction(self._replace_block_cond(options)) block_iter_name = oneOf("Posts") block_iter_start = "{block:" + block_iter_name + "}" block_iter_end = "{/block:" + block_iter_name + "}" block_iter = block_iter_start + SkipTo(block_iter_end) + block_iter_end block_iter.setParseAction(self._replace_block_iter(options)) parser = (block | block_type | block_cond | block_iter | variable) return parser.transformString(template)
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~ reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ( (lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?")) ) reRange.setParseAction(handle_range) reLiteral.setParseAction(handle_literal) reMacro.setParseAction(handle_macro) reDot.setParseAction(handle_dot) reTerm = (reLiteral | reRange | reMacro | reDot) reExpr = operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, handle_repetition), (None, 2, opAssoc.LEFT, handle_sequence), (Suppress('|'), 2, opAssoc.LEFT, handle_alternative), ]) _parser = reExpr return _parser
def defineConditionSyntax(self): num = Combine(Optional(oneOf("+ -")) + Word(nums) + "." + Optional(Word(nums)) + Optional(oneOf("e E")+Optional(oneOf("+ -")) +Word(nums))) op = oneOf("< == > >= <= !=") expr = Word(alphanums) + op + num return expr
def __init__(self): """ Setup the Backus Normal Form (BNF) parser logic. """ # Set an empty formula attribute self.formula = None # Instantiate blank parser for BNF construction self.bnf = Forward() # Expression for parenthesis, which are suppressed in the atoms # after matching. lpar = Literal(const.LPAR).suppress() rpar = Literal(const.RPAR).suppress() # Expression for mathematical constants: Euler number and Pi e = Keyword(const.EULER) pi = Keyword(const.PI) null = Keyword(const.NULL) _true = Keyword(const.TRUE) _false = Keyword(const.FALSE) # Prepare operator expressions addop = oneOf(const.ADDOP) multop = oneOf(const.MULTOP) powop = oneOf(const.POWOP) unary = reduce(operator.add, (Optional(x) for x in const.UNOP)) # Expression for floating point numbers, allowing for scientific notation. number = Regex(const.NUMBER) # Variables are alphanumeric strings that represent keys in the input # data dictionary. variable = delimitedList(Word(alphanums), delim=const.VARIABLE_NAME_SEPARATOR, combine=True) # Functional calls function = Word(alphanums) + lpar + self.bnf + rpar # Atom core - a single element is either a math constant, # a function or a variable. atom_core = function | pi | e | null | _true | _false | number | variable # Atom subelement between parenthesis atom_subelement = lpar + self.bnf.suppress() + rpar # In atoms, pi and e need to be before the letters for it to be found atom = ( unary + atom_core.setParseAction(self.push_first) | atom_subelement ).setParseAction(self.push_unary_operator) # By defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of # left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore((powop + factor).setParseAction(self.push_first)) term = factor + ZeroOrMore((multop + factor).setParseAction(self.push_first)) self.bnf << term + ZeroOrMore((addop + term).setParseAction(self.push_first))
def _string_to_ast(self, input_string): """ Parse a smart search string and return it in an AST like form """ # simple words # we need to use a regex to match on words because the regular # Word(alphanums) will only match on American ASCII alphanums and since # we try to be Unicode / internationally friendly we need to match much # much more. Trying to expand a word class to catch it all seems futile # so we match on everything *except* a few things, like our operators comp_word = Regex("[^*\s=><~!]+") word = Regex("[^*\s=><~!]+").setResultsName('word') # numbers comp_number = Word(nums) number = Word(nums).setResultsName('number') # IPv4 address ipv4_oct = Regex("((2(5[0-5]|[0-4][0-9])|[01]?[0-9][0-9]?))") comp_ipv4_address = Combine(ipv4_oct + ('.' + ipv4_oct*3)) ipv4_address = Combine(ipv4_oct + ('.' + ipv4_oct*3)).setResultsName('ipv4_address') # IPv6 address ipv6_address = Regex("((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?").setResultsName('ipv6_address') ipv6_prefix = Combine(ipv6_address + Regex("/(12[0-8]|1[01][0-9]|[0-9][0-9]?)")).setResultsName('ipv6_prefix') # VRF RTs of the form number:number vrf_rt = Combine((comp_ipv4_address | comp_number) + Literal(':') + comp_number).setResultsName('vrf_rt') # tags tags = Combine( Literal('#') + comp_word).setResultsName('tag') # operators for matching match_op = oneOf(' '.join(self.match_operators)).setResultsName('operator') boolean_op = oneOf(' '.join(self.boolean_operators)).setResultsName('boolean') # quoted string d_quoted_string = QuotedString('"', unquoteResults=True, escChar='\\') s_quoted_string = QuotedString('\'', unquoteResults=True, escChar='\\') quoted_string = (s_quoted_string | d_quoted_string).setResultsName('quoted_string') # expression to match a certain value for an attribute expression = Group(word + match_op + (quoted_string | vrf_rt | word | number)).setResultsName('expression') # we work on atoms, which are single quoted strings, match expressions, # tags, VRF RT or simple words. # NOTE: Place them in order of most exact match first! atom = Group(ipv6_prefix | ipv6_address | quoted_string | expression | tags | vrf_rt | boolean_op | word) enclosed = Forward() parens = nestedExpr('(', ')', content=enclosed) enclosed << ( parens | atom ).setResultsName('nested') content = Forward() content << ( ZeroOrMore(enclosed) ) res = content.parseString(input_string) return res
def expression(self): from pyparsing import Suppress,Combine,Optional,oneOf,OneOrMore,Word,nums,Group,alphas,alphanums,Literal,SkipTo,empty,lineEnd cvtInt = lambda toks: int(toks[0]) cvtReal = lambda toks: float(toks[0]) cvtTuple = lambda toks : tuple(toks.asList()) nameJoin = lambda toks : "".join([tok.replace("#","") for tok in toks[0]]) #lambda toks: " ".join([str(t) for t in toks[0]]) # define punctuation as suppressed literals lparen,rparen,lbrack,rbrack,lbrace,rbrace,colon = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums))\ .setName("integer")\ .setParseAction( cvtInt ) real = Combine(Optional(oneOf("+ -")) + Word(nums) + "." + Optional(Word(nums)) + Optional(oneOf("e E")+Optional(oneOf("+ -")) +Word(nums))).setName("real").setParseAction( cvtReal ) # TREE DEFINITION # ((seq2: 0.537243, seq1: 0.000004): 0.255741, seq3: 0.281503); tree_w_branches = ( OneOrMore(Word("():,."+alphas+nums))+Literal(";") ).setParseAction(lambda tokens: " ".join(tokens[:-1])+";") # SITE PROBABILITIES # site Freq Data: # 1 1 AAA: A(0.978) A(1.000) site_prob = ( integer.setResultsName("site",listAllMatches=True) + integer.setResultsName("freq",listAllMatches=True) + Word(alphas+"-").setResultsName("extant",listAllMatches=True) + colon + Group(OneOrMore(Group(Word(alphas,exact=1)+lparen+real+rparen))).setResultsName("probability",listAllMatches=True) + lineEnd ) # ANCESTRAL SEQUENCES # seq1 ACC # node #4 ACC # Optional # character with node # needs to be joined into a single name sequence = ( Group(Word(alphanums)+ Optional(Combine(Literal("#")+Word(nums)))).setParseAction(nameJoin).setResultsName("name",listAllMatches=True)+ Word(alphas+"- ").setResultsName("sequence", listAllMatches=True)+lineEnd ) return (SkipTo(Literal("Ancestral reconstruction by AAML."),include=True).suppress() + tree_w_branches.setResultsName("tree") + SkipTo(Literal("site")+Literal("Freq")+Literal("Data:"), include=True,).suppress()+ Group(OneOrMore(site_prob)).setResultsName("sites")+ SkipTo(Literal("List of extant and reconstructed sequences")+Word(nums)+Word(nums), include=True).suppress()+ Group(OneOrMore(sequence)).setResultsName("sequences")+ SkipTo(Literal("for a site."),include=True).suppress()+ Group(OneOrMore(real)).setResultsName("probability")+ empty )
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas+"_", alphanums+"_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal( "^" ) compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal( "." ) assign = Literal( "=" ) # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group(function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence(comp_expr, [ (andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp) ]) pattern = logic_expr + StringEnd() return pattern
def getrule(): """ Using pyparsing, get rule out of a string. """ arrow = pp.Literal("==>") buff = pp.Word(pp.alphas, "".join([pp.alphanums, "_"])) special_valueLHS = pp.oneOf([x for x in _LHSCONVENTIONS.keys()]) end_buffer = pp.Literal(">") special_valueRHS = pp.oneOf([x for x in _RHSCONVENTIONS.keys()]) chunk = getchunk() rule_reader = pp.Group(pp.OneOrMore(pp.Group(special_valueLHS + buff + end_buffer + pp.Group(pp.Optional(chunk))))) + arrow + pp.Group(pp.OneOrMore(pp.Group(special_valueRHS + buff + end_buffer + pp.Group(pp.Optional(chunk))))) return rule_reader
def _makeGrammar(): """ Define the simple string selector grammar using PyParsing """ #float definition point = Literal('.') plusmin = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusmin) + number) floatn = Combine(integer + Optional(point + Optional(number))) #vector definition lbracket = Literal('(') rbracket = Literal(')') comma = Literal(',') vector = Combine(lbracket + floatn('x') + comma + \ floatn('y') + comma + floatn('z') + rbracket) #direction definition simple_dir = oneOf(['X','Y','Z','XY','XZ','YZ']) direction = simple_dir('simple_dir') | vector('vector_dir') #CQ type definition cqtype = oneOf(['Plane','Cylinder','Sphere','Cone','Line','Circle','Arc'], caseless=True) cqtype = cqtype.setParseAction(upcaseTokens) #type operator type_op = Literal('%') #direction operator direction_op = oneOf(['>','<']) #index definition ix_number = Group(Optional('-')+Word(nums)) lsqbracket = Literal('[').suppress() rsqbracket = Literal(']').suppress() index = lsqbracket + ix_number('index') + rsqbracket #other operators other_op = oneOf(['|','#','+','-']) #named view named_view = oneOf(['front','back','left','right','top','bottom']) return direction('only_dir') | \ (type_op('type_op') + cqtype('cq_type')) | \ (direction_op('dir_op') + direction('dir') + Optional(index)) | \ (other_op('other_op') + direction('dir')) | \ named_view('named_view')
def __init__(self, EvaluateVariableChild=None, EvaluateNumberChild=None): EvaluateVariableChild = EvaluateVariableChild or EvaluateVariable EvaluateNumberChild = EvaluateNumberChild or EvaluateNumber # what is a float number floatNumber = Regex(r'[-]?\d+(\.\d*)?([eE][-+]?\d+)?') # a variable is a combination of letters, numbers, and underscor variable = Word(alphanums + "_") # a sign is plus or minus signOp = oneOf('+ -') # an operand is a variable or a floating point number operand = floatNumber ^ variable # when a floatNumber is found, parse it with evaluate number floatNumber.setParseAction(EvaluateNumberChild) # when a variable is found, parse it with the EvaluateVariableChild # or EvaluateVariable variable.setParseAction(EvaluateVariableChild) # comparisons include lt,le,gt,ge,eq,ne comparisonOp = oneOf("< <= > >= == !=") # negation of the boolean is ! notOp = oneOf("!") # an expression is a either a comparison or # a NOT operation (where NOT a is essentially (a == False)) comparisonExpression = operatorPrecedence(operand, [ (comparisonOp, 2, opAssoc.LEFT, EvaluateComparison ), (notOp, 1, opAssoc.RIGHT, EvaluateNot ), ]) # boolean logic of AND or OR boolOp = oneOf("& |") # a bool expression contains a nested bool expression or a comparison, # joined with a boolean operation boolExpression = Forward() boolPossible = boolExpression | comparisonExpression self.boolExpression = operatorPrecedence(boolPossible, [ (boolOp, 2, opAssoc.RIGHT, EvaluateOrAnd ), ]) return
def __init__(self): """ Create a parser that parse arithmetic expressions. They can contains variable identifiers or raw numbers. The meaning for the identifiers is left to the """ number = p.Regex(r'\d+(\.\d*)?([eE]\d+)?') identifier = p.Word(p.alphas) terminal = identifier | number self._expr = p.infixNotation(terminal, [ (p.oneOf('* /'), 2, p.opAssoc.LEFT), (p.oneOf('+ -'), 2, p.opAssoc.LEFT) ]) + p.stringEnd()
def _parse_line(delimiters=' \t'): """Parse a single data line that may contain string or numerical data. Float and Int 'words' are converted to their appropriate type. Exponentiation is supported, as are NaN and Inf.""" # Somewhat of a hack, but we can only use printables if the delimiter is # just whitespace. Otherwise, some seprators (like ',' or '=') potentially # get parsed into the general string text. So, if we have non whitespace # delimiters, we need to fall back to just alphanums, and then add in any # missing but important symbols to parse. if delimiters.isspace(): textchars = printables else: textchars = alphanums symbols = ['.', '/', '+', '*', '^', '(', ')', '[', ']', '=', ':', ';', '?', '%', '&', '!', '#', '|', '<', '>', '{', '}', '-', '_', '@', '$', '~'] for symbol in symbols: if symbol not in delimiters: textchars = textchars + symbol string_text = Word(textchars) digits = Word(nums) dot = "." sign = oneOf("+ -") ee = CaselessLiteral('E') | CaselessLiteral('D') num_int = ToInteger(Combine( Optional(sign) + digits )) num_float = ToFloat(Combine( Optional(sign) + ((digits + dot + Optional(digits)) | (dot + digits)) + Optional(ee + Optional(sign) + digits) )) # special case for a float written like "3e5" mixed_exp = ToFloat(Combine( digits + ee + Optional(sign) + digits )) nan = ToInf(oneOf("Inf -Inf")) | \ ToNan(oneOf("NaN nan NaN% NaNQ NaNS qNaN sNaN " + \ "1.#SNAN 1.#QNAN -1.#IND")) # sep = Literal(" ") | Literal("\n") data = ( OneOrMore( (nan | num_float | mixed_exp | num_int | string_text) ) ) return data
def get_parsing_expr(): S = Suppress O = Optional identifier_expression = Combine(oneOf(list(alphas)) + O(Word('_' + alphanums))) expr = S(Literal('$')) - identifier_expression expr = expr ^ oneOf(list(alphas)) expr.setName('variable') def my_parse_action(s, loc, tokens): # @UnusedVariable name = tokens[0] return Variable(name) expr.addParseAction(wrap_parse_action(my_parse_action)) return True, expr
def parse_date_range(x, hyphen=u"- — –", comma=u","): """Parse Date Ranges Parse date ranges like January 1-5, 1900 January 1-March 1, 1900 January 1 1900 - March 5, 1900 """ months = {'April': 4, 'August': 8, 'December': 12, 'February': 2, 'January': 1, 'July': 7, 'June': 6, 'March': 3, 'May': 5, 'November': 11, 'October': 10, 'September': 9} comma = pp.oneOf(comma).suppress() hyphen = pp.oneOf(hyphen).suppress() month = pp.oneOf([k for k in months.keys()]) month = month.setResultsName('m') month.addParseAction(lambda s,l,t: [months[t[0]]]) day = pp.Word(pp.nums).setResultsName('d') day.addParseAction(lambda s,l,t: [int(t[0])]) year = pp.Word(pp.nums).setResultsName('year') year.addParseAction(lambda s,l,t: [int(t[0])]) start_date = (month + day + pp.Optional(comma + year)).setResultsName('start') start_date = (month + day + pp.Optional(comma + year)).setResultsName('start') end_date = (pp.Optional(month) + day + comma + year).setResultsName('end') date_range = start_date + pp.Optional(hyphen + end_date) # date_range needs to be first or one_date will pick up # start_dates with a year grammar = date_range toks = grammar.parseString(x).asDict() d0 = toks['start'] d1 = toks['end'] if 'end' in toks else d0 if 'year' not in d0: d0['year'] = d1['year'] if 'm' not in d1: d1['m'] = d0['m'] d0date = datetime.date(*(d0[x] for x in ('year', 'm', 'd'))) d1date = datetime.date(*(d1[x] for x in ('year', 'm', 'd'))) return (d0date, d1date)
def _parse_filter(): op = pyparsing.oneOf('! & |') lpar = pyparsing.Literal('(').suppress() rpar = pyparsing.Literal(')').suppress() k = pyparsing.Word(pyparsing.alphanums) v = pyparsing.Word(pyparsing.alphanums + "*@.\\") rel = pyparsing.oneOf("= ~= >= <=") expr = pyparsing.Forward() atom = pyparsing.Group(lpar + op + expr + rpar) \ | pyparsing.Combine(lpar + k + rel + v + rpar) expr << atom + pyparsing.ZeroOrMore( expr ) return expr
def logicParse(inStr,pm = None): variable = oneOf('a b c d e f g h i j k l m n o p q r s t u w x y z ' + trueConstants + falseConstants) expr = operatorPrecedence(variable, [ (oneOf(notOps), 1, opAssoc.RIGHT), (oneOf(orOps), 2, opAssoc.LEFT), (oneOf(andOps), 2, opAssoc.LEFT), (oneOf(impOps), 2, opAssoc.LEFT)#, # (oneOf(bimpOps),2,opAssoc.LEFT), # (oneOf(xorOps), 2, opAssoc.LEFT) ]) parse = expr.parseString(inStr)[0] if pm == None: return parseToStatement(parse, propMap()) else: return parseToStatement(parse, pm)
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress()) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup ) reExpr << operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ] ) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def __init__(self): self.select_stmt = Forward().setName("select statement") self.itemName = MatchFirst(Keyword("itemName()")).setParseAction(self.ItemName) self.count = MatchFirst(Keyword("count(*)")).setParseAction(self.Count) self.identifier = ((~keyword + Word(alphas, alphanums+"_")) | QuotedString("`")) self.column_name = (self.itemName | self.identifier.copy()) self.table_name = self.identifier.copy() self.function_name = self.identifier.copy() # expression self.expr = Forward().setName("expression") self.integer = Regex(r"[+-]?\d+") self.string_literal = QuotedString("'") self.literal_value = self.string_literal self.expr_term = ( self.itemName | self.function_name + LPAR + Optional(delimitedList(self.expr)) + RPAR | self.literal_value.setParseAction(self.Literal) | NULL.setParseAction(self.Null) | self.identifier.setParseAction(self.Identifier) | (EVERY + LPAR + self.identifier.setParseAction(self.Identifier) + RPAR).setParseAction(self.EveryIdentifier) | (LPAR + Optional(delimitedList(self.literal_value.setParseAction(self.Literal))) + RPAR).setParseAction(self.ValueList) ) self.expr << (operatorPrecedence(self.expr_term, [ (NOT, UNARY, opAssoc.RIGHT, self.BoolNot), (oneOf('< <= > >='), BINARY, opAssoc.LEFT, self.BinaryComparisonOperator), (oneOf('= == != <>') | Group(IS + NOT) | IS | IN | LIKE, BINARY, opAssoc.LEFT, self.BinaryComparisonOperator), ((BETWEEN,AND), TERNARY, opAssoc.LEFT, self.BetweenXAndY), (OR, BINARY, opAssoc.LEFT, self.BoolOr), (AND, BINARY, opAssoc.LEFT, self.BoolAnd), (INTERSECTION, BINARY, opAssoc.LEFT, self.Intersection), ])).setParseAction(self.dont_allow_non_comparing_terms) self.ordering_term = (self.itemName | self.identifier) + Optional(ASC | DESC) self.single_source = self.table_name("table") self.result_column = Group("*" | self.count | delimitedList(self.column_name))("columns") self.select_core = (SELECT + self.result_column + FROM + self.single_source + Optional(WHERE + self.expr("where_expr"))) self.select_stmt << (self.select_core + Optional(ORDER + BY + Group(delimitedList(self.ordering_term))).setParseAction(self.OrderByTerms)("order_by_terms") + Optional(LIMIT + self.integer)("limit_terms"))
def parseKip(self, fin): # parser for .kip files operator = oneOf("ADD SUB MUL DIV NONE") integer = Word(nums) lbrack = Suppress('[') rbrack = Suppress(']') cage = Group( operator("oper") + integer("value") +\ lbrack + OneOrMore(integer)("cells") + rbrack +\ integer("color") ) cages = OneOrMore(cage)("cages") update = Group( integer("coords") + integer("answer") +integer("candidates") ) annal = "checkpoint" ^ update history = "History" + OneOrMore(annal)("history") dimension ="dim" + integer("dim") solution = "Solution" + OneOrMore(integer)("soln") answer = "Answers" + OneOrMore(integer)("answer") candidates = "Candidates" + OneOrMore(integer)("candidates") time = "Time" + integer("time") puzzle = dimension + cages + solution + answer + candidates + history + time puzzle.ignore(pythonStyleComment) return puzzle.parseFile(fin, parseAll = True)
def lookup(values, name=None): """ Creates the grammar for a Lookup (L) field, accepting only values from a list. Like in the Alphanumeric field, the result will be stripped of all heading and trailing whitespaces. :param values: values allowed :param name: name for the field :return: grammar for the lookup field """ if name is None: name = 'Lookup Field' if values is None: raise ValueError('The values can no be None') # TODO: This should not be needed, it is just a patch. Fix this. try: v = values.asList() values = v except AttributeError: values = values # Only the specified values are allowed lookup_field = pp.oneOf(values) lookup_field.setName(name) lookup_field.setParseAction(lambda s: s[0].strip()) lookup_field.leaveWhitespace() return lookup_field
(Minor updates by Paul McGuire, June, 2012) ''' from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \ LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \ cStyleComment, Regex, Forward, MatchFirst, And, srange, oneOf, alphas, alphanums, \ delimitedList # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g # Tokens EOL = Suppress(LineEnd()) # $ singleTextString = originalTextFor( ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = Literal('\\') + (oneOf(list(r'nrtbf\">' + "'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1)) LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1) CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'") STRING_LITERAL = Suppress("'") + Combine( OneOrMore(LITERAL_CHAR)) + Suppress("'") DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>' TOKEN_REF = Word(alphas.upper(), alphanums + '_') RULE_REF = Word(alphas.lower(), alphanums + '_') ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress( '\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1)) ACTION_CHAR_LITERAL = Suppress("'") + (
date = Group(year + slash + month + slash + day + hour + colon + minute + colon + second).setResultsName("date") minecraft_username = Word(alphanums + "_", min=3, max=16) buyer = minecraft_username.setResultsName("buyer") #This is fast but it'll break if NullCase makes an admin shop that #contains ' at ' in its name seller = SkipTo(' at ').setResultsName("seller") item_amount = number.setResultsName("item_amount") #Breaks if an item contains a ' for ' item = SkipTo(' for ').setResultsName("item") cash_amount = number.setResultsName("cash_amount") world = (oneOf("world_nether world_the_end world")).setResultsName("world") coordinates = Group( delimitedList(signed_integer)).setResultsName("coordinates") buy_transaction = date + buyer + Suppress( "bought") + item_amount + item + Suppress("for") + cash_amount + Suppress( "from") + seller + Suppress("at") + Suppress("[") + world + Suppress( "]") + coordinates #Small test #print(buy_transaction.parseString('2018/09/01 00:05:58 NullCase bought 3 White Wool for 25.00 from LightsChaos at [world] 29, 69, 38'))
The AtomicTermSymbol class, representing an atomic term symbol, with methods for parsing a string into quantum numbers and labels, creating an HTML representation of the term symbol, etc. """ import pyparsing as pp from .state import State, StateParseError from .utils import parse_fraction, float_to_fraction atom_L_symbols = ('S', 'P', 'D', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'Q', 'R', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z') integer = pp.Word(pp.nums) atom_Smult = integer.setResultsName('Smult') atom_Lletter = pp.oneOf(atom_L_symbols).setResultsName('Lletter') atom_Jstr = (integer + pp.Optional(pp.Suppress('/') + '2') + pp.StringEnd()).setResultsName('Jstr') atom_parity = pp.Literal('o').setResultsName('parity') atom_term = (atom_Smult + atom_Lletter + pp.Optional(atom_parity) + pp.Optional(pp.Suppress('_') + atom_Jstr) + pp.StringEnd()) class AtomicTermSymbolError(StateParseError): pass class AtomicTermSymbol(State): multiple_allowed = False
def header_parser(): identifier = Regex("[a-zA-Z_][a-zA-Z0-9_\$]*") comment = cppStyleComment.suppress() size = Group( Optional( Suppress('[') + SkipTo(']') + Suppress(']') ) ) # Params end_param = Literal(',') + 'parameter' | Literal(')') + '(' ptype = Optional( oneOf('integer real realtime time') ) # NOTE: this isn't completely right, good enough for parsing valid Verilog param = Group( 'parameter' + ptype + size + identifier + Suppress('=') + SkipTo(end_param) ) list_of_params = Group( Suppress('#(') + delimitedList( param ) + Suppress(')') ) # Ports dir_ = Optional( oneOf('input output inout') ) type_ = Optional( oneOf('wire reg') ) port = Group( dir_ + type_ + size + identifier ) list_of_ports = Group( Suppress('(') + delimitedList( port ) + Suppress(')') ) # Module module_identifier = identifier module = Group( Suppress('module') + module_identifier('module_name') + Optional( list_of_params('params') ) + Optional( list_of_ports ('ports' ) ) + Suppress(';') + SkipTo('endmodule') + Suppress('endmodule') ) # Debug #print #module_identifier.setParseAction( dbg('modname') )#.setDebug() #param .setParseAction( dbg('param') )#.setDebug() #port .setParseAction( dbg('port' ) )#.setDebug() #module .setParseAction( dbg('module', 1) )#.setDebug() file_ = SkipTo('module', ignore=comment ).suppress() + \ OneOrMore( module ).ignore( comment ) + \ SkipTo( StringEnd() ).suppress() return file_
def __init__(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ self.kwargs = {} self.expr_stack = [] self.assignment_stack = [] self.expression_string = None self.results = None self.container = None self.opn = { "+": self.add, "-": self.subtract, "*": self.multiply, "/": self.divide, "^": self.pow, } self.fn = {"exp": self.exp, "clamp": self.clamp} self.conditionals = ["==", "!=", ">", ">=", "<", "<="] # use CaselessKeyword for e and pi, to avoid accidentally matching # functions that start with 'e' or 'pi' (such as 'exp'); Keyword # and CaselessKeyword only match whole words e = CaselessKeyword("E") pi = CaselessKeyword("PI") # fnumber = Combine(Word("+-"+nums, nums) + # Optional("." + Optional(Word(nums))) + # Optional(e + Word("+-"+nums, nums))) # or use provided pyparsing_common.number, but convert back to str: # fnumber = ppc.number().addParseAction(lambda t: str(t[0])) fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?") ident = Word(alphas, alphanums + "_$") plus, minus, mult, div = map(Literal, "+-*/") lpar, rpar = map(Suppress, "()") addop = plus | minus multop = mult | div expop = Literal("^") comparison_op = oneOf(" ".join(self.conditionals)) qm, colon = map(Literal, "?:") assignment = Literal("=") assignment_op = ident + assignment + ~FollowedBy(assignment) expr = Forward() expr_list = delimitedList(Group(expr)) # add parse action that replaces the function identifier with a (name, number of args) tuple fn_call = (ident + lpar - Group(expr_list) + rpar).setParseAction( lambda t: t.insert(0, (t.pop(0), len(t[0]))) ) atom = ( addop[...] + ( (fn_call | pi | e | fnumber | ident).setParseAction(self.push_first) | Group(lpar + expr + rpar) ) ).setParseAction(self.push_unary_minus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left # exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor <<= atom + (expop + factor).setParseAction(self.push_first)[...] term = factor + (multop + factor).setParseAction(self.push_first)[...] expr <<= term + (addop + term).setParseAction(self.push_first)[...] comparison = expr + (comparison_op + expr).setParseAction(self.push_first)[...] ternary = ( comparison + (qm + expr + colon + expr).setParseAction(self.push_first)[...] ) # self.bnf = ternary assignment = Optional(assignment_op).setParseAction(self.push_last) + ternary self.bnf = assignment
else: self.val = tokens def __str__(self): return "<Not({})>".format(self.val) def __iter__(self): return iter((self.val,)) def to_expr(self): return pyeda.Not(self.val.to_expr()) columnRval = pyparsing_common.number.copy() | quotedString() condition = Group(column + oneOf(operators, caseless=True) + columnRval) boolExpr = infixNotation(condition, [ (not_, 1, opAssoc.RIGHT, BoolNot,), (and_, 2, opAssoc.LEFT, BoolAnd,), (or_, 2, opAssoc.LEFT, BoolOr,), ]) condition.setParseAction(Cond) def str_to_query_tokens(query: str): return boolExpr.parseString(query).asList()[0]
def __call__(self, s): return self.eval(s) def __getstate__(self): return self.name, self.grammar, self.calculator def __setstate__(self, state): self.name, self.grammar, self.calculator = state arithOpTable = [{ 'token': '^', 'assoc': 'right' }, { 'token': pp.oneOf('+ -'), 'arity': 1 }, pp.oneOf('* /'), pp.oneOf('+ -'), { 'token': pp.oneOf('== != < > <= >='), 'action': CompareAction }] logicOpTable = [{ 'token': '~', 'arity': 1, 'action': UnaryOperatorAction }, { 'token': '&', 'action': BinaryOperatorAction }, {
import pyparsing variable_names = pyparsing.Combine( pyparsing.Literal('$') + pyparsing.Word(pyparsing.alphanums + '_')) #integer = pyparsing.Word(pyparsing.nums) integer = pyparsing.pyparsing_common.signed_integer double = pyparsing.Combine( pyparsing.Word(pyparsing.nums) + '.' + pyparsing.Word(pyparsing.nums)) parser = pyparsing.operatorPrecedence( variable_names | double | integer, [('**', 2, pyparsing.opAssoc.RIGHT), ('-', 1, pyparsing.opAssoc.RIGHT), (pyparsing.oneOf('* / // %'), 2, pyparsing.opAssoc.LEFT), (pyparsing.oneOf('+ -'), 2, pyparsing.opAssoc.LEFT), (pyparsing.oneOf('> >= < <= == !='), 2, pyparsing.opAssoc.LEFT), ('not', 1, pyparsing.opAssoc.RIGHT), ('and', 2, pyparsing.opAssoc.LEFT), ('or', 2, pyparsing.opAssoc.LEFT)]) examples = [ "5 * 10 ** -2", "5 * 10 * -2", "5 * 10 ** (-2)", "5 * -10 ** 2", "5 * (-10) ** 2", "5 and not 8", "5 and -8", "1 ** -2", "-1 ** 2",
test = instring[ loc:loc+self.matchLen ] if test.upper() == self.match: return loc+self.matchLen, test #~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc exc.pstr = instring raise exc def Sequence(token): """ A sequence of the token""" return OneOrMore(token+maybeComma) digit_sequence = Word(nums) sign = oneOf("+ -") def convertToFloat(s, loc, toks): try: return float(toks[0]) except: raise ParseException(loc, "invalid float format %s"%toks[0]) exponent = CaselessLiteral("e")+Optional(sign)+Word(nums) #note that almost all these fields are optional, #and this can match almost anything. We rely on Pythons built-in #float() function to clear out invalid values - loosely matching like this #speeds up parsing quite a lot floatingPointConstant = Combine( Optional(sign) +
def logicterm(constant=DIGIT, variable=IDEN, function=IDEN, lambdaterm=False): # f(x,y...) | const | x if lambdaterm: function = function | lambdaterm(variable, lambdaKeyword='lambda') t = pp.Forward() t <<= (function('function') + LPAREN + pp.delimitedList(t)('args') + RPAREN).setParseAction(FunctionAction) | (constant | variable).setParseAction(AtomAction) return t def lambdaterm(variable=IDEN, lambdaKeyword='lambda'): # lambda variable: expression t = pp.Forward() t <<= pp.Suppress(lambdaKeyword) + pp.delimitedList(variable)('args') + (t | logicterm(constant=DIGIT, variable=IDEN, function=None))('term') t.setParseAction(LambdaAction) return t integer = pp.pyparsing_common.signed_integer varname = pp.pyparsing_common.identifier arithOplist = [('-', 1, pp.opAssoc.RIGHT), (pp.oneOf('* /'), 2, pp.opAssoc.LEFT), (pp.oneOf('+ -'), 2, pp.opAssoc.LEFT)] def func(EXP): return pp.Group('<' + EXP + pp.Suppress(',') + EXP +'>')| pp.Group('||' + EXP + '||') | pp.Group('|' + EXP + '|') | pp.Group(IDEN + '(' + pp.delimitedList(EXP) + ')') baseExpr = integer | varname EXP = mixedExpression(baseExpr, func=func, opList=arithOplist) a = EXP.parseString('5*5+<4,5>') print(a)
def detectChemicalType(formula): """detectChemicalType: utility routine for detecting chemical type Arguments ---------- formula : text version of formula Returns ------- type: string "element"|"ionic"|"covalent"|"acid"|"water" """ verbose = False #if verbose: print(f"detect formula:{formula}") if formula in ['H2O', 'HOH']: return "water" if formula == 'O2': return "oxygen" integer = pp.Word(pp.nums) element = pp.StringStart() + pp.oneOf(element_symbols) + pp.Optional( integer) + pp.StringEnd() try: parts = element.parseString(formula) return "element" except pp.ParseException: pass ammonium_formulas = [ammonium_formula, '(' + ammonium_formula + ')'] cation = pp.StringStart() + pp.oneOf(ionic_metal_symbols) try: parts = cation.parseString(formula) return "ionic" except pp.ParseException: pass hydrocarbon_formula = [ammonium_formula, '(' + ammonium_formula + ')'] integer_or_hco = pp.Word("HC", "CHO1234567890") hydrocarbon = pp.StringStart() + integer_or_hco + pp.StringEnd() if formula in ["CO2", "CO"]: return "covalent" if formula in ["H2CO3"]: return "ionic" try: parts = hydrocarbon.parseString(formula) return "hydrocarbon" except pp.ParseException: pass ammonium_formulas = [ammonium_formula, '(' + ammonium_formula + ')'] polycation = pp.StringStart() + pp.oneOf(ammonium_formulas) try: parts = polycation.parseString(formula) return "ionic" except pp.ParseException: pass acid = pp.StringStart() + pp.Char('H') + pp.NotAny(pp.oneOf('e o f g s')) try: parts = acid.parseString(formula) return "acid" except pp.ParseException: pass return "covalent"
def create_bnf(term_descs): """term_descs .. list of TermParse objects (sign, term_name, term_arg_names), where sign can be real or complex multiplier""" lc = ['+'] # Linear combination context. equal = Literal("=").setParseAction(rhs(lc)) zero = Literal("0").suppress() point = Literal(".") e = CaselessLiteral("E") inumber = Word("+-" + nums, nums) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) number = fnumber + Optional(Literal('j'), default='') add_op = oneOf('+ -') number_expr = Forward() number_expr << Optional(add_op) + ZeroOrMore('(') + number \ + ZeroOrMore(add_op + number_expr) \ + ZeroOrMore(')') ident = Word(alphas, alphanums + "_") integral = Combine((Literal('i') + Word(alphanums)) | Literal('i') | Literal('a') | Word(nums))("integral") history = Optional('[' + inumber + ']', default='')("history") variable = Combine(Word(alphas, alphanums + '._') + history) derivative = Combine(Literal('d') + variable \ + Literal('/') + Literal('dt')) trace = Combine(Literal('tr') + '(' + Optional(ident + Literal(',')) + variable + ')', adjacent=False) generalized_var = derivative | trace | variable args = Group(delimitedList(generalized_var)) flag = Literal('a') term = ((Optional(Literal('+') | Literal('-'), default='+')("sign") ^ Optional(number_expr + Literal('*').suppress(), default=['1.0', ''])("mul")) + Combine( ident("name") + Optional("." + (integral + "." + ident("region") + "." + flag("flag") | integral + "." + ident("region") | ident("region"))))("term_desc") + "(" + Optional(args, default=[''])("args") + ")") term.setParseAction(collect_term(term_descs, lc)) rhs1 = equal + OneOrMore(term) rhs2 = equal + zero equation = StringStart() + OneOrMore(term) \ + Optional(rhs1 | rhs2) + StringEnd() ## term.setDebug() return equation
) ParserElement.enablePackrat() # faster ParserElement.setDefaultWhitespaceChars(" \t") # use significant newlines # Data types # ------------------------------------------------------------------------------------------ # integer integer = Word(nums + "+-") # integer # floating point fp = Combine(Word(nums + "+-") + Literal(".") + Word(nums)) # fortran real exp = oneOf("E e D d") real = Combine(fp("base") + exp.setParseAction(lambda x: "e") + integer("exponent")) # C type char = Word(printables) # Decks of data # ------------------------------------------------------------------------------------------ # prelim data_type = oneOf("R I C") name_of_deck = LineStart() + OneOrMore( Word(printables), stopOn=White(min=3) + data_type ).setParseAction(" ".join) # single value decks ival_deck = name_of_deck("key") + Literal("I")("type") + integer("value")
def __init__(self): """ A program is a list of statements. Statements can be 'set' or 'select' statements. """ statement = Forward() SELECT, FROM, WHERE, SET, AS = map(CaselessKeyword, "select from where set as".split()) ident = Word( "$" + alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList(ident, ".", combine=True).setName("column name") columnNameList = Group( delimitedList(columnName)) tableName = delimitedList(ident, ".", combine=True).setName("column name") tableNameList = Group(delimitedList(tableName)) SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=") arrow = Literal ("->") t_expr = Group(ident + LPAR + Word("$" + alphas, alphanums + "_$") + RPAR + ZeroOrMore(LineEnd())).setName("t_expr") | \ Word(alphas, alphanums + "_$") + ZeroOrMore(LineEnd()) t_expr_chain = t_expr + ZeroOrMore(arrow + t_expr) whereExpression = Forward() and_, or_, in_ = map(CaselessKeyword, "and or in".split()) binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) realNum = ppc.real() intNum = ppc.signed_integer() columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + (columnRval | Word(printables) ) ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + statement + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) ''' Assignment for handoff. ''' setExpression = Forward () setStatement = Group( ( ident ) | ( quotedString("json_path") + AS + ident("name") ) | ( "(" + setExpression + ")" ) ) setExpression << setStatement + ZeroOrMore( ( and_ | or_ ) + setExpression ) optWhite = ZeroOrMore(LineEnd() | White()) """ Define the statement grammar. """ statement <<= ( Group( Group(SELECT + t_expr_chain)("concepts") + optWhite + Group(FROM + tableNameList) + optWhite + Group(Optional(WHERE + whereExpression("where"), "")) + optWhite + Group(Optional(SET + setExpression("set"), ""))("select") ) | Group( SET + (columnName + EQ + ( quotedString | intNum | realNum )) )("set") )("statement") """ Make a program a series of statements. """ self.program = statement + ZeroOrMore(statement) """ Make rest-of-line comments. """ comment = "--" + restOfLine self.program.ignore (comment)
LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI = map( Suppress, "{}[]()=;") kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("{0}_ = Keyword('{1}')".format(kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK fieldDefn = ((REQUIRED_ | OPTIONAL_ | REPEATED_)("fieldQualifier") - typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI) # enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}' enumDefn = ENUM_("typespec") - ident('name') + LBRACE + Dict( ZeroOrMore(Group(ident + EQ + integer + SEMI)))('values') + RBRACE # extensionsDefn ::= 'extensions' integer 'to' integer ';' extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI # messageExtension ::= 'extend' ident '{' messageBody '}'
CITATION_TYPE, CITATION_TYPES, CITATION_TYPE_PUBMED, EVIDENCE, ) from ..utils import valid_date __all__ = ['ControlParser'] log = logging.getLogger(__name__) set_tag = Suppress(BEL_KEYWORD_SET) unset_tag = Suppress(BEL_KEYWORD_UNSET) unset_all = Suppress(BEL_KEYWORD_ALL) supporting_text_tags = oneOf([BEL_KEYWORD_EVIDENCE, BEL_KEYWORD_SUPPORT]) set_statement_group_stub = And( [Suppress(BEL_KEYWORD_STATEMENT_GROUP), Suppress('='), qid('group')]) set_citation_stub = And([ Suppress(BEL_KEYWORD_CITATION), Suppress('='), delimited_quoted_list('values') ]) set_evidence_stub = And( [Suppress(supporting_text_tags), Suppress('='), quote('value')])
def create_bnf( stack ): point = Literal( "." ) comma = Literal( "," ) e = CaselessLiteral( "E" ) inumber = Word( nums ) fnumber = Combine( Word( "+-"+nums, nums ) + Optional( point + Optional( Word( nums ) ) ) + Optional( e + Word( "+-"+nums, nums ) ) ) _of = Literal( 'of' ) _in = Literal( 'in' ) _by = Literal( 'by' ) _copy = Literal( 'copy' ) _mn = Literal( '-n' ).setParseAction( replace( 'OA_SubN' ) ) _me = Literal( '-e' ).setParseAction( replace( 'OA_SubE' ) ) _pn = Literal( '+n' ).setParseAction( replace( 'OA_AddN' ) ) _pe = Literal( '+e' ).setParseAction( replace( 'OA_AddE' ) ) _inn = Literal( '*n' ).setParseAction( replace( 'OA_IntersectN' ) ) _ine = Literal( '*e' ).setParseAction( replace( 'OA_IntersectE' ) ) regop = (_mn | _me | _pn | _pe | _inn | _ine) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() _all = Literal( 'all' ).setParseAction( replace( 'KW_All' ) ) node = Literal( 'node' ) nodes = Literal( 'nodes' ) element = Literal( 'element' ) elements = Literal( 'elements' ) group = Literal( 'group' ) surface = Literal( 'surface' ) variable = Word( 'xyz', max = 1 ) | Literal( 'domain' ) any_var = Word( alphas + '_', alphanums + '_' ) | fnumber ident = Word(alphas, alphanums + "_") function = Word( alphas, alphanums + '_' ) function = Group( function ).setParseAction( join_tokens ) region = Combine( Literal( 'r.' ) + Word( alphas, '_' + alphas + nums ) ) region = Group( Optional( _copy, default = 'nocopy' ) + region ) region.setParseAction( replace( 'KW_Region', keep = True ) ) coor = oneOf( 'x y z' ) boolop = oneOf( '& |' ) relop = oneOf( '< > <= >= != ==' ) bool_term = ZeroOrMore( '(' ) + (coor | fnumber ) + relop + (coor | fnumber)\ + ZeroOrMore( ')' ) relation = Forward() relation << ZeroOrMore( '(' )\ + bool_term + ZeroOrMore( boolop + relation )\ + ZeroOrMore( ')' ) relation = Group( relation ).setParseAction( join_tokens ) nos = Group( nodes + _of + surface ).setParseAction( replace( 'E_NOS' ) ) nir = Group( nodes + _in + relation ).setParseAction( \ replace( 'E_NIR', keep = True ) ) nbf = Group( nodes + _by + function ).setParseAction( \ replace( 'E_NBF', keep = True ) ) ebf = Group( elements + _by + function ).setParseAction( \ replace( 'E_EBF', keep = True ) ) eog = Group( elements + _of + group + Word( nums ) ).setParseAction( \ replace( 'E_EOG', keep = True ) ) nog = Group( nodes + _of + group + (Word(nums) | ident) ).setParseAction( \ replace( 'E_NOG', keep = True ) ) onir = Group( node + _in + region ).setParseAction( \ replace_with_region( 'E_ONIR', 2 ) ) ni = Group( node + delimitedList( inumber ) ).setParseAction( \ replace( 'E_NI', keep = True ) ) ei1 = Group( element + delimitedList( inumber ) ).setParseAction( \ replace( 'E_EI1', keep = True ) ) etuple = lpar.suppress() + inumber + comma.suppress() \ + inumber + rpar.suppress() ei2 = Group( element + delimitedList( etuple ) ).setParseAction( \ replace( 'E_EI2', keep = True ) ) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei1 | ei2 | ebf | eog | nog) atom1.setParseAction( to_stack( stack ) ) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction( to_stack( stack ) ) region_expression << atom + ZeroOrMore( aux ) region_expression = StringStart() + region_expression + StringEnd() # region.set_debug() # relation.set_debug() # region_expression.set_debug() return region_expression
:type tokens: list """ return float(tokens[0]) # Suppressed Literals LPAREN = Literal("(").suppress() RPAREN = Literal(")").suppress() COLON = Literal(":").suppress() EXP_START = Literal("[").suppress() EXP_END = Literal("]").suppress() # Grammar Tokens # DataFrame Accessor INTEGER = Combine(Optional(oneOf("+ -")) + Word(nums))\ .setParseAction(parse_num) REAL = Combine(Optional(oneOf("+ -")) + Word(nums) + "." + Optional(Word(nums)) + Optional(oneOf("e E") + Optional(oneOf("+ -")) + Word(nums)))\ .setParseAction(parse_num) # Generic Identifier IDENTIFIER = Word(alphas + '_', alphanums + '_') # Python Like Function Name FUNC_NAME = delimitedList(IDENTIFIER, delim=".", combine=True) # Exponentiation operators EXPONENTIATION_OPS = "**" # Unary Operators UNARY_OPS = oneOf("+ -") # Multiplication/Division Operators
.. seealso:: - BEL 2.0 specification on `cellular location (2.2.4) <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ - PyBEL module :py:class:`pybel.parser.modifiers.LocationParser` """ from pyparsing import Group, Suppress, oneOf from ..baseparser import BaseParser from ..parse_identifier import IdentifierParser from ..utils import nest from ...constants import LOCATION __all__ = [ 'location_tag', 'LocationParser', ] location_tag = Suppress(oneOf(['loc', 'location'])) class LocationParser(BaseParser): def __init__(self, identifier_parser=None): """ :param IdentifierParser identifier_parser: An identifier parser for checking the 3P and 5P partners """ identifier_parser = identifier_parser if identifier_parser is not None else IdentifierParser( ) super(LocationParser, self).__init__( Group(location_tag + nest(identifier_parser.language))(LOCATION))
Rec. ITU-T X.680 (08/2015) 12.5 Module references Page 17 ''' modulereference = typereference ''' Rec. ITU-T X.680 (08/2015) 12.6 Comments Page 17 ''' # Missing comment for '/*' and '*/' comment = hyphen_minus + \ hyphen_minus + \ ZeroOrMore(Word(allowed_character_set, allowed_character_set)) + \ oneOf(hyphen_minus+ \ hyphen_minus, newline) ''' Rec. ITU-T X.680 (08/2015) 12.7 Empty lexical item Page 18 ''' empty = Empty().suppress() ''' Rec. ITU-T X.680 (08/2015) 12.8 Numbers Page 18 ''' # number = Word(digits.replace('0', ''), digits, min=2) | \ # Word(digits, max=1) number = Regex(r'\d+')
":": "substring", ":=": "iexact", ":<": "lt", ":<=": "lte", ":>": "gt", ":>=": "gte", } # Parsing grammar AND = CaselessKeyword("AND") OR = Optional(CaselessKeyword("OR")) NOT = CaselessKeyword("NOT") # Search operator OPERATOR = oneOf(OPERATOR_MAP.keys()) # Field name, explicitely exlude URL like patters FIELD = Regex(r"""(?!http|ftp|https|mailto)[a-zA-Z_]+""") # Match token WORD = Regex(r"""[^ \(\)]([^ '"]*[^ '"\)])?""") DATE = Word("0123456789:.-T") # Date range RANGE = "[" + DATE + "to" + DATE + "]" # Match value REGEX_STRING = "r" + RawQuotedString('"') STRING = REGEX_STRING | RawQuotedString("'") | RawQuotedString('"') | WORD
#!/usr/bin/env python3 from pyparsing import Combine, Group, Literal, oneOf, OneOrMore, Optional, \ Token, White, Word, ZeroOrMore, \ alphas, alphas8bit, alphanums, hexnums, nums, printables from testbin.parser import parseurl octet = [chr(i) for i in range(0, 256)] OCTET = oneOf(octet) ctl = [chr(i) for i in range(0, 32)] ctl.append(chr(127)) CTL = oneOf(ctl) CR = "\r" LF = "\n" CRLF = CR + LF SP = ' ' HTAB = '\t' WSP = Literal(HTAB) ^ Literal(SP) WSP.leaveWhitespace() LWS = Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP) LWS.leaveWhitespace() SWS = Optional(LWS) SWS.leaveWhitespace() ICAP_Version = Literal("ICAP/1.0") Token = Word(alphas) Extension_Method = Token Method = Literal("REQMOD") ^ Literal("RESPMOD") ^ Literal("OPTIONS") ^ \ Extension_Method Scheme = Literal("icap") Host = parseurl.host Port = parseurl.port
class DpkgParser(text_parser.PyparsingSingleLineTextParser): """Parser for Debian package manager log (dpkg.log) files.""" NAME = 'dpkg' DATA_FORMAT = 'Debian package manager log (dpkg.log) file' _ENCODING = 'utf-8' _DPKG_STARTUP = 'startup' _DPKG_STATUS = 'status' _DPKG_CONFFILE = 'conffile' _DPKG_ACTIONS = [ 'install', 'upgrade', 'configure', 'trigproc', 'disappear', 'remove', 'purge' ] _DPKG_STARTUP_TYPES = ['archives', 'packages'] _DPKG_STARTUP_COMMANDS = [ 'unpack', 'install', 'configure', 'triggers-only', 'remove', 'purge' ] _DPKG_CONFFILE_DECISIONS = ['install', 'keep'] _DPKG_STARTUP_BODY = pyparsing.Combine( pyparsing.Literal(_DPKG_STARTUP) + pyparsing.oneOf(_DPKG_STARTUP_TYPES) + pyparsing.oneOf(_DPKG_STARTUP_COMMANDS), joinString=' ', adjacent=False) _DPKG_STATUS_BODY = pyparsing.Combine( pyparsing.Literal(_DPKG_STATUS) + pyparsing.Word(pyparsing.printables) + pyparsing.Word(pyparsing.printables) + pyparsing.Word(pyparsing.printables), joinString=' ', adjacent=False) _DPKG_ACTION_BODY = pyparsing.Combine( pyparsing.oneOf(_DPKG_ACTIONS) + pyparsing.Word(pyparsing.printables) + pyparsing.Word(pyparsing.printables) + pyparsing.Word(pyparsing.printables), joinString=' ', adjacent=False) _DPKG_CONFFILE_BODY = pyparsing.Combine( pyparsing.Literal(_DPKG_CONFFILE) + pyparsing.Word(pyparsing.printables) + pyparsing.oneOf(_DPKG_CONFFILE_DECISIONS), joinString=' ', adjacent=False) _DPKG_LOG_LINE = ( text_parser.PyparsingConstants.DATE_TIME.setResultsName('date_time') + pyparsing.MatchFirst([ _DPKG_STARTUP_BODY, _DPKG_STATUS_BODY, _DPKG_ACTION_BODY, _DPKG_CONFFILE_BODY ]).setResultsName('body')) LINE_STRUCTURES = [('line', _DPKG_LOG_LINE)] def ParseRecord(self, parser_mediator, key, structure): """Parses a structure of tokens derived from a line of a text file. Args: parser_mediator (ParserMediator): parser mediator. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key != 'line': raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) time_elements_tuple = self._GetValueFromStructure( structure, 'date_time') try: date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(time_elements_tuple)) return body_text = self._GetValueFromStructure(structure, 'body') if not body_text: parser_mediator.ProduceExtractionWarning('missing body text') return event_data = DpkgEventData() event_data.body = body_text event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, line): """Verifies if a line from a text file is in the expected format. Args: parser_mediator (ParserMediator): parser mediator. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._DPKG_LOG_LINE.parseString(line) except pyparsing.ParseException as exception: logger.debug( 'Unable to parse Debian dpkg.log file with error: {0!s}'. format(exception)) return False return 'date_time' in structure and 'body' in structure
def parse(s): equals = pp.Suppress('=') colon = pp.Suppress(':') comment = pp.Suppress( pp.Optional(pp.Literal('#') - pp.ZeroOrMore(pp.Word(pp.printables)))) # set up multiple grammars # single str value strkeys = pp.oneOf(' '.join(strkeylist), caseless=True) string = pp.Word(pp.alphanums + punctuation) strgram = strkeys - equals - string - comment # single num value numkeys = pp.oneOf(' '.join(numkeys_int + numkeys_float), caseless=True) point = pp.Literal(".") e = pp.CaselessLiteral("E") num = pp.Combine( pp.Word("+-" + pp.nums, pp.nums) + pp.Optional(point + pp.Optional(pp.Word(pp.nums))) + pp.Optional(e + pp.Word("+-" + pp.nums, pp.nums))) numgram = numkeys - equals - num - comment # variable definition grammar strnumkeys = pp.oneOf(' '.join(var_def_keys + b_var_def_keys), caseless=True) bng_parameter = pp.Word(pp.alphas, pp.alphanums + "_") varnums = bng_parameter - num - num - pp.Optional(pp.Word("ubBU")) strnumgram = strnumkeys - equals - varnums - comment # multiple string value grammar multstrkey = pp.oneOf(' '.join(multstrkeys), caseless=True) multstrgram = multstrkey - equals - pp.OneOrMore(string) # var and logvar alt grammar (only one number given) varkeys = pp.oneOf(' '.join(var_def_keys_1or2nums), caseless=True) vargram = varkeys - equals - bng_parameter - num - pp.Optional( num) - comment # multiple num value multnumkey = pp.oneOf(' '.join(multnumkeys), caseless=True) multnumgram = multnumkey - equals - pp.OneOrMore(num) - comment # model-data mapping grammar mdmkey = pp.CaselessLiteral("model") nonetoken = pp.Suppress(pp.CaselessLiteral("none")) model_file = pp.Regex(".*?\.(bngl|xml)") exp_file = pp.Regex(".*?\.(exp|con)") mdmgram = mdmkey - equals - model_file - colon - ( pp.delimitedList(exp_file) ^ nonetoken) - comment # normalization mapping grammar normkey = pp.CaselessLiteral("normalization") anything = pp.Word(pp.alphanums + punctuation + ' ') normgram = normkey - equals - anything # The set of legal grammars for normalization is too complicated, # Will handle with separate code. # Grammar for dictionary-like specification of simulation actions # We are intentionally over-permissive here, because the Action class will be able to give more helpful error # messages than a failed parse. dict_entry = pp.Word( pp.alphas) - colon - pp.Word(pp.alphanums + punctuation_safe) dict_key = pp.oneOf(' '.join(dictkeys), caseless=True) dictgram = dict_key - equals - pp.delimitedList(dict_entry) - comment # mutant model grammar mutkey = pp.CaselessLiteral('mutant') mut_op = pp.Group( pp.Word(pp.alphas + '_', pp.alphanums + '_') - pp.oneOf('+ - * / =') - num) mutgram = mutkey - equals - string - string - pp.Group(pp.OneOrMore(mut_op)) - \ pp.Group(colon - (pp.delimitedList(exp_file) ^ nonetoken)) - comment # check each grammar and output somewhat legible error message line = (mdmgram | strgram | numgram | strnumgram | multnumgram | multstrgram | vargram | normgram | dictgram | mutgram).parseString(s, parseAll=True).asList() return line
def __init__(self, *studies): # import all parameter names self.studies = studies if len(self.studies) == 0: raise ConfigurationError( 'Parser instance takes at least one Study instance as argument.' ) self.names = [] for study in studies: self.names.extend(study.observationModel.parameterNames) try: # OnlineStudy: loop over all transition models for names in study.hyperParameterNames: self.names.extend(names) except AttributeError: try: # Hyper/ChangepointStudy: only one transition model self.names.extend(study.flatHyperParameterNames) except AttributeError: pass if not len(np.unique(self.names)) == len(self.names): raise ConfigurationError( 'Specified study objects contain duplicate parameter names.') # define arithmetic operators self.arith = { '+': operator.add, '-': operator.sub, '*': operator.mul, '/': operator.truediv, '^': operator.pow } # initialize symbols for parsing parameter = pp.oneOf(self.names) point = pp.Literal(".") e = pp.CaselessLiteral("E") fnumber = pp.Combine( pp.Word("+-" + pp.nums, pp.nums) + pp.Optional(point + pp.Optional(pp.Word(pp.nums))) + pp.Optional(e + pp.Word("+-" + pp.nums, pp.nums))) # initialize list of all numpy functions, remove functions that collide with (hyper-)parameter names self.functions = dir(np) for name in self.names: try: self.functions.remove(name) print( '! WARNING: Numpy function "{}" will not be available in parser, as it collides with ' '(hyper-)parameter names.'.format(name)) except ValueError: pass # initialize operators for parsing funcop = pp.oneOf(self.functions) atop = pp.Literal('@') expop = pp.Literal('^') signop = pp.oneOf('+ -') multop = pp.oneOf('* /') plusop = pp.oneOf('+ -') # minimal symbol atom = (parameter | fnumber) # expression based on operator precedence self.expr = pp.operatorPrecedence(atom, [(funcop, 1, pp.opAssoc.RIGHT), (atop, 2, pp.opAssoc.LEFT), (expop, 2, pp.opAssoc.RIGHT), (signop, 1, pp.opAssoc.RIGHT), (multop, 2, pp.opAssoc.LEFT), (plusop, 2, pp.opAssoc.LEFT)])
EQ, LPAR, RPAR, COLON, COMMA = map(Suppress, "=():,") EXCL, DOLLAR = map(Literal, "!$") sheetRef = Word(alphas, alphanums) | QuotedString("'", escQuote="''") colRef = Optional(DOLLAR) + Word(alphas, max=2) rowRef = Optional(DOLLAR) + Word(nums) cellRef = Combine( Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row"))) cellRange = (Group(cellRef("start") + COLON + cellRef("end"))("range") | cellRef | Word(alphas, alphanums)) expr = Forward() COMPARISON_OP = oneOf("< = > >= <= != <>") condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") - LPAR + Group(condExpr)("condition") + COMMA + Group(expr)("if_true") + COMMA + Group(expr)("if_false") + RPAR) def stat_function(name): return Group( CaselessKeyword(name) + Group(LPAR + delimitedList(expr) + RPAR)) sumFunc = stat_function("sum") minFunc = stat_function("min") maxFunc = stat_function("max") aveFunc = stat_function("ave")
def _parseFeatureSignatureAndRewrite(sig): """This function parses a given feature-signature and rewrites the signature according to the given __pt mapping. """ # this dictionary holds all transformations of operators from # the origin (cpp) to the compare (language) # e.g. in cpp && stands for the 'and'-operator. # the equivalent in maple (which is used for comparison) # is '&and' # if no equivalence can be found a name rewriting is done # e.g. 'defined' __pt = { #'defined' : 'defined_', 'defined': '', '!': '¬', '&&': '&and', '||': '&or', '<': '<', '>': '>', '<=': '<=', '>=': '>=', '==': '=', '!=': '!=', '*': '*', # needs rewriting with parenthesis '/': '/', '%': '', # needs rewriting a % b => modp(a, b) '+': '+', '-': '-', '&': '', # needs rewriting a & b => BitAnd(a, b) '|': '', # needs rewriting a | b => BitOr(a, b) '>>': '>>', # needs rewriting a >> b => a / (2^b) '<<': '<<', # needs rewriting a << b => a * (2^b) } def _rewriteOne(param): """This function returns each one parameter function representation for maple.""" if param[0][0] == '!': ret = __pt[param[0][0]] + '(' + str(param[0][1]) + ')' if param[0][0] == 'defined': ret = __pt[param[0][0]] + str(param[0][1]) return ret def _rewriteTwo(param): """This function returns each two parameter function representation for maple.""" # rewriting rules if param[0][1] == '%': return 'modp(' + param[0][0] + ',' + param[0][2] + ')' ret = ' ' + __pt[param[0][1]] + ' ' ret = '(' + ret.join(map(str, param[0][0::2])) + ')' if param[0][1] in ['<', '>', '<=', '>=', '!=', '==']: ret = '(true &and ' + ret + ')' return ret operand = __string | __hexadec | __integer | \ __function | __identifier compoperator = pypa.oneOf('< > <= >= == !=') calcoperator = pypa.oneOf('+ - * / & | << >> %') expr = pypa.operatorPrecedence(operand, [ ('defined', 1, pypa.opAssoc.RIGHT, _rewriteOne), ('!', 1, pypa.opAssoc.RIGHT, _rewriteOne), (calcoperator, 2, pypa.opAssoc.LEFT, _rewriteTwo), (compoperator, 2, pypa.opAssoc.LEFT, _rewriteTwo), ('&&', 2, pypa.opAssoc.LEFT, _rewriteTwo), ('||', 2, pypa.opAssoc.LEFT, _rewriteTwo), ]) try: rsig = expr.parseString(sig)[0] except pypa.ParseException, e: print('ERROR (parse): cannot parse sig (%s) -- (%s)' % (sig, e.col)) return sig
from pyparsing import infixNotation, oneOf, opAssoc, pyparsing_common, ParseResults expression_strs = open("inputs/day18.txt", 'r').read().splitlines() parser_a = infixNotation(pyparsing_common.integer, [(oneOf('* +'), 2, opAssoc.LEFT)]) parser_b = infixNotation(pyparsing_common.integer, [(oneOf('+'), 2, opAssoc.LEFT), (oneOf("*"), 2, opAssoc.LEFT)]) def apply(expression): result = 0 op = "+" for i in expression: if isinstance(i, ParseResults): result = eval(f"{result}{op}{apply(i)}") elif isinstance(i, str): op = i else: result = eval(f"{result}{op}{i}") return result print( f"part a: {sum(apply(i) for i in (parser_a.parseString(e) for e in expression_strs))}" ) print( f"part b: {sum(apply(i) for i in (parser_b.parseString(e) for e in expression_strs))}"
Sup = P.Suppress # commands # commands # commands botname = P.Forward() def setBotName(newname): botname << CL(newname) identifier = P.Word(P.alphas + "_", P.alphanums + "_").setResultsName('identifier') command_leader = L(".") hail = (botname + P.oneOf(": ,")) | (botname + P.White()) command_args = P.restOfLine.setResultsName('command_args') command = ( P.StringStart() + Sup(command_leader | hail) + identifier.setResultsName('command_identifier') + Sup(P.Optional(P.White())) + command_args.setResultsName('command_args')).setResultsName('command') _test_commands = [ (".hello", "['hello', '']"), (".foo bar", "['foo', 'bar']"), (". foo", "['foo', '']"), ("..foo", P.ParseException), ("TestBot:foo", "['foo', '']"), ("tesTBot,foo", "['foo', '']"),
class TestCommonHelperExpressions(PyparsingExpressionTestCase): tests = [ PpTestSpec( desc="A comma-delimited list of words", expr=pp.delimitedList(pp.Word(pp.alphas)), text="this, that, blah,foo, bar", expected_list=["this", "that", "blah", "foo", "bar"], ), PpTestSpec( desc="A counted array of words", expr=pp.Group(pp.countedArray(pp.Word("ab")))[...], text="2 aaa bbb 0 3 abab bbaa abbab", expected_list=[["aaa", "bbb"], [], ["abab", "bbaa", "abbab"]], ), PpTestSpec( desc="skipping comments with ignore", expr=( pp.pyparsing_common.identifier("lhs") + "=" + pp.pyparsing_common.fnumber("rhs") ).ignore(pp.cppStyleComment), text="abc_100 = /* value to be tested */ 3.1416", expected_list=["abc_100", "=", 3.1416], expected_dict={"lhs": "abc_100", "rhs": 3.1416}, ), PpTestSpec( desc="some pre-defined expressions in pyparsing_common, and building a dotted identifier with delimted_list", expr=( pp.pyparsing_common.number("id_num") + pp.delimitedList(pp.pyparsing_common.identifier, ".", combine=True)( "name" ) + pp.pyparsing_common.ipv4_address("ip_address") ), text="1001 www.google.com 192.168.10.199", expected_list=[1001, "www.google.com", "192.168.10.199"], expected_dict={ "id_num": 1001, "name": "www.google.com", "ip_address": "192.168.10.199", }, ), PpTestSpec( desc="using oneOf (shortcut for Literal('a') | Literal('b') | Literal('c'))", expr=pp.oneOf("a b c")[...], text="a b a b b a c c a b b", expected_list=["a", "b", "a", "b", "b", "a", "c", "c", "a", "b", "b"], ), PpTestSpec( desc="parsing nested parentheses", expr=pp.nestedExpr(), text="(a b (c) d (e f g ()))", expected_list=[["a", "b", ["c"], "d", ["e", "f", "g", []]]], ), PpTestSpec( desc="parsing nested braces", expr=( pp.Keyword("if") + pp.nestedExpr()("condition") + pp.nestedExpr("{", "}")("body") ), text='if ((x == y) || !z) {printf("{}");}', expected_list=[ "if", [["x", "==", "y"], "||", "!z"], ["printf(", '"{}"', ");"], ], expected_dict={ "condition": [[["x", "==", "y"], "||", "!z"]], "body": [["printf(", '"{}"', ");"]], }, ), ]