def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p
def build_parser(root_directory, path, fake_root=os.getcwd(), file_reader=None): from pyparsing import nestedExpr from pyparsing import QuotedString from pyparsing import Group from pyparsing import restOfLine from pyparsing import Word from pyparsing import alphanums from pyparsing import cStyleComment from pyparsing import OneOrMore from pyparsing import ZeroOrMore from pyparsing import Optional from pyparsing import Forward from pyparsing import Literal from pyparsing import Keyword root = Forward() include_handler = IncludeHandler( root_directory, path, root, fake_root=fake_root, file_reader=file_reader) # relaxed grammar identifier = Word(alphanums + "-_.:/") comment = ("//" + restOfLine).suppress() \ | ("#" + restOfLine).suppress() \ | cStyleComment endstmt = Literal(";").suppress() argument = QuotedString('"') \ | identifier arguments = ZeroOrMore(argument) statements = Forward() section = nestedExpr("{", "}", statements) include = Keyword("include").suppress() + QuotedString('"') regular = identifier + Group(arguments) + Optional(section, default=[]) statement = include.setParseAction(include_handler.pyparsing_call) \ | regular.setParseAction(include_handler.pyparsing_mark) statements << OneOrMore(statement + endstmt) root << Optional(statements) root.ignore(comment) setattr( root, 'parse_file', lambda f, root=root: root.parseFile(f, parseAll=True)) return root
def np(words, fn = gr_opt_quoted_string, action=nullDebugAction): p = Keyword(words[0], caseless=True).setDebug(bacula_tools.DEBUG) for w in words[1:]: p = p | Keyword(w, caseless=True).setDebug(bacula_tools.DEBUG) p = p + gr_eq + fn p.setParseAction(action) return p
def contains_keyword(text, query_keyword, atStart=False): """test presence of the keyword query_keyword with regard to surrounding unicode characters if atStart=True, this function success only if text starts with query_keyword """ keyword = Keyword(query_keyword) for token in keyword.scanString(text): if atStart and token[1]: return False if not text[token[1] - 1:token[1]].isalnum() and not text[token[2]:token[2] + 1].isalnum(): return True return False
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( + <expr> <expr> ) # ( * <expr> <expr> ) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pINTEGER = Word("-0123456789","0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0]=="true")) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pLET = "(" + Keyword("let") + "(" + pBINDING + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet([result[3]],result[5])) pPLUS = "(" + Keyword("+") + pEXPR + pEXPR + ")" pPLUS.setParseAction(lambda result: ECall("+",[result[2],result[3]])) pTIMES = "(" + Keyword("*") + pEXPR + pEXPR + ")" pTIMES.setParseAction(lambda result: ECall("*",[result[2],result[3]])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pPLUS | pTIMES) result = pEXPR.parseString(input)[0] return result # the first element of the result is the expression
def create_grammar(): global arrows global stereotypes assert len(arrows) > 0 assert len(stereotypes) > 0 linechars = ''.join((c for c in printables if c not in '}\n')) + ' \t' norbracket = ''.join((c for c in printables if c != ']')) + ' \t' nogt = ''.join((c for c in printables if c != '>')) + ' \t' norparen = ''.join((c for c in printables if c != ')')) + ' \t' line = Word(linechars) cls_body = Group(ZeroOrMore(line)) classkeyword = Keyword('class').setResultsName('type') st_names = stereotypes.keys() st = Literal(st_names[0]) for s in st_names[1:]: st = st | Literal(s) stereotype = Group(Optional(Literal('<<').suppress() + st + Literal('>>').suppress())) identifier_list = Word(alphas) + ZeroOrMore(Literal(',').suppress() + Word(alphas)) baseclasses = Group(Optional(Literal(':').suppress() + identifier_list)) cls = Group(stereotype + classkeyword + Word(alphas) + baseclasses + \ Literal('{').suppress() + cls_body + Literal('}').suppress()) arrow_names = arrows.keys() arrow_names.sort(lambda x,y: -cmp(len(x), len(y))) arrow = Keyword(arrow_names[0]) for ar in arrow_names[1:]: arrow = arrow | Keyword(ar) relation_caption = Literal('(').suppress() + Word(norparen) + \ Literal(')').suppress() quantifier = Literal('[').suppress() + Word(norbracket) + Literal(']').suppress() relation = Group( Word(alphas) + Group(Optional(quantifier)) + \ arrow.setResultsName('type') + \ Word(alphas) + Group(Optional(quantifier)) + \ Group(Optional(relation_caption)) ) grammar = ZeroOrMore(cls | relation) grammar.ignore(cStyleComment) grammar.ignore("//" + restOfLine) return grammar
def __createGram(self): if self.notNeedSpace: lNot = Keyword(self.operators['not']) else: lNot = Literal(self.operators['not']) lAnd = Literal(self.operators['and']) lOr = Literal(self.operators['or']) lImp = Literal(self.operators['impL']) lEqu = Literal(self.operators['equ']) lTrue = Keyword(self.constants['true']) lFalse = Keyword(self.constants['false']) lVar = Word(alphas, alphanums+'_') lVar.setParseAction(self.ffactory.createLogicVariable) lTrue.setParseAction(self.ffactory.createLogicTruth) lFalse.setParseAction(self.ffactory.createLogicFalse) factor = lTrue | lFalse | lVar expression = myparsing.operatorPrecedence(factor, [ (lNot, 1, opAssoc.RIGHT, self.ffactory.createNotOperation), (lAnd, 2, opAssoc.LEFT, self.ffactory.createAndOperation), (lOr, 2, opAssoc.LEFT, self.ffactory.createOrOperation), (lImp, 2, opAssoc.LEFT, self.ffactory.createImpicationOperation), (lEqu, 2, opAssoc.LEFT, self.ffactory.createEquvalenceOperation) ], [('(', ')'), ('[', ']'), ('{', '}')]) self.final = expression + StringEnd()
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_name = Keyword('name', caseless=True) + gr_eq + gr_opt_quoted_string gr_name.setParseAction(lambda x, y=self: y._set_name(x[2])) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) gr_phrase = Group(OneOrMore(gr_stripped_string | Word(alphanums)) + gr_eq + gr_opt_quoted_string) def np(words, fn = gr_opt_quoted_string, action=print): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_ifsc = np(PList('Ignore File Set Changes'), gr_yn, action=self._parse_setter(IGNORECHANGES)) gr_evss = np(PList('Enable VSS'), gr_yn, action=self._parse_setter(VSSENABLED)) gr_i_option = Group(Keyword(OPTIONS, caseless=True) + nestedExpr('{','}', Regex('[^\}]+', re.MULTILINE))) gr_e_option = gr_i_option.copy() gr_i_file = gr_phrase.copy() gr_e_file = gr_phrase.copy() gr_inc = Keyword('include', caseless=True) + nestedExpr('{','}', OneOrMore(gr_i_option | gr_i_file)) gr_inc.addParseAction(self._parse_add_entry) gr_exc = Keyword('exclude', caseless=True) + nestedExpr('{','}', OneOrMore(gr_e_option | gr_e_file)) gr_exc.addParseAction(self._parse_add_entry) gr_res = OneOrMore(gr_name | gr_inc | gr_exc | gr_ifsc | gr_evss) result = gr_res.parseString(string, parseAll=True) return 'Fileset: ' + self[NAME]
def _create_filter_parser(): and_kw = Keyword('AND') or_kw = Keyword('OR') variable = Literal('?') + Word(alphanums + '_').leaveWhitespace() uri_term = NotAny(Literal('"')) + Word(printables, excludeChars='>*') uri_part = Keyword('*') ^ uri_term ^ variable literal_term = QuotedString(quoteChar='"', escChar='\\') triple = Group(Literal('<').suppress() + uri_part.setResultsName('subj') + uri_part.setResultsName('pred') + (Group(uri_part).setResultsName('obj') ^ Group(literal_term).setResultsName('objlit')) + Literal('>').suppress()) expr = Forward() atom = (triple.setResultsName('triple') | Literal('(').suppress() + expr + Literal(')').suppress()) and_group = Group(atom + ZeroOrMore(and_kw.suppress() + atom)) or_group = Group(atom + ZeroOrMore(or_kw.suppress() + atom)) expr << (and_group.setResultsName('and') ^ or_group.setResultsName('or')) return expr
def check_if_case_arg(code): statement = Keyword('case') if len(statement.searchString(code)): return True else: return False
pythonKeywords = """and as assert break class continue def del elif else except exec finally for from global if import in is lambda None not or pass print raise return try while with yield True False""" pythonKeywords = set(pythonKeywords.split()) def no_keywords_allowed(s,l,t): wd = t[0] if wd in pythonKeywords: errmsg = "cannot not use keyword '%s' " \ "as an identifier" % wd raise ParseException(s,l,errmsg) ident.setParseAction(no_keywords_allowed) stateTransition = ident("fromState") + "->" + ident("toState") stateMachine = Keyword("statemachine") + \ ident("name") + ":" + \ OneOrMore(Group(stateTransition))("transitions") namedStateTransition = (ident("fromState") + \ "-(" + ident("transition") + ")->" + \ ident("toState")) namedStateMachine = Keyword("statemachine") + \ ident("name") + ":" + \ OneOrMore(Group(namedStateTransition))("transitions") def expand_state_definition(source, loc, tokens): indent = " " * (col(loc,source)-1) statedef = [] # build list of states states = set()
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr> ) # ( <name> <expr> ... ) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("-0123456789","0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0]=="true")) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [ result ]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3],result[5])) pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pCALL = "(" + pNAME + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: {"result":"expression","expr":result[0]}) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction(lambda result: {"result":"function", "name":result[2], "params":result[4], "body":result[6]}) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
from bind9_parser.isc_view import view_statements_set from bind9_parser.isc_managed_keys import managed_keys_statement_standalone from bind9_parser.isc_optview import optview_statements_set from bind9_parser.isc_optviewserver import optviewserver_statements_set from bind9_parser.isc_optviewzone import optviewzone_statements_set from bind9_parser.isc_optviewzoneserver import optviewzoneserver_statements_set from bind9_parser.isc_viewzone import viewzone_statements_set from bind9_parser.isc_clause_zone import clause_stmt_zone_standalone from bind9_parser.isc_clause_trusted_keys import clause_stmt_trusted_keys_standalone view_all_statements_set = ( view_statements_set | optview_statements_set | optviewserver_statements_set | optviewzone_statements_set | optviewzoneserver_statements_set | viewzone_statements_set | clause_stmt_zone_standalone | clause_stmt_trusted_keys_standalone | managed_keys_statement_standalone # Amazing copy ) view_all_statements_series = ZeroOrMore(view_all_statements_set) clause_stmt_view_standalone = (Keyword('view').suppress() - Group( view_name('view_name') - Optional(rr_class_set('rr_class')) - Group(lbrack + view_all_statements_series + rbrack)('configs'))('') + semicolon)('view') clause_stmt_view_series = (ZeroOrMore(clause_stmt_view_standalone))('view')
return int(n) except ValueError: return float(n.replace('d', 'e').replace('D', 'E')) # Basic tags eos = LineStart().suppress() eol = LineEnd().suppress() eol.setDebug(False) Number = Regex( '[-+]?(?:(?:\d+(?:\.\d*)?)|(?:\.\d+))(?:[eEdD][-+]\d+)?') # number Number.setParseAction(convertnum) NaN = Keyword('NaN').setParseAction(lambda t: np.nan_to_num(np.nan)) Str = Word(alphas) Str.setParseAction(ppc.upcaseTokens) StrNum = Regex('[a-zA-Z]') + Word(alphanums) # Literals Lminus = Literal('-') Lplus = Literal('+') Lslash = Literal('/') Lbslash = Literal('\\') Leq = Literal('=') Lden = Literal('g/cm^3') Llpar = Literal('(') Lrpar = Literal(')')
class BoolNot(object): def __init__(self, t): self.arg = t[0][1] def __bool__(self): v = bool(self.arg) return not v def __str__(self): return "~" + str(self.arg) __repr__ = __str__ __nonzero__ = __bool__ TRUE = Keyword("True") FALSE = Keyword("False") boolOperand = TRUE | FALSE | Word(alphas, max=1) boolOperand.setParseAction(BoolOperand) # define expression, based on expression operand and # list of operations in precedence order boolExpr = infixNotation(boolOperand, [ ("not", 1, opAssoc.RIGHT, BoolNot), ("and", 2, opAssoc.LEFT, BoolAnd), ("or", 2, opAssoc.LEFT, BoolOr), ]) if __name__ == "__main__": p = True q = False
#!/usr/bin/env python3 # -*- coding: utf8 -*- from __future__ import absolute_import, print_function, unicode_literals from pyparsing import alphanums, alphas, Forward, Group, Keyword, OneOrMore, printables, quotedString, removeQuotes, stringEnd, stringStart, Word RULE = Forward() labracket = Keyword('{').suppress() rabracket = Keyword('}').suppress() RULE_ELEMENT = Group( Keyword('with-interface') + labracket + Group(OneOrMore(Word(alphanums + ':'))) + rabracket | Word(alphas + '-') + quotedString.setParseAction(removeQuotes) | Word(alphas + '-') + Word(printables)) RULE << stringStart.suppress() + Word(printables) + Group( OneOrMore(RULE_ELEMENT)) + stringEnd.suppress() if __name__ == "__main__": """Test implementation and dump results.""" # parsed_rule = RULE.parseString('allow id 1d6b:0002 serial "0000:00:14.0" name "xHCI Host Controller" hash "Miigb8mx72Z0q6L+YMai0mDZSlYC8qiSMctoUjByF2o=" parent-hash "G1ehGQdrl3dJ9HvW9w2HdC//pk87pKzFE1WY25bq8k4=" with-interface 09:00:00') parsed_rule = RULE.parseString( 'block id 04f2:b2ea serial "" name "Integrated Camera" hash "18xYrZpFsIyYEyw3SqedfmQFkrnVcPmbyLZIVLeFPPs=" with-interface { 0e:01:00 0e:02:00 0e:02:00 0e:02:00 0e:02:00 0e:02:00 0e:02:00 }' ) print(parsed_rule.dump())
import re from pyparsing import ( Word, Keyword, NotAny, alphanums, nums, alphas, OneOrMore, srange, ZeroOrMore, Regex ) from whispy_lispy import ast int_literal = Word(nums) + NotAny('.') int_literal.setParseAction(ast.Int.from_parsed_result) float_literal = Word(nums) + Word('.') + Word(nums) float_literal.setParseAction(ast.Float.from_parsed_result) bool_literal = Keyword('#t') | Keyword('#f') bool_literal.setParseAction(ast.Bool.from_parsed_result) string_literal = Regex(r'\".*?(?<!\\)\"', re.DOTALL) string_literal.setParseAction(ast.String.from_parse_result) grammar = OneOrMore(float_literal | int_literal | bool_literal | string_literal)
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI, COLON, LPAR, RPAR, LBRACE, RBRACE, LBRACK, RBRACK, DOT, COMMA, EQ = map( Literal, ";:(){}[].,=") identLead = alphas + "$_" identBody = alphanums + "$_" identifier1 = Regex(r"\.?[" + identLead + "][" + identBody + r"]*(\.[" + identLead + "][" + identBody + "]*)*").setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction( lambda t: t[0][1:]).setName("escapedIdent") #.setDebug() identifier = identifier1 | identifier2 assert (identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine(Optional(Word(nums + "_")) + base + Word(hexnums + "xXzZ"), joinString=" ", adjacent=False).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group(LBRACE + delimitedList(expr) + RBRACE) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional(delimitedList(expr)) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList(expr, COLON) + RBRACK) subscrIdentifier = Group(identifier + Optional(subscrRef)) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group(expr + COLON + expr + COLON + expr).setName("mintypmax") primary = (number | (LPAR + mintypmaxExpr + RPAR) | (LPAR + Group(expr) + RPAR).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier) unop = oneOf("+ - ! ~ & ~& | ^| ^ ~^").setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>").setName( "binop") expr << ((unop + expr) | # must be first! (primary + "?" + expr + COLON + expr) | (primary + Optional(binop + expr))) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf("case casez casex") endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = (posedge + expr) | (negedge + expr) | expr | ( LPAR + eventExpr + RPAR) eventExpr << (Group(delimitedList(eventTerm, Keyword("or")))) eventControl = Group("@" + ( (LPAR + eventExpr + RPAR) | identifier | "*")).setName("eventCtrl") delayArg = ( number | Word(alphanums + "$_") | #identifier | (LPAR + Group(delimitedList(mintypmaxExpr | expr)) + RPAR)).setName("delayArg") #.setDebug() delay = Group("#" + delayArg).setName("delay") #.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group(lvalue + EQ + Optional(delayOrEventControl) + expr).setName("assgnmt") nbAssgnmt = Group((lvalue + "<=" + Optional(delay) + expr) | (lvalue + "<=" + Optional(eventControl) + expr)).setName("nbassgnmt") range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group(identifier + EQ + expr).setName("paramAssgnmt") parameterDecl = Group("parameter" + Optional(range) + delimitedList(paramAssgnmt) + SEMI).setName("paramDecl") inputDecl = Group("input" + Optional(range) + delimitedList(identifier) + SEMI) outputDecl = Group("output" + Optional(range) + delimitedList(identifier) + SEMI) inoutDecl = Group("inout" + Optional(range) + delimitedList(identifier) + SEMI) regIdentifier = Group(identifier + Optional(LBRACK + expr + COLON + expr + RBRACK)) regDecl = Group("reg" + Optional("signed") + Optional(range) + delimitedList(regIdentifier) + SEMI).setName("regDecl") timeDecl = Group("time" + delimitedList(regIdentifier) + SEMI) integerDecl = Group("integer" + delimitedList(regIdentifier) + SEMI) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group(LPAR + ((strength0 + COMMA + strength1) | (strength1 + COMMA + strength0)) + RPAR).setName("driveStrength") nettype = oneOf( "wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg" ) expandRange = Optional(oneOf("scalared vectored")) + range realDecl = Group("real" + delimitedList(identifier) + SEMI) eventDecl = Group("event" + delimitedList(identifier) + SEMI) blockDecl = (parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl) stmt = Forward().setName("stmt") #.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( (begin + Group(ZeroOrMore(stmt)) + end).setName("begin-end") | (if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional(else_ + stmtOrNull)).setName("if") | (delayOrEventControl + stmtOrNull) | (case + LPAR + expr + RPAR + OneOrMore(caseItem) + endcase) | (forever + stmt) | (repeat + LPAR + expr + RPAR + stmt) | (while_ + LPAR + expr + RPAR + stmt) | (for_ + LPAR + assgnmt + SEMI + Group(expr) + SEMI + assgnmt + RPAR + stmt) | (fork + ZeroOrMore(stmt) + join) | (fork + COLON + identifier + ZeroOrMore(blockDecl) + ZeroOrMore(stmt) + end) | (wait + LPAR + expr + RPAR + stmtOrNull) | ("->" + identifier + SEMI) | (disable + identifier + SEMI) | (assign + assgnmt + SEMI) | (deassign + lvalue + SEMI) | (force + assgnmt + SEMI) | (release + lvalue + SEMI) | (begin + COLON + identifier + ZeroOrMore(blockDecl) + ZeroOrMore(stmt) + end).setName("begin:label-end") | # these *have* to go at the end of the list!!! (assgnmt + SEMI) | (nbAssgnmt + SEMI) | (Combine(Optional("$") + identifier) + Optional(LPAR + delimitedList(expr | empty) + RPAR) + SEMI)).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group("always" + Optional(eventControl) + stmt).setName("alwaysStmt") initialStmt = Group("initial" + stmt).setName("initialStmt") chargeStrength = Group(LPAR + oneOf("small medium large") + RPAR).setName("chargeStrength") continuousAssign = Group(assign + Optional(driveStrength) + Optional(delay) + delimitedList(assgnmt) + SEMI).setName("continuousAssign") tfDecl = (parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl) functionDecl = Group("function" + Optional(range | "integer" | "real") + identifier + SEMI + Group(OneOrMore(tfDecl)) + Group(ZeroOrMore(stmt)) + "endfunction") inputOutput = oneOf("input output") netDecl1Arg = (nettype + Optional(expandRange) + Optional(delay) + Group(delimitedList(~inputOutput + identifier))) netDecl2Arg = ("trireg" + Optional(chargeStrength) + Optional(expandRange) + Optional(delay) + Group(delimitedList(~inputOutput + identifier))) netDecl3Arg = (nettype + Optional(driveStrength) + Optional(expandRange) + Optional(delay) + Group(delimitedList(assgnmt))) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1") gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group(gateType + Optional(driveStrength) + Optional(delay) + delimitedList(gateInstance) + SEMI) udpInstance = Group( Group(identifier + Optional(range | subscrRef)) + LPAR + Group(delimitedList(expr)) + RPAR) udpInstantiation = Group(identifier - Optional(driveStrength) + Optional(delay) + delimitedList(udpInstance) + SEMI).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group(delimitedList(expr)) + RPAR) namedPortConnection = Group(DOT + identifier + LPAR + expr + RPAR).setName( "namedPortConnection") #.setDebug() assert (r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group(LPAR + (delimitedList(namedPortConnection) | delimitedList(modulePortConnection)) + RPAR).setName("inst_args") moduleInstance = Group( Group(identifier + Optional(range)) + inst_args).setName( "moduleInstance") #.setDebug() moduleInstantiation = Group( identifier + Optional(parameterValueAssignment) + delimitedList(moduleInstance).setName("moduleInstanceList") + SEMI).setName("moduleInstantiation") parameterOverride = Group("defparam" + delimitedList(paramAssgnmt) + SEMI) task = Group("task" + identifier + SEMI + ZeroOrMore(tfDecl) + stmtOrNull + "endtask") specparamDecl = Group("specparam" + delimitedList(paramAssgnmt) + SEMI) pathDescr1 = Group(LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR) pathDescr2 = Group(LPAR + Group(delimitedList(subscrIdentifier)) + "*>" + Group(delimitedList(subscrIdentifier)) + RPAR) pathDescr3 = Group(LPAR + Group(delimitedList(subscrIdentifier)) + "=>" + Group(delimitedList(subscrIdentifier)) + RPAR) pathDelayValue = Group(( LPAR + Group(delimitedList(mintypmaxExpr | expr)) + RPAR) | mintypmaxExpr | expr) pathDecl = Group((pathDescr1 | pathDescr2 | pathDescr3) + EQ + pathDelayValue + SEMI).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional(binop + portConditionExpr) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group(if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional(polarityOp) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group(delimitedList(subscrIdentifier)) + Optional(polarityOp) + "*>" + Group(delimitedList(subscrIdentifier)) + RPAR + EQ + pathDelayValue + SEMI) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional(if_ + Group(LPAR + expr + RPAR)) + LPAR + Optional(edgeIdentifier) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional(polarityOp) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI) edgeSensitivePathDecl2 = Group( Optional(if_ + Group(LPAR + expr + RPAR)) + LPAR + Optional(edgeIdentifier) + subscrIdentifier + "*>" + LPAR + delimitedList(subscrIdentifier) + Optional(polarityOp) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group(posedge | negedge | (edge + LBRACK + delimitedList(edgeDescr) + RBRACK)) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = (expr + timCondBinop + scalarConst) | (Optional("~") + expr) timCheckCond << ((LPAR + timCheckCond + RPAR) | timCheckCondTerm) timCheckEvent = Group( Optional(timCheckEventControl) + subscrIdentifier + Optional("&&&" + timCheckCond)) timCheckLimit = expr controlledTimingCheckEvent = Group(timCheckEventControl + subscrIdentifier + Optional("&&&" + timCheckCond)) notifyRegister = identifier systemTimingCheck1 = Group("$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck2 = Group("$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck3 = Group("$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck4 = Group("$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional(COMMA + expr + COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck5 = Group("$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck6 = Group("$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck7 = Group("$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck = ( FollowedBy('$') + (systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7)).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") + ( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group("specify" + ZeroOrMore(specifyItem) + "endspecify").setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group(LBRACE + delimitedList(portRef) + RBRACE) port = portExpr | Group(DOT + identifier + LPAR + portExpr + RPAR) moduleHdr = Group( oneOf("module macromodule") + identifier + Optional(LPAR + Group( Optional( delimitedList( Group( oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg)) | port))) + RPAR) + SEMI).setName("moduleHdr") module = Group(moduleHdr + Group(ZeroOrMore(moduleItem)) + "endmodule").setName("module") #.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group("initial" + identifier + EQ + udpInitVal + SEMI).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group(OneOrMore(levelSymbol).setName("levelInpList")) outputSymbol = oneOf("0 1 x X") combEntry = Group(levelInputList + COLON + outputSymbol + SEMI) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore(levelSymbol) + edge + ZeroOrMore(levelSymbol)) inputList = levelInputList | edgeInputList seqEntry = Group(inputList + COLON + levelSymbol + COLON + (outputSymbol | "-") + SEMI).setName("seqEntry") udpTableDefn = Group("table" + OneOrMore(combEntry | seqEntry) + "endtable").setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group("primitive" + identifier + LPAR + Group(delimitedList(identifier)) + RPAR + SEMI + OneOrMore(udpDecl) + Optional(udpInitialStmt) + udpTableDefn + "endprimitive") verilogbnf = OneOrMore(module | udp) + StringEnd() verilogbnf.ignore(cppStyleComment) verilogbnf.ignore(compilerDirective) return verilogbnf
def pyparsing_parse(text): """ >>> formula = "a = b" >>> print(pyparsing_parse(formula)) ['a', '=', 'b'] >>> formula = "forall x: a = b" >>> print(pyparsing_parse(formula)) ['forall', 'x', ['a', '=', 'b']] >>> formula = "a & b" >>> print(pyparsing_parse(formula)) ['a', '&', 'b'] >>> formula = "~true -> ~b = c" >>> print(pyparsing_parse(formula)) [['~', 'true'], '->', ['~', ['b', '=', 'c']]] >>> formula = "~true -> ~(b = c)" >>> print(pyparsing_parse(formula)) [['~', 'true'], '->', ['~', ['b', '=', 'c']]] >>> formula = "exists y: a -> b" >>> print(pyparsing_parse(formula)) ['exists', 'y', ['a', '->', 'b']] >>> formula = "forall x: exists y: a = b" >>> print(pyparsing_parse(formula)) ['forall', 'x', ['exists', 'y', ['a', '=', 'b']]] >>> formula = "forall x: exists y: a = b -> a = b & ~ a = b -> a = b" >>> print(pyparsing_parse(formula)) ['forall', 'x', ['exists', 'y', [['a', '=', 'b'], '->', [[['a', '=', 'b'], '&', ['~', ['a', '=', 'b']]], '->', ['a', '=', 'b']]]]] >>> formula = "(forall x: exists y: a = b) -> a = b & ~ a = b -> a = b" >>> print(pyparsing_parse(formula)) [['forall', 'x', ['exists', 'y', ['a', '=', 'b']]], '->', [[['a', '=', 'b'], '&', ['~', ['a', '=', 'b']]], '->', ['a', '=', 'b']]] >>> formula = "(forall x: exists y: true) -> true & ~ true -> true" >>> print(pyparsing_parse(formula)) [['forall', 'x', ['exists', 'y', 'true']], '->', [['true', '&', ['~', 'true']], '->', 'true']] >>> formula = "a = b -> c = d & e = f" >>> result1 = pyparsing_parse(formula) >>> formula = "(a = b) -> (c = d & e = f)" >>> result2 = pyparsing_parse(formula) >>> result1 == result2 True >>> result1 [['a', '=', 'b'], '->', [['c', '=', 'd'], '&', ['e', '=', 'f']]] >>> formula = "forall x: exists y: true -> true & true | ~ true" >>> print(pyparsing_parse(formula)) ['forall', 'x', ['exists', 'y', ['true', '->', [['true', '&', 'true'], '|', ['~', 'true']]]]] >>> formula = "~ true | true & true -> forall x: exists y: true" >>> print(pyparsing_parse(formula)) [[['~', 'true'], '|', ['true', '&', 'true']], '->', ['forall', 'x', ['exists', 'y', 'true']]] >>> formula = "true & forall x: x = x" >>> print(pyparsing_parse(formula)) ['true', '&', ['forall', 'x', ['x', '=', 'x']]] >>> formula = "true & (forall x: x = x)" # same as previous >>> print(pyparsing_parse(formula)) ['true', '&', ['forall', 'x', ['x', '=', 'x']]] >>> formula = "forall x: x = x & true" >>> print(pyparsing_parse(formula)) ['forall', 'x', [['x', '=', 'x'], '&', 'true']] >>> formula = "(forall x: x = x) & true" # different to previous >>> print(pyparsing_parse(formula)) [['forall', 'x', ['x', '=', 'x']], '&', 'true'] >>> formula = "forall x: = x & true" >>> print(pyparsing_parse(formula)) Syntax error: forall x: = x & true ^ [] """ left_parenthesis, right_parenthesis, colon = map(Suppress, "():") forall = Keyword("forall") exists = Keyword("exists") implies = Literal("->") or_ = Literal("|") and_ = Literal("&") not_ = Literal("~") equals = Literal("=") boolean = Keyword("false") | Keyword("true") symbol = Word(alphas, alphanums) term = Forward() term << (Group(symbol + Group(left_parenthesis + delimitedList(term) + right_parenthesis)) | symbol) formula = Forward() forall_expression = Group(forall + symbol + colon + formula) exists_expression = Group(exists + symbol + colon + formula) operand = forall_expression | exists_expression | boolean | term formula << operatorPrecedence(operand, [ (equals, 2, opAssoc.LEFT), (not_, 1, opAssoc.RIGHT), (and_, 2, opAssoc.LEFT), (or_, 2, opAssoc.LEFT), (implies, 2, opAssoc.RIGHT)]) try: result = formula.parseString(text, parseAll=True) assert len(result) == 1 return result[0].asList() except (ParseException, ParseSyntaxException) as err: print("Syntax error:\n{0.line}\n{1}^".format(err, " " * (err.column - 1))) return []
if DEBUG: debug = (None, None, None) else: debug = (nothing, nothing, record_exception) keywords = [ "and", "as", "asc", "between", "case", "collate nocase", "cross join", "desc", "else", "end", "from", "group by", "having", "in", "inner join", "is", "join", "left join", "limit", "offset", "like", "on", "or", "order by", "select", "then", "union", "when", "where", "with" ] locs = locals() reserved = [] for k in keywords: name = k.upper().replace(" ", "") locs[name] = value = Keyword(k, caseless=True).setName( k.lower()).setDebugActions(*debug) reserved.append(value) RESERVED = MatchFirst(reserved) KNOWN_OPS = [(BETWEEN, AND), Literal("||").setName("concat").setDebugActions(*debug), Literal("*").setName("mul").setDebugActions(*debug), Literal("/").setName("div").setDebugActions(*debug), Literal("+").setName("add").setDebugActions(*debug), Literal("-").setName("sub").setDebugActions(*debug), Literal("<>").setName("neq").setDebugActions(*debug), Literal(">").setName("gt").setDebugActions(*debug), Literal("<").setName("lt").setDebugActions(*debug), Literal(">=").setName("gte").setDebugActions(*debug), Literal("<=").setName("lte").setDebugActions(*debug), Literal("=").setName("eq").setDebugActions(*debug),
from __future__ import division from __future__ import print_function from pyparsing import Keyword from pyparsing import Literal from undebt.pattern.common import SEMICOLON from undebt.pattern.common import NL from undebt.pattern.common import NAME from undebt.pattern.util import tokens_as_dict from undebt.haxecleanup.common import before_grammar from undebt.haxecleanup.common import add_mocks_to_before mock_definition = (Keyword("@Mock") + NL.suppress() + Keyword("public").suppress() + Keyword("var").suppress() + NAME("name") + Literal(":").suppress() + NAME("type") + SEMICOLON.suppress()) mocked_things = [] @tokens_as_dict(assert_keys=["name", "type"]) def store_mocks(tokens): """ @Mock public var something:SomeType; -> private var something:SomeType; """
def __init__(self, t): super(Exclude, self).__init__() self.matcher = t[0][1] def matches(self, metadata): return not self.matcher.matches(metadata) word = Word(alphanums + "._,-=/:") matcher = Literal("tag:") + ZeroOrMore(" ") + word | word matcher.setParseAction(Include) bool_expr = infixNotation( matcher, [ (Keyword("not"), 1, opAssoc.RIGHT, Exclude), ("and", 2, opAssoc.LEFT, AndMatching.from_tokens), ("or", 2, opAssoc.LEFT, OrMatching.from_tokens), ], ) class Matcher(object): def __init__(self, pattern): super(Matcher, self).__init__() self._matcher = bool_expr.parseString(pattern)[0] def __repr__(self): return repr(self._matcher) def matches(self, metadata):
def create_ace_grammer(): """ This function creates grammer for ace configuration parsing. :return grammer for parsing """ # Pyparsing grammer starts here :excitement :-O command = Group(Keyword('logging') | Keyword( 'access-list') | Keyword('probe')) # Grammer Global name = Word(printables) ipaddress = Combine(Word(nums) + ('.' + Word(nums)) * 3) num = Word(nums) # Grammer 1: # logging enable # logging timestamp # logging trap 9 # logging buffered 9 # logging host 127.0.0.1 udp/619 log = Keyword('logging') single_key = Keyword('enable') | Keyword('timestamp') double_key = (Keyword('trap') | Keyword('buffered')) + num triple_key = Keyword('host') + ipaddress + name grammer_1 = Group(log + (single_key | double_key | triple_key)) # Grammer 2: # eg : access-list FROM_INSIDE line 11 extended permit ip <ip> 255.255.255.0 any access = Keyword('access-list') in_out = Keyword('FROM_INSIDE') | Keyword('FROM_OUTSIDE') line = Keyword('line') extend = Keyword('extended') permit = Keyword('permit') ip_key = Keyword('ip') any_key = Keyword('any') ip_any = ipaddress | any_key grammer_2 = Group(access + in_out + line + num + extend + permit + ip_key + ip_any + ip_any + ip_any) # Grammer 3: # eg: probe http prb_HTTP-1234 # port 1234 # receive 5 # interval 10 # expect status 200 200 # expect regex "(200|302)" # ssl version all # request method get url /test/test:ping # passdetect interval 10 # open 3 probe = Keyword('probe') type_key = Keyword('http') | Keyword( 'icmp') | Keyword('https') | Keyword('tcp') grammer_3_1 = Group(probe + type_key + name) grammer_3_2 = Group(Keyword('port') + Word(nums)) grammer_3_3 = Group(Keyword('receive') + Word(nums)) grammer_3_4 = Group(Keyword('interval') + Word(nums)) grammer_3_5 = Group((Keyword('expect') + Keyword('status') + Word(nums) + Word(nums)) | (Keyword('expect') + Keyword('regex') + Word(printables))) # grammer_3_6 = Group(Keyword('passdetect') + Keyword('interval') + num) #grammer_3_7 = Group(Keyword('open') + num) grammer_3_6 = Group(Keyword('ssl') + Keyword('version') + Keyword('all')) grammer_3_7 = Group(Keyword('request') + Keyword('method') + Keyword('get') + Keyword('url') + Word(printables)) grammer_3_8 = Group(Keyword('request') + Keyword('method') + Word(printables)) grammer_3_9 = Group(Keyword('header') + Keyword('Host') + Keyword('header-value') + Word(printables)) grammer_3 = Group(grammer_3_1 + ZeroOrMore(grammer_3_2 | grammer_3_3 | grammer_3_4 | grammer_3_5 | grammer_3_6 | grammer_3_7 | grammer_3_8 | grammer_3_9 )) # grammer 4: # rserver host rs_Test123 # description TEST_DESC # ip address 127.0.0.1 # webhost-redirection https://www.google.com/test/1234/ 301 # probe prb_HTTP-1234 # inservice rserver_key = Keyword('rserver') host = Keyword('host') rserver_name = Word(printables) grammer_4_1 = Group(rserver_key + host + rserver_name) grammer_4_2 = Group(Keyword('description') + restOfLine) grammer_4_3 = Group(Keyword('ip address') + ipaddress) grammer_4_4 = Group(Keyword('probe') + Word(printables)) grammer_4_5 = Group(Keyword('inservice')) grammer_4_6 = Group(Keyword('webhost-redirection') + Word(printables) + num) grammer_4 = Group(grammer_4_1 + ZeroOrMore(grammer_4_2 | grammer_4_3 | grammer_4_4 | grammer_4_5 | grammer_4_6)) # grammer 5 # parameter-map type <connection|http|ssl> ALLOW_TEST # tcp-options selective-ack allow # tcp-options timestamp allow # tcp-options window-scale allow # persistence-rebalance strict # set timeout inactivity 9999 # session-cache timeout 300 # queue-delay timeout 1 # set header-maxparse-length 65535 # set content-maxparse-length 65535 # cipher RSA_EXPORT1024_WITH_RC4_56_SHA param_key = Keyword('parameter-map') type_key = Word('type') connection = Word('connection') | Word('http') | Word('ssl') param_name = Word(printables) tcp_key = Word('tcp-options') tcp_type = Keyword('timestamp') | Keyword( 'window-scale') | Keyword('selective-ack') allow = Word('allow') sess_queue = Keyword('session-cache') | Keyword('queue-delay') timeout = Keyword('timeout') set = Keyword('set') length = Keyword( 'header-maxparse-length') | Keyword('content-maxparse-length') grammer_5_1 = Group(param_key + type_key + connection + param_name) grammer_5_2 = Group(tcp_key + tcp_type + allow) grammer_5_3 = Group( Keyword('persistence-rebalance') + Keyword('strict')) grammer_5_4 = Group(Keyword('set') + Keyword('timeout') + Keyword('inactivity') + Word(nums)) grammer_5_5 = Group(set + length + num) grammer_5_6 = Group(sess_queue + timeout + num) grammer_5_7 = Group(Keyword('cipher') + name) grammer_5_8 = Keyword('case-insensitive') grammer_5_9 = Group(Keyword('parsing') + name) grammer_5_10 = Group(Keyword('exceed-mss') + name) grammer_5 = Group(grammer_5_1 + ZeroOrMore( grammer_5_2 | grammer_5_3 | grammer_5_4 | grammer_5_6 | grammer_5_7 | grammer_5_8 | grammer_5_9 | grammer_5_10)) # Grammer 6: # sticky ip-netmask 255.255.255.255 address source test-adfdas-$5D # sticky http-cookie TEST TEST_COOKIE # serverfarm sf_TEST # timeout 1000 # replicate sticky # cookie insert browser-expire # 8 static cookie-value "ONETXEIS" rserver ESC20_TXEIS_APP_1 443 sticky = Keyword('sticky') ipnetmask = Keyword('ip-netmask') http_cookie = Keyword('http-cookie') address = Keyword('address') source = Keyword('source') sticky_name = Word(printables) cookie = Keyword('cookie') insert = Keyword('insert') browser_expire = Keyword('browser-expire') static = Keyword('static') cookie_val = Keyword('cookie-value') grammer_6_1 = Group(sticky + ipnetmask + ipaddress + address + source + sticky_name) | Group(sticky + http_cookie + name + name) grammer_6_2 = Group(Keyword('serverfarm') + Word(printables)) grammer_6_3 = Group(Keyword('timeout') + Word(nums)) grammer_6_4 = Group(Keyword('replicate') + sticky) grammer_6_5 = Group(cookie + insert + browser_expire) grammer_6_6 = Group(num + static + cookie_val + name + rserver_key + name + num) grammer_6 = Group(grammer_6_1 + ZeroOrMore(grammer_6_2 | grammer_6_3 | grammer_6_4 | grammer_6_5 | grammer_6_6)) # grammer7: # class-map type management match-any TEST-PROTOCOLS # class-map match-any TEST_TEST_123 # class-map match-any TEST_TEST_123 # class-map match-all TEST_TEST_123 # 2 match protocol icmp source-address 127.0.0.1 255.0.0.0 # 3 match protocol snmp source-address 127.0.0.1 255.255.255.0 # 2 match destination-address 127.0.0.1 255.255.255.0 # 3 match source-address 127.0.0.1 255.255.255.0 # 2 match virtual-address 127.0.0.1 tcp eq 1234 # 2 match virtual-address 127.0.0.1 tcp any # 2 match http url .* classmap = Keyword('class-map') classmap_type = Keyword('type') mgmt = Keyword('management') | ( Keyword('http') + Keyword('loadbalance')) type_key_att = classmap_type + mgmt match_key = Keyword('match-any') | Keyword('match-all') grammer7_1 = Group(classmap + match_key + name) match_key = Keyword('match') proto_key = Keyword('protocol') grammer_url = Group( num + match_key + Keyword('http') + Keyword('url') + name) proto_type = Keyword('tcp') | Keyword('icmp') | Keyword( 'snmp') | Keyword('http') | Keyword('https') | Keyword('udp') proto = proto_key + proto_type source_dest = Keyword( 'source-address') | Keyword('destination-address') virtual_add = Keyword('virtual-address') eq_key = Keyword('eq') eq_val = Keyword('https') | Keyword('www') | Keyword('http') | num any_key = Keyword('any') add_att = Optional(proto) + source_dest + ipaddress + ipaddress virt_att = virtual_add + ipaddress + \ proto_type + ((eq_key + eq_val) | any_key) grammer7_2 = Group(num + match_key + (add_att | virt_att)) | grammer_url grammer_7 = Group(grammer7_1 + ZeroOrMore(grammer7_2)) # grammer8: # policy-map type loadbalance first-match LB_TEST_MAP_1235 # class class-default # serverfarm TEST_FARM_2 # sticky-serverfarm TEST_FARM_2 # connection advanced-options TEST_CONN123 # loadbalance vip inservice # loadbalance vip icmp-reply # loadbalance policy LB_TEST_123 # inspect ftp # ssl-proxy server ssl_name # nat dynamic 5 vlan 2100 # appl-parameter http advanced-options ADV-HTTP # connection advanced-options NETSETTINGS # action test_rewrite policy_key = Keyword('policy-map') lb_key = Keyword('loadbalance') match = Keyword('first-match') | Keyword('multi-match') grammer_8_1 = Group( policy_key + Optional(type_key + lb_key) + match + name) grammer_8_2_1 = Group(Keyword('class') + name) grammer_8_2_2 = Group(( (Keyword('serverfarm') | Keyword('action') | Keyword('sticky-serverfarm')) + name) | Keyword('drop') | Keyword('insert-http') + restOfLine) grammer_8_2_3 = Group(Keyword('connection') + Keyword('advanced-option') + name) lb_vip = Keyword('vip') + ( Keyword('inservice') | Keyword('icmp-reply') + ZeroOrMore(Keyword('active') + ZeroOrMore(Keyword('primary-inservice'))) | Keyword('inservice')) lb_policy = Keyword('policy') + name grammer_8_2_4 = Group(Keyword('loadbalance') + (lb_vip | lb_policy)) grammer_8_2_5 = Group(Keyword('inspect') + Keyword('ftp')) grammer_8_2_6 = Group(Keyword('ssl-proxy') + Keyword('server') + name) grammer_8_2_7 = Group(Keyword('nat') + Keyword('dynamic') + num + Keyword('vlan') + num) grammer_8_2_8 = Group(Keyword('appl-parameter') + Keyword('http') + Keyword('advanced-options') + name) grammer_8_2_9 = Group(Keyword('connection') + Keyword('advanced-options') + name) grammer_8_2_10 = Group(Keyword('action') + name) grammer_8_3 = Group(Keyword('description') + restOfLine) grammer_8_2 = Group(grammer_8_2_1 + ZeroOrMore( grammer_8_2_2 | grammer_8_2_3 | grammer_8_2_4 | grammer_8_2_5 | grammer_8_2_6 | grammer_8_2_7 | grammer_8_2_8 | grammer_8_2_9 | grammer_8_2_10)) grammer_8 = Group(grammer_8_1 + ZeroOrMore(grammer_8_3) + ZeroOrMore(grammer_8_2)) # grammer9: # interface vlan 1011 # ip address 127.0.0.1 255.255.255.0 # alias 127.0.0.1 255.255.255.0 # peer ip address 127.0.0.1 255.255.255.0 # access-group input FROM_TEST # service-policy input TEST_ACCESS # service-policy input vs_TEST # service-policy input TEST_POLICY_8080 # no shutdown # nat-pool 1 127.0.0.1 127.0.0.1 netmask 255.255.255.255 pat grammer_9_1 = Group(Keyword('interface') + Keyword('vlan') + num) grammer_9_2 = Group(ip_key + address + ipaddress + ipaddress) grammer_9_3 = Group(Keyword('alias') + ipaddress + ipaddress) grammer_9_4 = Group(Keyword('peer') + ip_key + address + ipaddress + ipaddress) grammer_9_5 = Group(Keyword('access-group') + Keyword('input') + name) grammer_9_6 = Group(Keyword('service-policy') + Keyword('input') + name) grammer_9_7 = Group(Keyword('no') + Keyword('shutdown')) grammer_9_8 = Group(Keyword('nat-pool') + num + ipaddress + ipaddress + Keyword('netmask') + ipaddress + Keyword('pat')) grammer_9 = Group(grammer_9_1 + ZeroOrMore(grammer_9_2 | grammer_9_3 | grammer_9_4 | grammer_9_5 | grammer_9_6 | grammer_9_7 | grammer_9_8)) # grammer 10: # ip route 0.0.0.0 0.0.0.0 127.0.0.1 grammer_10 = Group(ip_key + Keyword('route') + ipaddress + ipaddress) # grammer 11: # snmp-server host 127.0.0.1 traps version 2c ******** # snmp-server enable traps slb k7server snmp = Keyword('snmp-server') host = Keyword('host') traps = Keyword('traps') slb = Keyword('slb') version = Keyword('version') enable = Keyword('enable') host_att = host + ipaddress + traps + version + name + name ord_att = enable + traps + slb + name grammer_11 = Group(snmp + (host_att | ord_att)) # grammer 12 # serverfarm host TEST_TEST_79 # probe probe_TEST_123 # inband-health check count # predictor leastconns slowstart 30 # rserver RS_TEST123 # inservice serverfarm = Keyword('serverfarm') host = Keyword('host') grammer_12_1 = Group(serverfarm + host + name) grammer_12_2 = Group(Keyword('probe') + name) grammer_12_3 = Group(Keyword('inband-health') + Keyword('check') + name) grammer_12_4_1 = Keyword('rserver') + ~Word( 'host') + name + ZeroOrMore(num) grammer_12_4_2 = Keyword('inservice') grammer_12_4 = Group(grammer_12_4_1 + ZeroOrMore(grammer_12_4_2)) grammer_12_5 = Group(Keyword('predictor') + Keyword('leastconns') + Keyword('slowstart') + num) grammer_12_6 = Group(Keyword('description') + printables) grammer_12_7 = Group(Keyword('predictor') + printables) grammer_12_6 = Group(Keyword('description') + restOfLine) grammer_12_7 = Group(Keyword('predictor') + restOfLine) grammer_12 = Group(grammer_12_1 + ZeroOrMore( grammer_12_2 | grammer_12_2 | grammer_12_3 | grammer_12_4 | grammer_12_5 | grammer_12_6 | grammer_12_7)) # grammer ssl # ssl-proxy service SSL_CLIENT # key KEY12.PEM # cert CERT12.PEM # ssl advanced-options PM1 grammer_ssl = Group(Keyword('ssl-proxy') + Keyword('service') + name) grammer_ssl_key = Group(Keyword('key') + name) grammer_ssl_cert = Group(Keyword('cert') + name) grammer_ssl_chaingroup = Group(Keyword('chaingroup') + name) grammer_ssl_opt = Group(Keyword('ssl') + Keyword('advanced-options') + name) grammer_ssl_comp = Group(grammer_ssl + ZeroOrMore(grammer_ssl_key | grammer_ssl_cert | grammer_ssl_chaingroup | grammer_ssl_opt)) # Grammer crypto: # eg: crypto chaingroup ACME-PROD-CA_CHAINGROUP # cert acme-prod-root-ca_24092044.crt # cert acme-prod-issuing-ca_22102028.crt # # crypto csr-params llprd-frontend-csr # country DK # state Sealand # organization-name ACME # organization-unit ACME Input Management # common-name tcpwebprod.prod.acmeintern.dk # crypto csr-params llprd-backend-csr # country DK # state Sealand # organization-name ACME # organization-unit ACME Input Management # common-name acmenpap.prod.acmeintern.dk #grammer for crypto chaingroup test_group # cert Test_cert.crt grammer_crypto_1 = Group( Keyword('crypto') + Keyword('chaingroup') + name) grammer_crypto_2 = Group(Keyword('cert') + name) grammer_crypto_3 = Group(grammer_crypto_1 + ZeroOrMore(grammer_crypto_2)) #grammer for crypto csr-params grammer_crypto_4 = Group( Keyword('crypto') + Keyword('csr-params') + name) grammer_crypto_5 = Group(Keyword('country') + name) grammer_crypto_6 = Group(Keyword('state') + name) grammer_crypto_7 = Group(Keyword('organization-name') + restOfLine) grammer_crypto_8 = Group(Keyword('organization-unit') + name) grammer_crypto_9 = Group(Keyword('common-name') + name) grammer_crypto_10 = Group(grammer_crypto_4 + ZeroOrMore(grammer_crypto_5 | grammer_crypto_6 | grammer_crypto_7 | grammer_crypto_8 | grammer_crypto_9)) # aaa authentication login default group TAC_PLUS local # aaa accounting default group TAC_PLUS grammer_aaa_1 = Keyword('aaa') grammer_aaa_2 = Keyword( 'authentication login') | Keyword('accounting') grammer_aaa_3 = Keyword('default') grammer_aaa_4 = Keyword('group') grammer_aaa_5 = Keyword('local') grammer_aaa = Group(grammer_aaa_1 + grammer_aaa_2 + grammer_aaa_3 + grammer_aaa_4 + (name | grammer_aaa_5)) # action-list type modify http test-ssl-rewrite # ssl url rewrite location ".*" # header rewrite request Host header-value "(.*)" replace "%1\/" grammer_al_1 = Keyword('action-list') grammer_al_2 = Keyword('type') grammer_al_3 = Keyword('modify') grammer_al_4 = Keyword('http') grammer_al_5 = Keyword('ssl') grammer_al_6 = Keyword('url') grammer_al_7 = Keyword('rewrite') grammer_al_8 = Keyword('location') grammer_al_9 = Keyword('header') grammer_al_10 = Keyword('request') grammer_al_11 = Keyword('Host') grammer_al_12 = Keyword('header-value') grammer_al_13 = Keyword('replace') grammer_al_1_1 = Group( grammer_al_5 + grammer_al_6 + grammer_al_7 + grammer_al_8 + name) grammer_al_1_2 = Group( grammer_al_9 + grammer_al_7 + grammer_al_10 + grammer_al_11 + grammer_al_12 + name + grammer_al_13 + name ) grammer_al = Group(Group(grammer_al_1 + grammer_al_2 + grammer_al_3 + grammer_al_4 + name) + ZeroOrMore(grammer_al_1_1 | grammer_al_1_2)) # Overall Grammer grammer = Group(grammer_1 | grammer_2 | grammer_3 | grammer_4 | grammer_5 | grammer_6 | grammer_7 | grammer_8 | grammer_9 | grammer_10 | grammer_11 | grammer_12 | grammer_ssl_comp | grammer_aaa | grammer_crypto_3 | grammer_crypto_10 | grammer_al) print "Grammer created for ace config parser." LOG.info("Grammer created for ace config parser.") return grammer
from __future__ import division from __future__ import print_function from pyparsing import Keyword from pyparsing import Literal from undebt.pattern.common import SEMICOLON from undebt.pattern.common import PARENS from undebt.pattern.common import NL from undebt.pattern.common import NAME from undebt.pattern.util import tokens_as_dict from undebt.haxecleanup.common import before_grammar from undebt.haxecleanup.common import add_mocks_to_before mock_definition = (Keyword("@Mock") + PARENS("mockType") + NL.suppress() + Keyword("public").suppress() + Keyword("var").suppress() + NAME("name") + Literal(":").suppress() + NAME("type") + SEMICOLON.suppress()) mocked_things = [] @tokens_as_dict(assert_keys=["name", "type", "mockType"]) def store_mocks(tokens): """ reads all the mocked classes and stores them for later processing """ mocked_things.append(tokens) return "private var " + tokens["name"] + " : " + tokens["type"] + ";"
def script(self): # constants left_bracket = Suppress("{") right_bracket = Suppress("}") semicolon = Suppress(";") space = White().suppress() keyword = Word(alphanums + ".+-_/") path = Word(alphanums + ".-_/") variable = Word("$_-" + alphanums) value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+') value_sq = NginxQuotedString(quoteChar="'") value_dq = NginxQuotedString(quoteChar='"') value = (value_dq | value_sq | value_wq) # modifier for location uri [ = | ~ | ~* | ^~ ] location_modifier = (Keyword("=") | Keyword("~*") | Keyword("~") | Keyword("^~")) # modifier for if statement if_modifier = Combine( Optional("!") + (Keyword("=") | Keyword("~*") | Keyword("~") | (Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x"))))) condition_body = ( (if_modifier + Optional(space) + value) | (variable + Optional(space + if_modifier + Optional(space) + value))) # This ugly workaround needed to parse unquoted regex with nested parentheses # pyparsing.nestedExpr doesn't work in some rare cases like: ($http_user_agent ~* \( ) # so we capture all content between parentheses and then parse it:) # TODO(buglloc): may be use something better? condition = Regex(r'\(.*\)').setParseAction( lambda s, l, t: condition_body.parseString(t[0][1:-1])) # rules include = (Keyword("include") + space + value + semicolon)("include") directive = (keyword + ZeroOrMore(space + value) + semicolon)("directive") file_delimiter = (Suppress("# configuration file ") + path + Suppress(":"))("file_delimiter") comment = (Regex(r"#.*"))("comment").setParseAction(_fix_comment) hash_value = Group(value + ZeroOrMore(space + value) + semicolon)("hash_value") generic_block = Forward() if_block = Forward() location_block = Forward() hash_block = Forward() unparsed_block = Forward() sub_block = OneOrMore( Group(if_block | location_block | hash_block | generic_block | include | directive | file_delimiter | comment | unparsed_block)) if_block << ( Keyword("if") + Group(condition) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") location_block << (Keyword("location") + Group( Optional(space + location_modifier) + Optional(space) + value) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") hash_block << (keyword + Group(OneOrMore(space + variable)) + Group(left_bracket + Optional(OneOrMore(hash_value)) + right_bracket))("block") generic_block << ( keyword + Group(ZeroOrMore(space + variable)) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") unparsed_block << ( keyword + Group(ZeroOrMore(space + variable)) + nestedExpr(opener="{", closer="}"))("unparsed_block") return sub_block
alphas, alphas8bit, alphanums, Keyword, ) word_free = Word(alphas8bit + "_-/.+**" + alphanums) word_strict = Word(alphas8bit + alphas, alphas8bit + alphanums + "_") (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon) = map(Suppress, "()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer") cvt_int = lambda toks: int(toks[0]) integer.setParseAction(cvt_int) boolean = Keyword("False", caseless=True) cvt_bool = lambda toks: toks[0].lower == "true" boolean.setParseAction(cvt_bool) none = Keyword("None", caseless=True) cvt_none = lambda toks: [None] none.setParseAction(cvt_none) real = Combine( Optional(oneOf("+ -")) + Word(nums) + "." + Optional(Word(nums)) + Optional("e" + Optional(oneOf("+ -")) + Word(nums)) ).setName("real")
int as int_, lcurly, quote, rcurly, time, ) from ctx_parser.common.nonterminal_symbols import ( COMMENT, NAME, POSITIVE_NUMBER, UINT_VALUE, ) # Terminal symbols date = Keyword('date') day = Keyword('day') month = Keyword('month') year = Keyword('year') hour = Keyword('hour') min_ = Keyword('min') sec = Keyword('sec') # Custom parse actions def parseTimestamp(tokens): timestamp = tokens[0] return datetime.datetime.fromtimestamp(int(timestamp)) def parseFlag(tokens):
def make_grammar(): from pyparsing import (ParserElement, Literal, Word, Forward, Optional, QuotedString, Combine, ZeroOrMore, Keyword, alphas, alphanums, nums) ParserElement.enablePackrat() plus = Literal("+") minus = Literal("-") mul = Literal("*") div = Literal("/") floordiv = Literal("//") mod = Literal("%") lt = Literal("<") le = Literal("<=") gt = Literal(">") ge = Literal(">=") lshift = Literal("<<") rshift = Literal(">>") equal = Literal("==") | Literal("=") | Literal("!=") bitwise_not = Literal("~") bitwise_and = Literal("&") bitwise_or = Literal("|") bitwise_xor = Literal("^") logical_not = Literal("!") | Keyword("not") logical_and = Literal("&&") | Literal("and") | Keyword("AND") logical_or = Literal("||") | Keyword("or") | Keyword("OR") ident = Word(alphas + "_", alphanums + "_") functionname = Word(alphas + "_", alphanums + "_") unit = Word(alphas) int_number = Word(nums) float_number = Combine(Word(nums) + Optional(Literal(".") + Word(nums))) number = (float_number | int_number) + Optional(unit) lparent = Literal("(").suppress() rparent = Literal(")").suppress() relational_op = (lt | le | gt | ge) shift = (lshift | rshift) add_op = (plus | minus) mul_op = (mul | floordiv | div | mod) expr = Forward() string = (QuotedString('"') | QuotedString("'")) primary_expr = ident | number | string | (lparent + expr + rparent) def make_op(s, loc, toks): if len(toks) == 1: return toks[0] else: def loop(lhs, rest): if len(rest) == 0: return lhs else: return loop(Operator(rest[0], lhs, rest[1]), rest[2:]) return loop(Operator(toks[1], toks[0], toks[2]), toks[3:]) def make_unary(s, loc, toks): if len(toks) == 1: return toks[0] else: return UnaryOperator(toks[0], make_unary(s, loc, toks[1:])) argument_expression_list = expr + ZeroOrMore(Literal(",").suppress() + expr) function_expression = (functionname + lparent + argument_expression_list + rparent) postfix_expression = function_expression | primary_expr unary_expr = ZeroOrMore(bitwise_not | logical_not | minus | plus) + postfix_expression cast_expresion = unary_expr | postfix_expression mult_expr = cast_expresion + ZeroOrMore(mul_op + cast_expresion) # noqa: E221 add_expr = mult_expr + ZeroOrMore(add_op + mult_expr) # noqa: E221 shift_expr = add_expr + ZeroOrMore(shift + add_expr) # noqa: E221 relational_expr = shift_expr + ZeroOrMore(relational_op + shift_expr) # noqa: E221 equality_expr = relational_expr + ZeroOrMore(equal + relational_expr) # noqa: E221 bitwise_and_expr = equality_expr + ZeroOrMore(bitwise_and + equality_expr) # noqa: E221 bitwise_xor_expr = bitwise_and_expr + ZeroOrMore(bitwise_xor + bitwise_and_expr) # noqa: E221 bitwise_or_expr = bitwise_xor_expr + ZeroOrMore(bitwise_or + bitwise_xor_expr) # noqa: E221 logical_and_expr = bitwise_or_expr + ZeroOrMore(logical_and + bitwise_or_expr) # noqa: E221 logical_or_expr = logical_and_expr + ZeroOrMore(logical_or + logical_and_expr) # noqa: E221 expr <<= logical_or_expr function_expression.setParseAction(Function) int_number.setParseAction(lambda s, loc, toks: int(toks[0])) float_number.setParseAction(lambda s, loc, toks: float(toks[0])) number.setParseAction(Number) string.setParseAction(String) ident.setParseAction(Variable) unary_expr.setParseAction(make_unary) mult_expr.setParseAction(make_op) add_expr.setParseAction(make_op) shift_expr.setParseAction(make_op) relational_expr.setParseAction(make_op) equality_expr.setParseAction(make_op) bitwise_and_expr.setParseAction(make_op) bitwise_xor_expr.setParseAction(make_op) bitwise_or_expr.setParseAction(make_op) logical_and_expr.setParseAction(make_op) logical_or_expr.setParseAction(make_op) return expr
def simple_option(name): opt = Keyword(name) + EQUALS +\ Word(alphas + nums + '-_') + SEMICOLON opt.setParseAction(self.set_simple_option) return opt
# read block default parameters if key == 'BlockParameterDefaults' : ParsedModel.block_defaults = get_block_defaults(value) if key == 'System' : ParsedModel.system = get_system(value) key_value = Forward() # key key = Word (alphanums + '$+-_*\\.') # value value = Forward() off = Keyword( 'off' ) on = Keyword( 'on' ) string_value = quotedString other_value = Word (alphanums + '$+-_*\\.') normal_value = on | off | string_value | other_value block_block = nestedExpr('{', '}', content = key_value) list_block = nestedExpr('[', ']', content = value) value << (normal_value | block_block | list_block | ';' | ',') key_value << Group(key + value).setParseAction(processNode) parser = OneOrMore(key_value)
def _parse_formula(text): """ >>> formula = "p(a,b)" >>> print(parse_string(formula)) ['p', (['a', 'b'], {})] >>> formula = "~p(a,b)" >>> print(parse_string(formula)) ['~','p', (['a', 'b'], {})] >>> formula = "=(a,b)" >>> print(parse_string(formula)) ['=', (['a', 'b'], {})] >>> formula = "<(a,b)" >>> print(parse_string(formula)) ['<', (['a', 'b'], {})] >>> formula = "~p(a)" >>> print(parse_string(formula)) ['~', 'p', (['a'], {})] >>> formula = "~p(a)|a(p)" >>> print(parse_string(formula)) [(['~', 'p', (['a'], {})], {}), '|', (['a', (['p'], {})], {})] >>> formula = "p(a) | p(b)" >>> print(parse_string(formula)) [(['p', (['a'], {})], {}), '|', (['p', (['b'], {})], {})] >>> formula = "~p(a) | p(b)" >>> print(parse_string(formula)) [(['~', 'p', (['a'], {})], {}), '|', (['p', (['b'], {})], {})] >>> formula = "p(f(a)) | p(b)" >>> print(parse_string(formula)) [(['p', ([(['f', (['a'], {})], {})], {})], {}), '|', (['p', (['b'], {})], {})] >>> formula = "p(a) | p(b) | p(c)" >>> print(parse_string(formula)) [(['p', ([(['f', (['a'], {})], {})], {})], {}), '|', (['p', (['b'], {})], {})] """ left_parenthesis, right_parenthesis, colon = map(Suppress, "():") exists = Keyword("exists") forall = Keyword("forall") implies = Literal("->") or_ = Literal("|") and_ = Literal("&") not_ = Literal("~") equiv_ = Literal("%") symbol = Word(alphas + "_" + "?" + ".", alphanums + "_" + "?" + "." + "-") term = Forward() term << (Group(symbol + Group(left_parenthesis + delimitedList(term) + right_parenthesis)) | symbol) pred_symbol = Word(alphas + "_" + ".", alphanums + "_" + "." + "-") | Literal("=") | Literal("<") literal = Forward() literal << (Group(pred_symbol + Group( left_parenthesis + delimitedList(term) + right_parenthesis)) | Group( not_ + pred_symbol + Group(left_parenthesis + delimitedList(term) + right_parenthesis))) formula = Forward() forall_expression = Group(forall + delimitedList(symbol) + colon + formula) exists_expression = Group(exists + delimitedList(symbol) + colon + formula) operand = forall_expression | exists_expression | literal formula << operatorPrecedence(operand, [(not_, 1, opAssoc.RIGHT), (and_, 2, opAssoc.LEFT), (or_, 2, opAssoc.LEFT), (equiv_, 2, opAssoc.RIGHT), (implies, 2, opAssoc.RIGHT)]) result = formula.parseString(text, parseAll=True) return result.asList()[0]
)) CONST, VIRTUAL, CLASS, STATIC, PAIR, TEMPLATE, TYPEDEF, INCLUDE = map( Keyword, [ "const", "virtual", "class", "static", "pair", "template", "typedef", "#include", ], ) ENUM = Keyword("enum") ^ Keyword("enum class") ^ Keyword("enum struct") NAMESPACE = Keyword("namespace") BASIS_TYPES = map( Keyword, [ "void", "bool", "unsigned char", "char", "int", "size_t", "double", "float", ], )
def parse_element(cls, indent_stack): """Set ``extensions`` attribute to the rule definition.""" import_line = quotedString.setParseAction(removeQuotes) + restOfLine return (Keyword('extensions:').suppress() + indentedBlock( OneOrMore(import_line), indent_stack)).setResultsName('extensions')
def prefixed_line(starts_with): line = (Suppress(Keyword(starts_with)) + SkipTo(EOL).setParseAction(lambda t: [t[0].strip()]).setResultsName("text") + EOL) return line
def parse_imp (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction(lambda result: EPrimCall(oper_deref,[EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EValue(VBoolean(result[0]=="true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) def mkFunBody (params,body): bindings = [ (p,ERefCell(EId(p))) for p in params ] return ELet(bindings,body) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3],mkFunBody(result[3],result[5]))) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pFUN | pCALL) pDECL_VAR = "var" + pNAME + "=" + pEXPR + ";" pDECL_VAR.setParseAction(lambda result: (result[1],result[3])) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = ( pDECL_VAR | NoMatch() ) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT = Forward() pSTMT_IF_1 = "if" + pEXPR + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction(lambda result: EIf(result[1],result[2],result[4])) pSTMT_IF_2 = "if" + pEXPR + pSTMT pSTMT_IF_2.setParseAction(lambda result: EIf(result[1],result[2],EValue(VBoolean(True)))) pSTMT_WHILE = "while" + pEXPR + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1],result[2])) pSTMT_PRINT = "print" + pEXPR + ";" pSTMT_PRINT.setParseAction(lambda result: EPrimCall(oper_print,[result[1]])); pSTMT_UPDATE = pNAME + "<-" + pEXPR + ";" pSTMT_UPDATE.setParseAction(lambda result: EPrimCall(oper_update,[EId(result[0]),result[2]])) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock (decls,stmts): bindings = [ (n,ERefCell(expr)) for (n,expr) in decls ] return ELet(bindings,EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1],result[2])) pSTMT << ( pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_PRINT | pSTMT_UPDATE | pSTMT_BLOCK ) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: {"result":"statement", "stmt":result[0]}) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: {"result":"declaration", "decl":result[0]}) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: {"result":"abstract", "stmt":result[1]}) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result":"quit"}) pTOP = (pQUIT | pABSTRACT | pTOP_DECL | pTOP_STMT ) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
class DnstestParser: """ Parses natural-language-like grammar describing DNS changes """ grammar_strings = [] # implement my grammar word = Word(alphas) value = Word(alphanums).setResultsName("value") add_op = Keyword("add").setResultsName("operation") rm_op = Keyword("remove").setResultsName("operation") rename_op = Keyword("rename").setResultsName("operation") change_op = Keyword("change").setResultsName("operation") confirm_op = Keyword("confirm").setResultsName("operation") rec_op = Or([Keyword("record"), Keyword("entry"), Keyword("name")]) val_op = Optional(Keyword("with")) + Or( [Keyword("value"), Keyword("address"), Keyword("target")]) fqdn = Regex( "(([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9])(\.([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9]))*)" ) ipaddr = Regex( "((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}(1[0-9]{2}|2[0-4][0-9]|25[0-5]|[1-9][0-9]|[0-9]))" ) hostname = Regex("([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9])") hostname_or_fqdn = And([NotAny(ipaddr), MatchFirst([fqdn, hostname])]) hostname_fqdn_or_ip = MatchFirst([ipaddr, fqdn, hostname]) grammar_strings.append( 'add (record|name|entry)? <hostname_or_fqdn> (with ?)(value|address|target)? <hostname_fqdn_or_ip>' ) cmd_add = add_op + Optional(rec_op) + hostname_or_fqdn.setResultsName( "hostname") + Suppress(val_op) + hostname_fqdn_or_ip.setResultsName( 'value') grammar_strings.append('remove (record|name|entry)? <hostname_or_fqdn>') cmd_remove = rm_op + Optional(rec_op) + hostname_fqdn_or_ip.setResultsName( "hostname") grammar_strings.append( 'rename (record|name|entry)? <hostname_or_fqdn> (with ?)(value ?) <value> to <hostname_or_fqdn>' ) cmd_rename = rename_op + Suppress(Optional( rec_op)) + hostname_or_fqdn.setResultsName("hostname") + Suppress( Optional(val_op)) + hostname_fqdn_or_ip.setResultsName( 'value') + Suppress( Keyword("to")) + hostname_or_fqdn.setResultsName('newname') grammar_strings.append( 'change (record|name|entry)? <hostname_or_fqdn> to <hostname_fqdn_or_ip>' ) cmd_change = change_op + Suppress(Optional( rec_op)) + hostname_or_fqdn.setResultsName("hostname") + Suppress( Keyword("to")) + hostname_fqdn_or_ip.setResultsName('value') grammar_strings.append('confirm (record|name|entry)? <hostname_or_fqdn>') cmd_confirm = confirm_op + Suppress( Optional(rec_op)) + hostname_or_fqdn.setResultsName("hostname") line_parser = Or( [cmd_confirm, cmd_add, cmd_remove, cmd_rename, cmd_change]) def __init__(self): pass def parse_line(self, line): res = self.line_parser.parseString(line, parseAll=True) d = res.asDict() # hostname_or_fqdn using And and NotAny now returns a ParseResults object instead of a string, # we need to convert that to a string to just take the first value for i in d: if isinstance(d[i], ParseResults): d[i] = d[i][0] return d def get_grammar(self): """ return a list of possible grammar options """ return self.grammar_strings
__repr__ = __str__ def to_expr(self): return Expr(self.op, *self.args) # code nicked from the book Programming in Python 3 (kindle) # optimisation -- before creating any parsing elements ParserElement.enablePackrat() # allow python style comments comment = (Literal("#") + restOfLine).suppress() LP, RP, colon = map(Suppress, "():") forall = Keyword("forall") | Literal("\u2200") exists = Keyword("exists") | Literal("\u2203") implies = Keyword("==>") | Keyword("implies") | Literal("\u2192") | Literal("->") implied = Keyword("<==") | Keyword("impliedby") | Literal("\u2190") | Literal("<-") iff = Keyword("<=>") | Keyword("iff") | Literal("\u2194") | Keyword("<->") or_ = Keyword("\\/") | Literal("|") | Keyword("or") | Literal("\u2228") and_ = Keyword("/\\") | Literal("&") | Keyword("and") | Literal("\u2227") not_ = Literal("~") | Keyword("not") | Literal("\u00AC") equals = Literal("=") | Keyword("equals") notequals = Literal("=/=") | Literal("!=") | Keyword("notequals") | Literal("\u2260") boolean = CaselessKeyword("FALSE") | CaselessKeyword("TRUE") variable = ~(and_ | or_ | not_ | forall | exists | implied | implies | iff) + Combine( Optional("?") + Word(alphas, alphanums + "'") ) constant = ~(and_ | or_ | not_ | forall | exists | implied | implies | iff) + Word(alphas, alphanums + "'-_")
return val def __repr__(self): return 'SubstituteVal(%s)' % self._path # Grammar definition pathDelimiter = '.' # match gcloud's variable identifier = Combine( Optional('${') + Optional('_') + Word(alphas, alphanums + "_") + Optional('}')) # identifier = Word(alphas, alphanums + "_") propertyPath = delimitedList(identifier, pathDelimiter, combine=True) and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) lparen = Suppress('(') rparen = Suppress(')') binaryOp = oneOf("== != < > >= <= in notin", caseless=True)('operator') E = CaselessLiteral("E") numberSign = Word("+-", exact=1) realNumber = Combine( Optional(numberSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(numberSign) + Word(nums)))
def parse_morphology(filename, filename_toparse): global current_section_name current_section_name = '' converted_file = open(filename, 'w') put_string = 'from neuron import h\ndef shape_3D(self):\n' converted_file.write(put_string) ntabs = 1 # from here on, add a tab to all lines # define lists of characters for a..z and 1..9 uppercase = lowercase.upper() lowercaseplus = lowercase+('_') lowercaseplus = lowercaseplus+(uppercase) nonzero = ''.join([str(i) for i in range(1, 10)]) COMMA = Literal(',') EQUALS = Literal('=') MINUS = Literal('-') PERIOD = Literal('.') LCURL = Literal('{') RCURL = Literal('}') LBRACK = Literal('(') RBRACK = Literal(')') LSQUARE = Literal('[') RSQUARE = Literal(']') PTSCLEAR = Literal('{pt3dclear()').suppress() PTSCLEARNL = Literal('{\npt3dclear()\n').suppress() integer = Word(nums) single_section = Word(lowercaseplus, min = 2) single_section.setResultsName('SINGLE') integer_var = Word(lowercase, exact = 1) double = Group(Optional(MINUS) + integer + Optional(PERIOD + integer)) operand = integer ^ integer_var operator = Word('+-*/', exact=1) unaryoperation = operand binaryoperation = operand + operator + operand operation = unaryoperation ^ binaryoperation array_section = Group(single_section + LSQUARE.suppress() + operation + RSQUARE.suppress()) array_section.setResultsName('ARRAY') section = single_section ^ array_section section_location = Group(section + LBRACK.suppress() + double + RBRACK.suppress()) create = Keyword('create').suppress() + section + ZeroOrMore(COMMA.suppress() + section) create.setParseAction(print_create(converted_file, ntabs)) connect = Keyword('connect').suppress() + section_location + COMMA.suppress() + section_location connect.setParseAction(print_connect(converted_file, ntabs)) for_loop = Keyword('for').suppress() + integer_var + EQUALS.suppress() + integer + COMMA.suppress() + integer # NOTE TO FUTURE SELF: for loops can only have one line of code in this implementation for_loop.setParseAction(print_for_loop(converted_file, ntabs)) point_add = Literal('pt3dadd(').suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + RBRACK.suppress() point_add.setParseAction(print_point_add(converted_file, ntabs)) point_style = Literal('pt3dstyle(').suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + RBRACK.suppress() point_style.setParseAction(print_point_style(converted_file, ntabs)) geom_define_pre = section + (PTSCLEAR ^ PTSCLEARNL) geom_define_body = OneOrMore(point_add ^ point_style) + RCURL.suppress() geom_define_pre.setParseAction(update_current_section(converted_file, ntabs)) geom_define = geom_define_pre + geom_define_body expression = (connect ^ for_loop ^ geom_define ^ create) codeblock = OneOrMore(expression) test_str = 'Ia_node[0] {\npt3dclear()\n pt3dadd( 47, 76, 92.5, 3.6) }' #file_to_parse = open('../../tempdata/Ia_geometry') file_to_parse = open(filename_toparse) tokens = codeblock.parseString(file_to_parse.read())
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"')).setResultsName('quotes') | operator_word operator_parenthesis = Group( (Suppress('(') + operator_or + Suppress(")") )).setResultsName0('parenthesis') | operator_quotes operator_not = Forward() operator_not << ( Group(Suppress(Keyword('no', caseless=True)) + operator_not).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << ( Group(operator_not + Suppress(Keyword('and', caseless=True)) + operator_and).setResultsName('and') | Group(operator_not + OneOrMore(~oneOf('and or') + operator_and) ).setResultsName('and') | operator_not) operator_or << ( Group(operator_and + Suppress(Keyword('or', caseless=True)) + operator_or).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False integer = Word(nums).setParseAction(lambda t: int(t[0])) date = Combine((integer + '-' + integer + '-' + integer) + ' ' + integer + ':' + integer) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + (White(min=16).setParseAction( lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task'))) + restOfLine.setResultsName('message'))
direction = None name = None edge = None vertex = None property = None def to_string(self): # graph.schema().vertexLabel('reviewer').buildEdgeIndex('ratedByStars', rated).direction(OUT). byPropertyKey('stars').add() s = self.schema s += "edge_label = schema.edgeLabel('{}')\n".format(self.edge) s += "schema.vertexLabel('{vertex}').buildEdgeIndex('{name}', edge_label).direction({direction}).byPropertyKey('{property}').add()".format(vertex=self.vertex, name=self.name, direction=self.direction, property=self.property) return s create = Keyword('create', caseless=True) property = Keyword('property', caseless=True) vertex = Keyword('vertex', caseless=True) edge = Keyword('edge', caseless=True) graph = Keyword('graph', caseless=True) graphs = Keyword('graphs', caseless=True) show = Keyword('show', caseless=True) drop = Keyword("drop", caseless=True) index = Keyword('index', caseless=True) label = Keyword('label', caseless=True) on_ = Keyword("on", caseless=True).suppress() use = Keyword('use', caseless=True).suppress() describe = Keyword("desc", caseless=True) | \ Keyword("describe", caseless=True) direction = Keyword("OUT", caseless=True) | Keyword("IN", caseless=True)
# ip46_addr is just plain addressing (without subnet suffix) for IPv4 and IPv6 ip46_addr = (ip4_addr | ip6_addr) ip46_addr.setName('<ip46_addr>') # ip46_addr_or_prefix is just about every possible IP addressing methods out there ip46_addr_or_prefix = ( ip4s_prefix # Lookahead via '/' | ip4_addr # Lookahead via 'non-hex' | ip6_addr) ip46_addr_or_prefix.setName('ip4^ip6^ip4/s') ip_port = Word(nums).setParseAction(lambda toks: int(toks[0]), max=5) ip_port.setName('<ip_port>') inet_ip_port_keyword_and_number_element = ( Keyword('port').suppress() - ip_port('ip_port') # No semicolon here )('') ip46_addr_and_port_list = ((ip46_addr('addr') + Optional(inet_ip_port_keyword_and_number_element) + semicolon)('ip46_addr_port'))('') inet_ip_port_keyword_and_wildcard_element = (Keyword('port').suppress() - (ip_port('ip_port_w') | Literal('*')('ip_port_w'))('') ) # ('') # ('ip_port_w') dscp_port = Word(nums).setParseAction(lambda toks: int(toks[0]), max=3) dscp_port.setName('<dscp_port>')
def create_bnf(allow_tuple=False, free_word=False): cvt_int = lambda toks: int(toks[0]) cvt_real = lambda toks: float(toks[0]) cvt_bool = lambda toks: toks[0].lower == 'true' cvt_none = lambda toks: [None] cvt_tuple = lambda toks : tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) # define punctuation as suppressed literals (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon) = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer") integer.setParseAction(cvt_int) boolean = Keyword("False", caseless = True) boolean.setParseAction(cvt_bool) none = Keyword("None", caseless = True) none.setParseAction(cvt_none) real = Combine(Optional(oneOf("+ -"))+ Word(nums) + "." + Optional(Word(nums)) + Optional("e" + Optional(oneOf("+ -")) + Word(nums))).setName("real") real.setParseAction(cvt_real) tuple_str = Forward() list_str = Forward() dict_str = Forward() if free_word: string = Word(alphas8bit + "_-/.+**" + alphanums) else: string = Word(alphas8bit + alphas, alphas8bit + alphanums + "_" ) list_item = (none | boolean | real | integer | list_str | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | string ) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_inner = Optional(delimitedList(list_item)) + Optional(Suppress(",")) tuple_inner.setParseAction(cvt_tuple) tuple_str << (Suppress("(") + tuple_inner + Suppress(")")) list_inner = Optional(delimitedList(list_item) + Optional(Suppress(","))) list_inner.setParseAction(lambda toks: list(toks)) list_str << (lbrack + list_inner + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_inner = delimitedList(dict_entry) + Optional(Suppress(",")) dict_inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_inner) + rbrace) dict_or_tuple = dict_inner | tuple_inner if allow_tuple: return dict_or_tuple else: return dict_inner
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function from pyparsing import Keyword from pyparsing import Literal from undebt.pattern.common import LPAREN from undebt.pattern.common import RPAREN from undebt.pattern.common import NAME from undebt.pattern.common import INDENT from undebt.pattern.util import tokens_as_dict # mock getters dataprovider_test_grammar = (INDENT("indent") + Keyword("@Test")("test") + LPAREN.suppress() + Keyword("dataProvider").suppress() + Literal("=").suppress() + Literal('"').suppress() + NAME("name") + Literal('"').suppress() + RPAREN.suppress()) @tokens_as_dict(assert_keys=["indent", "test", "name"]) def dataprovider_test_replace(tokens): ''' @Test(dataprovider="someDataProvider") -> @Test @DataProvider("someDataProvider") '''
ivar = NAME.setResultsName('ivar') istart = orig_test.setResultsName('istart') eqsign = p.Literal('=').setParseAction(lambda s,loc,toks: [' = ']) comma = p.Literal(',').setParseAction(lambda s,loc,toks: [', ']) iend = orig_test.setResultsName('iend') istep = orig_test.setResultsName('istep') do_stmt = do_kwd + ivar + eqsign + istart + comma + iend + Optional(comma + istep)\ + EOLL do_stmt.setParseAction(lambda s,loc,toks: [''.join(toks), '', 'enddo']) if_expr = (Suppress('(') + orig_test + Suppress(')')) | orig_test if_expr.setParseAction(lambda s,loc,toks: [' (' + ''.join(toks) + ')' ]) if_expr_2 = if_expr.copy() if_expr_2.setParseAction(lambda s,loc,toks: [' (' + ''.join(toks) + ')' + ' then' ]) if_stmt = Keyword('if') + if_expr_2 + Optional(Suppress('then')) + EOLL if_stmt.setParseAction(lambda s,loc,toks: [''.join(toks), '', 'endif']) elseif_stmt = Keyword('elseif') + if_expr_2 + Optional(Suppress('then')) + EOLL elseif_stmt.setParseAction(lambda s,loc,toks: [''.join(toks), '']) while_kwd = Keyword('while').setParseAction(lambda s,loc,toks: [' while']) dowhile_stmt = Keyword('do') + while_kwd + if_expr + EOLL dowhile_stmt.setParseAction(lambda s,loc,toks: [''.join(toks), '', 'enddo']) selectcase_kwd = Keyword('select') + Keyword('case') selectcase_kwd.setParseAction(lambda s,loc,toks: [' '.join(toks)]) selectcase_stmt = selectcase_kwd + if_expr + EOLL selectcase_stmt.setParseAction(lambda s,loc,toks: [''.join(toks), '', 'end select']) where_stmt = Keyword('where') + if_expr + EOLL
from pyparsing import Literal, CaselessLiteral, Word, Upcase, delimitedList, \ Optional, Combine, Group, alphas, nums, alphanums, ParseException, Forward, \ oneOf, quotedString, ZeroOrMore, restOfLine, Keyword, QuotedString, \ restOfLine, OnlyOnce, Or, removeQuotes, Regex, Suppress, OneOrMore sql_select = Forward() tok_sql_open_paren = Literal("(") tok_sql_close_paren = Literal(")") tok_sql_equals = Literal("=") tok_sql_plus = Literal("+") tok_sql_comment = Literal("#") tok_sql_op = oneOf("= + * / % < > in", caseless=True) tok_sql_literal_insert = Keyword("insert", caseless=True) tok_sql_literal_select = Keyword("select", caseless=True) tok_sql_literal_update = Keyword("update", caseless=True) tok_sql_literal_delete = Keyword("delete", caseless=True) tok_sql_literal_begin = Keyword("begin", caseless=True) tok_sql_literal_use = Keyword("use", caseless=True) tok_sql_literal_as = Keyword("as", caseless=True) tok_sql_literal_set = Keyword("set", caseless=True) tok_sql_literal_from = Keyword("from", caseless=True) tok_sql_literal_commit = Keyword("commit", caseless=True) tok_sql_literal_rollback = Keyword("rollback", caseless=True) tok_sql_literal_into = Keyword("into", caseless=True) tok_sql_literal_order = Keyword("order", caseless=True) tok_sql_literal_group = Keyword("group", caseless=True) tok_sql_literal_having = Keyword("having", caseless=True) tok_sql_literal_by = Keyword("by", caseless=True)
def _build_join(t): """ Populates join token fields. """ t.source.name = t.source.parsed_name t.source.alias = t.source.parsed_alias[0] if t.source.parsed_alias else '' return t # define SQL tokens comma_token = Suppress(',') select_kw = Keyword('select', caseless=True) update_kw = Keyword('update', caseless=True) volatile_kw = Keyword('volatile', caseless=True) create_kw = Keyword('create', caseless=True) table_kw = Keyword('table', caseless=True) as_kw = Keyword('as', caseless=True) from_kw = Keyword('from', caseless=True) where_kw = Keyword('where', caseless=True) join_kw = Keyword('join', caseless=True) on_kw = Keyword('on', caseless=True) left_kw = Keyword('left', caseless=True) right_kw = Keyword('right', caseless=True) cross_kw = Keyword('cross', caseless=True) outer_kw = Keyword('outer', caseless=True) inner_kw = Keyword('inner', caseless=True) natural_kw = Keyword('natural', caseless=True) on_kw = Keyword('on', caseless=True) insert_kw = Keyword('insert', caseless=True) into_kw = Keyword('into', caseless=True)
def SPICE_BNF(): global bnf if not bnf: # punctuation colon = Literal(":").suppress() lbrace = Literal("{").suppress() rbrace = Literal("}").suppress() lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() lparen = Literal("(").suppress() rparen = Literal(")").suppress() equals = Literal("=").suppress() comma = Literal(",").suppress() semi = Literal(";").suppress() # primitive types int8_ = Keyword("int8").setParseAction(replaceWith(ptypes.int8)) uint8_ = Keyword("uint8").setParseAction(replaceWith(ptypes.uint8)) int16_ = Keyword("int16").setParseAction(replaceWith(ptypes.int16)) uint16_ = Keyword("uint16").setParseAction(replaceWith(ptypes.uint16)) int32_ = Keyword("int32").setParseAction(replaceWith(ptypes.int32)) uint32_ = Keyword("uint32").setParseAction(replaceWith(ptypes.uint32)) int64_ = Keyword("int64").setParseAction(replaceWith(ptypes.int64)) uint64_ = Keyword("uint64").setParseAction(replaceWith(ptypes.uint64)) # keywords enum32_ = Keyword("enum32").setParseAction(replaceWith(32)) enum16_ = Keyword("enum16").setParseAction(replaceWith(16)) enum8_ = Keyword("enum8").setParseAction(replaceWith(8)) flags32_ = Keyword("flags32").setParseAction(replaceWith(32)) flags16_ = Keyword("flags16").setParseAction(replaceWith(16)) flags8_ = Keyword("flags8").setParseAction(replaceWith(8)) channel_ = Keyword("channel") server_ = Keyword("server") client_ = Keyword("client") protocol_ = Keyword("protocol") typedef_ = Keyword("typedef") struct_ = Keyword("struct") message_ = Keyword("message") image_size_ = Keyword("image_size") bytes_ = Keyword("bytes") cstring_ = Keyword("cstring") switch_ = Keyword("switch") default_ = Keyword("default") case_ = Keyword("case") identifier = Word(alphas, alphanums + "_") enumname = Word(alphanums + "_") integer = ( (Combine(CaselessLiteral("0x") + Word(nums + "abcdefABCDEF")) | Word(nums + "+-", nums)) .setName("int") .setParseAction(cvtInt) ) typename = identifier.copy().setParseAction(lambda toks: ptypes.TypeRef(str(toks[0]))) # This is just normal "types", i.e. not channels or messages typeSpec = Forward() attributeValue = integer ^ identifier attribute = Group(Combine("@" + identifier) + Optional(lparen + delimitedList(attributeValue) + rparen)) attributes = Group(ZeroOrMore(attribute)) arraySizeSpecImage = Group(image_size_ + lparen + integer + comma + identifier + comma + identifier + rparen) arraySizeSpecBytes = Group(bytes_ + lparen + identifier + comma + identifier + rparen) arraySizeSpecCString = Group(cstring_ + lparen + rparen) arraySizeSpec = ( lbrack + Optional( identifier ^ integer ^ arraySizeSpecImage ^ arraySizeSpecBytes ^ arraySizeSpecCString, default="" ) + rbrack ) variableDef = Group( typeSpec + Optional("*", default=None) + identifier + Optional(arraySizeSpec, default=None) + attributes - semi ).setParseAction(parseVariableDef) switchCase = Group( Group( OneOrMore( default_.setParseAction(replaceWith(None)) + colon | Group(case_.suppress() + Optional("!", default="") + identifier) + colon ) ) + variableDef ).setParseAction(lambda toks: ptypes.SwitchCase(toks[0][0], toks[0][1])) switchBody = Group( switch_ + lparen + delimitedList(identifier, delim=".", combine=True) + rparen + lbrace + Group(OneOrMore(switchCase)) + rbrace + identifier + attributes - semi ).setParseAction(lambda toks: ptypes.Switch(toks[0][1], toks[0][2], toks[0][3], toks[0][4])) messageBody = structBody = Group(lbrace + ZeroOrMore(variableDef | switchBody) + rbrace) structSpec = Group(struct_ + identifier + structBody + attributes).setParseAction( lambda toks: ptypes.StructType(toks[0][1], toks[0][2], toks[0][3]) ) # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "channel_type" typeSpec << ( structSpec ^ int8_ ^ uint8_ ^ int16_ ^ uint16_ ^ int32_ ^ uint32_ ^ int64_ ^ uint64_ ^ typename ).setName("type") flagsBody = enumBody = Group( lbrace + delimitedList(Group(enumname + Optional(equals + integer))) + Optional(comma) + rbrace ) messageSpec = ( Group(message_ + messageBody + attributes).setParseAction( lambda toks: ptypes.MessageType(None, toks[0][1], toks[0][2]) ) | typename ) channelParent = Optional(colon + typename, default=None) channelMessage = Group( messageSpec + identifier + Optional(equals + integer, default=None) + semi ).setParseAction(lambda toks: ptypes.ChannelMember(toks[0][1], toks[0][0], toks[0][2])) channelBody = channelParent + Group( lbrace + ZeroOrMore(server_ + colon | client_ + colon | channelMessage) + rbrace ) enum_ = enum32_ | enum16_ | enum8_ flags_ = flags32_ | flags16_ | flags8_ enumDef = Group(enum_ + identifier + enumBody + attributes - semi).setParseAction( lambda toks: ptypes.EnumType(toks[0][0], toks[0][1], toks[0][2], toks[0][3]) ) flagsDef = Group(flags_ + identifier + flagsBody + attributes - semi).setParseAction( lambda toks: ptypes.FlagsType(toks[0][0], toks[0][1], toks[0][2], toks[0][3]) ) messageDef = Group(message_ + identifier + messageBody + attributes - semi).setParseAction( lambda toks: ptypes.MessageType(toks[0][1], toks[0][2], toks[0][3]) ) channelDef = Group(channel_ + identifier + channelBody + attributes - semi).setParseAction( lambda toks: ptypes.ChannelType(toks[0][1], toks[0][2], toks[0][3], toks[0][4]) ) structDef = Group(struct_ + identifier + structBody + attributes - semi).setParseAction( lambda toks: ptypes.StructType(toks[0][1], toks[0][2], toks[0][3]) ) typedefDef = Group(typedef_ + identifier + typeSpec + attributes - semi).setParseAction( lambda toks: ptypes.TypeAlias(toks[0][1], toks[0][2], toks[0][3]) ) definitions = typedefDef | structDef | enumDef | flagsDef | messageDef | channelDef protocolChannel = Group(typename + identifier + Optional(equals + integer, default=None) + semi).setParseAction( lambda toks: ptypes.ProtocolMember(toks[0][1], toks[0][0], toks[0][2]) ) protocolDef = Group( protocol_ + identifier + Group(lbrace + ZeroOrMore(protocolChannel) + rbrace) + semi ).setParseAction(lambda toks: ptypes.ProtocolType(toks[0][1], toks[0][2])) bnf = ZeroOrMore(definitions) + protocolDef + StringEnd() singleLineComment = "//" + restOfLine bnf.ignore(singleLineComment) bnf.ignore(cStyleComment) return bnf
def _create_grammar_6_0(): """Create the SYM 6.0 grammar. """ word = Word(printables.replace(';', '').replace(':', '')) positive_integer = Word(nums) number = Word(nums + '.Ee-+') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) name = Word(alphas + nums + '_-').setWhitespaceChars(' ') assign = Suppress(Literal('=')) comma = Suppress(Literal(',')) type_ = name version = Group(Keyword('FormatVersion') - assign - Keyword('6.0')) title = Group(Keyword('Title') - assign - QuotedString('"')) enum_value = Group(number + assign + QuotedString('"')) enum = Group( Suppress(Keyword('Enum')) - assign - name - Suppress(lp) + Group(delimitedList(enum_value)) - Suppress(rp)) sig_unit = Group(Literal('/u:') + word) sig_factor = Group(Literal('/f:') + word) sig_offset = Group(Literal('/o:') + word) sig_min = Group(Literal('/min:') + word) sig_max = Group(Literal('/max:') + word) sig_default = Group(Literal('/d:') + word) sig_long_name = Group(Literal('/ln:') + word) sig_enum = Group(Literal('/e:') + word) signal = Group( Suppress(Keyword('Sig')) - Suppress(assign) - name - type_ + Group(Optional(positive_integer)) + Group(Optional(Keyword('-m'))) + Group( Optional(sig_unit) + Optional(sig_factor) + Optional(sig_offset) + Optional(sig_min) + Optional(sig_max) + Optional(sig_default) + Optional(sig_long_name) + Optional(sig_enum))) symbol = Group( Suppress(lb) - name - Suppress(rb) - Group(Optional(Keyword('ID') + assign + word)) - Group(Keyword('Len') + assign + positive_integer) + Group( Optional( Keyword('Mux') + assign + word + positive_integer + comma + positive_integer + positive_integer)) + Group(Optional(Keyword('CycleTime') + assign + positive_integer)) + Group(Optional(Keyword('Timeout') + assign + positive_integer)) + Group(Optional(Keyword('MinInterval') + assign + positive_integer)) + Group( ZeroOrMore(Group( Keyword('Sig') + assign + name + positive_integer)))) enums = Group(Keyword('{ENUMS}') + Group(ZeroOrMore(enum))) signals = Group(Keyword('{SIGNALS}') + Group(ZeroOrMore(signal))) send = Group(Keyword('{SEND}') + Group(ZeroOrMore(symbol))) receive = Group(Keyword('{RECEIVE}') + Group(ZeroOrMore(symbol))) sendreceive = Group(Keyword('{SENDRECEIVE}') + Group(ZeroOrMore(symbol))) section = (enums | signals | send | receive | sendreceive) grammar = (version - title + Group(OneOrMore(section)) + StringEnd()) grammar.ignore(dblSlashComment) return grammar
def parse_element(cls, indent_stack): """Set ``memoize`` attribute to the rule.""" return (Keyword("@memoize").suppress() + Literal('(').suppress() + SkipTo(')') + Literal(')').suppress()).setResultsName("memoize")
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ... ) <expr ) # ( function ( <name> ... ) <expr> ) # ( <expr> <expr> ... ) # ( call/cc <expr>) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EValue(VBoolean(result[0]=="true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pBINDINGS = ZeroOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [ result ]) def makeLet (bindings,body): params = [ param for (param,exp) in bindings ] args = [ exp for (param,exp) in bindings ] return ECall(EFunction(params,body),args) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: makeLet(result[3],result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3],result[5])) pFUNrec = "(" + Keyword("function") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pFUNrec.setParseAction(lambda result: EFunction(result[4],result[6],name=result[2])) def makeDo (exprs): result = exprs[-1] for e in reversed(exprs[:-1]): # space is not an allowed identifier in the syntax! result = makeLet([(" ",e)],result) return result pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: makeDo(result[2])) def makeWhile (cond,body): return makeLet([(" while", EFunction([],EIf(cond,makeLet([(" ",body)],ECall(EId(" while"),[])),EValue(VNone())),name=" while"))], ECall(EId(" while"),[])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: makeWhile(result[2],result[3])) pCALLCC = "(" + Keyword("call/cc") + pEXPR + ")" pCALLCC.setParseAction(lambda result: ECallCC(result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pFUNrec| pDO | pWHILE | pCALLCC | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: {"result":"expression","expr":result[0]}) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: {"result":"value", "name":result[2], "expr":result[3]}) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction(lambda result: {"result":"function", "name":result[2], "params":result[4], "body":result[6]}) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: {"result":"abstract", "expr":result[1]}) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result":"quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
from pyparsing import Keyword, Word, infixNotation, opAssoc # <constant> ::= False | True # <variable> ::= 'p' | 'q' | 'r' # <or> ::= 'or' # <and> ::= 'and' # <not> ::= 'not' # <expression> ::= <term> { <or><term> } # <term> ::= <factor> { <and><factor> } # <factor> ::= <constant> | <not><factor> | (<expression>) constant = Keyword('True') | Keyword('False') variable = Word('pqr', exact=1) operand = constant | variable expr = infixNotation(operand, [ ( "not", 1, opAssoc.RIGHT, ), ( "and", 2, opAssoc.LEFT, ), ( "or", 2, opAssoc.LEFT, ),
from pyparsing import (Word, Group, Suppress, Combine, Optional, Forward, Empty, quotedString, oneOf, removeQuotes, delimitedList, nums, alphas, alphanums, Keyword, CaselessLiteral) word_free = Word(alphas + '][@_-/.+**' + alphanums) word_strict = Word(alphas, alphas + alphanums + '_' ) (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon, equal_sign) = map(Suppress, '()[]{}:=') integer = Combine(Optional(oneOf('+ -')) + Word(nums)).setName('integer') cvt_int = lambda toks: int(toks[0]) integer.setParseAction(cvt_int) boolean_true = Keyword('True', caseless=True) boolean_true.setParseAction(lambda x: True) boolean_false = Keyword('False', caseless=True) boolean_false.setParseAction(lambda x: False) boolean = boolean_true | boolean_false none = Keyword('None', caseless=True) cvt_none = lambda toks: [None] none.setParseAction(cvt_none) e = CaselessLiteral("e") real = (Combine(Optional(oneOf('+ -')) + Word(nums) + '.' + Optional(Word(nums)) + Optional(e + Optional(oneOf('+ -')) + Word(nums)))
# [162] WS ::= #x20 | #x9 | #xD | #xA # Not needed? # WS = #x20 | #x9 | #xD | #xA # [163] ANON ::= '[' WS* ']' ANON = Literal('[') + ']' ANON.setParseAction(lambda x: rdflib.BNode()) # A = CaseSensitiveKeyword('a') A = Literal('a') A.setParseAction(lambda x: rdflib.RDF.type) # ------ NON-TERMINALS -------------- # [5] BaseDecl ::= 'BASE' IRIREF BaseDecl = Comp('Base', Keyword('BASE') + Param('iri', IRIREF)) # [6] PrefixDecl ::= 'PREFIX' PNAME_NS IRIREF PrefixDecl = Comp( 'PrefixDecl', Keyword('PREFIX') + PNAME_NS + Param('iri', IRIREF)) # [4] Prologue ::= ( BaseDecl | PrefixDecl )* Prologue = Group(ZeroOrMore(BaseDecl | PrefixDecl)) # [108] Var ::= VAR1 | VAR2 Var = VAR1 | VAR2 Var.setParseAction(lambda x: rdflib.term.Variable(x[0])) # [137] PrefixedName ::= PNAME_LN | PNAME_NS PrefixedName = Comp('pname', PNAME_LN | PNAME_NS)
# # cLibHeader.py # # A simple parser to extract API doc info from a C header file # # Copyright, 2012 - Paul McGuire # from pyparsing import Word, alphas, alphanums, Combine, oneOf, Optional, delimitedList, Group, Keyword testdata = """ int func1(float *vec, int len, double arg1); int func2(float **arr, float *vec, int len, double arg1, double arg2); """ ident = Word(alphas, alphanums + "_") vartype = Combine( oneOf("float double int char") + Optional(Word("*")), adjacent = False) arglist = delimitedList(Group(vartype("type") + ident("name"))) functionCall = Keyword("int") + ident("name") + "(" + arglist("args") + ")" + ";" for fn,s,e in functionCall.scanString(testdata): print fn.name for a in fn.args: print " - %(name)s (%(type)s)" % a
def parse(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( function ( <name> ... ) <expr> ) # ( ref <expr> ) # ( <expr> <expr> ... ) # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = ZeroOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) def makeLet(bindings, body): params = [param for (param, exp) in bindings] args = [exp for (param, exp) in bindings] return ECall(EFunction(params, body), args) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: makeLet(result[3], result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3], result[5])) pFUNrec = "(" + Keyword( "function") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pFUNrec.setParseAction( lambda result: EFunction(result[4], result[6], name=result[2])) def makeDo(exprs): result = exprs[-1] for e in reversed(exprs[:-1]): # space is not an allowed identifier in the syntax! result = makeLet([(" ", e)], result) return result pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: makeDo(result[2])) def makeWhile(cond, body): return makeLet( [(" while", EFunction([], EIf(cond, makeLet([(" ", body)], ECall(EId(" while"), [])), EValue(VNone())), name=" while"))], ECall(EId(" while"), [])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: makeWhile(result[2], result[3])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pFUNrec | pDO | pWHILE | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: { "result": "value", "name": result[2], "expr": result[3] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction( lambda result: { "result": "function", "name": result[2], "params": result[4], "body": result[6] }) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: { "result": "abstract", "expr": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def init_grammar(self): """Set up the parsing classes Any changes to the grammar of the config file be done here. """ # Some syntax that we need, but don't care about SEMICOLON = (Suppress(";")) EQUALS = Suppress("=") # Top Section FILE_NAME = Word(alphas + nums + '-_.') alignment_def = Keyword('alignment') + EQUALS\ + FILE_NAME + SEMICOLON alignment_def.setParseAction(self.set_alignment) tree_def = Keyword('user_tree_topology') + EQUALS\ + FILE_NAME + SEMICOLON tree_def.setParseAction(self.set_user_tree) def simple_option(name): opt = Keyword(name) + EQUALS +\ Word(alphas + nums + '-_') + SEMICOLON opt.setParseAction(self.set_simple_option) return opt branch_def = simple_option('branchlengths') MODEL_NAME = Word(alphas + nums + '+' + ' ' + '_') model_list = delimitedList(MODEL_NAME) model_def = 'models' + EQUALS + model_list + SEMICOLON model_def.setParseAction(self.set_models) model_selection_def = simple_option("model_selection") top_section = alignment_def + Optional(tree_def) + branch_def + \ model_def + model_selection_def # Data Block Parsing column = Word(nums) block_name = Word(alphas + '_-' + nums) block_def = column("start") +\ Optional(Suppress("-") + column("end")) +\ Optional(Suppress("\\") + column("step")) block_def.setParseAction(self.define_range) block_list_def = Group(OneOrMore(Group(block_def))) user_subset_def = Optional("charset") + block_name("name") + \ EQUALS + block_list_def("parts") + SEMICOLON user_subset_def.setParseAction(self.define_user_subset) block_def_list = OneOrMore(Group(user_subset_def)) block_section = Suppress("[data_blocks]") + block_def_list block_def_list.setParseAction(self.check_blocks) # Scheme Parsing scheme_name = Word(alphas + '_-' + nums) # Make a copy, cos we set a different action on it user_subset_ref = block_name.copy() user_subset_ref.setParseAction(self.check_block_exists) subset = Group(Suppress("(") + delimitedList(user_subset_ref("name")) + Suppress(")")) subset.setParseAction(self.define_subset_grouping) scheme = Group(OneOrMore(subset)) scheme_def = scheme_name("name") + \ EQUALS + scheme("scheme") + SEMICOLON scheme_def.setParseAction(self.define_scheme) scheme_list = OneOrMore(Group(scheme_def)) scheme_algo = simple_option("search") scheme_section = \ Suppress("[schemes]") + scheme_algo + Optional(scheme_list) # We've defined the grammar for each section. # Here we just put it all together self.config_parser = ( top_section + block_section + scheme_section + stringEnd)
# [162] WS ::= #x20 | #x9 | #xD | #xA # Not needed? # WS = #x20 | #x9 | #xD | #xA # [163] ANON ::= '[' WS* ']' ANON = Literal('[') + ']' ANON.setParseAction(lambda x: rdflib.BNode()) # A = CaseSensitiveKeyword('a') A = Literal('a') A.setParseAction(lambda x: rdflib.RDF.type) # ------ NON-TERMINALS -------------- # [5] BaseDecl ::= 'BASE' IRIREF BaseDecl = Comp('Base', Keyword('BASE') + Param('iri', IRIREF)) # [6] PrefixDecl ::= 'PREFIX' PNAME_NS IRIREF PrefixDecl = Comp('PrefixDecl', Keyword('PREFIX') + PNAME_NS + Param('iri', IRIREF)) # [4] Prologue ::= ( BaseDecl | PrefixDecl )* Prologue = Group(ZeroOrMore(BaseDecl | PrefixDecl)) # [108] Var ::= VAR1 | VAR2 Var = VAR1 | VAR2 Var.setParseAction(lambda x: rdflib.term.Variable(x[0])) # [137] PrefixedName ::= PNAME_LN | PNAME_NS PrefixedName = Comp('pname', PNAME_LN | PNAME_NS)
def _parse_line(self): """ Parses a single line, and returns a node representing the active context Further lines processed are expected to be children of the active context, or children of its accestors. ------------------------------------------------ Basic grammar is as follows: line = <mako>|<nemo>|<string> <mako> We don't parse normally parse tags, so the following info is sketchy. Mako tags are recognized as anythign that starts with: - <% - %> - %CLOSETEXT - </% Mako Control tags however are parsed, and required to adhere to the same indentation rules as Nemo tags. mako_control = <start>|<middle>|<end> start = (for|if|while) <inner>: middle = (else|elif): end = endfor|endwhile nemo = % ( <mako_control>|<nemo_statement> ) nemo_statement = .<quote><string><quote>|#<quote><string><quote>|<words> <quote> = '|" Notes: Quotes are required to be balanced. Quotes preceded by a \ are ignored. <string> = * words = \w+ """ #if self.debug: print '\t ' + str(self._current_node) # PyParser setParseAction's actually execute during parsing, # So we need closures in order to change the current scope def depth_from_indentation(function): """ Set the depth as the start of the match """ def wrap(start, values): #print 'Depth %d | %d %s' %(self._depth, start, values) #self._depth = start self._current_node = function(values) #print self._current_node return '' return wrap def depth_from_match(function): """ Set the depth as the start of the match """ def wrap(start, values): #print 'Depth %d | %d %s' %(self._depth, start, values) #print self._current_node self._depth = start self._current_node = function(values) #print self._current_node return '' return wrap def depth_from_nemo_tag(function): """ Start of the match is where the nemo tag is. Pass the other values to the wrapped function """ def wrap(start, values): # print 'Depth %d | %d %s' %(self._depth, start, values) self._depth = start tokens = values[1] self._current_node = function(tokens) #print self._current_node return '' return wrap # Match HTML from pyparsing import NotAny, MatchFirst html = restOfLine html.setParseAction(depth_from_indentation(self._add_html_node)) # Match Mako control tags nemo_tag = Literal('%') begin = Keyword('for') | Keyword('if') | Keyword('while') middle = Keyword('else') | Keyword('elif') end = Keyword('endfor') | Keyword('endif') | Keyword('endwhile') control = nemo_tag + (begin | middle | end) begin.setParseAction(depth_from_indentation(self._add_nesting_mako_control_node) ) middle.setParseAction(depth_from_indentation(self._add_mako_middle_node)) end.setParseAction(depth_from_indentation(self._add_mako_control_leaf)) # Match Nemo tags argument_name = Word(alphas,alphanums+"_-:") argument_value = quotedString regular_argument = argument_name + Literal('=') + argument_value class_name = Literal('.').setParseAction(lambda x: 'class=') id_name = Literal('#').setParseAction(lambda x: 'id=') special_argument = (class_name | id_name) + argument_value argument = Combine(special_argument) | Combine(regular_argument) # Match single Nemo statement (Part of a multi-line) inline_nemo_html = Word(alphas) + Group(ZeroOrMore(argument)) inline_nemo_html.setParseAction(depth_from_match(self._add_nemo_node)) # Match first nemo tag on the line (the one that may begin a multi-statement expression) nemo_html = nemo_tag + Group(Word(alphanums+"_-:") + Group(ZeroOrMore(argument))) nemo_html.setParseAction(depth_from_nemo_tag(self._add_nemo_node)) # Match a multi-statement expression. Nemo statements are seperated by |. Anything after || is treated as html separator = Literal('|').suppress() html_separator = Literal('||') # | Literal('|>') nemo_list = nemo_html + ZeroOrMore( separator + inline_nemo_html ) inline_html = html.copy() inline_html.setParseAction(depth_from_match(self._add_inline_html_node)) nemo_multi = nemo_list + Optional(html_separator + inline_html) # Match empty Nemo statement empty = nemo_tag + Empty() empty.setParseAction(depth_from_indentation(self._add_blank_nemo_node)) # Match unused Mako tags mako_tags = Literal('<%') | Literal('%>') | Literal('%CLOSETEXT') | Literal('</%') mako = mako_tags mako_tags.setParseAction(depth_from_indentation(self._add_html_node)) # Matches General nemo = (control | nemo_multi | empty) line = mako_tags | nemo | html # Depth Calculation (deprecated?) self._depth = len(self._c) - len(self._c.strip()) #try: line.parseString(self._c)
def parser(self): """ This function returns a parser. The grammar should be like most full text search engines (Google, Tsearch, Lucene). Grammar: - a query consists of alphanumeric words, with an optional '*' wildcard at the end of a word - a sequence of words between quotes is a literal string - words can be used together by using operators ('and' or 'or') - words with operators can be grouped with parenthesis - a word or group of words can be preceded by a 'not' operator - the 'and' operator precedes an 'or' operator - if an operator is missing, use an 'and' operator """ operatorOr = Forward() operatorWord = Word(wordchars).setResultsName('value') operatorQuotesContent = Forward() operatorQuotesContent << ( (operatorWord + operatorQuotesContent) | operatorWord) operatorQuotes = Group( Suppress('"') + operatorQuotesContent + Suppress('"')).setResultsName("quotes") | operatorWord prefix = (Word(alphanums).setResultsName('index') + Word('=').setResultsName('binop')) operatorParenthesis = Group( Optional(prefix) + (Suppress("(") + operatorOr + Suppress(")"))).setResultsName( "parenthesis") | Group(prefix + operatorQuotes).setResultsName( 'term') | operatorQuotes operatorNot = Forward() operatorNot << ( Group(Suppress(Keyword("not", caseless=True)) + operatorNot).setResultsName("not") | operatorParenthesis) operatorAnd = Forward() operatorAnd << ( Group(operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd).setResultsName("and") | Group(operatorNot + OneOrMore(~oneOf("and or", caseless=True) + operatorAnd)).setResultsName("and") | operatorNot) operatorProximity = Forward() operatorProximity << ( Group(operatorParenthesis + Suppress(Literal("near,")) + Word(nums).setResultsName('distance') + operatorParenthesis).setResultsName("near") | Group(operatorParenthesis + Suppress(Literal("span,")) + Word(nums).setResultsName('distance') + operatorParenthesis).setResultsName("span") | operatorAnd) operatorOr << ( Group(operatorProximity + Suppress(Keyword("or", caseless=True)) + operatorOr).setResultsName("or") | operatorProximity) return operatorOr.parseString