def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack,rbrack,lbrace,rbrace,lparen,rparen = map(Literal,"[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack,ignore=escapedChar) + rbrack) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = ( reLiteral | reRange | reMacro | reDot ) reExpr = operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ] ) _parser = reExpr return _parser
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join( c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = (reLiteral | reRange | reMacro | reDot) reExpr = operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ]) _parser = reExpr return _parser
def parser(self): """ This function returns a parser. The grammar should be like most full text search engines (Google, Tsearch, Lucene). Grammar: - a query consists of alphanumeric words, with an optional '*' wildcard at the end of a word - a sequence of words between quotes is a literal string - words can be used together by using operators ('and' or 'or') - words with operators can be grouped with parenthesis - a word or group of words can be preceded by a 'not' operator - the 'and' operator precedes an 'or' operator - if an operator is missing, use an 'and' operator """ operatorOr = Forward() operatorWord = Group(Combine(Word(alphanums) + Suppress('*'))).setResultsName('wordwildcard') | \ Group(Word(alphanums)).setResultsName('word') operatorQuotesContent = Forward() operatorQuotesContent << ( (operatorWord + operatorQuotesContent) | operatorWord) operatorQuotes = Group( Suppress('"') + operatorQuotesContent + Suppress('"')).setResultsName("quotes") | operatorWord operatorParenthesis = Group( (Suppress("(") + operatorOr + Suppress(")"))).setResultsName("parenthesis") | operatorQuotes operatorNot = Forward() operatorNot << ( Group(Suppress(Keyword("not", caseless=True)) + operatorNot).setResultsName("not") | operatorParenthesis) operatorAnd = Forward() operatorAnd << ( Group(operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd).setResultsName("and") | Group(operatorNot + OneOrMore(~oneOf("and or") + operatorAnd) ).setResultsName("and") | operatorNot) operatorOr << ( Group(operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr).setResultsName("or") | operatorAnd) return operatorOr.parseString
# # Copyright 2004, by Alberto Santini http://www.albertosantini.it/chess/ # from pyparsingOD import alphanums, nums, quotedString from pyparsingOD import Combine, Forward, Group, Literal, oneOf, OneOrMore, Optional, Suppress, ZeroOrMore, White, Word from pyparsingOD import ParseException # # define pgn grammar # tag = Suppress("[") + Word(alphanums) + Combine(quotedString) + Suppress("]") comment = Suppress("{") + Word(alphanums + " ") + Suppress("}") dot = Literal(".") piece = oneOf("K Q B N R") file_coord = oneOf("a b c d e f g h") rank_coord = oneOf("1 2 3 4 5 6 7 8") capture = oneOf("x :") promote = Literal("=") castle_queenside = Literal("O-O-O") | Literal("0-0-0") | Literal("o-o-o") castle_kingside = Literal("O-O") | Literal("0-0") | Literal("o-o") move_number = Optional(comment) + Word(nums) + dot m1 = file_coord + rank_coord # pawn move e.g. d4 m2 = file_coord + capture + file_coord + rank_coord # pawn capture move e.g. dxe5 m3 = file_coord + "8" + promote + piece # pawn promotion e.g. e8=Q m4 = piece + file_coord + rank_coord # piece move e.g. Be6 m5 = piece + file_coord + file_coord + rank_coord # piece move e.g. Nbd2 m6 = piece + rank_coord + file_coord + rank_coord # piece move e.g. R4a7 m7 = piece + capture + file_coord + rank_coord # piece capture move e.g. Bxh7
selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = Upcase( delimitedList( ident, ".", combine=True ) ) columnNameList = Group( delimitedList( columnName ) ) tableName = Upcase( delimitedList( ident, ".", combine=True ) ) tableNameList = Group( delimitedList( tableName ) ) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + columnRval ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + selectStmt + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression )
selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group((columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression)
OneOrMore(CONCAT | pound_char | unquoted_sglQuotedString) , adjacent=False ).setParseAction(lambda s, l, t: "'%s'" % t[0]) string_value = delphi_string | base16_value list_value = LBRACE + Optional(Group(delimitedList(identifier | number_value | string_value))) + RBRACE paren_list_value = LPAREN + ZeroOrMore(identifier | number_value | string_value) + RPAREN item_list_entry = ITEM + ZeroOrMore(attribute_value_pair) + END item_list = LANGLE + ZeroOrMore(item_list_entry) + RANGLE generic_value = identifier value = item_list | number_value | string_value | list_value | paren_list_value | generic_value category_attribute = CATEGORIES + PERIOD + oneOf("strings itemsvisibles visibles", True) event_attribute = oneOf("onactivate onclosequery onclose oncreate ondeactivate onhide onshow", True) font_attribute = FONT + PERIOD + oneOf("charset color height name style", True) hint_attribute = HINT layout_attribute = oneOf("left top width height", True) generic_attribute = identifier attribute = (category_attribute | event_attribute | font_attribute | hint_attribute | layout_attribute | generic_attribute) category_attribute_value_pair = category_attribute + EQUALS + paren_list_value event_attribute_value_pair = event_attribute + EQUALS + value font_attribute_value_pair = font_attribute + EQUALS + value hint_attribute_value_pair = hint_attribute + EQUALS + value layout_attribute_value_pair = layout_attribute + EQUALS + value generic_attribute_value_pair = attribute + EQUALS + value attribute_value_pair << Group( category_attribute_value_pair
# # copyright 2006, Paul McGuire # from pyparsingOD import oneOf, OneOrMore, printables, StringEnd test = "The quick brown fox named 'Aloysius' lives at 123 Main Street (and jumps over lazy dogs in his spare time)." nonAlphas = [ c for c in printables if not c.isalpha() ] print("Extract vowels, consonants, and special characters from this test string:") print("'" + test + "'") print() print("Define grammar using normal results names") print("(only last matching symbol is saved)") vowels = oneOf(list("aeiouy"), caseless=True).setResultsName("vowels") cons = oneOf(list("bcdfghjklmnpqrstvwxz"), caseless=True).setResultsName("cons") other = oneOf(list(nonAlphas)).setResultsName("others") letters = OneOrMore(cons | vowels | other) + StringEnd() results = letters.parseString(test) print(results) print(results.vowels) print(results.cons) print(results.others) print() print("Define grammar using results names, with listAllMatches=True") print("(all matching symbols are saved)") vowels = oneOf(list("aeiouy"), caseless=True).setResultsName("vowels",listAllMatches=True)
(Minor updates by Paul McGuire, June, 2012) ''' from pyparsingOD import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \ LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \ cStyleComment, Regex, Forward, MatchFirst, And, srange, oneOf, alphas, alphanums, \ delimitedList # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g # Tokens EOL = Suppress(LineEnd()) # $ singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = Literal('\\') + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1)) LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1) CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'") STRING_LITERAL = Suppress("'") + Combine(OneOrMore(LITERAL_CHAR)) + Suppress("'") DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>' TOKEN_REF = Word(alphas.upper(), alphanums+'_') RULE_REF = Word(alphas.lower(), alphanums+'_') ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress('\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1)) ACTION_CHAR_LITERAL = Suppress("'") + (ACTION_ESC | ~(Literal('\\') | Literal("'")) + Word(printables, exact=1)) + Suppress("'") ACTION_STRING_LITERAL = Suppress('"') + ZeroOrMore(ACTION_ESC | ~(Literal('\\') | Literal('"')) + Word(printables, exact=1)) + Suppress('"') SRC = Suppress('src') + ACTION_STRING_LITERAL("file") + INT("line") id = TOKEN_REF | RULE_REF SL_COMMENT = Suppress('//') + Suppress('$ANTLR') + SRC | ZeroOrMore(~EOL + Word(printables)) + EOL ML_COMMENT = cStyleComment WS = OneOrMore(Suppress(' ') | Suppress('\t') | (Optional(Suppress('\r')) + Literal('\n')))
# # cLibHeader.py # # A simple parser to extract API doc info from a C header file # # Copyright, 2012 - Paul McGuire # from pyparsingOD import Word, alphas, alphanums, Combine, oneOf, Optional, delimitedList, Group, Keyword testdata = """ int func1(float *vec, int len, double arg1); int func2(float **arr, float *vec, int len, double arg1, double arg2); """ ident = Word(alphas, alphanums + "_") vartype = Combine( oneOf("float double int char") + Optional(Word("*")), adjacent = False) arglist = delimitedList(Group(vartype("type") + ident("name"))) functionCall = Keyword("int") + ident("name") + "(" + arglist("args") + ")" + ";" for fn,s,e in functionCall.scanString(testdata): print(fn.name) for a in fn.args: print(" - %(name)s (%(type)s)" % a)
integer = Regex(r"[+-]?\d+") LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI = map(Suppress, "{}[]()=;") kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("%s_ = Keyword('%s')" % (kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody("body") + RBRACE typespec = ( oneOf( """double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident ) rvalue = integer | TRUE_ | FALSE_ | ident fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK fieldDefn = ( (REQUIRED_ | OPTIONAL_ | REPEATED_)("fieldQualifier") - typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI )
LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI = map( Suppress, "{}[]()=;") kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("%s_ = Keyword('%s')" % (kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK fieldDefn = ((REQUIRED_ | OPTIONAL_ | REPEATED_)("fieldQualifier") - typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI) # enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}' enumDefn = ENUM_ - ident + LBRACE + ZeroOrMore( Group(ident + EQ + integer + SEMI)) + RBRACE # extensionsDefn ::= 'extensions' integer 'to' integer ';' extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI # messageExtension ::= 'extend' ident '{' messageBody '}'
(even binary content) inside the structure. This is done by pre- sizing the data with the NUMBER similar to Dan Bernstein's netstrings setup. SPACE White space is basically ignored. This is interesting because since Stackish is serialized consistently this means you can use \n as the separation character and perform reasonable diffs on two structures. """ from pyparsingOD import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,printables,srange MARK, UNMARK, AT, COLON, QUOTE = map(Suppress, "[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t: int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t: float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas, alphanums + "_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return "" BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE)
# from pyparsingOD import oneOf, OneOrMore, printables, StringEnd test = "The quick brown fox named 'Aloysius' lives at 123 Main Street (and jumps over lazy dogs in his spare time)." nonAlphas = [c for c in printables if not c.isalpha()] print( "Extract vowels, consonants, and special characters from this test string:" ) print("'" + test + "'") print() print("Define grammar using normal results names") print("(only last matching symbol is saved)") vowels = oneOf(list("aeiouy"), caseless=True).setResultsName("vowels") cons = oneOf(list("bcdfghjklmnpqrstvwxz"), caseless=True).setResultsName("cons") other = oneOf(list(nonAlphas)).setResultsName("others") letters = OneOrMore(cons | vowels | other) + StringEnd() results = letters.parseString(test) print(results) print(results.vowels) print(results.cons) print(results.others) print() print("Define grammar using results names, with listAllMatches=True") print("(all matching symbols are saved)") vowels = oneOf(list("aeiouy"),
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=") identLead = alphas+"$_" identBody = alphanums+"$_" identifier1 = Regex( r"\.?["+identLead+"]["+identBody+"]*(\.["+identLead+"]["+identBody+"]*)*" ).setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction(lambda t:t[0][1:]).setName("escapedIdent")#.setDebug() identifier = identifier1 | identifier2 assert(identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine( Optional( Word(nums + "_") ) + base + Word(hexnums+"xXzZ"), joinString=" ", adjacent=False ).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group( LBRACE + delimitedList( expr ) + RBRACE ) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional( delimitedList( expr ) ) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList( expr, COLON ) + RBRACK) subscrIdentifier = Group( identifier + Optional( subscrRef ) ) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group( expr + COLON + expr + COLON + expr ).setName("mintypmax") primary = ( number | (LPAR + mintypmaxExpr + RPAR ) | ( LPAR + Group(expr) + RPAR ).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier ) unop = oneOf( "+ - ! ~ & ~& | ^| ^ ~^" ).setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>" ).setName("binop") expr << ( ( unop + expr ) | # must be first! ( primary + "?" + expr + COLON + expr ) | ( primary + Optional( binop + expr ) ) ) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf( "case casez casex" ) endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = ( posedge + expr ) | ( negedge + expr ) | expr | ( LPAR + eventExpr + RPAR ) eventExpr << ( Group( delimitedList( eventTerm, Keyword("or") ) ) ) eventControl = Group( "@" + ( ( LPAR + eventExpr + RPAR ) | identifier | "*" ) ).setName("eventCtrl") delayArg = ( number | Word(alphanums+"$_") | #identifier | ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) ).setName("delayArg")#.setDebug() delay = Group( "#" + delayArg ).setName("delay")#.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group( lvalue + EQ + Optional( delayOrEventControl ) + expr ).setName( "assgnmt" ) nbAssgnmt = Group(( lvalue + "<=" + Optional( delay ) + expr ) | ( lvalue + "<=" + Optional( eventControl ) + expr )).setName( "nbassgnmt" ) range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group( identifier + EQ + expr ).setName("paramAssgnmt") parameterDecl = Group( "parameter" + Optional( range ) + delimitedList( paramAssgnmt ) + SEMI).setName("paramDecl") inputDecl = Group( "input" + Optional( range ) + delimitedList( identifier ) + SEMI ) outputDecl = Group( "output" + Optional( range ) + delimitedList( identifier ) + SEMI ) inoutDecl = Group( "inout" + Optional( range ) + delimitedList( identifier ) + SEMI ) regIdentifier = Group( identifier + Optional( LBRACK + expr + COLON + expr + RBRACK ) ) regDecl = Group( "reg" + Optional("signed") + Optional( range ) + delimitedList( regIdentifier ) + SEMI ).setName("regDecl") timeDecl = Group( "time" + delimitedList( regIdentifier ) + SEMI ) integerDecl = Group( "integer" + delimitedList( regIdentifier ) + SEMI ) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group( LPAR + ( ( strength0 + COMMA + strength1 ) | ( strength1 + COMMA + strength0 ) ) + RPAR ).setName("driveStrength") nettype = oneOf("wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg") expandRange = Optional( oneOf("scalared vectored") ) + range realDecl = Group( "real" + delimitedList( identifier ) + SEMI ) eventDecl = Group( "event" + delimitedList( identifier ) + SEMI ) blockDecl = ( parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl ) stmt = Forward().setName("stmt")#.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( ( begin + Group( ZeroOrMore( stmt ) ) + end ).setName("begin-end") | ( if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional( else_ + stmtOrNull ) ).setName("if") | ( delayOrEventControl + stmtOrNull ) | ( case + LPAR + expr + RPAR + OneOrMore( caseItem ) + endcase ) | ( forever + stmt ) | ( repeat + LPAR + expr + RPAR + stmt ) | ( while_ + LPAR + expr + RPAR + stmt ) | ( for_ + LPAR + assgnmt + SEMI + Group( expr ) + SEMI + assgnmt + RPAR + stmt ) | ( fork + ZeroOrMore( stmt ) + join ) | ( fork + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ) | ( wait + LPAR + expr + RPAR + stmtOrNull ) | ( "->" + identifier + SEMI ) | ( disable + identifier + SEMI ) | ( assign + assgnmt + SEMI ) | ( deassign + lvalue + SEMI ) | ( force + assgnmt + SEMI ) | ( release + lvalue + SEMI ) | ( begin + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ).setName("begin:label-end") | # these *have* to go at the end of the list!!! ( assgnmt + SEMI ) | ( nbAssgnmt + SEMI ) | ( Combine( Optional("$") + identifier ) + Optional( LPAR + delimitedList(expr|empty) + RPAR ) + SEMI ) ).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group( "always" + Optional(eventControl) + stmt ).setName("alwaysStmt") initialStmt = Group( "initial" + stmt ).setName("initialStmt") chargeStrength = Group( LPAR + oneOf( "small medium large" ) + RPAR ).setName("chargeStrength") continuousAssign = Group( assign + Optional( driveStrength ) + Optional( delay ) + delimitedList( assgnmt ) + SEMI ).setName("continuousAssign") tfDecl = ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl ) functionDecl = Group( "function" + Optional( range | "integer" | "real" ) + identifier + SEMI + Group( OneOrMore( tfDecl ) ) + Group( ZeroOrMore( stmt ) ) + "endfunction" ) inputOutput = oneOf("input output") netDecl1Arg = ( nettype + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl2Arg = ( "trireg" + Optional( chargeStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl3Arg = ( nettype + Optional( driveStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( assgnmt ) ) ) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1" ) gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group( gateType + Optional( driveStrength ) + Optional( delay ) + delimitedList( gateInstance) + SEMI ) udpInstance = Group( Group( identifier + Optional(range | subscrRef) ) + LPAR + Group( delimitedList( expr ) ) + RPAR ) udpInstantiation = Group( identifier - Optional( driveStrength ) + Optional( delay ) + delimitedList( udpInstance ) + SEMI ).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group( delimitedList( expr ) ) + RPAR ) namedPortConnection = Group( DOT + identifier + LPAR + expr + RPAR ).setName("namedPortConnection")#.setDebug() assert(r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group( LPAR + (delimitedList( namedPortConnection ) | delimitedList( modulePortConnection )) + RPAR).setName("inst_args") moduleInstance = Group( Group ( identifier + Optional(range) ) + inst_args ).setName("moduleInstance")#.setDebug() moduleInstantiation = Group( identifier + Optional( parameterValueAssignment ) + delimitedList( moduleInstance ).setName("moduleInstanceList") + SEMI ).setName("moduleInstantiation") parameterOverride = Group( "defparam" + delimitedList( paramAssgnmt ) + SEMI ) task = Group( "task" + identifier + SEMI + ZeroOrMore( tfDecl ) + stmtOrNull + "endtask" ) specparamDecl = Group( "specparam" + delimitedList( paramAssgnmt ) + SEMI ) pathDescr1 = Group( LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR ) pathDescr2 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDescr3 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "=>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDelayValue = Group( ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) | mintypmaxExpr | expr ) pathDecl = Group( ( pathDescr1 | pathDescr2 | pathDescr3 ) + EQ + pathDelayValue + SEMI ).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional( binop + portConditionExpr ) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional( polarityOp ) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group( delimitedList( subscrIdentifier ) ) + Optional( polarityOp ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl2 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "*>" + LPAR + delimitedList( subscrIdentifier ) + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group( posedge | negedge | (edge + LBRACK + delimitedList( edgeDescr ) + RBRACK )) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = ( expr + timCondBinop + scalarConst ) | ( Optional("~") + expr ) timCheckCond << ( ( LPAR + timCheckCond + RPAR ) | timCheckCondTerm ) timCheckEvent = Group( Optional( timCheckEventControl ) + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) timCheckLimit = expr controlledTimingCheckEvent = Group( timCheckEventControl + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) notifyRegister = identifier systemTimingCheck1 = Group( "$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck2 = Group( "$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck3 = Group( "$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck4 = Group( "$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + expr + COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck5 = Group( "$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck6 = Group( "$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck7 = Group( "$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck = (FollowedBy('$') + ( systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7 )).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") +( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd ) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group( "specify" + ZeroOrMore( specifyItem ) + "endspecify" ).setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation ) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group( LBRACE + delimitedList( portRef ) + RBRACE ) port = portExpr | Group( ( DOT + identifier + LPAR + portExpr + RPAR ) ) moduleHdr = Group ( oneOf("module macromodule") + identifier + Optional( LPAR + Group( Optional( delimitedList( Group(oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg) ) | port ) ) ) + RPAR ) + SEMI ).setName("moduleHdr") module = Group( moduleHdr + Group( ZeroOrMore( moduleItem ) ) + "endmodule" ).setName("module")#.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group( "initial" + identifier + EQ + udpInitVal + SEMI ).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group( OneOrMore( levelSymbol ).setName("levelInpList") ) outputSymbol = oneOf("0 1 x X") combEntry = Group( levelInputList + COLON + outputSymbol + SEMI ) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore( levelSymbol ) + edge + ZeroOrMore( levelSymbol ) ) inputList = levelInputList | edgeInputList seqEntry = Group( inputList + COLON + levelSymbol + COLON + ( outputSymbol | "-" ) + SEMI ).setName("seqEntry") udpTableDefn = Group( "table" + OneOrMore( combEntry | seqEntry ) + "endtable" ).setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group( "primitive" + identifier + LPAR + Group( delimitedList( identifier ) ) + RPAR + SEMI + OneOrMore( udpDecl ) + Optional( udpInitialStmt ) + udpTableDefn + "endprimitive" ) verilogbnf = OneOrMore( module | udp ) + StringEnd() verilogbnf.ignore( cppStyleComment ) verilogbnf.ignore( compilerDirective ) return verilogbnf
Combine, Literal, QuotedString) EQ,EXCL,LPAR,RPAR,COLON,COMMA = map(Suppress, '=!():,') EXCL, DOLLAR = map(Literal,"!$") sheetRef = Word(alphas, alphanums) | QuotedString("'",escQuote="''") colRef = Optional(DOLLAR) + Word(alphas,max=2) rowRef = Optional(DOLLAR) + Word(nums) cellRef = Combine(Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row"))) cellRange = (Group(cellRef("start") + COLON + cellRef("end"))("range") | cellRef | Word(alphas,alphanums)) expr = Forward() COMPARISON_OP = oneOf("< = > >= <= != <>") condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") + LPAR + Group(condExpr)("condition") + COMMA + expr("if_true") + COMMA + expr("if_false") + RPAR) statFunc = lambda name : CaselessKeyword(name) + LPAR + delimitedList(expr) + RPAR sumFunc = statFunc("sum") minFunc = statFunc("min") maxFunc = statFunc("max") aveFunc = statFunc("ave") funcCall = ifFunc | sumFunc | minFunc | maxFunc | aveFunc
BEGIN = Literal("BEGIN:").suppress() END = Literal("END:").suppress() str = printables + "\xe4\xf6\xe5\xd6\xc4\xc5" valstr = str + " " EQ = Literal("=").suppress() SEMI = Literal(";").suppress() COLON = Literal(":").suppress() EVENT = Literal("VEVENT").suppress() CALENDAR = Literal("VCALENDAR").suppress() ALARM = Literal("VALARM").suppress() # TOKENS CALPROP = oneOf("VERSION PRODID METHOD") ALMPROP = oneOf("TRIGGER") EVTPROP = oneOf("X-MOZILLA-RECUR-DEFAULT-INTERVAL \ X-MOZILLA-RECUR-DEFAULT-UNITS \ UID DTSTAMP LAST-MODIFIED X RRULE EXDATE") propval = Word(valstr) typeval = Word(valstr) typename = oneOf("VALUE MEMBER FREQ UNTIL INTERVAL") proptype = Group(SEMI + typename + EQ + typeval).suppress() calprop = Group(CALPROP + ZeroOrMore(proptype) + COLON + propval) almprop = Group(ALMPROP + ZeroOrMore(proptype) + COLON + propval) evtprop = Group(EVTPROP + ZeroOrMore(proptype) + COLON + propval).suppress() \ | "CATEGORIES" + COLON + propval.setResultsName("categories") \
(even binary content) inside the structure. This is done by pre- sizing the data with the NUMBER similar to Dan Bernstein's netstrings setup. SPACE White space is basically ignored. This is interesting because since Stackish is serialized consistently this means you can use \n as the separation character and perform reasonable diffs on two structures. """ from pyparsingOD import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,printables,srange MARK,UNMARK,AT,COLON,QUOTE = map(Suppress,"[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t:int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t:float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas,alphanums+"_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return "" BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE) item = Forward() def assignUsing(s): def assignPA(tokens):
val2 = val.eval() if not fn(val1, val2): break val1 = val2 else: return True return False # define the parser integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) variable = Word(alphas, exact=1) operand = real | integer | variable signop = oneOf('+ -') multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal('**') # use parse actions to attach EvalXXX constructors to sub-expressions operand.setParseAction(EvalConstant) arith_expr = operatorPrecedence(operand, [ (signop, 1, opAssoc.RIGHT, EvalSignOp), (expop, 2, opAssoc.LEFT, EvalPowerOp), (multop, 2, opAssoc.LEFT, EvalMultOp), (plusop, 2, opAssoc.LEFT, EvalAddOp), ]) comparisonop = oneOf("< <= > >= != = <> LT GT LE GE EQ NE") comp_expr = operatorPrecedence(arith_expr, [
# # cLibHeader.py # # A simple parser to extract API doc info from a C header file # # Copyright, 2012 - Paul McGuire # from pyparsingOD import Word, alphas, alphanums, Combine, oneOf, Optional, delimitedList, Group, Keyword testdata = """ int func1(float *vec, int len, double arg1); int func2(float **arr, float *vec, int len, double arg1, double arg2); """ ident = Word(alphas, alphanums + "_") vartype = Combine(oneOf("float double int char") + Optional(Word("*")), adjacent=False) arglist = delimitedList(Group(vartype("type") + ident("name"))) functionCall = Keyword("int") + ident("name") + "(" + arglist( "args") + ")" + ";" for fn, s, e in functionCall.scanString(testdata): print(fn.name) for a in fn.args: print(" - %(name)s (%(type)s)" % a)
adjacent=False).setParseAction(lambda s, l, t: "'%s'" % t[0]) string_value = delphi_string | base16_value list_value = LBRACE + Optional( Group(delimitedList(identifier | number_value | string_value))) + RBRACE paren_list_value = LPAREN + ZeroOrMore(identifier | number_value | string_value) + RPAREN item_list_entry = ITEM + ZeroOrMore(attribute_value_pair) + END item_list = LANGLE + ZeroOrMore(item_list_entry) + RANGLE generic_value = identifier value = item_list | number_value | string_value | list_value | paren_list_value | generic_value category_attribute = CATEGORIES + PERIOD + oneOf( "strings itemsvisibles visibles", True) event_attribute = oneOf( "onactivate onclosequery onclose oncreate ondeactivate onhide onshow", True) font_attribute = FONT + PERIOD + oneOf("charset color height name style", True) hint_attribute = HINT layout_attribute = oneOf("left top width height", True) generic_attribute = identifier attribute = (category_attribute | event_attribute | font_attribute | hint_attribute | layout_attribute | generic_attribute) category_attribute_value_pair = category_attribute + EQUALS + paren_list_value event_attribute_value_pair = event_attribute + EQUALS + value font_attribute_value_pair = font_attribute + EQUALS + value hint_attribute_value_pair = hint_attribute + EQUALS + value layout_attribute_value_pair = layout_attribute + EQUALS + value