def KeyValue(key_type, val_type): if type(key_type) is str: key_type = Literal(key_type) if type(val_type) is str: val_type = Literal(val_type) key_type = key_type.setResultsName("key") val_type = val_type.setResultsName("value") return key_type + ":" + val_type
def _create_parser(self): semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) eq = Suppress(Word("=")) identifier = Word(alphas + "_", alphanums + "_") typeIdentifier = Word(alphas + "_", alphanums + "_:") structIdentifer = Group( typeIdentifier.setResultsName('type') + identifier.setResultsName('identifier') + Optional(eq) + Optional( CharsNotIn(";").setResultsName('defaultValue')) + semicolon) structIdentifers = Group(OneOrMore(structIdentifer)) ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) structDef = Word("struct").setResultsName('type') + identifier.setResultsName( 'name') + op + structIdentifers.setResultsName("structIdentifiers") + cl + semicolon dictionaryDef = Word("dictionary").setResultsName('type') + lt + CharsNotIn("<>").setResultsName( 'content') + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence").setResultsName('type') + lt + typeIdentifier.setResultsName( 'typeSequence') + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn( "{}").setResultsName('content') + cl + semicolon exceptionDef = Word("exception").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn( "{}").setResultsName('content') + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore(Literal(',') + typeIdentifier) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group(Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName( 'type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef.setResultsName('decorator')) + retValDef.setResultsName( 'ret') + typeIdentifier.setResultsName('name') + opp + Optional(params).setResultsName( 'params') + clp + Optional(raiseDef.setResultsName('raise')) + semicolon) interfaceDef = Word('interface').setResultsName('type') + typeIdentifier.setResultsName('name') + op + Group( ZeroOrMore(remoteMethodDef)).setResultsName('methods') + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore( moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore(cppStyleComment) return IDSL
def get_parser(): """Return a section parser. @see grammar.md for the whole grammar.""" if Section._parser is None: # Parser not yet defined. Defining it. head_type = Literal("V") | Literal("C") | Literal("@") head = OPEN_HEAD \ + head_type.setResultsName("type") \ + INTEGER.setResultsName("id") \ + CLOSE_HEAD + EOL content_line = WORDS + EOL content = OneOrMore(content_line) Section._parser = Group(head + content.setResultsName("content")) return Section._parser
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) identifier = Word(alphas+"_",alphanums+"_") typeIdentifier = Word(alphas+"_",alphanums+"_:") ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) dictionaryDef = Word("dictionary") + lt + CharsNotIn("<>;") + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence") + lt + CharsNotIn("<>;") + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon structDef = Word("struct") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon exceptionDef = Word("exception") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier ) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef) + retValDef + typeIdentifier.setResultsName('name') + opp + Optional( params).setResultsName('params') + clp + Optional(raiseDef) + semicolon ) interfaceDef = Word("interface") + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)) + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore( cppStyleComment ) tree = IDSL.parseString(text) return IDSLParsing.module(tree)
def genParseObject(self,cLocale): val = Literal('-') | Literal('/') | Literal('.') return val.setResultsName('datesep')
# We allow the rhs to be a number or another identifier, so this'll parse: parse_result = c_assignment.parseString('double x= y;') # We're so far from an honest-to-God C parser: try: # This is valid c: valid_c = 'double x = 7, y = 9;' parse_result = c_assignment.parseString(valid_c) except ParseException as e: # Will print: # Expected ";" (at char 12), (line:1, col:13) print(e) # Explicitly tokenize the lexemes by use of 'setResultsName'! c_assignment = tyype.setResultsName("type") + identifier.setResultsName( "new_identifier") + assignment c_assignment += (number | identifier).setResultsName("rhs") + eos parse_result = c_assignment.parseString("double x = 7;") # Will print 'double' print(parse_result.type) # Will print 'x': print(parse_result.new_identifier) # Will print '7': print(parse_result.rhs) # Suppress characters your don't care about with 'Suppress': assignment = Suppress(Literal("="))
class RpkiSetsParser: """Parser class""" def __init__(self, network): self.network = network # self.user_defined_sets = {} attribute_unnamed = Word(alphanums+'_'+".") attribute = attribute_unnamed.setResultsName("attribute") self.attribute = attribute integer_string = Word(nums).setResultsName("value").setParseAction(lambda t: int(t[0])) self.nodeQuery = attribute.setResultsName("nodeQuery").setFailAction(parse_fail_action) self.children = Literal("children").setResultsName("children") self.relation = self.children.setResultsName("relation").setFailAction(parse_fail_action) set_values = Suppress("{") + delimitedList( attribute, delim=',').setResultsName("set_values") + Suppress("}") empty_set = Literal("{}").setResultsName("set_values").setParseAction(lambda x: set()) self.set_definition = ("(" + self.nodeQuery + ")" + self.relation + (empty_set | set_values)) self.rpkiSetsLine = (self.set_definition.setResultsName("set_definition")) self.path = {} def apply_rpki_sets(self, qstring): LOG.debug("Applying RPKI sets %s" % qstring) result = self.rpkiSetsLine.parseString(qstring) self.network.g_rpki.add_node(result.nodeQuery) if 'set_definition' in result: LOG.debug("Storing set definition %s" % result.set_name) for n in result.set_values: self.network.g_rpki.add_node(n) self.network.g_rpki.add_edge(result.nodeQuery, n, relation = result.relation) return def apply_rpki_file(self, rpki_in_file): """Applies a BGP policy file to the network""" LOG.debug("Applying policy file %s" % rpki_in_file) rpki_lines = [] rpki_path = os.path.split(rpki_in_file)[0] with open( rpki_in_file, 'r') as f_rpki: for line in f_rpki.readlines(): line = line.strip() if line.startswith("#"): LOG.debug("Skipping commented line %s", line) continue if line == "": continue rpki_lines.append(line) for line in rpki_lines: line = line.strip() if line.startswith("#"): LOG.debug("Skipping commented line %s", line) continue if line == "": continue try: self.apply_rpki_sets(line) except pyparsing.ParseFatalException as e: LOG.warn("Unable to parse query line %s" % line)
NOUN = Group(OneOrMore(NNPS|NNP|NNS|NN)) #PROPER_NOUN = Group(OneOrMore(NNP|NNPS)) VERB = OneOrMore(VDB|VBG|VBN|VBP|VBZ|VB) ADJECTIVE = OneOrMore(JJ) ADVERB = OneOrMore(RB) ARTICLE_NOUN = Group(DT + NOUN) ADJECTIVE_NOUN = Group(ADJECTIVE + NOUN) VERB_NOUN = Group(VERB + NOUN) POS_NOUN = Group((ARTICLE_NOUN | NOUN) + POS + (ADJECTIVE_NOUN | VERB_NOUN | NOUN)) ARTICLE_ADJECTIVE_NOUN = Group(DT + ADJECTIVE_NOUN) NOUN_AND_NOUN = Group((NOUN | ARTICLE_NOUN) + AND + (POS_NOUN | ARTICLE_NOUN | NOUN)) NOUN_OF_NOUN = Group((NOUN_AND_NOUN | ARTICLE_NOUN | NOUN) + OF + (NOUN_AND_NOUN | ARTICLE_NOUN | NOUN)) #TODO DT.setResultsName('article') NOUN.setResultsName('noun') #TODO expr = Forward() expr << (NOUN_OF_NOUN | NOUN_AND_NOUN | ARTICLE_ADJECTIVE_NOUN | ADJECTIVE_NOUN | POS_NOUN | ARTICLE_NOUN | NOUN) def read_in_file(): """Returns read in csv file(s).""" title_list = []
# coding: utf-8 from pyparsing import Word, Literal, alphas, alphanums, OneOrMore, ZeroOrMore, infixNotation, opAssoc, oneOf node_name = Word(alphanums + '_') node_path = OneOrMore(node_name | Literal('/') | Literal('.') | Literal('..')) operator = Literal("==") status = Literal('complete') | Literal('aborted') logical_operator = Literal('and') | Literal('or') single_trigger_expr = node_path.setResultsName("node_path").setParseAction(lambda t: ''.join(t)) \ + operator.setResultsName("operator") \ + status.setResultsName("status") trigger_expr = infixNotation( single_trigger_expr.setResultsName("single_expr"), [(oneOf("and", "AND"), 2, opAssoc.LEFT), (oneOf("or", "OR"), 2, opAssoc.RIGHT)] ).setResultsName("complex_filter") def parse_trigger(trigger): return single_trigger_expr.parseString(trigger)
We use token.setResultsName("key") to make the key value dictionary for easily fishing results out later, much better thean indexing a list, which might change size if we change things later. Dictionary is not sensitive to that. We also use () for long lines not \ because apparently it's better. """ """ checkout - standard format grammar definition OUT product version pool# user host "isv_def" count cur_use cur_resuse \ server_handle share_handle process_id "project" "requested product" \ "requested version" mm/dd hh:mm:ss example: OUT imarisbase 6.0 9 heisenberg_lab my-workstation-72 "" 1 1 0 26e 26e 410 "" "" "" 06/16 10:57:52 """ rlmRlogCheckoutEntry_std = ( Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + poolNum + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + shareHandle + processId + project + requestedProduct + requestedVersion + date.setResultsName("date") + timeHHMMSS.setResultsName("time")) """ checkout - small format grammar definition OUT product version user host "isv_def" count server_handle share_handle hh:mm example: OUT imarisbase 7.4 serrmeli my-workstation-72 "" 1 1281 7c1 14:22 """ rlmRlogCheckoutEntry_sml = (Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + user.setResultsName("user") + host.setResultsName("host") +
1 modern Courier New 02070309020205020404 2 roman Symbol 05050102010706020507 3 roman Times New Roman (Hebrew) 0 """ from pyparsing import Optional, Literal, Word, Group, White from pyparsing import Suppress, Combine, replaceWith from pyparsing import alphas, nums, printables, alphanums from pyparsing import restOfLine, oneOf, OneOrMore, ZeroOrMore from pyparsing import ParseException separator = Literal(';') space = Literal(' ') white = White() leftBracket = Literal('{') rightBracket = Literal('}') bracket = leftBracket | rightBracket.setResultsName('bracket') # basic RTF control codes, ie. "\labelname3434" controlLabel = Combine(Word(alphas + "'") + Optional(Word(nums))) controlValue = Optional(space) + Optional(Word(alphanums + '-')) baseControl = Combine(Literal('\\') + controlLabel + controlValue ).setResultsName('baseControl') # in some cases (color and font table declarations), control has ';' # suffix rtfControl = Combine(baseControl + Optional(separator) ).setResultsName('control') rtfGroup = leftBracket + OneOrMore(rtfControl) + rightBracket # opening controls
def operator(): token = Literal("+") | Literal("-") | Literal("/") | Literal("*") token.setName("operator") token.setResultsName("operator") return token
_p_lease_deleted.setParseAction(lambda s, loc, toks: True) _p_hex_digit = Word(hexnums, exact=2) _p_mac = Combine(_p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit) _p_lease_hardware_ethernet = _ungroup( Keyword("hardware").suppress() + Keyword("ethernet").suppress() + _p_mac) _p_lease_junk = ( Word(alphas) # if we include { } ; here, they become greedy and eat the closing # brace or semicolon + CharsNotIn('{};')).suppress() _p_lease_decl = (_p_lease_deleted.setResultsName('deleted') | _p_lease_hardware_ethernet.setResultsName('mac') | _p_lease_junk) + Literal(';').suppress() _p_lease = (Keyword("lease").suppress() + _p_ip_address.setResultsName('ip') + _ungroup( nestedExpr( opener='{', closer='}', content=_p_lease_decl, ignoreExpr=quotedString, ), )).setParseAction(dictify) def parse(s): g = _p_lease.scanString(s)
join_header = Optional(Literal("left") | Literal("right")) + (Literal("join") | Literal("inner join") | Literal("outer join")) join_tail = Group(identifier.setResultsName("table") + Suppress("on") + Group(column + Suppress("=") + column).setResultsName("join_link")) columns = Group(star | (ZeroOrMore(column + comma) + column)) join = Group(join_header.setResultsName("join_type") + join_tail) whr_column = Group(column + oneOf([">", "<", ">=", "<=", "=", "!="]) + (values | column)) whrs = whr + Group(ZeroOrMore(whr_column + oneOf(["and", "or"]))+ whr_column) cds = cd.setResultsName("db_cmd") + identifier.setResultsName("db_name") dds = dd.setResultsName("db_cmd") + identifier.setResultsName("db_name") uds = ud.setResultsName("db_cmd") + identifier.setResultsName("db_name") cts = ct.setResultsName("db_cmd") + identifier.setResultsName("table_name") dts = dt.setResultsName("db_cmd") + identifier.setResultsName("table_name") cfs = cf.setResultsName("db_cmd") + identifier.setResultsName("fragment_name") + \ location.setResultsName("location") + Suppress("on") + identifier.setResultsName("table_name") + \ opening_bracket + columns.setResultsName("columns") + closing_bracket dfs = df.setResultsName("db_cmd") + identifier.setResultsName("fragment_name") + Suppress("on") + \ identifier.setResultsName("table_name") sds = sd.setResultsName("db_cmd") sts = st.setResultsName("db_cmd")
a valid out is OUT imarisbase 6.0 9 heisenberg_lab heisenberg-8-434 "" 1 1 0 26e 26e 410 "" "" "" 06/16 10:57:52 OUT product version pool# user host "isv_def" count cur_use cur_resuse server_handle share_handle process_id "project" "requested product" "requested version" mm/dd hh:mm:ss deny is DENY product version user host "isv_def" count why last_attempt mm/dd hh:mm We use token.setResultsName("key") to make the key value dictionary for easily fishing results out later, much better thean indexing a list, which might change size if we change things later. Dictionary is not sensetive to that. We also use ( ) for long lines not \ because apparently it's better. """ rlmRlogCheckoutEntry = ( Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + poolNum + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + shareHandle + processId + project + requestedProduct + requestedVersion + date.setResultsName("date") + timeHHMMSS.setResultsName("time") ) rlmRlogCheckinEntry = ( Literal("IN").setResultsName("checkedIn") + whyIn + product.setResultsName("product") + version.setResultsName("version") + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + date.setResultsName("date") + timeHHMMSS.setResultsName("time")
_p_lease_active.setParseAction(lambda s, loc, toks: True) _p_hex_digit = Word(hexnums, exact=2) _p_mac = Combine(_p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit + ':' + _p_hex_digit) _p_lease_hardware_ethernet = _ungroup( Keyword("hardware").suppress() + Keyword("ethernet").suppress() + _p_mac) _p_lease_junk = ( Word(alphas) # if we include { } ; here, they become greedy and eat the closing # brace or semicolon + CharsNotIn('{};')).suppress() _p_lease_decl = (_p_lease_deleted.setResultsName('deleted') | _p_lease_active.setResultsName('active') | _p_lease_hardware_ethernet.setResultsName('mac') | _p_lease_junk) + Literal(';').suppress() _p_lease = (Keyword("lease").suppress() + _p_ip_address.setResultsName('ip') + _ungroup( nestedExpr( opener='{', closer='}', content=_p_lease_decl, ignoreExpr=quotedString, ), )).setParseAction(dictify) def parse(s):
_p_lease_hardware_ethernet = _ungroup( Keyword("hardware").suppress() + Keyword("ethernet").suppress() + _p_mac ) _p_lease_junk = ( Word(alphas) # if we include { } ; here, they become greedy and eat the closing # brace or semicolon + CharsNotIn('{};') ).suppress() _p_lease_decl = ( _p_lease_deleted.setResultsName('deleted') | _p_lease_hardware_ethernet.setResultsName('mac') | _p_lease_junk ) + Literal(';').suppress() _p_lease = ( Keyword("lease").suppress() + _p_ip_address.setResultsName('ip') + _ungroup( nestedExpr( opener='{', closer='}', content=_p_lease_decl, ignoreExpr=quotedString, ), )
# variables, arrays, functions, etc... safe_variable = Combine("_" + Word(ac_chars) + "=" + opQuotedString(Word(ac_chars.replace(";", "")))) var_array = "(" + OneOrMore(opQuotedString(Word(ac_chars + " ="))) + ")" bad_variable = Combine(Word(ac_chars) + "=" + (var_array | opQuotedString(Word(ac_chars)))) generic_function = function_head(Word(alphas + "_", alphanums + "_")) + function_body if_expression = Forward() case_statement = Forward() statement_seperator = Literal(";") statement_block = Forward() bash_functions = oneOf("echo sed awk") + restOfLine # TODO: match all possible PKGBUILDs pkgbuildline = (pkgname.setResultsName("pkgname") | pkgver.setResultsName("pkgver") | pkgrel.setResultsName("pkgrel") | pkgdesc.setResultsName("pkgdesc") | epoch.setResultsName("epoch") | url.setResultsName("url") | license.setResultsName("license") | install.setResultsName("install") | changelog.setResultsName("changelog") | source.setResultsName("source") | noextract.setResultsName("noextract") | chksums.setResultsName("chksums") | groups.setResultsName("groups") | arch.setResultsName("arch") | backup.setResultsName("backup") | depends.setResultsName("depends")
def __init__(self, network): self.network = network self.g_business_relationship = nx.DiGraph() self.user_defined_sets = {} self.user_library_calls = [] self.user_defined_functions = {} # Grammars #TODO: tidy this up attribute_unnamed = Word(alphanums+'_'+".") attribute = attribute_unnamed.setResultsName("attribute") self.attribute = attribute lt = Literal("<").setResultsName("<") le = Literal("<=").setResultsName("<=") eq = Literal("=").setResultsName("=") ne = Literal("!=").setResultsName("!=") ge = Literal(">=").setResultsName(">=") gt = Literal(">").setResultsName(">") wildcard = Literal("*").setResultsName("wildcard") self.wildcard = wildcard self.prefix_lists = {} self.tags_to_allocate = set() self.allocated_tags = {} self._opn = { '<': operator.lt, '<=': operator.le, '=': operator.eq, '!=': operator.ne, '>=': operator.ge, '>': operator.gt, '&': set.intersection, '|': set.union, } # map alphanum chars to alphanum equivalents for use in tags self._opn_to_tag = { '<': "lt", '<=': "le", '=': "eq", '!=': "ne", '>=': "ge", '>': "gt", '&': "and", '|': "or", } # Both are of comparison to access in same manner when evaluating comparison = (lt | le | eq | ne | ge | gt).setResultsName("comparison") stringComparison = (eq | ne).setResultsName("comparison") # #quoted string is already present float_string = Word(nums).setResultsName("value").setParseAction(lambda t: float(t[0])) integer_string = Word(nums).setResultsName("value").setParseAction(lambda t: int(t[0])) #TODO: use numString, and make integer if fiull stop #TODO: allow parentheses? - should be ok as pass to the python parser ipField = Word(nums, max=3) ipAddress = Combine( ipField + "." + ipField + "." + ipField + "." + ipField ).setResultsName("ipAddress") boolean_and = Literal("&").setResultsName("&") boolean_or = Literal("|").setResultsName("|") boolean = (boolean_and | boolean_or).setResultsName("boolean") self._boolean = boolean # need to use in checking #TODO fix this matching 2a.ab when that should match a string numericQuery = Group(attribute + comparison + float_string).setResultsName( "numericQuery") stringValues = (attribute_unnamed | quotedString.setParseAction(removeQuotes) ).setResultsName("value") stringQuery = Group(attribute + stringComparison + stringValues).setResultsName( "stringQuery") wildcardQuery = wildcard.setResultsName("wildcardQuery") singleQuery = numericQuery | stringQuery | wildcardQuery singleQuery.setFailAction(parse_fail_action) self.nodeQuery = singleQuery + ZeroOrMore(boolean + singleQuery) self.u_egress = Literal("egress->").setResultsName("u_egress") self.v_ingress = Literal("->ingress").setResultsName("v_ingress") self.u_ingress = Literal("ingress<-").setResultsName("u_ingress") self.v_egress = Literal("<-egress").setResultsName("v_egress") edgeType = ( self.u_egress | self.u_ingress | self.v_egress | self.v_ingress).setResultsName("edgeType").setFailAction(parse_fail_action) self.edgeQuery = ("(" + self.nodeQuery.setResultsName("query_a") + ")" + edgeType + "(" + self.nodeQuery.setResultsName("query_b") + ")").setFailAction(parse_fail_action) #start of BGP queries originQuery = (Literal("Origin").setResultsName("attribute") + #this is a workaround for the match, comparison, value 3-tuple in processing Literal("(").setResultsName("comparison") + Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("originQuery") transitQuery = (Literal("Transit").setResultsName("attribute") + #this is a workaround for the match, comparison, value 3-tuple in processing Literal("(").setResultsName("comparison") + Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("transitQuery") prefixList = Literal("prefix_list") matchPl = (prefixList.setResultsName("attribute") + comparison + attribute.setResultsName("value")) matchTag = (Literal("tag").setResultsName("attribute") + comparison + attribute.setResultsName("value")) #tags contain -> tag = aaa inTags = ( Literal("tags").setResultsName("attribute").setParseAction(lambda x: "tag") + Literal("contain").setResultsName("comparison").setParseAction(lambda x: "=") + attribute_unnamed.setResultsName("value") ) bgpMatchQuery = Group(matchPl | matchTag | inTags | originQuery | transitQuery ).setResultsName("bgpMatchQuery").setFailAction(parse_fail_action) self.bgpMatchQuery = bgpMatchQuery setLP = (Literal("setLP").setResultsName("attribute") + integer_string.setResultsName("value")).setResultsName("setLP") setMED = (Literal("setMED").setResultsName("attribute") + integer_string.setResultsName("value")).setResultsName("setMED") addTag = (Literal("addTag").setResultsName("attribute") + attribute.setResultsName("value")).setResultsName("addTag") removeTag = (Literal("removeTag").setResultsName("attribute") + attribute.setResultsName("value")).setResultsName("removeTag") #TODO: need to set blank value reject = Literal("reject") #TODO: remove once move quagga output inside module self.reject = reject rejectAction = (reject.setResultsName("attribute") + Literal("route").setResultsName("value")).setResultsName("reject") setNextHop = (Literal("setNextHop").setResultsName("attribute") + ipAddress.setResultsName("value")).setResultsName("setNextHop") setOriginAttribute = (Literal("setOriginAttribute").setResultsName("attribute") + (oneOf("IGP BGP None").setResultsName("value"))).setResultsName("setOriginAttribute") bgpAction = Group(addTag | setLP | setMED | removeTag | setNextHop | setOriginAttribute | rejectAction).setResultsName("bgpAction") # The Clauses ifClause = Group(Suppress("if") + bgpMatchQuery + ZeroOrMore(Suppress(boolean_and) + bgpMatchQuery)).setResultsName("if_clause") actionClause = bgpAction + ZeroOrMore(Suppress(boolean_and) + bgpAction) thenClause = Group(Suppress("then") + actionClause).setResultsName("then_clause") ifThenClause = Group(Suppress("(") + ifClause + thenClause + Suppress(")")).setResultsName("ifThenClause") elseActionClause = Group(Suppress("(") + actionClause + Suppress(")")).setResultsName("else_clause") # Support actions without a condition (ie no "if") unconditionalAction = Group(Suppress("(") + Group(actionClause).setResultsName("unconditionalActionClause") + Suppress(")")).setResultsName("bgpSessionQuery") # Query may contain itself (nested) bgpSessionQuery = Forward() bgpSessionQuery << ( ifThenClause + Optional( Suppress("else") + (elseActionClause | bgpSessionQuery)) ).setResultsName("bgpSessionQuery") bgpSessionQuery = bgpSessionQuery | unconditionalAction self.bgpSessionQuery = bgpSessionQuery self.bgpApplicationQuery = self.edgeQuery + Suppress(":") + self.bgpSessionQuery # Library stuff set_values = Suppress("{") + delimitedList( attribute, delim=',').setResultsName("set_values") + Suppress("}") #Set to empty set, rather than empty list as empty list is processed differently somewhere in parser empty_set = Literal("{}").setResultsName("set_values").setParseAction(lambda x: set()) self.set_definition = attribute.setResultsName("set_name") + Suppress("=") + (empty_set | set_values) library_params = attribute | Group(set_values) | empty_set library_function = attribute.setResultsName("def_name") + Suppress("(") + delimitedList( library_params, delim=',').setResultsName("def_params") + Suppress(")") library_function.setFailAction(parse_fail_action) self.library_def = Suppress("define") + library_function self.library_call = Suppress("apply") + library_function self.library_def.setFailAction(parse_fail_action) self.library_edge_query = (self.attribute.setResultsName("query_a") + edgeType + self.attribute.setResultsName("query_b")) self.library_edge_query.setFailAction(parse_fail_action) library_edge_definition = self.library_edge_query + Suppress(":") + self.bgpSessionQuery library_global_definition = "global tags = {" + delimitedList( attribute, delim=',').setResultsName("tags") + "}" self.library_entry = library_global_definition.setResultsName("global_tags") | library_edge_definition.setResultsName("library_edge") self.library_entry.setFailAction(parse_fail_action) self.bgpPolicyLine = ( self.bgpApplicationQuery.setResultsName("bgpApplicationQuery") | self.library_call.setResultsName("library_call") | self.set_definition.setResultsName("set_definition") )
We use token.setResultsName("key") to make the key value dictionary for easily fishing results out later, much better thean indexing a list, which might change size if we change things later. Dictionary is not sensitive to that. We also use () for long lines not \ because apparently it's better. """ """ checkout - standard format grammar definition OUT product version pool# user host "isv_def" count cur_use cur_resuse \ server_handle share_handle process_id "project" "requested product" \ "requested version" mm/dd hh:mm:ss example: OUT imarisbase 6.0 9 heisenberg_lab my-workstation-72 "" 1 1 0 26e 26e 410 "" "" "" 06/16 10:57:52 """ rlmRlogCheckoutEntry_std = (Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + poolNum + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + shareHandle + processId + project + requestedProduct + requestedVersion + date.setResultsName("date") +
tok_sql_literal_by + \ tok_sql_cols.setResultsName("order_list") ) + \ Optional(tok_sql_literal_limit + \ tok_sql_identifier) + \ Optional(tok_sql_literal_semicol)) sql_delete = tok_sql_literal_delete.setResultsName("op") + restOfLine sql_update = tok_sql_literal_update.setResultsName("op") + restOfLine sql_begin = tok_sql_literal_begin.setResultsName("op") + restOfLine sql_use = tok_sql_literal_use.setResultsName("op") + restOfLine sql_set = tok_sql_literal_set.setResultsName("op") + restOfLine sql_commit = tok_sql_literal_commit.setResultsName("op") + restOfLine sql_rollback = tok_sql_literal_rollback.setResultsName("op") + restOfLine sql_comment = tok_sql_comment.setResultsName("op") + restOfLine statements = [sql_insert, sql_update, sql_delete, sql_begin, sql_use, sql_commit, sql_rollback, sql_comment, sql_set, sql_select] sql_statement = Or(statements) def parse(sql): try: return sql_statement.parseString(sql) except ParseException: raise ValueError # test function adapted from simpleSQL.py : Copyright (c) 2003, Paul McGuire def test( str, op=sql_insert ): print str,"->" try:
gSpace + gName + gSpaces + gParameters).setResultsName("usage") gEmptyLine = gSpaces + gEOL gShortText = Optional( Word(srange("[A-Z]") + srange("[a-z]")) + restOfLine) + gEOL_keep #gText = OneOrMore(gShortText) #gIntroduction = Optional( #Group(Word(srange("[A-Z]") + srange("[a-z]")) + Regex(".*:$")) #.setResultsName("introduction")) gOptionDescriptionText = \ Optional(gRepetition + Literal(':')) + \ OneOrMore( Optional(Regex("\n")) + gSpaces + Optional(Literal('(')) + Word(alphas, alphanums + '_') + restOfLine)\ .setResultsName("description") gOptionDescriptionOption = gSpaces + \ gRawOption.setResultsName("first_option") + Optional(gParameter) + \ ZeroOrMore( Group( Literal(",").suppress() + gSpace.suppress() + gRawOption.setResultsName("option") + Optional(gParameter)))\ .setResultsName("other_options") gOptionDescriptionSwitch = Or([ gOptionDescriptionOption, gInnerParameter, gStdin]) gOptionDescription = (gOptionDescriptionSwitch + gOptionDescriptionText)\ .setResultsName("option_description") #gBidule = gIntroduction + gOptionDescription #gSubNext = Or([gBidule, gShortText.setResultsName("short_text")]) #gNext = Dict(ZeroOrMore(Group(gSubNext))).setResultsName("next") gRest = Regex("(.*\n?)*").setResultsName("rest")
1 modern Courier New 02070309020205020404 2 roman Symbol 05050102010706020507 3 roman Times New Roman (Hebrew) 0 """ from pyparsing import Optional, Literal, Word, Group, White from pyparsing import Suppress, Combine, replaceWith from pyparsing import alphas, nums, printables, alphanums from pyparsing import restOfLine, oneOf, OneOrMore, ZeroOrMore from pyparsing import ParseException separator = Literal(';') space = Literal(' ') white = White() leftBracket = Literal('{') rightBracket = Literal('}') bracket = leftBracket | rightBracket.setResultsName('bracket') # basic RTF control codes, ie. "\labelname3434" controlLabel = Combine(Word(alphas + "'") + Optional(Word(nums))) controlValue = Optional(space) + Optional(Word(alphanums + '-')) baseControl = Combine(Literal('\\') + controlLabel + controlValue).setResultsName('baseControl') # in some cases (color and font table declarations), control has ';' # suffix rtfControl = Combine(baseControl + Optional(separator)).setResultsName('control') rtfGroup = leftBracket + OneOrMore(rtfControl) + rightBracket # opening controls
def parser_bnf(): """Grammar for parsing podcast configuration files.""" at = Literal("@").suppress() caret = Literal("^") colon = Literal(":").suppress() left_bracket = Literal("[").suppress() period = Literal(".").suppress() right_bracket = Literal("]").suppress() # zero_index ::= [0-9]+ zero_index = Word(nums).setParseAction(lambda s, l, t: int(t[0])) # filename ::= [A-Za-z0-9][-A-Za-z0-9._ ]+ filename_first = Word(alphanums, exact=1) filename_rest = Word(alphanums + "-_/. ") filename = Combine(filename_first + Optional(filename_rest)) # millisecs ::= "." [0-9]+ millisecs = (Word(nums).setParseAction( lambda s, l, t: int(t[0][:3].ljust(3, "0"))).setResultsName("ms")) # hours, minutes, seconds ::= zero_index hours = zero_index.setResultsName("hh") minutes = zero_index.setResultsName("mm") seconds = zero_index.setResultsName("ss") hours_minutes = hours + colon + minutes + colon | minutes + colon secs_millisecs = (seconds + Optional(period + millisecs) | period + millisecs) # timestamp ::= [[hours ":"] minutes ":"] seconds ["." millisecs] timestamp = Optional(hours_minutes) + secs_millisecs # duration_file ::= "@", filename # We need a separate item for a lonely duration file timestamp so # that we can attach a parse action just to the lonely case. Using # duration_file alone means the parse action is attached to all # instances of duration_file. duration_file = at + filename.setResultsName("filename") lonely_duration_file = at + filename.setResultsName("filename") # timespecs ::= timestamp [duration_file | {timestamp}] # If duration_file timestamp is lonely, prepend a zero timestamp. timespecs = Or([ lonely_duration_file.setParseAction( lambda s, l, t: [timestamp.parseString("00:00:00.000"), t]), Group(timestamp) + duration_file, OneOrMore(Group(timestamp.setParseAction(default_timestamp_fields))) ]) # last_frame ::= "-1" | "last" last_frame = oneOf(["-1", "last"]).setParseAction(replaceWith(-1)) # frame_number ::= ":" (zero_index | last_frame) frame_number = colon - (zero_index | last_frame).setResultsName("num") # stream_number ::= ":" zero_index stream_number = colon - zero_index.setResultsName("num") # input_file ::= ":" [filename] input_file = colon - Optional(filename).setResultsName("filename") # previous_segment ::= ":" "^" previous_segment = colon - caret.setResultsName("filename") # frame_input_file ::= input_file | previous_segment frame_input_file = Or([input_file, previous_segment]) # av_trailer ::= input_file [stream_number] av_trailer = input_file + Optional(stream_number) # frame_type ::= "frame" | "f" frame_type = oneOf(["f", "frame"]).setParseAction(replaceWith("frame")) # frame_input ::= frame_type [frame_input_file [frame_number]] frame_input = (frame_type.setResultsName("type") + Optional(frame_input_file + Optional(frame_number))) # video_type ::= "video" | "v" video_type = oneOf(["v", "video"]).setParseAction(replaceWith("video")) # audio_type ::= "audio" | "a" audio_type = oneOf(["a", "audio"]).setParseAction(replaceWith("audio")) # av_input ::= (audio_type | video_type) [av_trailer] av_input = ((audio_type | video_type).setResultsName("type") + Optional(av_trailer)) # inputspec ::= "[" (av_input | frame_input) "]" inputspec = (left_bracket + delimitedList( av_input | frame_input, delim=":").setParseAction(default_input_fields) - right_bracket) # segmentspec ::= inputspec [timespecs] segmentspec = Group(inputspec + Group(Optional(timespecs)).setResultsName("times")) # config ::= {segmentspec} config = ZeroOrMore(segmentspec) config.ignore(pythonStyleComment) return config