def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack, ignore=escapedChar) + rbrack.suppress()) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = (reLiteral | reRange | reMacro | reDot | reGroup) reExpr << operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ]) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress()) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup ) reExpr << operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ] ) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() ## word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL) )) ## redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") |Optional(fd_src, 1) + Literal(">"))\ +(word | (fd_dst + Optional("-"))) # "&>word" || ">&word" full_redir = (oneOf("&> >&") + word)\ .setParseAction(lambda t:("&" ,">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = Optional(fd_src | Literal("&"), 1) + \ Literal(">>") + word # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word redirector = (fd_redir | full_redir | here_doc | add_to_file | fd_bind)\ .setParseAction(lambda token: tuple(token)) ## single command (args/redir list) command = Group(OneOrMore(redirector | word)) ## logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon ## pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def separated_list(base, separator, allow_term_sep=False): l = delimitedList(base, separator) if allow_term_sep: if type(separator) == str: separator = Literal(separator) l += Optional(separator.suppress()) return l
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() # word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL))) # redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") | Optional(fd_src, 1) + Literal(">")) + \ (word | (fd_dst + Optional("-"))) # "&>word" || ">&word" obj = (oneOf("&> >&") + word) full_redir = obj.setParseAction(lambda t: ("&", ">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = (Optional(fd_src | Literal("&"), 1) + Literal(">>") + word) # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word obj = (fd_redir | full_redir | here_doc | add_to_file | fd_bind) redirector = obj.setParseAction(lambda token: tuple(token)) # single command (args/redir list) command = Group(OneOrMore(redirector | word)) # logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon # pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def computeVariables(self, text): # Literals dollar = Literal('$') eq = Literal("=") eol = LineEnd().suppress() # Declare Variable startVar = (dollar.suppress() + Word(alphanums).setResultsName("name") + eq.suppress()) declareVariables = (OneOrMore( Group(startVar + SkipTo(startVar | Literal("--") | eol).setResultsName( "content")).setResultsName("variables*"))) declareVariables.ignore(Literal("==")) token = declareVariables.searchString(text) for var in token: for name, content in var: self.descVariables[name] = self.compute(content, verbose=False)
def __init__(self): left_bracket = Literal("{").suppress() right_bracket = Literal("}").suppress() semicolon = Literal(";").suppress() space = White().suppress() key = Word(alphanums + "+.-_/") value = ZeroOrMore( CharsNotIn('{};#"\'') | space | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True)) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") comment = Literal('#').suppress() + Optional(restOfLine) # rules assignment = Group( (key | value) + value + semicolon + Optional(space + comment)) block = Forward() block << Group( Group(key + Optional(space + modifier) + Optional(space) + Optional(value) + Optional(space + value)) + left_bracket + Group(ZeroOrMore(assignment | block | comment.suppress())) + right_bracket) def comment_handler(t): result = [] if "promo" in t[0]: result.append("promo") if "author: " in t[0]: try: email = t[0].split("author: ")[1].strip() result.append(email) except Exception: result.append(t[0]) return result comment.setParseAction(comment_handler) self.script = OneOrMore(assignment | block | comment.suppress())
def _get_handbrake_title_pattern(self): title = Literal("+ title").suppress() integer = Word("0123456789") time = Combine(integer + ":" + integer + ":" + integer) duration = Literal("+ duration:").suppress() subtitle = Literal("+ subtitle tracks:") iso = Literal('(iso639-2:').suppress() + Word(alphas) subtitle_track = Literal("+").suppress() + Group(integer + SkipTo(iso).suppress() + iso) + restOfLine.suppress() title_num = integer.setResultsName("title") duration_num = time.setResultsName("duration") subtitles = Group(ZeroOrMore(subtitle_track)).setResultsName("subtitles") pattern = title + title_num + \ SkipTo(duration).suppress() + \ duration + duration_num + \ SkipTo(subtitle).suppress() + subtitle.suppress() + subtitles return pattern
def remove_comments(string): """Remove comments from the statements Args: string(str): String to be processed Returns: result(str): String with comments trimmed """ if string == '': return string # Remove multiline comments multiline_comment = nestedExpr('/*', '*/').suppress() string = multiline_comment.transformString(string) # Remove single line comments singleline_comment = Literal('--') + ZeroOrMore(CharsNotIn('\n')) string = singleline_comment.suppress().transformString(string) return string
def create_bnf(stack): point = Literal(".") comma = Literal(",") e = CaselessLiteral("E") inumber = Word(nums) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) _of = Literal('of') _in = Literal('in') _by = Literal('by') _copy = Literal('copy') _mn = Literal('-n').setParseAction(replace('OA_SubN')) _me = Literal('-e').setParseAction(replace('OA_SubE')) _pn = Literal('+n').setParseAction(replace('OA_AddN')) _pe = Literal('+e').setParseAction(replace('OA_AddE')) _inn = Literal('*n').setParseAction(replace('OA_IntersectN')) _ine = Literal('*e').setParseAction(replace('OA_IntersectE')) regop = (_mn | _me | _pn | _pe | _inn | _ine) lpar = Literal("(").suppress() rpar = Literal(")").suppress() _all = Literal('all').setParseAction(replace('KW_All')) node = Literal('node') nodes = Literal('nodes') element = Literal('element') elements = Literal('elements') group = Literal('group') _set = Literal('set') surface = Literal('surface') ident = Word(alphas + '_.', alphanums + '_.') set_name = Word(nums) | ident function = Word(alphas + '_', alphanums + '_') function = Group(function).setParseAction(join_tokens) region = Combine( Literal('r.') + Word(alphas + '_', '_' + alphas + nums + '.')) region = Group(Optional(_copy, default='nocopy') + region) region.setParseAction(replace('KW_Region', keep=True)) coor = oneOf('x y z') boolop = oneOf('& |') relop = oneOf('< > <= >= != ==') bool_term = (ZeroOrMore('(') + (coor | fnumber) + relop + (coor | fnumber) + ZeroOrMore(')')) relation = Forward() relation << (ZeroOrMore('(') + bool_term + ZeroOrMore(boolop + relation) + ZeroOrMore(')')) relation = Group(relation).setParseAction(join_tokens) nos = Group(nodes + _of + surface).setParseAction(replace('E_NOS')) nir = Group(nodes + _in + relation).setParseAction( replace('E_NIR', keep=True)) nbf = Group(nodes + _by + function).setParseAction( replace('E_NBF', keep=True)) ebf = Group(elements + _by + function).setParseAction( replace('E_EBF', keep=True)) eog = Group(elements + _of + group + Word(nums)).setParseAction( replace('E_EOG', keep=True)) nog = Group(nodes + _of + group + Word(nums)).setParseAction( replace('E_NOG', keep=True)) onir = Group(node + _in + region).setParseAction( replace_with_region('E_ONIR', 2)) ni = Group(node + delimitedList(inumber)).setParseAction( replace('E_NI', keep=True)) ei1 = Group(element + delimitedList(inumber)).setParseAction( replace('E_EI1', keep=True)) etuple = (lpar.suppress() + inumber + comma.suppress() + inumber + rpar.suppress()) ei2 = Group(element + delimitedList(etuple)).setParseAction( replace('E_EI2', keep=True)) noset = Group(nodes + _of + _set + set_name).setParseAction( replace('E_NOSET', keep=True)) eoset = Group(elements + _of + _set + set_name).setParseAction( replace('E_EOSET', keep=True)) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei1 | ei2 | ebf | eog | nog | noset | eoset) atom1.setParseAction(to_stack(stack)) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction(to_stack(stack)) region_expression << atom + ZeroOrMore(aux) region_expression = StringStart() + region_expression + StringEnd() return region_expression
def __repr__(self): return "<variable " + str(self.name) + ">" number = Regex(r"[\+\-]?(([0-9]+(\.[0-9]+)?)|(\.[0-9]+))") comma = Literal(",") name = Regex("[a-z][a-z0-9_]*") var_name = Regex("[a-z][a-z0-9_]*") var_name.setParseAction(lambda tokens: Variable(tokens)) element = Forward() equation = Forward() arguments = Group(equation) + ZeroOrMore(comma.suppress() + Group(equation)) function_or_element = (name + Literal("(").suppress() + Group(arguments) + Literal(")").suppress()).setParseAction( lambda tokens: Function(tokens)) | element element << (var_name | number | (Literal("(").suppress() + Group(equation) + Literal(")").suppress())) equation << (function_or_element + ZeroOrMore(infix + function_or_element)) # Now we have the actual evaluate function. def evaluate(text, variables={}, functions={}): """
#Variables variable << ( Combine( Word(alphas) + ZeroOrMore( Word(alphas) | integer | Literal(UNDERSCORE) ))) #Array elements arrayElem << ( (variable + Literal(S) + expression + Literal(PIECE)) ) #Identifiers identifier = ( (arrayElem) ^ (variable) ) #Types types = (Literal(LETTER) | Literal(NUMBER) | Literal(SENTENCE)) #Expressions term << Group(functionCall ^ constant ^ identifier ^ Group(lpar.suppress() + expression + rpar.suppress())) notTerm = (ZeroOrMore(bitNot) + term) unary = ZeroOrMore(plusMinus) + notTerm multiplication = Group(unary + (ZeroOrMore(multDivMod + unary ))) addition = Group(multiplication + (ZeroOrMore(plusMinus + multiplication))) andExp = Group(addition + (ZeroOrMore(bitAnd + addition ))) xorExp = Group(andExp + (ZeroOrMore(bitXor + andExp ))) orExp = Group(xorExp + (ZeroOrMore(bitOr + xorExp ))) expression << Group(orExp) #Boolean expressions relationalExp << Group((expression + relationalOp + expression) | ( ZeroOrMore(boolNot) + lpar.suppress() + relationalExp + rpar.suppress())) boolAndExp = Group(relationalExp) + ZeroOrMore(boolAnd + Group(relationalExp))
HTM_CTRL_NEWLINE = HTM_CTRL.suppress() + Literal("\\par").setParseAction(replaceWith("\n")) HTM_CTRL_NEWLINE.suppress() # handle "{\*\htmltag84 }" HTM_CTRL_EMPTY = HTM_CTRL.suppress() + Word(" ").leaveWhitespace() HTM_TXT = OneOrMore(Word(htmchars)) HTM_CTRL_CONTENT = HTM_CTRL.suppress() + Optional(BRCKT_R).suppress() + HTM_TXT # Both opening and closing tags and their contents HTM_TAG = Combine(Literal("<") + Word(htmchars) + Literal(">")) HTM_TAG.leaveWhitespace() HTM_TAG.setName("HtmlTag") #HTM_TAG_EMPTYCONTENT = Word(" ") + BRCKT_R.suppress() HTM_TAG_PLUS_CONTENT = HTM_TAG + Optional(BRCKT_R.suppress() + HTM_TXT) HTM_TAG_PLUS_CONTENT.leaveWhitespace() # Text content inside HTML HTM_CONTENT_IND = Suppress("\\htmlrtf0 ") HTM_CONTENT = HTM_CONTENT_IND + OneOrMore(Word(htmchars)) HTM_CONTENT.setName("Html content") HTM_CONTENT.leaveWhitespace() RTFLINK = Suppress("HYPERLINK \"") + Word(htmchars.replace('"','')) + Literal('"').suppress() RTF = OneOrMore( HTM_CTRL_CONTENT | \ HTM_TAG_PLUS_CONTENT | \ HTM_CONTENT | \
signed_float = (Optional(Or([Literal("+"), Literal("-")]))("sign") + positive_float).\ setParseAction(lambda _s, l, t: {'type': 'float', 'value': float(t[0]) } ) boolean = Or([true, false]).setParseAction(lambda t: { 'type': 'boolean', 'value': bool(t[0]) }) quoted_string = quotedString(r".*") unquoted_string = Regex(r".*") # Anything alphanums_string = Word(alphanums) code_string = Word(alphanums + "_" + "-") # For codes in Categories, Code Lists literal_code_string = (tag.suppress() + Optional(simple_ident + dot.suppress())("hierarchy") + code_string("code")) # References code_string_reference = hash.suppress() + code_string + hash.suppress() # RULES - literal ANY string string = quotedString.setParseAction(lambda t: { 'type': 'str', 'value': t[0][1:-1] }) # RULES - unit name def parse_action_unit_name(s, l, tt):
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join( [ c for c in string.punctuation if c not in '_' ] ) +string.whitespace # token definitions identifier = Word(alphanums + "_" ).setName("identifier") #double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine(double_quoted_string+ Optional(OneOrMore(pluss+double_quoted_string)),adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, (( CharsNotIn( opener + closer ).setParseAction( lambda t:t[0] )) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | #.setParseAction(strip_quotes) | identifier ).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID ).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine((Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName("attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi) ).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = (Optional(subgraph_,'') + Optional(ID,'') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point << (subgraph | graph_stmt | node_id ) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi)) graphparser = ( (Optional(strict_,'notstrict') + ((graph_ | digraph_)) + Optional(ID,'') + lbrace + Group(Optional(stmt_list)) +rbrace).setResultsName("graph") ) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) #graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
def create_bnf(stack): point = Literal(".") comma = Literal(",") e = CaselessLiteral("E") inumber = Word(nums) fnumber = Combine(Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums))) _of = Literal('of') _in = Literal('in') _by = Literal('by') _copy = Literal('copy') _mv = Literal('-v').setParseAction(replace('OA_SubV')) _me = Literal('-e').setParseAction(replace('OA_SubE')) _mf = Literal('-f').setParseAction(replace('OA_SubF')) _mc = Literal('-c').setParseAction(replace('OA_SubC')) _ms = Literal('-s').setParseAction(replace('OA_SubS')) _pv = Literal('+v').setParseAction(replace('OA_AddV')) _pe = Literal('+e').setParseAction(replace('OA_AddE')) _pf = Literal('+f').setParseAction(replace('OA_AddF')) _pc = Literal('+c').setParseAction(replace('OA_AddC')) _ps = Literal('+s').setParseAction(replace('OA_AddS')) _inv = Literal('*v').setParseAction(replace('OA_IntersectV')) _ine = Literal('*e').setParseAction(replace('OA_IntersectE')) _inf = Literal('*f').setParseAction(replace('OA_IntersectF')) _inc = Literal('*c').setParseAction(replace('OA_IntersectC')) _ins = Literal('*s').setParseAction(replace('OA_IntersectS')) regop = (_mv | _me | _mf | _mc | _ms | _pv | _pe | _pf | _pc | _ps | _inv | _ine | _inf | _inc | _ins) lpar = Literal("(").suppress() rpar = Literal(")").suppress() _all = Literal('all').setParseAction(replace('KW_All')) vertex = Literal('vertex') vertices = Literal('vertices') cell = Literal('cell') cells = Literal('cells') group = Literal('group') _set = Literal('set') surface = Literal('surface') ident = Word(alphas + '_.', alphanums + '_.') set_name = Word(nums) | ident function = Word(alphas + '_', alphanums + '_') function = Group(function).setParseAction(join_tokens) region = Combine(Literal('r.') + Word(alphas + '_', '_' + alphas + nums + '.')) region = Group(Optional(_copy, default='nocopy') + region) region.setParseAction(replace('KW_Region', keep=True)) coor = oneOf('x y z') boolop = oneOf('& |') relop = oneOf('< > <= >= != ==') bool_term = (ZeroOrMore('(') + (coor | fnumber) + relop + (coor | fnumber) + ZeroOrMore(')')) relation = Forward() relation << (ZeroOrMore('(') + bool_term + ZeroOrMore(boolop + relation) + ZeroOrMore(')')) relation = Group(relation).setParseAction(join_tokens) nos = Group(vertices + _of + surface).setParseAction(replace('E_VOS')) nir = Group(vertices + _in + relation).setParseAction( replace('E_VIR', keep=True)) nbf = Group(vertices + _by + function).setParseAction( replace('E_VBF', keep=True)) ebf = Group(cells + _by + function).setParseAction( replace('E_CBF', keep=True)) eog = Group(cells + _of + group + Word(nums)).setParseAction( replace('E_COG', keep=True)) nog = Group(vertices + _of + group + Word(nums)).setParseAction( replace('E_VOG', keep=True)) onir = Group(vertex + _in + region).setParseAction( replace_with_region('E_OVIR', 2)) ni = Group(vertex + delimitedList(inumber)).setParseAction( replace('E_VI', keep=True)) ei1 = Group(cell + delimitedList(inumber)).setParseAction( replace('E_CI1', keep=True)) etuple = (lpar.suppress() + inumber + comma.suppress() + inumber + rpar.suppress()) ei2 = Group(cell + delimitedList(etuple)).setParseAction( replace('E_CI2', keep=True)) noset = Group(vertices + _of + _set + set_name).setParseAction( replace('E_VOSET', keep=True)) eoset = Group(cells + _of + _set + set_name).setParseAction( replace('E_COSET', keep=True)) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei1 | ei2 | ebf | eog | nog | noset | eoset) atom1.setParseAction(to_stack(stack)) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction(to_stack(stack)) region_expression << atom + ZeroOrMore(aux) region_expression = StringStart() + region_expression + StringEnd() return region_expression
from pyparsing import Literal,Suppress,CharsNotIn,CaselessLiteral,\ Word,dblQuotedString,alphanums import urllib import pprint # Define the pyparsing grammar for a URL, that is: # URLlink ::= <a href= URL>linkText</a> # URL ::= doubleQuotedString | alphanumericWordPath # Note that whitespace may appear just about anywhere in the link. Note also # that it is not necessary to explicitly show this in the pyparsing grammar; by default, # pyparsing skips over whitespace between tokens. linkOpenTag = (Literal("<") + "a" + "href" + "=").suppress() + \ ( dblQuotedString | Word(alphanums+"/") ) + \ Suppress(">") linkCloseTag = Literal("<") + "/" + CaselessLiteral("a") + ">" link = linkOpenTag + CharsNotIn("<") + linkCloseTag.suppress() # Go get some HTML with some links in it. serverListPage = urllib.urlopen( "http://www.yahoo.com" ) htmlText = serverListPage.read() serverListPage.close() # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). for toks,strt,end in link.scanString(htmlText): print toks.asList() # Rerun scanString, but this time create a dict of text:URL key-value pairs. # Need to reverse the tokens returned by link, using a parse action. link.setParseAction( lambda st,loc,toks: [ toks[1], toks[0] ] )
# nameType = Word(alphanums+"@_:|") real = Combine(Word(nums+"+-", nums) + Optional(dot) + Optional(Word(nums)) + Optional(CaselessLiteral("E") + Word(nums+"+-",nums))).setParseAction(to_float) integer = Word(nums+"+-", nums).setParseAction(to_int) number = integer ^ real number_range = Group(number + minus + number).setParseAction(to_list) color = Color + lparen + Group(delimitedList(number)).setParseAction(to_list) + rparen + Optional(aster + number) acolor = AColor + lparen + Group(delimitedList(number)).setParseAction(to_list) + rparen vector = Vector + lparen + Group(delimitedList(number)).setParseAction(to_list) + rparen matrix = Matrix + lparen + Group(delimitedList(vector)).setParseAction(to_list) + rparen transform = Transform + lparen + Group(matrix + comma.suppress() + vector).setParseAction(to_list) + rparen transformHex = TransformHex + lparen + quotedString.setParseAction(no_quotes) + rparen tm = transform ^ transformHex listStr = List + lparen + Group(Optional(delimitedList(nameType ^ acolor ^ integer ^ number))).setParseAction(to_list) + rparen listInt = ListInt + lparen + Group(Optional(delimitedList(integer))).setParseAction(to_list) + rparen listFloat = ListFloat + lparen + Group(delimitedList(number)).setParseAction(to_list) + rparen listVector = ListVector + lparen + Group(delimitedList(vector)).setParseAction(to_list) + rparen listString = ListString + lparen + Group(quotedString.setParseAction(no_quotes)).setParseAction(to_list) + rparen listIntHex = ListIntHex + lparen + quotedString.setParseAction(no_quotes) + rparen listFloatHex = ListFloatHex + lparen + quotedString.setParseAction(no_quotes) + rparen listVectorHex = ListVectorHex + lparen + quotedString.setParseAction(no_quotes) + rparen listColorHex = ListColorHex + lparen + quotedString.setParseAction(no_quotes) + rparen vectorList = listVector ^ listVectorHex
def __prepare_identifier(cls, name): return identifier_from_slug(name) @classmethod def __make_spec(cls, name, op_name): spec_gen = cls.spec_map[op_name] return spec_gen(name) def convert_junction(seq): left_spec = seq[0][0] right_spec = seq[0][-1] return left_spec & right_spec criterion = Group(identifier('name') + colon.suppress() + \ direction('operator')).setResultsName('order') criterion.setParseAction(CriterionConverter.convert) criteria = \ operatorPrecedence(criterion, [ (tilde, BINARY, opAssoc.LEFT, convert_junction) ]) order = criteria | criterion def parse_order(criteria_string): """ Parses the given order criteria string. """
true = Literal("True") false = Literal("False") atom = Forward() infix = infixNotation(atom, [ ('not', 1, opAssoc.RIGHT, _make_unary), (oneOf('* /'), 2, opAssoc.LEFT, _make_binary), (oneOf('+ -'), 2, opAssoc.LEFT, _make_binary), (oneOf('> gt >= ge < lt <= le != ne == eq'), 2, opAssoc.LEFT, _make_binary), ('and', 2, opAssoc.LEFT, _make_binary), ('or', 2, opAssoc.LEFT, _make_binary), ('in', 2, opAssoc.LEFT, _make_binary), ]) dellist = delimitedList(Optional(atom)) listing = lbr.suppress() + dellist + rbr.suppress() function = identifier.setResultsName('name') + lpar.suppress() + Group( Optional(delimitedList(atom))).setResultsName("args") + rpar.suppress() atom << (listing | number | string | variable | true | false | none | function) _false = Const(False) _true = Const(True) number.setParseAction(lambda t: Const(_number(t[0]))) variable.setParseAction(lambda t: Variable(t[0].strip("$"))) string.setParseAction(lambda t: Const(_str(t[0]))) none.setParseAction(lambda t: _false) false.setParseAction(lambda t: _false) true.setParseAction(lambda t: _true) dellist.setParseAction(lambda s, l, t: List(t[:])) function.setParseAction(_make_func)
integer = Word(nums) single_section = Word(lowercaseplus, min=2) single_section.setResultsName('SINGLE') integer_var = Word(lowercase, exact=1) double = Group(Optional(MINUS) + integer + Optional(PERIOD + integer)) operand = integer ^ integer_var operator = Word('+-*/', exact=1) unaryoperation = operand binaryoperation = operand + operator + operand operation = unaryoperation ^ binaryoperation array_section = Group(single_section + LSQUARE.suppress() + operation + RSQUARE.suppress()) array_section.setResultsName('ARRAY') section = single_section ^ array_section section_location = Group(section + LBRACK.suppress() + double + RBRACK.suppress()) create = Keyword('create').suppress() + section + ZeroOrMore(COMMA.suppress() + section) create.setParseAction(print_create(converted_file)) connect = Keyword('connect').suppress() + section_location + COMMA.suppress( ) + section_location connect.setParseAction(print_connect(converted_file))
EOL = Suppress(LineEnd()) # $ SGL_PRINTABLE = Char(printables) singleTextString = originalTextFor( ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = BSLASH + (oneOf(list(r'nrtbf\">' + "'")) | ('u' + Word(hexnums, exact=4)) | SGL_PRINTABLE) LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE CHAR_LITERAL = APOS + LITERAL_CHAR + APOS STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(SGL_PRINTABLE) + '>>' TOKEN_REF = Word(alphas.upper(), alphanums + '_') RULE_REF = Word(alphas.lower(), alphanums + '_') ACTION_ESC = (BSLASH.suppress() + APOS | BSLASH.suppress() | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE)) ACTION_CHAR_LITERAL = (APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS) ACTION_STRING_LITERAL = ( QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE) SRC = SRC_.suppress() + ACTION_STRING_LITERAL("file") + INT("line") id = TOKEN_REF | RULE_REF SL_COMMENT = Suppress('//') + Suppress('$ANTLR') + SRC | ZeroOrMore( ~EOL + Word(printables)) + EOL ML_COMMENT = cStyleComment WS = OneOrMore( Suppress(' ') | Suppress('\t') | (Optional(Suppress('\r')) + Literal('\n')))
alphanums, WordEnd, Combine, ) IDENTIFIER = pyparsing_common.identifier VALUE = quotedString ^ pyparsing_common.number ^ Literal("None") PATH = Combine(OneOrMore(Word(alphanums + '/\\~ "')) + Literal(".py") + WordEnd()) # ARGS_FLAG = Literal("--args") ^ Literal("-a") HELP_FLAG = Literal("--help") ^ Literal("-h") # EQUAL_SIGN = Literal("=") DOUBLE_DASH_SIGN = Literal("--") # args = ARGS_FLAG.suppress() + OneOrMore(Group(VALUE)) fwargs = OneOrMore(Group(DOUBLE_DASH_SIGN.suppress() + IDENTIFIER + VALUE)) kwargs = OneOrMore(Group(IDENTIFIER + EQUAL_SIGN.suppress() + VALUE)) # args_func = Group(IDENTIFIER + Group(Optional(args)) + Optional(HELP_FLAG)) kwargs_func = Group(IDENTIFIER + Group(Optional(kwargs)) + Optional(HELP_FLAG)) fwargs_func = Group(IDENTIFIER + Group(Optional(fwargs)) + Optional(HELP_FLAG)) # paths = Group(ZeroOrMore(PATH)) args_funcs = Group(ZeroOrMore(args_func)) kwargs_funcs = Group(ZeroOrMore(kwargs_func)) fwargs_funcs = Group(ZeroOrMore(fwargs_func)) # command_1 = paths + args_funcs command_2 = paths + kwargs_funcs command_3 = paths + fwargs_funcs
if spec is None: spec = crit_spec else: spec = spec & crit_spec return spec # Numbers are converted to ints if possible. cql_number = Combine( Optional('-') + ('0' | Word(nonzero_nums, nums)) + Optional('.' + Word(nums)) + Optional(Word('eE', exact=1) + Word(nums + '+-', nums))).setParseAction(convert_number) # Dates are parsed as double-quoted ISO8601 strings and converted to datetime # objects. cql_date = Combine(dbl_quote.suppress() + Regex(ISO8601_REGEX) + dbl_quote.suppress()).setParseAction(convert_date) # All double-quoted strings that are not dates are returned with their quotes # removed. cql_string = dblQuotedString.setParseAction(removeQuotes) # URLs protocol = Literal('http') domain = Combine(OneOrMore(CharsNotIn('/'))) path = Combine(slash + OneOrMore(CharsNotIn(',~?&'))) cql_url = Combine(protocol + '://' + domain + path) # Number range. # FIXME: char ranges are not supported yet cql_number_range = Group(cql_number + '-' + cql_number).setParseAction(convert_range)
def get_parser(force=False): ''' @return: a pyparsing.ParserElement object with the usual parse methods, except that pyparsing.ParserElement.parseString overridden to return a ProcessGroup object instead of a pyparsing.ParseResults object for convenience. ''' global decay_parser, ID ######################################## ## Supporting parse classes ######################################## class ParsedDecay: def __init__(self, start, end, params): self.start = start if isinstance(end, ParseResults): end = end.asList() self.end = end self.params = params class ParsedParam: def __init__(self, name, value): self.name = name self.value = value class ParsedParticle: def __init__(self, name, params): self.name = name self.params = params class ParsedDefault: def __init__(self, params, for_particle): self.params = params self.for_particle = for_particle ######################################## ## Parser action functions ######################################## def push_param_stmt(code_str, loc, toks): """ toks will be of the form [param_name, param_value] """ return ParsedParam(toks[0], toks[1]) def push_edge_stmt(code_str, loc, toks): ''' toks will be a list of the form [start_name, end (, params)] ''' if len(toks) > 2: # Check for parameter list params = toks[2] else: params = {} end = toks[1] if isinstance(end, ParsedParticle): end_name = end.name else: end_name = end return ParsedDecay(toks[0], end_name, params) def push_node_stmt(code_str, loc, toks): """ toks will be a list of the form [name, params] """ if len(toks) > 1: # Check for parameter list params = toks[1] else: params = {} return ParsedParticle(toks[0], params) def push_param_list(code_str, loc, toks): """ toks will be a list of the form [ name1, name2, '=', val2, name3, ... ] """ params = {} i = 0 l = len(toks) while i < l: param_name = toks[i] if i + 2 < l and toks[i + 1] == '=': param_value = toks[i + 2] increment = 3 else: param_value = DEFAULT_PARAM_VALUE increment = 1 params[param_name] = param_value i += increment return params def push_default_stmt(code_str, loc, toks): ''' toks will be of the form ["particle", param_dict] or ["decay", param_dict] ''' return ParsedDefault(toks[1], toks[0].lower() == 'particle') def push_stmt_list(code_str, loc, toks): """ toks will be a ParseResults of Particle/ParsedDecay/ParsedParam objects """ proc_group = ProcessGroup() seen_particles = {} edges = [] particle_defaults = {} def params_for_object(obj, defaults): params = defaults.copy() params.update(obj.params) return params # Add particle objects we've generated already toks = toks.asList() for token in toks: if isinstance(token, ParsedDefault) and token.for_particle: particle_defaults.update(token.params) elif isinstance(token, ParsedParticle): #print 'Adding ', token.name seen_particles[token.name] = Particle( token.params.pop('type', token.name), **params_for_object(token, particle_defaults) ) def find_or_insert_particle(name): if seen_particles.has_key(name): #print 'Using existing particle for %s' % name return seen_particles[name] else: #print 'Creating %s' % name seen_particles[name] = Particle(name, **particle_defaults) # Type is assumed to be the name of the particle return seen_particles[name] # Next add decays and any particles they reference that we haven't found already particle_defaults = {} # Reset so that we can use the right defaults at each place in the file decay_defaults = {} for token in toks: if isinstance(token, ParsedDefault): if token.for_particle: particle_defaults.update(token.params) else: decay_defaults.update(token.params) elif isinstance(token, ParsedDecay): start = find_or_insert_particle(token.start) end = [] for end_point in token.end: end.append( find_or_insert_particle(end_point) ) params = params_for_object(token, decay_defaults) # If a particle was used twice, this should raise an error start.add_decay(end, **params) elif isinstance(token, ParsedParam): proc_group.add_param(token.name, token.value) seen_particles = seen_particles.values() # We allow for more than one root particle while len(seen_particles) > 0: decay_root = seen_particles[0] # Find the root of the current tree while decay_root.parent: decay_root = decay_root.parent # Now record everything under that root as dealt with, so we can see if there are more roots particles_to_delete = [decay_root] while len(particles_to_delete) > 0: particle = particles_to_delete.pop() seen_particles.remove(particle) decays = particle.decays for decay in decays: particles_to_delete.extend(decay) proc_group.add_root_particle(decay_root) return proc_group ######################################## ## Parser grammar definition ######################################## if force or not decay_parser: # Literals lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") equals = Literal("=") comma = Literal(",") semi = Literal(";") minus = Literal("-") arrow = Combine(arrow_start + arrow_end) # keywords particle = CaselessKeyword("particle") decay = CaselessKeyword("decay") # token definitions float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") param_list = Forward() stmt_list = Forward() param_val = (float_number | ID | param_list).setName("param_val") # We don't want to suppress the equals, since there may be parameters with no values param_sequence = OneOrMore(ID + Optional(equals + param_val) + Optional(comma.suppress())).setName("param_sequence") param_list << (lbrack.suppress() + Optional(param_sequence) + rbrack.suppress()).setName("param_list") # Here a parameter statement is required, since there is no point in having a default stmt with no parameters default_stmt = ( (particle | decay) + param_list ).setName("default_stmt") node_set = Group( lbrace.suppress() + ZeroOrMore(ID) + rbrace.suppress() ).setName("node_set") edgeop = arrow.copy().setName('edgeop') edgeRHS = edgeop.suppress() + node_set edge_stmt = ID + edgeRHS + Optional(param_list) node_stmt = (ID + Optional(param_list)).setName("node_stmt") param_stmt = (ID + equals.suppress() + param_val).setName('param_stmt') ### NOTE: THE ORDER OF THE stmt OPTIONS DETERMINES THE RESOLUTION ORDER FOR WHEN IT FINDS A NODE NAME!!! ### # Default statements have highest priority, since we want to prevent the use of their keywords as # node or param names. stmt = (default_stmt | param_stmt | edge_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + semi.suppress()) decay_parser = stmt_list + StringEnd() # Comments singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) decay_parser.ignore(singleLineComment) decay_parser.ignore(cStyleComment) ######################################## ## Set parse actions ######################################## ''' def printAction(code_str, loc, toks): print toks return toks ''' stmt_list.setParseAction(push_stmt_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) param_list.setParseAction(push_param_list) param_stmt.setParseAction(push_param_stmt) default_stmt.setParseAction(push_default_stmt) # Make the top-level parse method return a PyDecay object, not a ParseResults decay_parser.parseString = new.instancemethod(lambda self, instring, parseAll=False: super(self.__class__, self).parseString(instring, parseAll)[0] , decay_parser, decay_parser.__class__) return decay_parser
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = Literal("strict") graph_ = Literal("graph") digraph_ = Literal("digraph") subgraph_ = Literal("subgraph") node_ = Literal("node") edge_ = Literal("edge") # token definitions identifier = Word(alphanums + "_" ).setName("identifier") double_quoted_string = dblQuotedString alphastring_ = OneOrMore(CharsNotIn(_noncomma)) ID = (identifier | double_quoted_string.setParseAction(strip_quotes) |\ alphastring_).setName("ID") html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) float_number = Combine(Optional(minus) + \ OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID | html_text).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (Group(colon + ID) | \ Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | \ Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals.suppress() + righthand_id) + \ Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + \ rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + \ rbrace.suppress()).setName("graph_stmt") subgraph = (Group(Optional(subgraph_ + Optional(ID)) + graph_stmt) | \ Group(subgraph_ + ID)).setName("subgraph") edgeRHS = OneOrMore(edgeop + Group(node_id | subgraph)) edge_stmt = Group(node_id | subgraph) + edgeRHS + Optional(attr_list) node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals.suppress() + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = (Optional(strict_) + Group((graph_ | digraph_)) + \ Optional(ID) + graph_stmt).setResultsName("graph") singleLineComment = "//" + restOfLine # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
unquoted = toks[0][1:-1] if len(url_protocol.searchString(unquoted)) > 0: result = [url_to_resource(unquoted)] else: result = [unquoted] return result # Numbers are converted to ints if possible. cql_number = Combine(Optional('-') + ('0' | Word(nonzero_nums, nums)) + Optional('.' + Word(nums)) + Optional(Word('eE', exact=1) + Word(nums + '+-', nums)) ).setParseAction(convert_number) # Dates are parsed as double-quoted ISO8601 strings and converted to datetime # objects. cql_date = Combine(dbl_quote.suppress() + Regex(ISO8601_REGEX) + dbl_quote.suppress() ).setParseAction(convert_date) # All double-quoted strings that are not dates are returned with their quotes # removed. cql_string = (dblQuotedString | sglQuotedString).setParseAction(convert_string) # URLs are detected as strings starting with the http(s) protocol. url_protocol = Combine(Literal('http') + Optional('s')) # Number range. # FIXME: char ranges are not supported yet cql_number_range = Group(cql_number + '-' + cql_number ).setParseAction(convert_range) cql_values = Group(delimitedList(
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join([c for c in string.punctuation if c not in '_' ]) + string.whitespace # token definitions identifier = Word(alphanums + "_").setName("identifier") # double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine( double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, ((CharsNotIn(opener + closer).setParseAction(lambda t: t[0])) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | # .setParseAction(strip_quotes) | identifier).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine( (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName( "attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi)).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = ( Optional(subgraph_, '') + Optional(ID, '') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point <<= (subgraph | graph_stmt | node_id) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list <<= OneOrMore(stmt + Optional(semi)) graphparser = ((Optional(strict_, 'notstrict') + ((graph_ | digraph_)) + Optional(ID, '') + lbrace + Group(Optional(stmt_list)) + rbrace).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) # graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
def create_bnf(stack): point = Literal(".") comma = Literal(",") e = CaselessLiteral("E") inumber = Word(nums) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) _of = Literal('of') _in = Literal('in') _by = Literal('by') _copy = Literal('copy') _mv = Literal('-v').setParseAction(replace('OA_SubV')) _me = Literal('-e').setParseAction(replace('OA_SubE')) _mf = Literal('-f').setParseAction(replace('OA_SubF')) _mc = Literal('-c').setParseAction(replace('OA_SubC')) _ms = Literal('-s').setParseAction(replace('OA_SubS')) _pv = Literal('+v').setParseAction(replace('OA_AddV')) _pe = Literal('+e').setParseAction(replace('OA_AddE')) _pf = Literal('+f').setParseAction(replace('OA_AddF')) _pc = Literal('+c').setParseAction(replace('OA_AddC')) _ps = Literal('+s').setParseAction(replace('OA_AddS')) _inv = Literal('*v').setParseAction(replace('OA_IntersectV')) _ine = Literal('*e').setParseAction(replace('OA_IntersectE')) _inf = Literal('*f').setParseAction(replace('OA_IntersectF')) _inc = Literal('*c').setParseAction(replace('OA_IntersectC')) _ins = Literal('*s').setParseAction(replace('OA_IntersectS')) regop = (_mv | _me | _mf | _mc | _ms | _pv | _pe | _pf | _pc | _ps | _inv | _ine | _inf | _inc | _ins) lpar = Literal("(").suppress() rpar = Literal(")").suppress() _all = Literal('all').setParseAction(replace('KW_All')) vertex = Literal('vertex') vertices = Literal('vertices') cell = Literal('cell') cells = Literal('cells') group = Literal('group') _set = Literal('set') surface = Literal('surface') ident = Word(alphas + '_.', alphanums + '_.') set_name = Word(nums) | ident function = Word(alphas + '_', alphanums + '_') function = Group(function).setParseAction(join_tokens) region = Combine( Literal('r.') + Word(alphas + '_', '_' + alphas + nums + '.')) region = Group(Optional(_copy, default='nocopy') + region) region.setParseAction(replace('KW_Region', keep=True)) coor = oneOf('x y z') boolop = oneOf('& |') relop = oneOf('< > <= >= != ==') bool_term = (ZeroOrMore('(') + (coor | fnumber) + relop + (coor | fnumber) + ZeroOrMore(')')) relation = Forward() relation << (ZeroOrMore('(') + bool_term + ZeroOrMore(boolop + relation) + ZeroOrMore(')')) relation = Group(relation).setParseAction(join_tokens) nos = Group(vertices + _of + surface).setParseAction(replace('E_VOS')) nir = Group(vertices + _in + relation).setParseAction( replace('E_VIR', keep=True)) nbf = Group(vertices + _by + function).setParseAction( replace('E_VBF', keep=True)) ebf = Group(cells + _by + function).setParseAction( replace('E_CBF', keep=True)) eog = Group(cells + _of + group + Word(nums)).setParseAction( replace('E_COG', keep=True)) nog = Group(vertices + _of + group + Word(nums)).setParseAction( replace('E_VOG', keep=True)) onir = Group(vertex + _in + region).setParseAction( replace_with_region('E_OVIR', 2)) ni = Group(vertex + delimitedList(inumber)).setParseAction( replace('E_VI', keep=True)) ei1 = Group(cell + delimitedList(inumber)).setParseAction( replace('E_CI1', keep=True)) etuple = (lpar.suppress() + inumber + comma.suppress() + inumber + rpar.suppress()) ei2 = Group(cell + delimitedList(etuple)).setParseAction( replace('E_CI2', keep=True)) noset = Group(vertices + _of + _set + set_name).setParseAction( replace('E_VOSET', keep=True)) eoset = Group(cells + _of + _set + set_name).setParseAction( replace('E_COSET', keep=True)) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei1 | ei2 | ebf | eog | nog | noset | eoset) atom1.setParseAction(to_stack(stack)) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction(to_stack(stack)) region_expression << atom + ZeroOrMore(aux) region_expression = StringStart() + region_expression + StringEnd() return region_expression
def _setup_QASMParser(): """ Routine to initialise and return parsing blocks """ class _Op: """ Class to set up quantum operations """ def __init__(self, name, argParser, version="OPENQASM 2.0", qop=False, keyOverride=None): global cops global qops global _reservedKeys if name in qops or name in cops: raise IOError(dupTokenWarning.format("Operation", name)) self.operation = name if keyOverride is not None: self.parser = (keyOverride + argParser).addParseAction( lambda s, l, t: _override_keyword(t, name)) else: self.parser = CaselessKeyword(name)("keyword") + argParser self.version = parse_version(version) self.parser.addParseAction( lambda s, l, t: _set_version(t, self.version)) _reservedKeys.append(name) if qop: qops[name] = self else: cops[name] = self class _Routine(): """ Class to set up quantum gates, circuits, etc. """ def __init__(self, name, pargs=False, spargs=False, gargs=False, qargs=False, returnables=False, prefixes=None, version="OPENQASM 2.0"): global blocks global _reservedKeys if name in qops or name in cops: raise IOError(dupTokenWarning.format("Routine", name)) self.operation = name self.parser = Keyword(name)("keyword") + validName("gateName") if prefixes: localPrefixParser = Each(map(Optional, map( Keyword, prefixes))).addParseAction(prefix_setter) else: localPrefixParser = prefixParser self.parser = localPrefixParser + self.parser # Handle different args req = [] if pargs: req.append(Optional(pargParser)("pargs")) if spargs: req.append(Optional(spargParser)("spargs")) if gargs: req.append(Optional(gargParser)("gargs")) self.parser = self.parser + Each(req) if qargs: self.parser = self.parser + qargParser("qargs") if returnables: self.parser = self.parser + Optional(returnParser) self.version = parse_version(version) self.parser.addParseAction( lambda s, l, t: _set_version(t, self.version)) _reservedKeys.append(name) blocks[name] = self class _Block(): """ Class to set up blocks such as if, for, etc. """ def __init__(self, name, detParser, version="OPENQASM 2.0"): global blocks global _reservedKeys self.operation = name self.parser = Keyword(name)("keyword") + detParser self.version = parse_version(version) self.parser.addParseAction( lambda s, l, t: _set_version(t, self.version)) _reservedKeys.append(name) blocks[name] = self sign = Word("+-", exact=1) number = Word(nums) expo = Combine(CaselessLiteral("e") + Optional(sign) + number).setResultsName("exponent") pi = CaselessKeyword("pi") bitstring = Combine(OneOrMore(oneOf("0 1")) + Literal("b")) integer = Combine(number + Optional(expo)) real = Combine( Optional(sign) + (("." + number) ^ (number + "." + Optional(number))) + Optional(expo)) validName = Forward() lineEnd = Literal(";") _is_ = Keyword("to").suppress() _in_ = Keyword("in") _to_ = Literal("->").suppress() commentSyntax = "//" commentOpenStr = "/*" commentCloseStr = "*/" commentOpenSyntax = Literal(commentOpenStr) commentCloseSyntax = Literal(commentCloseStr) dirSyntax = "***" dirOpenStr = f"{dirSyntax} begin" dirCloseStr = f"{dirSyntax} end" dirSyntax = Keyword(dirSyntax) dirOpenSyntax = CaselessLiteral(dirOpenStr) dirCloseSyntax = CaselessLiteral(dirCloseStr) intFunc = oneOf("abs powrem countof fllog") realFunc = oneOf("abs powrem arcsin arccos arctan sin cos tan exp ln sqrt") boolFunc = oneOf("andof orof xorof") inL, inS, inR = map(Suppress, "[:]") vBar = Suppress("|") bSlash = Suppress("\\") brL, brR = map(Suppress, "()") intExp = Forward() realExp = Forward() boolExp = Forward() index = intExp.setResultsName("index") interval = Optional(intExp.setResultsName("start"), default=None) + inS \ + Optional(intExp.setResultsName("end"), default=None) \ + Optional(inS + Optional(intExp.setResultsName("step"), default=1)) interRef = Group(inL + interval + inR) loopRef = Group( inL + intExp.setResultsName("start") + inS + intExp.setResultsName("end") + Optional(inS + Optional(intExp.setResultsName("step"), default=1)) + inR) ref = inL + Group(delimitedList(index ^ interval))("ref") + inR regNoRef = validName("var") regRef = Group(validName("var") + Optional(ref)) regMustRef = Group(validName("var") + ref) regListNoRef = Group(delimitedList(regNoRef)) regListRef = Group(delimitedList(regRef)) inPlaceAlias = vBar + regListRef + vBar validQarg = regRef | inPlaceAlias aliasQarg = Group(regRef) | inPlaceAlias inPlaceCreg = bSlash + delimitedList(regRef | bitstring) + bSlash validCreg = (regRef | inPlaceCreg) def set_maths_type(toks, mathsType): """ Set logical or integer or floating point """ toks["type"] = mathsType intVar = integer | regRef realVar = real | integer | pi | regRef boolVar = interRef | regRef | realExp | intExp | validCreg | bitstring intFuncVar = (intFunc + brL + Group(Optional(delimitedList(intVar)))("args") + brR).setParseAction(Function) realFuncVar = ((realFunc ^ intFunc) + brL + Group(Optional(delimitedList(realVar)))("args") + brR).setParseAction(Function) boolFuncVar = (boolFunc + brL + Group(Optional(delimitedList(boolVar)))("args") + brR).setParseAction(Function) mathOp = [(oneOf("- +"), 1, opAssoc.RIGHT, Binary), (oneOf("^"), 2, opAssoc.LEFT, Binary), (oneOf("* / div"), 2, opAssoc.LEFT, Binary), (oneOf("+ -"), 2, opAssoc.LEFT, Binary)] logOp = [(oneOf("! not"), 1, opAssoc.RIGHT, Binary), (oneOf("and or xor"), 2, opAssoc.LEFT, Binary), (oneOf("< <= == != >= >"), 2, opAssoc.LEFT, Binary), (oneOf("in"), 2, opAssoc.LEFT, Binary)] intExp <<= infixNotation( intFuncVar | intVar, mathOp).setParseAction(lambda s, l, t: set_maths_type(t, "int")) realExp <<= infixNotation( realFuncVar | realVar, mathOp).setParseAction(lambda s, l, t: set_maths_type(t, "float")) boolExp <<= infixNotation( boolFuncVar | boolVar, logOp).setParseAction(lambda s, l, t: set_maths_type(t, "bool")) mathExp = intExp ^ realExp ^ boolExp cregExp = bitstring("bit") ^ validCreg("reg") prefixes = ["unitary"] callMods = ["CTRL", "INV"] def prefix_setter(toks): """ Pull out prefixes of gate calls and add them into list """ for prefix in prefixes: toks[prefix] = prefix in toks.asList() prefixParser = Each(map(Optional, map(Keyword, prefixes))).addParseAction(prefix_setter) pargParser = brL + delimitedList(validName)("pargs") + brR spargParser = inL + delimitedList(validName)("spargs") + inR gargParser = ungroup( nestedExpr("<", ">", delimitedList(ungroup(validName)), None)) qargParser = delimitedList(regRef) callQargParser = delimitedList(validQarg) callPargParser = brL + delimitedList(realExp) + brR callSpargParser = inL + delimitedList(intExp) + inR fullArgParser = Each( (Optional(pargParser("pargs")), Optional(spargParser("spargs")), Optional(gargParser("gargs")))) callArgParser = Each( (Optional(callPargParser("pargs")), Optional(callSpargParser("spargs")), Optional(gargParser("gargs")))) returnParser = Optional(_to_ + validCreg("byprod")) modifiers = ZeroOrMore(Combine(oneOf(callMods) + Suppress("-"))) commentLine = Literal(commentSyntax).suppress() + restOfLine("comment") commentBlock = cStyleComment("comment").addParseAction( removeQuotes).addParseAction(removeQuotes) comment = commentLine | commentBlock comment.addParseAction(lambda s, l, t: _set_version(t, (0, 0, 0))) directiveName = Word(alphas).setParseAction(downcaseTokens) directiveArgs = CharsNotIn(";") _Op("directive", directiveName("directive") + Suppress(White() * (1, )) + directiveArgs("args"), version="REQASM 1.0", keyOverride=(~dirOpenSyntax + ~dirCloseSyntax + dirSyntax)) def split_args(toks): """ Split directive arguments out """ toks[0]["keyword"] = "directive" toks[0]["args"] = toks[0]["args"].strip().split(" ") directiveStatement = directiveName("directive") + restOfLine("args") + \ Group(ZeroOrMore(Combine(Optional(White(" ")) + ~dirCloseSyntax + Word(printables+" "))))("block") directiveBlock = ungroup( nestedExpr( dirOpenSyntax, dirCloseSyntax, content=directiveStatement, ignoreExpr=(comment | quotedString )).setWhitespaceChars("\n").setParseAction(split_args)) directiveBlock.addParseAction(lambda s, l, t: _set_version(t, (2, 1, 0))) # Programming lines _Op("version", Empty(), version=(0, 0, 0), keyOverride=Combine( oneOf(versions)("type") + White() + real("versionNumber"))("version")) _Op("include", quotedString("file").addParseAction(removeQuotes)) # Gate-like structures _Op("opaque", validName("name") + fullArgParser + Optional(qargParser("qargs")) + returnParser, keyOverride=prefixParser + "opaque") _Routine("gate", pargs=True, qargs=True) _Routine("circuit", pargs=True, qargs=True, spargs=True, returnables=True, version="REQASM 1.0") # Variable-like structures _Op("creg", regRef("arg")) _Op("qreg", regRef("arg")) _Op("cbit", Group(regNoRef)("arg"), version="REQASM 1.0") _Op("qbit", Group(regNoRef)("arg"), version="REQASM 1.0") _Op("defAlias", regMustRef("alias"), keyOverride="alias", version="REQASM 1.0") # No more on-definition aliases _Op("alias", regRef("alias") + _is_ + aliasQarg("target"), keyOverride="set", version="REQASM 1.0") _Op("val", validName("var") + Literal("=").suppress() + mathExp("val"), version="REQASM 1.0") _Op("set", (Group(regRef)("var") ^ inPlaceCreg("var")) + Literal("=").suppress() + cregExp("val"), version="REQASM 1.0") # Operations-like structures _Op("measure", regRef("qreg") + _to_ + regRef("creg"), qop=True) _Op("barrier", regListNoRef("args")) _Op("output", regRef("value"), qop=True, version="REQASM 1.0") _Op("reset", regRef("qreg")) _Op("exit", Empty(), version="REQASM 1.0") _Op("free", validName("target"), version="REQASM 1.0") _Op("next", validName("loopVar"), qop=True, version="REQASM 1.0") _Op("finish", (Literal("quantum process") | validName)("loopVar"), qop=True, version="REQASM 1.0") _Op("end", validName("process"), qop=True, version="REQASM 1.0") # Special gate call handler callGate = Combine(Group(modifiers)("mods") + \ validName("gate")) + \ callArgParser + \ callQargParser("qargs").addParseAction(lambda s, l, t: _override_keyword(t, "call")) + \ returnParser callGate.addParseAction(lambda s, l, t: _set_version(t, (1, 2, 0))) # Block structures _Block("for", validName("var") + _in_ + loopRef("range"), version="REQASM 1.0") _Block("if", "(" + boolExp("cond") + ")", version="REQASM 1.0") _Block("while", "(" + boolExp("cond") + ")", version="OMEQASM 1.0") qopsParsers = list(map(lambda qop: qop.parser, qops.values())) + [callGate, directiveBlock] blocksParsers = list(map(lambda block: block.parser, blocks.values())) _Op("if", blocks["if"].parser + Group(Group(Group(Or(qopsParsers))))("block"), version="OPENQASM 2.0", keyOverride=Empty()) _Op("for", blocks["for"].parser + Group(Group(Group(Or(qopsParsers))))("block"), version="REQASM 1.0", keyOverride=Empty()) _Op("while", blocks["while"].parser + Group(Group(Group(Or(qopsParsers))))("block"), version="OMEQASM 1.0", keyOverride=Empty()) # Set-up line parsers reservedNames = Or(map(Keyword, _reservedKeys)) validName <<= (~reservedNames) + Word(alphas, alphanums + "_") copsParsers = list(map(lambda cop: cop.parser, cops.values())) operations = (( (Or(copsParsers) ^ Or(qopsParsers)) | # Classical/Quantum Operations callGate | # Gate parsers White() # Blank Line ) + lineEnd.suppress()) ^ directiveBlock # ; or Directives validLine = Forward() codeBlock = nestedExpr("{", "}", Suppress(White()) ^ Group(validLine), (quotedString)) validLine <<= ( ((operations + Optional(comment)) ^ (Or(blocksParsers) + codeBlock("block") + Optional(lineEnd)) ^ comment)) # Whole line comment testLine = Forward() dummyCodeBlock = nestedExpr( "{", "}", testLine, (directiveBlock | quotedString | comment)) + Optional(lineEnd) ignoreSpecialBlocks = (~commentOpenSyntax + ~commentCloseSyntax + ~dirOpenSyntax + ~dirCloseSyntax) testLine <<= ( comment | # Comments directiveBlock | # Directives (ignoreSpecialBlocks + ZeroOrMore(CharsNotIn("{}")) + dummyCodeBlock) | # Block operations (ignoreSpecialBlocks + ZeroOrMore(CharsNotIn("{};")) + lineEnd) ) # QASM Instructions testKeyword = (dirSyntax.setParseAction( lambda s, l, t: _override_keyword(t, "directive")) | Word(alphas)("keyword")) code = (Group(directiveBlock)) | Group(validLine) return code, testLine, testKeyword, reservedNames, mathExp
.setParseAction(lambda s, loc, toks: ["-" + toks[0][1:]]) .setName("negative_number_entity") ) space_entity = ( Literal("~.").setParseAction(lambda s, loc, toks: [" "]).setName("space_entity") ) end_entity = Literal("~E") def _expand_entity_parse_action(s, loc, toks): position = Position.from_loc(loc, s) return LinkActionParameter(toks[0], position=position) expand_entity = ( (Literal("~X~").suppress() + parse_query + end_entity.suppress()) .setParseAction(_expand_entity_parse_action) .setName("expand_entity") ) entities = ( tilde_entity | minus_entity | negative_number_entity | space_entity | islash_entity | slash_entity | http_entity | https_entity | file_entity | protocol_entity
def compute(self, text, verbose=True): # Literals dollar = Literal('$') amper = Literal('&') at = Literal('@') qm = Literal('?') em = Literal('!') dot = Literal('.') colon = Literal(":") vbar = Literal("|") lbrack = Literal("[") rbrack = Literal("]") lcurly = Literal("{") rcurly = Literal("}") lparen = Literal("(") rparen = Literal(")") lt = Literal("<") gt = Literal(">") eq = Literal("=") deq = Literal("==") # Reusables spellId = Word(nums, min=2, max=6).addParseAction( tokenMap(int)).setResultsName("spellId") idx = Word(nums, max=1).addParseAction(tokenMap(int)).setResultsName("id") var = Word(alphas).setResultsName("var") # Spell References effectId = Optional( Word(nums, max=2).addParseAction( tokenMap(int)).setResultsName("effectId")) references = (dollar.suppress() + ((at.suppress() + var + Optional(spellId)) | (spellId + var + effectId) | (var + effectId))).addParseAction(self.setReferences) # Conditions brackets = Suppress(lbrack) + SkipTo(rbrack).setResultsName( "statement") + Suppress(rbrack) value = Word(nums, max=5).addParseAction( tokenMap(int)).setResultsName("value") conditionVar = Group( Optional(em).setResultsName("not") + Optional(var) + (spellId | idx) | Optional("-") + value | Word(alphanums, exact=8).setResultsName("hashVariable")) conditions = ((dollar + qm).suppress() + OneOrMore( Group( Optional(Suppress(qm)) + Optional(Suppress(lparen)) + OneOrMore( conditionVar.setResultsName("variables*") + Optional(Combine(em + eq) | amper | vbar | deq | lt | gt).setResultsName("operators*")) + Optional(Suppress(rparen)) + brackets).setResultsName("conditions*")) + brackets).addParseAction(lambda t: self.setConditions( t, verbose=verbose)) + Optional(dot.suppress()) # Call Variable callVariables = (Suppress((lt + dollar) | (dollar + lt)) + SkipTo(gt).setResultsName("name") + Suppress(gt)).addParseAction(self.callVariables) # Expressions expressions = ( Suppress(dollar + lcurly) + SkipTo(rcurly).setResultsName("content") + rcurly + Optional( dot.suppress() + Word(nums, exact=1).addParseAction( tokenMap(int)).setResultsName("mod"), ) ).addParseAction(lambda t: self.setExpressions(t, verbose=verbose)) # Language Choices languageChoices = ( (Literal('$L') | Literal('$l')).suppress() + OneOrMore(Word(alphas) + Optional(Literal(":").suppress()) ).setResultsName("options*") + Literal(';').suppress()).addParseAction(self.setLanguageChoices) # Icons icons = (Literal("|T").suppress() + SkipTo(colon).setResultsName("path") + colon.suppress() + Word(nums, exact=2).addParseAction( tokenMap(int)).setResultsName("size") + Literal("|t").suppress()).addParseAction(self.setIcons) # Parsing layer by layer parsingOrder = [ icons, languageChoices, callVariables, references, expressions, conditions ] steps = [text] for parser in parsingOrder: steps.append(parser.transformString(steps[-1])) result = steps[-1] # Replace each Sha1 Hash placeholder by refering value if verbose: for k, v in self.variables.items(): result = result.replace(k, str(v)) # Display fixes displayFixes = [["*% of", "% of"], ["power)%", "power)"]] for bef, aft in displayFixes: result = result.replace(bef, aft) return super(SpellDescriptionParser, self).compute(result, verbose)
def _create_config_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [parse_first] [legacy] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [parse_first] [depends_on] [only_if] [do_not_cache] "," python_allowed_exp peristent_identfier ::= @persitent_identifier( level ) inherit_from ::= "@inherit_from()" legacy ::= "@legacy(" correspondences+ ")" do_not_cache ::= "@do_not_cache" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not (cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction( check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = (Word(alphanums + "_") + Optional(oneOf("[0] [n]")))\ .setResultsName("json_id", listAllMatches=True)\ .setParseAction(lambda tokens: "".join(tokens)) aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]"))) .setParseAction(lambda tokens: "".join(tokens)))\ .setResultsName("aliases") python_allowed_expr = Forward() ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr('{', '}')) list_def = originalTextFor(nestedExpr('[', ']')) dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']')) function_call = originalTextFor( ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')')) python_allowed_expr << (ident ^ dict_def ^ list_def ^ dict_access ^ list_access ^ function_call)\ .setResultsName("value", listAllMatches=True) persistent_identifier = (Suppress("@persistent_identifier") + nestedExpr("(", ")"))\ .setResultsName("persistent_identifier") inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("inherit_from") legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if") depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("depends_on") parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("parse_first") do_not_cache = (Suppress("@") + "do_not_cache")\ .setResultsName("do_not_cache") master_format = (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("master_format") derived_calculated_body = Optional(parse_first) + Optional( depends_on) + Optional(only_if) + Optional( do_not_cache) + python_allowed_expr derived = "derived" + Suppress( ":") + INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress( ":") + INDENT + derived_calculated_body + UNDENT source_tag = quotedString\ .setParseAction(removeQuotes)\ .setResultsName("source_tag", listAllMatches=True) source_format = oneOf(CFG_BIBFIELD_MASTER_FORMATS)\ .setResultsName("source_format", listAllMatches=True) creator_body = (Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(legacy) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\ .setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + INDENT + OneOrMore( creator_body) + UNDENT checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\ .setResultsName("checker_function", listAllMatches=True) checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT) doc_string = QuotedString( quoteChar='"""', multiline=True) | quotedString.setParseAction(removeQuotes) subfield = (Suppress("@subfield") + Word(alphanums + "_" + '.') + Suppress(":") + Optional(doc_string))\ .setResultsName("subfields", listAllMatches=True) documentation = ("documentation" + Suppress(":") + INDENT + Optional(doc_string).setResultsName("main_doc") + ZeroOrMore(subfield) + UNDENT)\ .setResultsName("documentation") field_def = (creator | derived | calculated)\ .setResultsName("type_field", listAllMatches=True) body = Optional(inherit_from) + Optional(field_def) + Optional( checker) + Optional(documentation) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString)\ .setResultsName("includes", listAllMatches=True) rule = (Optional(persistent_identifier) + json_id + Optional(Suppress(",") + aliases) + Suppress(":") + INDENT + body + UNDENT)\ .setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())
ZeroOrMore(~EOL + (White(" \t") | Word(printables))) ).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = BSLASH + ( oneOf(list(r"nrtbf\">" + "'")) | ("u" + Word(hexnums, exact=4)) | SGL_PRINTABLE ) LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE CHAR_LITERAL = APOS + LITERAL_CHAR + APOS STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' DOUBLE_ANGLE_STRING_LITERAL = "<<" + ZeroOrMore(SGL_PRINTABLE) + ">>" TOKEN_REF = Word(alphas.upper(), alphanums + "_") RULE_REF = Word(alphas.lower(), alphanums + "_") ACTION_ESC = ( BSLASH.suppress() + APOS | BSLASH.suppress() | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE) ) ACTION_CHAR_LITERAL = APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS ACTION_STRING_LITERAL = ( QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE ) SRC = SRC_.suppress() + ACTION_STRING_LITERAL("file") + INT("line") id = TOKEN_REF | RULE_REF SL_COMMENT = ( Suppress("//") + Suppress("$ANTLR") + SRC | ZeroOrMore(~EOL + Word(printables)) + EOL ) ML_COMMENT = cStyleComment
from pyparsing import Word, Literal, nums, alphas import json rus_alphas = 'йцукенгшщзхъфывапролджэячсмитьбюЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ' rus_alphanums = 'йцукенгшщзхъфывапролджэячсмитьбюЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ1234567890,.:;()«»-–' #Короткое (среднее) тире – Alt + 0150; Дефиc - клавиша на клавиатуре f = open('destiny.txt', 'r') lines = f.readlines() text = ' '.join(lines) text = text.replace("\r", "") text = text.replace("\n", "") Dash = Literal('—') #тире (длинное) — Alt + 0151 Number = Word(nums) + Dash.suppress() Name = Word(nums).suppress() + Dash.suppress() + Word(rus_alphanums) #Шаблон для поиска описания Description = Name.suppress() + Word(rus_alphanums + ' ') + Literal('/').suppress() destinyData = {} for num in Number.searchString(text): key = int(num[0]) destinyData[key] = dict({'number': num[0]}) key = 1 for name in Name.searchString(text): destinyData[key].update({'name': name[0]}) key += 1
rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") comma_sep = comma.suppress() #------------------------------------------------------------------------------ # A convenient function for calculating a unique name given a list of # existing names. #------------------------------------------------------------------------------ def make_unique_name(base, existing=[], format="%s_%s"): """ Return a name, unique within a context, based on the specified name. @param base: the desired base name of the generated unique name. @param existing: a sequence of the existing names to avoid returning. @param format: a formatting specification for how the name is made unique. """ count = 2 name = base
raise Exception("Unit name invalid") unit_name = Regex(r".*").setParseAction(parse_action_unit_name) # RULES - Simple hierarchical name # A literal hierarchical name - A non literal hierarchical name could be "processor_name" simple_h_name = (simple_ident + ZeroOrMore(dot.suppress() + simple_ident))\ .setParseAction(lambda _s, l, t: {'type': 'h_var', 'parts': t.asList() } ) # RULES - simple_hierarchical_name [":" simple_hierarchical_name] factor_name = (Optional(simple_h_name.setResultsName("processor")) + Optional( Group(processor_factor_separator.suppress() + simple_h_name).setResultsName("factor"))).setParseAction( lambda _s, l, t: { 'type': 'pf_name', 'processor': t.processor if t.processor else None, 'factor': t.factor[0] if t.factor else None }) # RULES - processor_name # "h{a.c}ola{b}.sdf{c}", # "{a.b.c}.h{c}ola{b}.sdf{c}", # "{a}b", # "{a}b{c}", # "aa", # "aa{b}aa", # "{a}",
sglQuotedString = Combine( _sglQuote + ZeroOrMore( CharsNotIn("\\'\n\r") | _escapedChar | "''" ) + _sglQuote ).streamline().setName("string enclosed in single quotes") quotedArg = ( dblQuotedString | sglQuotedString ) quotedArg.setParseAction(removeQuotes) quotedArg.setName("quotedArg") plainArgChars = printables.replace('#', '').replace('"', '').replace("'", "") plainArg = Word(plainArgChars) plainArg.setName("plainArg") arguments = Group(ZeroOrMore(quotedArg | plainArg)) arguments = arguments.setResultsName('arguments') arguments.setName("arguments") # comment line. comment = Literal('#') + restOfLine comment = comment.suppress() comment.setName('comment') full_command = ( comment | (command + arguments + Optional(comment)) ) full_command.setName('full_command') ### command_list = [] # filled in by namespaces.init_global_dict(). ### command/argument handling. def process_args(args, globals_dict, locals_dict):
def _create_config_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [parse_first] [legacy] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [parse_first] [depends_on] [only_if] [do_not_cache] "," python_allowed_exp peristent_identfier ::= @persitent_identifier( level ) inherit_from ::= "@inherit_from()" legacy ::= "@legacy(" correspondences+ ")" do_not_cache ::= "@do_not_cache" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = (Word(alphanums + "_") + Optional(oneOf("[0] [n]")))\ .setResultsName("json_id", listAllMatches=True)\ .setParseAction(lambda tokens: "".join(tokens)) aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]"))) .setParseAction(lambda tokens: "".join(tokens)))\ .setResultsName("aliases") python_allowed_expr = Forward() ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr('{', '}')) list_def = originalTextFor(nestedExpr('[', ']')) dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']')) function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')')) python_allowed_expr << (ident ^ dict_def ^ list_def ^ dict_access ^ list_access ^ function_call)\ .setResultsName("value", listAllMatches=True) persistent_identifier = (Suppress("@persistent_identifier") + nestedExpr("(", ")"))\ .setResultsName("persistent_identifier") inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("inherit_from") legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if") depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("depends_on") parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("parse_first") do_not_cache = (Suppress("@") + "do_not_cache")\ .setResultsName("do_not_cache") master_format = (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("master_format") derived_calculated_body = Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(do_not_cache) + python_allowed_expr derived = "derived" + Suppress(":") + INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress(":") + INDENT + derived_calculated_body + UNDENT source_tag = quotedString\ .setParseAction(removeQuotes)\ .setResultsName("source_tag", listAllMatches=True) source_format = oneOf(CFG_BIBFIELD_MASTER_FORMATS)\ .setResultsName("source_format", listAllMatches=True) creator_body = (Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(legacy) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\ .setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + INDENT + OneOrMore(creator_body) + UNDENT checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\ .setResultsName("checker_function", listAllMatches=True) checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT) doc_string = QuotedString(quoteChar='"""', multiline=True) | quotedString.setParseAction(removeQuotes) subfield = (Suppress("@subfield") + Word(alphanums + "_" + '.') + Suppress(":") + Optional(doc_string))\ .setResultsName("subfields", listAllMatches=True) documentation = ("documentation" + Suppress(":") + INDENT + Optional(doc_string).setResultsName("main_doc") + ZeroOrMore(subfield) + UNDENT)\ .setResultsName("documentation") field_def = (creator | derived | calculated)\ .setResultsName("type_field", listAllMatches=True) body = Optional(inherit_from) + Optional(field_def) + Optional(checker) + Optional(documentation) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString)\ .setResultsName("includes", listAllMatches=True) rule = (Optional(persistent_identifier) + json_id + Optional(Suppress(",") + aliases) + Suppress(":") + INDENT + body + UNDENT)\ .setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())
primary_field ::= para_end field_label primary_input ::= primary_field : field_value """ tab = Literal("\\tab").suppress() para_end = Literal("\\par").suppress() lparen = Literal("(") rparen = Literal(")") field_label_separator = Optional(Literal(":").suppress()) note = (lparen.suppress() + SkipTo(rparen, failOn=(para_end | tab) )('note') + rparen.suppress()) field_value_with_paren = Word(alphas + "-<>=.,+/%()#'& " + nums)('value') field_value_no_paren = Word(alphas + "-<>=.,+/%#'& " + nums)('value') # Very annoying - one field in first section is missing a colon field_label_exception = Literal("Water use <= 125 litres/person/day") # Ideally field_label should be allowed to contain parentheses iff # they are balanced field_label = (field_value_no_paren('label') + Optional(note) + Literal(":") | field_label_exception('label') ) # Ideally field text should be marked as running to end of line?
def __repr__(self): return "<variable " + str(self.name) + ">" number = Regex(r"[\+\-]?(([0-9]+(\.[0-9]+)?)|(\.[0-9]+))") comma = Literal(",") name = Regex("[a-z][a-z0-9_]*") var_name = Regex("[a-z][a-z0-9_]*") var_name.setParseAction(lambda tokens: Variable(tokens)) element = Forward() equation = Forward() arguments = Group(equation) + ZeroOrMore(comma.suppress() + Group(equation)) function_or_element = (name + Literal("(").suppress() + Group(arguments) + Literal(")").suppress()).setParseAction( lambda tokens: Function(tokens)) | element element << (var_name | number | (Literal("(").suppress() + Group(equation) + Literal(")").suppress())) equation << (function_or_element + ZeroOrMore(infix + function_or_element)) # Now we have the actual evaluate function. def evaluate(text, variables={}, functions={}): """ Evaluates the specified text as an arithmetic equation. The decimal module is used to do the actual calculations so that there's quite a bit of
float_number = Combine( integer + Optional( point + Optional(number) ) ).setParseAction(lambda t:float(t[0])) # (originally I had pyparsing pulling out the $(Macro) references from inside names # as well, but the framework doesn't work especially well without whitespace delimiters between # tokens so we just do simple find/replace in a second pass pv_name = Word(alphanums+":._$()") pv_value = (float_number | Word(alphanums)) pv_assignment = pv_name + pv_value comment = Literal("#") + Regex(r".*") macro = Group( Word(alphas) + Literal("=").suppress() + pv_name ) macros = Optional(macro + ZeroOrMore(Word(";,").suppress() + macro) ) #file_include = Literal("file") + pv_name + macros file_include = Literal("file") + \ (file_name | ignored_quote + file_name + ignored_quote) \ + Optional(ignored_comma) + macros def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress() req_line = line( file_include | comment.suppress() | pv_name ) req_file = OneOrMore(req_line) + StringEnd().suppress() sav_line = line( comment.suppress() | Literal("<END>").suppress() | pv_assignment) sav_file = OneOrMore(sav_line) + StringEnd().suppress()
def create_bnf( stack ): point = Literal( "." ) comma = Literal( "," ) e = CaselessLiteral( "E" ) inumber = Word( nums ) fnumber = Combine( Word( "+-"+nums, nums ) + Optional( point + Optional( Word( nums ) ) ) + Optional( e + Word( "+-"+nums, nums ) ) ) _of = Literal( 'of' ) _in = Literal( 'in' ) _by = Literal( 'by' ) _copy = Literal( 'copy' ) _mn = Literal( '-n' ).setParseAction( replace( 'OA_SubN' ) ) _me = Literal( '-e' ).setParseAction( replace( 'OA_SubE' ) ) _pn = Literal( '+n' ).setParseAction( replace( 'OA_AddN' ) ) _pe = Literal( '+e' ).setParseAction( replace( 'OA_AddE' ) ) _inn = Literal( '*n' ).setParseAction( replace( 'OA_IntersectN' ) ) _ine = Literal( '*e' ).setParseAction( replace( 'OA_IntersectE' ) ) regop = (_mn | _me | _pn | _pe | _inn | _ine) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() _all = Literal( 'all' ).setParseAction( replace( 'KW_All' ) ) node = Literal( 'node' ) nodes = Literal( 'nodes' ) element = Literal( 'element' ) elements = Literal( 'elements' ) group = Literal( 'group' ) surface = Literal( 'surface' ) variable = Word( 'xyz', max = 1 ) | Literal( 'domain' ) any_var = Word( alphas + '_', alphanums + '_' ) | fnumber function = Word( alphas, alphanums + '_' ) function = Group( function ).setParseAction( join_tokens ) region = Combine( Literal( 'r.' ) + Word( alphas, '_' + alphas + nums ) ) region = Group( Optional( _copy, default = 'nocopy' ) + region ) region.setParseAction( replace( 'KW_Region', keep = True ) ) coor = oneOf( 'x y z' ) boolop = oneOf( '& |' ) relop = oneOf( '< > <= >= != ==' ) bool_term = ZeroOrMore( '(' ) + (coor | fnumber ) + relop + (coor | fnumber)\ + ZeroOrMore( ')' ) relation = Forward() relation << ZeroOrMore( '(' )\ + bool_term + ZeroOrMore( boolop + relation )\ + ZeroOrMore( ')' ) relation = Group( relation ).setParseAction( join_tokens ) nos = Group( nodes + _of + surface ).setParseAction( replace( 'E_NOS' ) ) nir = Group( nodes + _in + relation ).setParseAction( \ replace( 'E_NIR', keep = True ) ) nbf = Group( nodes + _by + function ).setParseAction( \ replace( 'E_NBF', keep = True ) ) ebf = Group( elements + _by + function ).setParseAction( \ replace( 'E_EBF', keep = True ) ) eog = Group( elements + _of + group + Word( nums ) ).setParseAction( \ replace( 'E_EOG', keep = True ) ) nog = Group( nodes + _of + group + Word( nums ) ).setParseAction( \ replace( 'E_NOG', keep = True ) ) onir = Group( node + _in + region ).setParseAction( \ replace_with_region( 'E_ONIR', 2 ) ) ni = Group( node + delimitedList( inumber ) ).setParseAction( \ replace( 'E_NI', keep = True ) ) ei1 = Group( element + delimitedList( inumber ) ).setParseAction( \ replace( 'E_EI1', keep = True ) ) etuple = lpar.suppress() + inumber + comma.suppress() \ + inumber + rpar.suppress() ei2 = Group( element + delimitedList( etuple ) ).setParseAction( \ replace( 'E_EI2', keep = True ) ) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei1 | ei2 | ebf | eog | nog) atom1.setParseAction( to_stack( stack ) ) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction( to_stack( stack ) ) region_expression << atom + ZeroOrMore( aux ) region_expression = StringStart() + region_expression + StringEnd() # region.set_debug() # relation.set_debug() # region_expression.set_debug() return region_expression
def __str__(self): return " ".join(str(term) for term in self.terms) ARROW = Literal("=>") EQUAL = Literal("=") LPAREN = Literal("(") RPAREN = Literal(")") COMMA = Literal(",") COLON = Literal(";") MATCH = Literal("match") HEADER_WORDS = Word(alphanums + "_.") EXACT_MATCH = Group(MATCH.suppress() + LPAREN.suppress() + HEADER_WORDS + Literal("=") + HEADER_WORDS + RPAREN.suppress()) PREFIX_MATCH = Group(MATCH.suppress() + LPAREN.suppress() + HEADER_WORDS + Literal(",") + HEADER_WORDS + RPAREN.suppress()) TRAFFIC_CORE = (Literal("true") | EXACT_MATCH | PREFIX_MATCH) # TODO: encode conjunction as (ts&ts) or ts&ts? TRAFFIC = Group(TRAFFIC_CORE + Literal("&") + TRAFFIC_CORE | Literal("not") + TRAFFIC_CORE | TRAFFIC_CORE) PATH = Group(Word(alphanums + "()_.*- ")) grammar = (TRAFFIC + COLON.suppress() + PATH + ARROW.suppress() + PATH)
Word(srange("[A-Z]") + srange("[a-z]")) + restOfLine) + gEOL_keep #gText = OneOrMore(gShortText) #gIntroduction = Optional( #Group(Word(srange("[A-Z]") + srange("[a-z]")) + Regex(".*:$")) #.setResultsName("introduction")) gOptionDescriptionText = \ Optional(gRepetition + Literal(':')) + \ OneOrMore( Optional(Regex("\n")) + gSpaces + Optional(Literal('(')) + Word(alphas, alphanums + '_') + restOfLine)\ .setResultsName("description") gOptionDescriptionOption = gSpaces + \ gRawOption.setResultsName("first_option") + Optional(gParameter) + \ ZeroOrMore( Group( Literal(",").suppress() + gSpace.suppress() + gRawOption.setResultsName("option") + Optional(gParameter)))\ .setResultsName("other_options") gOptionDescriptionSwitch = Or([ gOptionDescriptionOption, gInnerParameter, gStdin]) gOptionDescription = (gOptionDescriptionSwitch + gOptionDescriptionText)\ .setResultsName("option_description") #gBidule = gIntroduction + gOptionDescription #gSubNext = Or([gBidule, gShortText.setResultsName("short_text")]) #gNext = Dict(ZeroOrMore(Group(gSubNext))).setResultsName("next") gRest = Regex("(.*\n?)*").setResultsName("rest") gHelp = Optional(gEmptyLine) + gUsage + gRest
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_.").setName("identifier") double_quoted_string = QuotedString( '"', multiline=True, unquoteResults=False) # dblQuotedString alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' ')) def parse_html(s, loc, toks): return '<%s>' % ''.join(toks[0]) opener = '<' closer = '>' html_text = nestedExpr( opener, closer, (CharsNotIn(opener + closer))).setParseAction(parse_html).leaveWhitespace() ID = ( identifier | html_text | double_quoted_string | #.setParseAction(strip_quotes) | alphastring_).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress())).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group(subgraph | graph_stmt | node_id).setName('edge_point') node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore((Optional(strict_) + Group( (graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
_sglQuote).streamline().setName("string enclosed in single quotes") quotedArg = (dblQuotedString | sglQuotedString) quotedArg.setParseAction(removeQuotes) quotedArg.setName("quotedArg") plainArgChars = printables.replace('#', '').replace('"', '').replace("'", "") plainArg = Word(plainArgChars) plainArg.setName("plainArg") arguments = Group(ZeroOrMore(quotedArg | plainArg)) arguments = arguments.setResultsName('arguments') arguments.setName("arguments") # comment line. comment = Literal('#') + restOfLine comment = comment.suppress() comment.setName('comment') full_command = (comment | (command + arguments + Optional(comment))) full_command.setName('full_command') ### command_list = [] # filled in by namespaces.init_global_dict(). # command/argument handling. def process_args(args, globals_dict, locals_dict): """ Take a list of string arguments parsed via pyparsing and evaluate
none = Literal("None") true = Literal("True") false = Literal("False") atom = Forward() infix = infixNotation(atom, [ ('not', 1, opAssoc.RIGHT, _make_unary), (oneOf('* /'), 2, opAssoc.LEFT, _make_binary), (oneOf('+ -'), 2, opAssoc.LEFT, _make_binary), (oneOf('> gt >= ge < lt <= le != ne == eq'), 2, opAssoc.LEFT, _make_binary), ('and', 2, opAssoc.LEFT, _make_binary), ('or', 2, opAssoc.LEFT, _make_binary), ('in', 2, opAssoc.LEFT, _make_binary), ]) dellist = delimitedList(Optional(atom)) listing = lbr.suppress() + dellist + rbr.suppress() function = identifier.setResultsName('name') + lpar.suppress() + Group( Optional(delimitedList(atom))).setResultsName("args") + rpar.suppress() atom << (listing | number | string | variable | true | false | none | function) _false = Const(False) _true = Const(True) number.setParseAction(lambda t: Const(_number(t[0]))) variable.setParseAction(lambda t: Variable(t[0].strip("$"))) string.setParseAction(lambda t: Const(_str(t[0]))) none.setParseAction(lambda t: _false) false.setParseAction(lambda t: _false) true.setParseAction(lambda t: _true) dellist.setParseAction(lambda s, l, t: List(t[:])) function.setParseAction(_make_func)
BASE_STRINGS = alphanums + "-" + "_" NETWORK_STRINGS = alphanums + "-" + "_" + '.' # 適時調整 BASE_WORDS = Word(BASE_STRINGS) QUOTED_WORDS = quotedString.addParseAction(removeQuotes) END_OF_WORDS = WordEnd(BASE_STRINGS) LineSeparator = Literal(';').suppress().setResultsName('separator_token') Comments = Optional(cppStyleComment.setResultsName('comment')) opener, closer = Literal('{'), Literal('}') # ex: {1.1.1.1; 2.2.2.2; ...} WORD_LIST = ( opener.suppress() + delimitedList(Word(NETWORK_STRINGS), delim=';') + LineSeparator + closer.suppress() ).setParseAction(valuelists_detection) QUOTED_WORD_LIST = ( opener.suppress() + delimitedList(QUOTED_WORDS, delim=';') + LineSeparator + closer.suppress() ).setParseAction(quoted_valuelists_detection) NameDefinitions = BASE_WORDS.setResultsName('node_type')
# (originally I had pyparsing pulling out the $(Macro) references from inside names # as well, but the framework doesn't work especially well without whitespace delimiters between # tokens so we just do simple find/replace in a second pass pv_name = Word(alphanums + ":._$()") pv_value = (float_number | Word(alphanums)) pv_assignment = pv_name + pv_value comment = Literal("#") + Regex(r".*") macro = Group(Word(alphas) + Literal("=").suppress() + pv_name) macros = Optional(macro + ZeroOrMore(Word(";,").suppress() + macro)) #file_include = Literal("file") + pv_name + macros file_include = Literal("file") + \ (file_name | ignored_quote + file_name + ignored_quote) \ + Optional(ignored_comma) + macros def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress() req_line = line(file_include | comment.suppress() | pv_name) req_file = OneOrMore(req_line) + StringEnd().suppress() sav_line = line(comment.suppress() | Literal("<END>").suppress() | pv_assignment) sav_file = OneOrMore(sav_line) + StringEnd().suppress()
class ObfuscateBNF(object): __metaclass__ = abc.ABCMeta def __init__(self, get_obfuscated): """BNF grammar for source statements. Parameters ---------- get_obfuscated : function Function to return the obfuscated name for an identifier. """ self.get_obfuscated = get_obfuscated self.directive = oneOf("#:") self.comment = ~self.directive + pythonStyleComment self.separator = Word("~!@$%^&*()+`-={}|[]:;<>?,/.", max=2) self.string = \ QuotedString(quoteChar='"', escChar='\\', multiline=False, unquoteResults=False) |\ QuotedString(quoteChar="'", escChar='\\', multiline=False, unquoteResults=False) self.doc_string = \ QuotedString(quoteChar='"""', escChar='\\', multiline=True, unquoteResults=False) |\ QuotedString(quoteChar="'''", escChar='\\', multiline=True, unquoteResults=False) self.string_or_doc = self.doc_string | self.string self.triple_quote = Literal("'''") | Literal('"""') self.e = Literal('E') | Literal('e') self.point = Literal('.') self.plusorminus = Literal('+') | Literal('-') self.number = Word(nums) self.integer = Combine(Optional(self.plusorminus) + self.number) self.fnumber = Combine( self.integer + Optional(self.point + Optional(self.number)) + Optional(self.e + self.integer)) self.tab = Literal(' ') self.ident = Word(alphas+'_', alphanums+'_') self.conseq_idents_numbs = OneOrMore(self.ident | self.fnumber) self.attrib = self.ident + OneOrMore('.'+self.ident) self.statement = ( ZeroOrMore( (self.directive | self.tab | self.conseq_idents_numbs | self.separator | self.string_or_doc | self.triple_quote) ) + Optional(self.comment).suppress() ) self.attribs = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.attrib | self.ident.suppress() | self.separator.suppress() | self.fnumber.suppress() | self.string_or_doc.suppress() | self.triple_quote.suppress()) ) + Optional(self.comment).suppress() ) self.conseq_idents = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.ident | self.separator.suppress() | self.fnumber.suppress() | self.string.suppress()) ) + Optional(self.comment).suppress() ) self.conseq_idents_no_obfuscate = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.ident | self.separator.suppress() | self.fnumber.suppress() | self.string_or_doc.suppress() | self.triple_quote.suppress()) ) + Optional(self.comment).suppress() ) self.attribs.setParseAction(self.add_attribs_reserveds) self.conseq_idents.setParseAction(self.add_conseq_idents) self.conseq_idents_no_obfuscate.setParseAction( self.add_conseq_idents_no_obfuscate) self.conseq_idents_numbs.setParseAction( self.transform_conseq_ident_numbs) self.directive.setParseAction(self.transform_directive) ############### # Parse actions ############### def add_conseq_idents(self, conseq_idents_list): """Add names to obfuscate to identifiers table. Parameters ---------- conseq_idents_list : list """ if 'import' not in conseq_idents_list[:] and \ 'except' not in conseq_idents_list[:]: add_identifiers(set(conseq_idents_list)) def add_conseq_idents_no_obfuscate( self, conseq_idents_no_obfuscate_list): """Add names that are not obfuscated to identifiers table. Parameters ---------- conseq_idents_no_obfuscate_list : list """ # If an except error was not added to reserved list, don't obfuscate it if 'import' not in conseq_idents_no_obfuscate_list[:] and \ 'except' not in conseq_idents_no_obfuscate_list[:]: add_identifiers(set(conseq_idents_no_obfuscate_list), do_obfuscate=False) def add_attribs_reserveds(self, attribs_list): """Add attributes of reserved names to reserved list. Take a list of attributes strings from a source statement, break it into lists of objects with their attributes, and add attributes that follow a reserved name to the reserved list. Example ------ If r is reserved, then a.r.c + d.r.e would add c and e to reserveds. Parameters ---------- attribs_list : list """ if attribs_list: # Create an ordered list of attribute parents # Ex. a.b.c => [a, b] _attrib_list = [attribs_list[0]] is_last_token_an_attrib = True for token in attribs_list[1:]: if is_last_token_an_attrib and token != '.': # End of attrib list reached. Process list. add_attribs_reserveds_list(_attrib_list) # Start new attrib list _attrib_list = [token] is_last_token_an_attrib = True elif is_last_token_an_attrib and token == '.': is_last_token_an_attrib = False elif not is_last_token_an_attrib and token == '.': continue # Multiple dots, continue attrib list elif not is_last_token_an_attrib and token != '.': _attrib_list.append(token) is_last_token_an_attrib = True else: # Process last list if _attrib_list: add_attribs_reserveds_list(_attrib_list) def transform_conseq_ident_numbs(self, conseq_ident_list): """Allow for non-name tokens in a statement. Names start with an alpha or underscore. Obfuscate these name tokens and simply copy unchanged other tokens. Parameters ---------- conseq_ident_list : list Returns ------- statement : str """ return ' '.join([ self.get_obfuscated(ident) if (ident[0].isalpha() or ident[0] == '_') else ident for ident in conseq_ident_list ]) def transform_directive(self, directive_list): """Create a directive statement.""" return ''.join([directive_list[0], ' '])
def _create_field_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include | python_comment include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] [producer] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [decorators] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [decorators] "," python_allowed_exp decorators ::= (peristent_identfier | legacy | do_not_cache | parse_first | depends_on | only_if | only_if_master_value)* peristent_identfier ::= @persitent_identifier( level ) legacy ::= "@legacy(" correspondences+ ")" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" only_if_master_value ::= "@only_if_master_value(" python_condition+ ")" inherit_from ::= "@inherit_from()" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring producer ::= "producer:" INDENT producer_body UNDENT producer_body ::= producer_code "," python_dictionary producer_code ::= ident """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = (Word(alphas + "_", alphanums + "_") + Optional(oneOf("[0] [n]")))\ .setResultsName("json_id", listAllMatches=True)\ .setParseAction(lambda tokens: "".join(tokens)) aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]"))) .setParseAction(lambda tokens: "".join(tokens)))\ .setResultsName("aliases") ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr('{', '}')) list_def = originalTextFor(nestedExpr('[', ']')) dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']')) function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')')) python_allowed_expr = (dict_def ^ list_def ^ dict_access ^ \ list_access ^ function_call ^ restOfLine)\ .setResultsName("value", listAllMatches=True) persistent_identifier = (Suppress("@persistent_identifier") + \ nestedExpr("(", ")"))\ .setResultsName("persistent_identifier") legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if") only_if_master_value = (Suppress("@only_if_value") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if_master_value") depends_on = (Suppress("@depends_on") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("depends_on") parse_first = (Suppress("@parse_first") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("parse_first") memoize = (Suppress("@memoize") + nestedExpr("(", ")"))\ .setResultsName("memoize") field_decorator = parse_first ^ depends_on ^ only_if ^ \ only_if_master_value ^ memoize ^ legacy #Independent decorators inherit_from = (Suppress("@inherit_from") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("inherit_from") override = (Suppress("@") + "override")\ .setResultsName("override") extend = (Suppress("@") + "extend")\ .setResultsName("extend") master_format = (Suppress("@master_format") + \ originalTextFor(nestedExpr("(", ")")))\ .setResultsName("master_format") \ .setParseAction(lambda toks: toks[0]) derived_calculated_body = (ZeroOrMore(field_decorator) + python_allowed_expr)\ .setResultsName('derived_calculated_def') derived = "derived" + Suppress(":") + \ INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress(":") + \ INDENT + derived_calculated_body + UNDENT source_tag = quotedString\ .setParseAction(removeQuotes)\ .setResultsName("source_tag", listAllMatches=True) source_format = Word(alphas, alphanums + "_")\ .setResultsName("source_format", listAllMatches=True) creator_body = (ZeroOrMore(field_decorator) + source_format + \ Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\ .setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + \ INDENT + OneOrMore(creator_body) + UNDENT field_def = (creator | derived | calculated)\ .setResultsName("type_field", listAllMatches=True) #JsonExtra json_dumps = (Suppress('dumps') + Suppress(',') + python_allowed_expr)\ .setResultsName("dumps")\ .setParseAction(lambda toks: toks.value[0]) json_loads = (Suppress("loads") + Suppress(",") + python_allowed_expr)\ .setResultsName("loads")\ .setParseAction(lambda toks: toks.value[0]) json_extra = (Suppress('json:') + \ INDENT + Each((json_dumps, json_loads)) + UNDENT)\ .setResultsName('json_ext') #Checker checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\ .setResultsName("checker", listAllMatches=True) checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT) #Description/Documentation doc_double = QuotedString(quoteChar='"""', multiline=True) doc_single = QuotedString(quoteChar="'''", multiline=True) doc_string = INDENT + (doc_double | doc_single) + UNDENT description_body = (Suppress('description:') + doc_string).\ setParseAction(lambda toks: toks[0][0]) description = (description_body | doc_double | doc_single)\ .setResultsName('description') #Producer producer_code = (Word(alphas, alphanums + "_")\ + originalTextFor(nestedExpr("(", ")")))\ .setResultsName('producer_code', listAllMatches=True) producer_body = (producer_code + Suppress(",") + python_allowed_expr)\ .setResultsName("producer_rule", listAllMatches=True) producer = Suppress("producer:") + INDENT + OneOrMore(producer_body) + UNDENT schema = (Suppress('schema:') + INDENT + dict_def + UNDENT)\ .setParseAction(lambda toks: toks[0])\ .setResultsName('schema') body = Optional(field_def) & Optional(checker) & Optional(json_extra) \ & Optional(description) & Optional(producer) & Optional(schema) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString)\ .setResultsName("includes", listAllMatches=True) rule = (Optional(persistent_identifier) + Optional(inherit_from) + \ Optional(override) + Optional(extend) +json_id + \ Optional(Suppress(",") + aliases) + Suppress(":") + \ INDENT + body + UNDENT)\ .setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())
def __init__(self, expr_str: str, args: dict, backend: tp.Any, **kwargs) -> None: """Instantiates expression parser. """ # call super init ################# super().__init__() # bind attributes to instance ############################# # input arguments self.vars = args.copy() self.var_map = {} self.backend = backend self.parser_kwargs = kwargs self.lhs, self.rhs, self._diff_eq, self._assign_type, self.lhs_key = self._preprocess_expr_str( expr_str) # add functions from args dictionary to backend, if passed for key, val in args.items(): if callable(val): self.backend.ops[key] = val # additional attributes self.expr_str = expr_str self.expr = None self.expr_stack = [] self.expr_list = [] self.op = None self._finished_rhs = False self._instantaneous = kwargs.pop('instantaneous', False) # define algebra ################ if not self.expr: # general symbols point = Literal(".") comma = Literal(",") colon = Literal(":") e = CaselessLiteral("E") pi = CaselessLiteral("PI") # parentheses par_l = Literal("(") par_r = Literal(")").setParseAction(self._push_first) idx_l = Literal("[") idx_r = Literal("]") # basic mathematical operations plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") mod = Literal("%") dot = Literal("@") exp_1 = Literal("^") exp_2 = Combine(mult + mult) transp = Combine(point + Literal("T")) inv = Combine(point + Literal("I")) # numeric types num_float = Combine( Word("-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("-" + nums, nums))) num_int = Word("-" + nums, nums) # variables and functions name = Word(alphas, alphas + nums + "_$") func_name = Combine(name + par_l, adjacent=True) # math operation groups op_add = plus | minus op_mult = mult | div | dot | mod op_exp = exp_1 | exp_2 | inv | transp # logical operations greater = Literal(">") less = Literal("<") equal = Combine(Literal("=") + Literal("=")) unequal = Combine(Literal("!") + Literal("=")) greater_equal = Combine(Literal(">") + Literal("=")) less_equal = Combine(Literal("<") + Literal("=")) # logical operations group op_logical = greater_equal | less_equal | unequal | equal | less | greater # pre-allocations self.expr = Forward() exponential = Forward() index_multiples = Forward() # basic organization units index_start = idx_l.setParseAction(self._push_first) index_end = idx_r.setParseAction(self._push_first) index_comb = colon.setParseAction(self._push_first) arg_comb = comma.setParseAction(self._push_first) arg_tuple = par_l + ZeroOrMore(self.expr.suppress() + Optional(arg_comb)) + par_r func_arg = arg_tuple | self.expr.suppress() # basic computation unit atom = (func_name + Optional(func_arg.suppress()) + ZeroOrMore(arg_comb.suppress() + func_arg.suppress()) + par_r.suppress() | name | pi | e | num_float | num_int).setParseAction(self._push_neg_or_first) | \ (par_l.setParseAction(self._push_last) + self.expr.suppress() + par_r).setParseAction(self._push_neg) # apply indexing to atoms indexed = (Optional(minus) + atom).setParseAction(self._push_neg) + \ ZeroOrMore((index_start + index_multiples + index_end)) index_base = (self.expr.suppress() | index_comb) index_full = index_base + ZeroOrMore( (index_comb + index_base)) + ZeroOrMore(index_comb) index_multiples << index_full + ZeroOrMore((arg_comb + index_full)) # hierarchical relationships between mathematical and logical operations boolean = indexed + Optional( (op_logical + indexed).setParseAction(self._push_first)) exponential << boolean + ZeroOrMore( (op_exp + Optional(exponential)).setParseAction( self._push_first)) factor = exponential + ZeroOrMore( (op_mult + exponential).setParseAction(self._push_first)) expr = factor + ZeroOrMore( (op_add + factor).setParseAction(self._push_first)) self.expr << expr #(Optional(minus) + expr).setParseAction(self._push_neg)
def build_MEASURE(self): ## Grammar definition # literals self.var_list = dict() period = Literal(".") variable = Word(alphas, alphanums + "." + "_" + "-").setName("variable").setDebug(self.dbgLiterals) number = Word(nums+".").setName("number").setDebug(self.dbgLiterals) integer = Word(nums).setName("integer").setDebug(self.dbgLiterals) float = Combine(integer + "." + integer).setName("float").setDebug(self.dbgLiterals) ipAddress = Combine(integer + ('.' + integer)*3).setName("ipAddress").setDebug(self.dbgLiterals) quote = (Literal("\"").suppress()|Literal("'").suppress()).setName("quote").setDebug(self.dbgLiterals) string = (quote + Regex(r'(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*') + quote).setName("string").setDebug(self.dbgLiterals) # special characters oparen = Literal("(").suppress().setName("opening parenthesis").setDebug(self.dbgLiterals) eparen = Literal(")").suppress().setName("closing parenthesis").setDebug(self.dbgLiterals) semicolon = Literal(";").suppress().setName("semicolon").setDebug(self.dbgLiterals) comma = Literal(",").suppress().setName("comma").setDebug(self.dbgLiterals) obrace = Literal("{").suppress().setName("opening brace").setDebug(self.dbgLiterals) ebrace = Literal("}").suppress().setName("closing brace").setDebug(self.dbgLiterals) to = Literal("->").setName("right-arrow").setDebug(self.dbgLiterals) # section literals measurements = Literal("measurements").suppress().setDebug(self.dbgLiterals) zoneTok = Literal("zones").suppress().setDebug(self.dbgLiterals) actionTok = Literal("actions").suppress().setDebug(self.dbgLiterals) # arithmetic literals eq = Literal("=").setName("equal sign").setDebug(self.dbgLiterals) geq = Literal(">=").setName("greater or equal sign").setDebug(self.dbgLiterals) leq = Literal("<=").setName("less or equal sign").setDebug(self.dbgLiterals) gt = Literal(">").setName("greater than sign").setDebug(self.dbgLiterals) lt = Literal("<").setName("less than sign").setDebug(self.dbgLiterals) minus = Literal("-").setName("minus sign").setDebug(self.dbgLiterals) plus = Literal("+").setName("plus sign").setDebug(self.dbgLiterals) _and = (Literal("&&")|Literal("and")).setName("and sign").setDebug(self.dbgLiterals) _or = (Literal("||")|Literal("or")).setName("or sign").setDebug(self.dbgLiterals) _not = (Literal("!")|Literal("not")).setName("not sign").setDebug(self.dbgLiterals) # Productions for measurement definitions # paramExpr = Group(Optional(((variable)("pname") + eq.suppress() + (number|variable|dblQuotedString)("pval")) + ZeroOrMore(comma + (number|variable|dblQuotedString)("p")))) namedParam = Group((variable)("pname") + eq.suppress() + (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")) + Optional(comma))("param").setDebug(self.dbgMeasurement) paramExpr = Group(ZeroOrMore(namedParam))("params").setDebug(self.dbgMeasurement) functionExpr = Group(variable("fname") + oparen + paramExpr + eparen )("function").setDebug(self.dbgMeasurement) measurementExpr = Group(variable("mvar") + eq.suppress() + (functionExpr) + semicolon)("measure").setDebug(self.dbgMeasurement) measurementList = OneOrMore(measurementExpr).setDebug(self.dbgMeasurement) measure = Group(measurements + obrace + measurementList + ebrace)("measurements").setDebug(self.dbgMeasurement) # Productions for zone definitions arithParamExpr = Group(Optional((number|variable|string)("param") + ZeroOrMore(comma + (number|variable|string)("param")))).setDebug(self.dbgZones) arithNamedParam = Group((variable)("pname") + eq.suppress() + (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")|dblQuotedString("pstr")) + Optional(comma))("param").setDebug(self.dbgZones) arithParamExpr = Group(ZeroOrMore(arithNamedParam))("params").setDebug(self.dbgZones) arithFuncExpr = Group(variable("fname") + oparen + arithParamExpr("params") + eparen + Optional(comma))("function").setDebug(self.dbgZones) arithNestFuncExpr = Group(OneOrMore(arithFuncExpr))("params").setDebug(self.dbgZones) arithFuncExpr2 = Group(variable("fname") + oparen + arithNestFuncExpr + eparen)("function").setDebug(self.dbgZones) arithTok = (arithFuncExpr|arithFuncExpr2|number("num")|variable("var")).setDebug(self.dbgZones) opExpr = (eq|geq|leq|gt|lt|minus|plus|_and|_or).setDebug(self.dbgZones) arithExpr = Forward().setDebug(self.dbgZones) arithExpr << Group(oparen + Group((arithTok|arithExpr))("l") + opExpr("op") + Group((arithTok|arithExpr))("r") + eparen)("expression").setDebug(self.dbgZones) zoneExpr = Group(variable("zname") + eq.suppress() + arithExpr + semicolon)("zone").setName("ZoneExpr").setDebug(self.dbgZones) zones = Group(zoneTok + obrace + OneOrMore(zoneExpr) + ebrace)("zones").setName("Zones").setDebug(self.dbgZones) # Productions for action definitions actNamedParam = Group((variable)("pname") + eq.suppress() + (ipAddress("pipaddr")|float("pfloat")|integer("pint")|variable("pvar")|dblQuotedString("pstr")) + Optional(comma))("param").setDebug(self.dbgActions) actParamExpr = Group(ZeroOrMore(actNamedParam))("params").setDebug(self.dbgActions) actFunExpr = Group(variable("fname") + oparen + actParamExpr + eparen + semicolon)("function").setDebug(self.dbgActions) # statevariable doesn't allow "-", because its confused with "->" statevariable = Word(alphas, alphanums + "." + "_").setName("statevariable") state = statevariable("state").setDebug(self.dbgActions) statetrans = Group(statevariable("from") + to.suppress() + statevariable("to"))("trans").setDebug(self.dbgActions) stateenter = Group(to.suppress() + statevariable("enter"))("edge").setDebug(self.dbgActions) stateleave = Group(statevariable("leave") + to.suppress())("edge").setDebug(self.dbgActions) fsm = (statetrans|stateleave | stateenter|state).setDebug(self.dbgActions) action = Group(fsm + eq.suppress() + Group(OneOrMore(actFunExpr))("functions"))("action").setDebug(self.dbgActions) actions = Group(actionTok + obrace + OneOrMore(action) + ebrace)("actions").setDebug(self.dbgActions) self.MEASURE = measure + zones + actions self.actionFunctions = [ {"fname":"Publish", "parameters": [ {"pname":"topic","type":"pstr"}, {"pname":"message","type":"pstr"}, ]}, {"fname":"Notify", "parameters": [ {"pname":"target","type":"pstr"}, {"pname":"message","type":"pstr"}, ]} ] self.zoneFunctions = [ {"fname":"AVG", "parameters": [ {"pname":"val","type":"pvar"}, {"pname":"max_age","type":"pstr"}, ]} ] self.measureFunctions = [ {"fname":"delay.twoway.icmp.us.mean", "parameters": [ {"pname":"source.ipv4","type":"pipaddr"}, {"pname":"destination.ipv4","type":"pipaddr"}, {"pname":"count","type":"pint"} ]}, {"fname":"overload.risk.rx", "parameters": [ {"pname":"interface","type":"pvar"} ]} ]
from pyparsing import Literal,Suppress,CharsNotIn,CaselessLiteral,\ Word,dblQuotedString,alphanums,SkipTo import urllib import pprint # Define the pyparsing grammar for a URL, that is: # URLlink ::= <a href= URL>linkText</a> # URL ::= doubleQuotedString | alphanumericWordPath # Note that whitespace may appear just about anywhere in the link. Note also # that it is not necessary to explicitly show this in the pyparsing grammar; by default, # pyparsing skips over whitespace between tokens. linkOpenTag = (Literal("<") + "a" + "href" + "=").suppress() + \ ( dblQuotedString | Word(alphanums+"/") ) + \ Suppress(">") linkCloseTag = Literal("<") + "/" + CaselessLiteral("a") + ">" link = linkOpenTag + SkipTo(linkCloseTag) + linkCloseTag.suppress() # Go get some HTML with some links in it. serverListPage = urllib.urlopen("http://www.yahoo.com") htmlText = serverListPage.read() serverListPage.close() # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). for toks, strt, end in link.scanString(htmlText): print toks.asList() # Rerun scanString, but this time create a dict of text:URL key-value pairs. # Need to reverse the tokens returned by link, using a parse action. link.setParseAction(lambda st, loc, toks: [toks[1], toks[0]])
@classmethod def __convert_ref(cls, rf): names = [ identifier_from_slug(token) for token in rf.attribute.split('.') ] config = {} # First, make sure that all parent attributes are configured INLINE. for idx in range(len(names[:-1])): key = tuple(names[:idx + 1]) config[key] = {IGNORE_OPTION: False, WRITE_AS_LINK_OPTION: False} is_off = rf.option == OFF_PAT opts = {IGNORE_OPTION: is_off} if not is_off: opts[WRITE_AS_LINK_OPTION] = rf.option == URL_PAT config[tuple(names)] = opts return config colon = Literal(':') attribute = Word(alphas, alphanums + '-') identifier = \ Combine(attribute + ZeroOrMore('.' + attribute)).setName('identifier') option = (URL | INLINE | OFF) ref = Group(identifier('attribute') + colon.suppress() + option('option')) refs = delimitedList(ref, delim=TILDE_PAT) refs.setParseAction(RefsConverter.convert) def parse_refs(refs_string): return refs.parseString(refs_string)[0]
def parse_morphology(filename, filename_toparse): global current_section_name current_section_name = '' converted_file = open(filename, 'w') put_string = 'from neuron import h\ndef shape_3D(self):\n' converted_file.write(put_string) ntabs = 1 # from here on, add a tab to all lines # define lists of characters for a..z and 1..9 uppercase = lowercase.upper() lowercaseplus = lowercase+('_') lowercaseplus = lowercaseplus+(uppercase) nonzero = ''.join([str(i) for i in range(1, 10)]) COMMA = Literal(',') EQUALS = Literal('=') MINUS = Literal('-') PERIOD = Literal('.') LCURL = Literal('{') RCURL = Literal('}') LBRACK = Literal('(') RBRACK = Literal(')') LSQUARE = Literal('[') RSQUARE = Literal(']') PTSCLEAR = Literal('{pt3dclear()').suppress() PTSCLEARNL = Literal('{\npt3dclear()\n').suppress() integer = Word(nums) single_section = Word(lowercaseplus, min = 2) single_section.setResultsName('SINGLE') integer_var = Word(lowercase, exact = 1) double = Group(Optional(MINUS) + integer + Optional(PERIOD + integer)) operand = integer ^ integer_var operator = Word('+-*/', exact=1) unaryoperation = operand binaryoperation = operand + operator + operand operation = unaryoperation ^ binaryoperation array_section = Group(single_section + LSQUARE.suppress() + operation + RSQUARE.suppress()) array_section.setResultsName('ARRAY') section = single_section ^ array_section section_location = Group(section + LBRACK.suppress() + double + RBRACK.suppress()) create = Keyword('create').suppress() + section + ZeroOrMore(COMMA.suppress() + section) create.setParseAction(print_create(converted_file, ntabs)) connect = Keyword('connect').suppress() + section_location + COMMA.suppress() + section_location connect.setParseAction(print_connect(converted_file, ntabs)) for_loop = Keyword('for').suppress() + integer_var + EQUALS.suppress() + integer + COMMA.suppress() + integer # NOTE TO FUTURE SELF: for loops can only have one line of code in this implementation for_loop.setParseAction(print_for_loop(converted_file, ntabs)) point_add = Literal('pt3dadd(').suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + RBRACK.suppress() point_add.setParseAction(print_point_add(converted_file, ntabs)) point_style = Literal('pt3dstyle(').suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + COMMA.suppress() + double + RBRACK.suppress() point_style.setParseAction(print_point_style(converted_file, ntabs)) geom_define_pre = section + (PTSCLEAR ^ PTSCLEARNL) geom_define_body = OneOrMore(point_add ^ point_style) + RCURL.suppress() geom_define_pre.setParseAction(update_current_section(converted_file, ntabs)) geom_define = geom_define_pre + geom_define_body expression = (connect ^ for_loop ^ geom_define ^ create) codeblock = OneOrMore(expression) test_str = 'Ia_node[0] {\npt3dclear()\n pt3dadd( 47, 76, 92.5, 3.6) }' #file_to_parse = open('../../tempdata/Ia_geometry') file_to_parse = open(filename_toparse) tokens = codeblock.parseString(file_to_parse.read())
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_." ).setName("identifier") double_quoted_string = QuotedString('"', multiline=True, unquoteResults=False) # dblQuotedString _noncomma = "".join([c for c in printables if c != ","]) alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' ')) def parse_html(s, loc, toks): return '<%s>' % ''.join(toks[0]) opener = '<' closer = '>' html_text = nestedExpr( opener, closer, ( CharsNotIn( opener + closer ) ) ).setParseAction(parse_html).leaveWhitespace() ID = ( identifier | html_text | double_quoted_string | #.setParseAction(strip_quotes) | alphastring_ ).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID ).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress()) ).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group(subgraph | graph_stmt | node_id ).setName('edge_point') node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore( (Optional(strict_) + Group((graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph") ) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
from pyparsing import Literal,Suppress,CharsNotIn,CaselessLiteral,\ Word,dblQuotedString,alphanums,SkipTo import urllib import pprint # Define the pyparsing grammar for a URL, that is: # URLlink ::= <a href= URL>linkText</a> # URL ::= doubleQuotedString | alphanumericWordPath # Note that whitespace may appear just about anywhere in the link. Note also # that it is not necessary to explicitly show this in the pyparsing grammar; by default, # pyparsing skips over whitespace between tokens. linkOpenTag = (Literal("<") + "a" + "href" + "=").suppress() + \ ( dblQuotedString | Word(alphanums+"/") ) + \ Suppress(">") linkCloseTag = Literal("<") + "/" + CaselessLiteral("a") + ">" link = linkOpenTag + SkipTo(linkCloseTag) + linkCloseTag.suppress() # Go get some HTML with some links in it. serverListPage = urllib.urlopen( "http://www.yahoo.com" ) htmlText = serverListPage.read() serverListPage.close() # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). for toks,strt,end in link.scanString(htmlText): print toks.asList() # Rerun scanString, but this time create a dict of text:URL key-value pairs. # Need to reverse the tokens returned by link, using a parse action. link.setParseAction( lambda st,loc,toks: [ toks[1], toks[0] ] )