def parse_mapping(data): ''' Takes a list of strings and returns the (character,configuration) mapping in it The 'data' passed is actually the metadata at ths tartting of the file, which specifies characters which have to be replaced with configurations. For example, 'u sp 5d' indicates that u is to be replaced with 'sp 5d' The function uses the fact that different mappings are separated by a gap of at least two spaces. So, data is parsed to separate out individual mappings. Then, the part after the character is joined to create a mapping. ''' mapping = {} # Defined two grammars, as Optional() wasn't working as expected: map_parse1 = CharsNotIn(' ') + White() + Word(alphanums) + White(exact=1) + Word(alphanums) + White(min=2) + restOfLine map_parse2 = CharsNotIn(' ') + White() + Word(alphanums) + White(min=2) + restOfLine for row in data: row_copy = row while len(row_copy) > 0: # If it's a two-level confiugration: try: temp = map_parse1.parseString(row_copy) mapping[temp[0]] = ' '.join([temp[2],temp[4]]) row_copy = temp[-1] except: # If it's a single-level configuration: try: temp = map_parse2.parseString(row_copy) mapping[temp[0]] = temp[2] row_copy = temp[-1] except: # Parsing error, so mappings must be complete return mapping
def param_substitude(lines, param_dict): mask_par = (CharsNotIn('{{}}')[0, ] + '{{' + WRD_p + '}}' + CharsNotIn('{{}}')[0, ])[1, ] + Char('\n')[0, 1] # mask_par.setParseAction(param_set) mask_par.setParseAction(lambda tokens: param_set(tokens, param_dict)) for line_to, line in enumerate(lines): line = line.rstrip() if not line: continue lines[line_to] = mask_par.transformString(line) return lines
def init_latex_parser(self): # these variables will be used to define valid lists of characters lowers = 'qwertyuiopasdfghjklzxcvbnm' uppers = lowers.upper() alphas = lowers + uppers digits = '1234567890' other_word_symbols = '-_' word_chars = alphas + other_word_symbols + digits punctuation_symbols = '.!?:;…' command_symbol = '\\' white_characters = CharsNotIn(word_chars + punctuation_symbols + command_symbol) # define grammar word = Word(word_chars) command = command_symbol + OneOrMore( alphas ) # as far as i can tell, only alphas are allowed in a latex command (bar the special commands such as \&). punc = Word(punctuation_symbols) white_and_word = Group(Suppress(Optional(white_characters)) + word) white_and_command = Group( Suppress(Optional(white_characters)) + command) white_and_punc = Group(Suppress(Optional(white_characters)) + punc) sentence = ZeroOrMore(white_and_word) + white_and_punc pure_piece = ZeroOrMore(sentence) bracketed_piece = '{' + pure_piece + Suppress( ZeroOrMore(white_characters)) + '}' self.parser = content.parseString
class NLPyParser(NLBaseParser): """pyparsing--based implementation of the NLBaseParser """ notSpace = CharsNotIn(" \n") eq = Literal('=').suppress() value = (QuotedString('"', escChar=chr(92), unquoteResults=False) \ ^ OneOrMore(notSpace)) ts = Group(Literal('ts') + eq + value) event = Group(Literal('event') + eq + value) name = ~oneOf("ts event") + Word(alphanums + '-_.') nv = ZeroOrMore(Group(name + eq + value)) nvp = Each([ts, event, nv]) + White('\n').suppress() + StringEnd() def parseLine(self, line): try: rlist = self.nvp.parseString(line).asList() except ParseException as E: raise ValueError(E) result = {} for a in rlist: if self.parse_date and a[0] == 'ts': result[a[0]] = parse_ts(a[1]) else: result[a[0]] = a[1] return result
def PIs(): """Parses Processing Instructions PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' >>> tests=( ... '''<?foo?>''', ... '''<?foo ?>''', ... '''<?foo abc def ghi ?>''', ... '''<?xml-stylesheet foo="bla" bar="x" ?>''', ... '''<?xml-stylesheet ... foo="bla" ... foo="x" ... ?>''', ... ) >>> p=PIs() >>> for t in tests: ... result=p.parseString(t) ... print result ['foo'] ['foo', ' '] ['foo', ' abc def ghi '] ['xml-stylesheet', ' foo="bla" bar="x" '] ['xml-stylesheet', '\\n foo="bla"\\n foo="x"\\n '] """ pi=Suppress('<?') + \ Word(alphas, alphanums+'-_')('pitarget') + \ ZeroOrMore(CharsNotIn('?>'))('picontents') + \ Suppress('?>') return pi
def _parser_piece_text(): """ Return PyParsing element to the text of a markdown link. """ # No double line breaks in markdown links double_line_break = (Word("\n\r", exact=1) + Optional(Word(" \t")) + Word("\n\r", exact=1)) # We will ignore escaped square brackets when match finding balanced # square brackets. ignore = Literal("\\[") | Literal("\\]") # The text parser will match text inside balanced brackets using the # nestedExpr helper function from PyParsing. # # Next we define the content that is allowed inside the brackets. content_character = ~FollowedBy(double_line_break) + CharsNotIn( "[]", exact=1) # Normally with nestedExpr, the content parser would be separately applied # to each whitespace-separated string within the nested expression. # However, since we set whitespaceChars to '', the content parser is # applied to characters one-at-a-time. # # If this ever changes, we would need to change content to something # like Combine(OneOrMore(~ignore + content_character)) content = content_character text = originalTextFor( nestedExpr( opener="[", closer="]", content=content, ignoreExpr=ignore, )).setResultsName("text") text.addParseAction(lambda s, l, toks: toks[0][1:-1]) return text
def pyparse_blk(text): def create_add_block(tokens): return Block.Block(tokens.title, tokens.color if tokens.color else 'white') left_bracket, right_bracket, equal_sign = map(Suppress, '[]=') color = (Word('#', hexnums, exact=7) | Word(alphanums, alphas))('color') empty_block = ( left_bracket + right_bracket)('empty_block').setParseAction(lambda: EmptyBlock) new_lines = Word('/')('new_lines').setParseAction( lambda tokens: len(tokens.new_lines)) title = CharsNotIn('[]/\n')('title').setParseAction( lambda tokens: tokens.title.strip()) block_data = Optional(color + Suppress(':')) + Optional(title) block_data.addParseAction(create_add_block) blocks = Forward() block = left_bracket + block_data + blocks + right_bracket blocks << Group( ZeroOrMore(Optional(new_lines) + OneOrMore(empty_block | block))) stack = [Block.create_root_block()] try: result = blocks.parseString(text, parseAll=True) assert len(result) == 1 blocks_list = result.asList()[0] populate_children(blocks_list, stack) except (ParseSyntaxException, ParseException) as parse_err: raise ValueError('Error {{0}}: {0}'.format(parse_err.lineno)) return stack[0]
def func_tokens(dictionary, parse_action): func_name = Word(alphas + '_', alphanums + '_') func_ident = Combine('$' + func_name.copy()('funcname')) func_tok = func_ident + originalTextFor(nestedExpr())('args') func_tok.leaveWhitespace() func_tok.setParseAction(parse_action) func_tok.enablePackrat() rx_tok = Combine(Literal('$').suppress() + Word(nums)('num')) def replace_token(tokens): index = int(tokens.num) return dictionary.get(index, '') rx_tok.setParseAction(replace_token) strip = lambda s, l, tok: tok[0].strip() text_tok = CharsNotIn(',').setParseAction(strip) quote_tok = QuotedString('"') if dictionary: arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok)) else: arglist = Optional(delimitedList(quote_tok | text_tok)) return func_tok, arglist, rx_tok
def pyparsing_parse(text): """ >>> import os >>> dirname = os.path.join(os.path.dirname(__file__), "data") >>> filename = os.path.join(dirname, "error1.blk") >>> pyparsing_parse(open(filename, encoding="utf8").read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 8 >>> filename = os.path.join(dirname, "error2.blk") >>> pyparsing_parse(open(filename, encoding="utf8").read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 1 >>> filename = os.path.join(dirname, "error3.blk") >>> pyparsing_parse(open(filename, encoding="utf8").read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 4 >>> expected = "[white: ]\\n[lightblue: Director]\\n/\\n/\\n[white: ]\\n[lightgreen: Secretary]\\n/\\n/\\n[white: Minion #1]\\n[white: ]\\n[white: Minion #2]" >>> filename = os.path.join(dirname, "hierarchy.blk") >>> blocks = pyparsing_parse(open(filename, encoding="utf8").read()) >>> str(blocks).strip() == expected True >>> expected = "[#00CCDE: MessageBox Window\\n[lightgray: Frame\\n[white: ]\\n[white: Message text]\\n/\\n/\\n[goldenrod: OK Button]\\n[white: ]\\n[#ff0505: Cancel Button]\\n/\\n[white: ]\\n]\\n]" >>> filename = os.path.join(dirname, "messagebox.blk") >>> blocks = pyparsing_parse(open(filename, encoding="utf8").read()) >>> str(blocks).strip() == expected True """ def add_block(tokens): return Block.Block(tokens.name, tokens.color if tokens.color else "white") left_bracket, right_bracket = map(Suppress, "[]") new_rows = Word("/")("new_rows").setParseAction( lambda tokens: len(tokens.new_rows)) name = CharsNotIn("[]/\n")("name").setParseAction( lambda tokens: tokens.name.strip()) color = (Word("#", hexnums, exact=7) | Word(alphas, alphanums))("color") empty_node = (left_bracket + right_bracket).setParseAction(lambda: EmptyBlock) nodes = Forward() node_data = Optional(color + Suppress(":")) + Optional(name) node_data.setParseAction(add_block) node = left_bracket - node_data + nodes + right_bracket nodes << Group( ZeroOrMore(Optional(new_rows) + OneOrMore(node | empty_node))) stack = [Block.get_root_block()] try: results = nodes.parseString(text, parseAll=True) assert len(results) == 1 items = results.asList()[0] populate_children(items, stack) except (ParseException, ParseSyntaxException) as err: raise ValueError("Error {{0}}: syntax error, line " "{0}".format(err.lineno)) return stack[0]
def fromString(inputText): text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) eq = Suppress(Word("=")) identifier = Word(alphas+"_",alphanums+"_") typeIdentifier = Word(alphas+"_",alphanums+"_:") structIdentifer = Group(typeIdentifier.setResultsName('type') + identifier.setResultsName('identifier') + Optional(eq) + Optional(CharsNotIn(";").setResultsName('defaultValue')) + semicolon) structIdentifers = Group(OneOrMore(structIdentifer)) ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) structDef = Word("struct").setResultsName('type') + identifier.setResultsName('name') + op + structIdentifers.setResultsName("structIdentifiers") + cl + semicolon dictionaryDef = Word("dictionary").setResultsName('type') + lt + CharsNotIn("<>").setResultsName('content') + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence").setResultsName('type') + lt + typeIdentifier.setResultsName('typeSequence') + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon exceptionDef = Word("exception").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier ) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef.setResultsName('decorator')) + retValDef.setResultsName('ret') + typeIdentifier.setResultsName('name') + opp + Optional( params).setResultsName('params') + clp + Optional(raiseDef.setResultsName('raise')) + semicolon ) interfaceDef = Word('interface').setResultsName('type') + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)).setResultsName('methods') + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore( cppStyleComment ) tree = IDSL.parseString(text) return tree
class Include(object): rule = (INCLUDE + LOPBRACK + CharsNotIn('>')("header") + ROPBRACK).setParseAction(lambda t: Include(t.header)) def __init__(self, header, parent=''): self.header = header self.parent = parent def __repr__(self): return "#include <{}>".format(self.header)
class NginxParser(object): """ A class that parses nginx configuration with pyparsing """ # constants left_bracket = Literal("{").suppress() right_bracket = Literal("}").suppress() semicolon = Literal(";").suppress() space = White().suppress() key = Word(alphanums + "_/") value = CharsNotIn("{};,") location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules assignment = (key + Optional(space + value) + semicolon) block = Forward() block << Group( Group(key + Optional(space + modifier) + Optional(space + location)) + left_bracket + Group(ZeroOrMore(Group(assignment) | block)) + right_bracket) script = OneOrMore(Group(assignment) | block).ignore(pythonStyleComment) def __init__(self, source): self.source = source def parse(self): """ Returns the parsed tree. """ return self.script.parseString(self.source) def as_list(self): """ Returns the list of tree. """ return self.parse().asList()
class RawNginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + nonspace) condition = Regex(r"\(.+\)") # Matches anything that is not a special character AND any chars in single # or double quotes value = Regex(r"((\".*\")?(\'.*\')?[^\{\};,]?)+") location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine() assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space map_statement = space + Literal( "map") + space + nonspace + space + dollar_var + space block = Forward() block << Group( # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(map_statement)).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | assignment) | block) + space).leaveWhitespace() + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block) + space + stringEnd script.parseWithTabs() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
def grammar(self, value): seps = list({value.list_sep, value.range_sep, value.step_sep}) quotedstr = pp.quotedString.setParseAction(pp.removeQuotes) self.tokens = OrderedDict({ 'regex': Literal('r').suppress() + quotedstr, 'quoted': quotedstr, 'colnum': Grammar.integer, 'sep': Or(seps), 'name': CharsNotIn(seps), })
def initGrammar(self): L_Equals = Word("=") N_comment = htmlComment() N_name = CharsNotIn("{}|[]") N_simpleText = SkipTo( oneOf(["{{", "|", "[[", "]]", "}}", "'''", "<ref"])) N_elements = Forward() N_apostrofs = QuotedString("'''").setParseAction( lambda s, l, t: {'APOSTROFS': t}) N_link = nestedExpr( opener="[[", closer="]]", content=N_name + Optional("|" + delimitedList(CharsNotIn("[]"), delim="|")) ).setParseAction(self.genLink) N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction( lambda s, l, t: {'HEADER': t}) N_template = Forward() N_key = CharsNotIn("{}|=") # N_value = ZeroOrMore(CharsNotIn("{}|")) + ZeroOrMore(N_template + ZeroOrMore(CharsNotIn("{}|"))).setResultsName('VALUE') N_keyValues = "|" + delimitedList( Group(Optional(N_key) + Optional("=" + N_elements)), delim="|") N_label_content = N_template | ("{{" + OneOrMore("!") + "}}") | CharsNotIn("{}|") N_label = nestedExpr(opener="{", closer="}", content=N_label_content) N_template << nestedExpr( opener="{{", closer="}}", content=N_name + Optional(N_keyValues)).setParseAction(self.genTemplate) ref_start, ref_end = makeHTMLTags("ref") N_named_ref = ref_start + SkipTo(ref_end) + ref_end N_named_ref.setParseAction(lambda s, l, t: {'REF': t}) N_element = N_comment | N_simpleText | N_named_ref | N_apostrofs | N_link | N_header | N_template | N_label # N_ref = nestedExpr( opener="<ref>", closer="</ref>", content=N_elements).setParseAction( lambda s,l,t: {'REF' : t} ) N_elements << ZeroOrMore(N_element) self.N_S = N_elements
def initGrammar(self): N_comment = htmlComment().setParseAction(self.genComment) N_name = CharsNotIn("{}|[]") N_link = nestedExpr( opener="[[", closer="]]", content=N_name + Optional("|" + delimitedList(CharsNotIn("[]"), delim="|")) ).setParseAction(self.genLink).setDebug(True) L_Equals = Word("=") N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction( self.genHeader) N_element = Forward() N_template = Forward().setDebug(True) N_key = CharsNotIn("{}|=") N_internalText = CharsNotIn("{}|=<[") + SkipTo( Literal("{{") | Literal("[[") | Literal("<!--") | Literal("<ref") | Literal("|") | Literal("}}")) #CharsNotIn("{}|[]<") N_insideElements = OneOrMore(N_element | N_internalText).setDebug(True) N_keyValue = Group( Optional(N_key) + Optional(Literal("=") + N_insideElements)).setDebug(True) N_keyValues = "|" + delimitedList(N_keyValue, delim="|") N_keyValues.setDebug(True) #N_label_content = N_template | ("{{"+OneOrMore("!")+"}}") | CharsNotIn("{}|") #N_label = nestedExpr( opener="{", closer="}", content = N_label_content) N_template << nestedExpr( opener="{{", closer="}}", content=N_name + Optional(N_keyValues)).setParseAction(self.genTemplate) #ref_start, ref_end = makeHTMLTags("ref") #N_named_ref = ref_start + SkipTo(ref_end) + ref_end #N_named_ref.setParseAction( lambda s,l,t: {'REF' : t} ) N_element = N_comment | N_link | N_header | N_template N_element.setDebug(True) self.N_S = N_element
def pattern(): """pyparsing pattern """ def attachLocation(s, loc, tocs): """pyparsing callback. Saves path position in the original string """ return [(loc, tocs[0])] path = CharsNotIn(" \t")("path") path.setParseAction(attachLocation) longPath = CharsNotIn(" \t", min=2)("path") longPath.setParseAction(attachLocation) slashPath = Combine(Literal('/') + Optional(CharsNotIn(" \t")))("path") slashPath.setParseAction(attachLocation) pat = ((Literal('f ') + Optional(White()) + Optional(path)) ^ longPath ^ slashPath) + \ Optional(White() + Word(nums)("line")) pat.leaveWhitespace() pat.setParseAction(CommandOpen.create) return pat
def get_parser(): from pyparsing import CharsNotIn, ParserElement, Suppress, ZeroOrMore ParserElement.enablePackrat() word = CharsNotIn(f"{PERIOD}{LBRACK}{RBRACK}") idx = Suppress(LBRACK) + word + Suppress(RBRACK) attr = Suppress(PERIOD) + word parser = word + ZeroOrMore(attr ^ idx) parser.setParseAction(PERIOD.join) return parser
def build_parser(): key = Word(alphanums).setResultsName('key') value = restOfLine.setParseAction(lambda string, location, tokens: tokens[ 0].strip()).setResultsName('value') property_ = Group(key + Suppress(Literal('=')) + value) properties = Group(OneOrMore(property_)).setResultsName('properties') section_name = (Suppress('[') + OneOrMore(CharsNotIn(']')) + Suppress(']')).setResultsName('section') section = Group(section_name + properties) ini_file = ZeroOrMore(section).setResultsName('sections') ini_file.ignore(pythonStyleComment) return ini_file
def _getControls(self, index=1): identifier = QuotedString('"') | CharsNotIn(',') arglist = delimitedList(identifier) docstr = self.doc[1:] if index: return [(arglist.parseString(line)[index]).strip() for line in docstr] else: ret = [] for line in docstr: ret.append([z.strip() for z in arglist.parseString(line)]) return ret
def query_from_string(cls, filter_string): """ TODO: * handle values with " via: a.b.c.d="hello\"world" * handle keys with " via: a.\"b.c="yeah" * handle key with __ in it """ filter_string_raw = filter_string filter_string = str(filter_string) unicode_spaces = list(set( str(c) for c in filter_string if c.isspace())) unicode_spaces_other = unicode_spaces + [u'(', u')', u'=', u'"'] atom = CharsNotIn(unicode_spaces_other) atom_inside_quotes = CharsNotIn(u'"') atom_quoted = Literal('"') + Optional(atom_inside_quotes) + Literal( '"') EQUAL = Literal('=') grammar = (atom_quoted | atom) + EQUAL + Optional((atom_quoted | atom)) grammar.setParseAction(cls.BoolOperand) boolExpr = infixNotation( grammar, [ ("and", 2, opAssoc.LEFT, cls.BoolAnd), ("or", 2, opAssoc.LEFT, cls.BoolOr), ], ) try: res = boolExpr.parseString('(' + filter_string + ')') except ParseException: raise RuntimeError(u"Invalid query %s" % filter_string_raw) if len(res) > 0: return res[0].result raise RuntimeError("Parsing the filter_string %s went terribly wrong" % filter_string)
def __init__(self): self.ALPHA_LABEL = Regex(r'alpha\[\d+\]:') self.LNL_LABEL = Literal('Final GAMMA-based Score of best tree') self.FRQ_LABEL = Regex(r'Base frequencies: (?=\d+)') ^ Regex( r'ML estimate base freqs\[\d+\]:') self.NAMES_LABEL = Regex(r'Partition: \d+ with name:\s+') self.RATES_LABEL = Regex(r'rates\[\d+\].+?:') self.MODEL_LABEL = Literal('Substitution Matrix:') self.alpha = OneOrMore( Suppress(SkipTo(self.ALPHA_LABEL)) + Suppress(self.ALPHA_LABEL) + FLOAT) self.lnl = Suppress(SkipTo(self.LNL_LABEL)) + Suppress( self.LNL_LABEL) + FLOAT self.frq = OneOrMore( Group( Suppress(SkipTo(self.FRQ_LABEL)) + Suppress(self.FRQ_LABEL) + OneOrMore(FLOAT))) self.names = OneOrMore( Suppress(SkipTo(self.NAMES_LABEL)) + Suppress(self.NAMES_LABEL) + CharsNotIn('\n') + Suppress(LineEnd())) self.rates = OneOrMore( Group( Suppress(SkipTo(self.RATES_LABEL)) + Suppress(self.RATES_LABEL) + OneOrMore(FLOAT))) self.model = Suppress(SkipTo(self.MODEL_LABEL)) + Suppress( self.MODEL_LABEL) + WORD MODEL_LABEL = Literal('Substitution Matrix:') SCORE_LABEL = Literal('Final GAMMA likelihood:') DESC_LABEL = Literal('Model Parameters of Partition') NAME_LEADIN = Literal(', Name:') DATATYPE_LEADIN = Literal(', Type of Data:') ALPHA_LEADIN = Literal('alpha:') TREELENGTH_LEADIN = Literal('Tree-Length:') RATES_LABEL = Regex(r'rate \w <-> \w:') FREQS_LABEL = Regex(r'freq pi\(\w\):') model = Suppress(SkipTo(MODEL_LABEL)) + Suppress(MODEL_LABEL) + WORD likelihood = Suppress( SkipTo(SCORE_LABEL)) + Suppress(SCORE_LABEL) + FLOAT description = Suppress( SkipTo(DESC_LABEL)) + Suppress(DESC_LABEL) + INT + Suppress( NAME_LEADIN) + SPACEDWORD + Suppress(DATATYPE_LEADIN) + WORD alpha = Suppress(ALPHA_LEADIN) + FLOAT rates = Suppress(RATES_LABEL) + FLOAT freqs = Suppress(FREQS_LABEL) + FLOAT self._dash_f_e_parser = (Group(OneOrMore(model)) + likelihood + Group( OneOrMore( Group(description + alpha + Suppress(TREELENGTH_LEADIN) + Suppress(FLOAT) + Group(OneOrMore(rates)) + Group(OneOrMore(freqs))))))
def grammar(): parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" field_def = OneOrMore(Word(alphanums + "_\"'`:-") | parenthesis) field_def.setParseAction(field_act) tablename_def = (Word(alphas + "`_") | QuotedString("\"")) field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def) field_list_def.setParseAction(field_list_act) create_table_def = Literal( "CREATE") + "TABLE" + tablename_def.setResultsName( "tableName") + "(" + field_list_def.setResultsName( "fields") + ")" + ";" create_table_def.setParseAction(create_table_act) add_fkey_def = Literal( "ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName( "tableName") + "ADD" + "CONSTRAINT" + Word( alphanums + "_") + "FOREIGN" + "KEY" + "(" + Word( alphanums + "_").setResultsName("keyName") + ")" + "REFERENCES" + Word( alphanums + "_").setResultsName("fkTable") + "(" + Word( alphanums + "_").setResultsName("fkCol") + ")" + Optional( Literal("DEFERRABLE")) + ";" add_fkey_def.setParseAction(add_fkey_act) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(other_statement_act) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(other_statement_act) return OneOrMore(comment_def | create_table_def | add_fkey_def | other_statement_def)
class Include: """ Rule to parse #include directives. """ rule = (INCLUDE + LOPBRACK + CharsNotIn('>')("header") + ROPBRACK).setParseAction(lambda t: Include(t.header)) def __init__(self, header: CharsNotIn, parent: str = ''): self.header = header self.parent = parent def __repr__(self) -> str: return "#include <{}>".format(self.header)
def getSignatures(signatureFile): fp = open(signatureFile, "rb") content = fp.read() fp.close() litteral = Word(alphas + nums + "_") regex_pattern = CharsNotIn("(") tags = (OneOrMore(Group('#' + litteral))).setResultsName("tags") hierarchy_modifier = oneOf("<= =") java_type = Group( Optional(hierarchy_modifier) + Word(alphas + nums + "_" + "." + "[" + "]" + "$")) return_type = (java_type | "*").setResultsName("return_type") method_name = (litteral | "<init>" | regex_pattern).setResultsName("method_name") parameter = Group((java_type + litteral)) parameter_list = (delimitedList(parameter) | "*").setResultsName("parameters") body_instruction = (CharsNotIn("{;}")) signature_body = (delimitedList(body_instruction, ";")).setResultsName("signature_body") class_name = (java_type | Group(Optional(hierarchy_modifier) + CharsNotIn(":"))).setResultsName("class_name") signature_stmt = Group((Optional(tags) + return_type + class_name+":"+method_name+ \ "(" + Optional(parameter_list) + ")"+"{"+Optional(signature_body)+"}")) grammar = OneOrMore(signature_stmt) grammar.ignore(dblSlashComment) result = grammar.parseString(content) #IPython.embed() #debug_db_data(content,result) signatures = [Signature(sig) for sig in result] return signatures
def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction)
def __init__(self, debug=False): # Parser definition for headers header = Group( Word(alphas) + Literal(":").suppress() + Optional(CharsNotIn("\r\n"))) headers = Dict(OneOrMore(header)).setResultsName("header") # Parser definition for OFX body aggregate = Forward().setResultsName("OFX") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(ZeroOrMore(aggregate | content)) \ + aggregate_close_tag) body = Group(aggregate).setResultsName("body") # The parser as a whole self.parser = headers + body if (debug): self.parser.setDebugActions(_ofxStartDebugAction, _ofxSuccessDebugAction, _ofxExceptionDebugAction)
def Syntax(): delimitedList = (lambda x: x + ZeroOrMore(Suppress(",") + x) + Optional(Suppress(","))) dbl_quoted = Suppress('"') + Optional(CharsNotIn('"')) + Suppress('"') lelem = Word(alphanums + "-_") relem = Literal("true") | Literal("false") | Word(nums + ".") | dbl_quoted dict_ = Forward() assignment = lelem + Suppress("=") + (relem | (dict_)) dict_ << Suppress("{") + Group( Dict(delimitedList(Group(assignment))) | (dbl_quoted + Suppress(",") + Word(nums + ".")) | delimitedList(dict_)) + Suppress("}") return Dict(delimitedList(Group(assignment)))
def parse_final(): ''' Takes the parsed data( list of lists), and performs substitutions according to the mapping specified as the start of the file. Applies first mapping for first half of the document, and second mapping for the second half. The substitutions are performed by trying to match every field with the grammar below, and making substitutions whenever the grammar's rule s satisfied. ''' first_mapping = [] second_mapping = [] parsed_data = [] if not parse_data(first_mapping, second_mapping,parsed_data): return False SUBS = CharsNotIn('(') + "(" + Word(alphanums) + ")" + restOfLine for row in parsed_data: for i in range(len(row)): # First half of data ; first mapping will apply: if len(row)>6: try: parsed = SUBS.parseString(row[i]) parsed[0] = first_mapping[parsed[0]] row[i] = ''.join(parsed) # print parsed except: pass # Second half of data ; second mapping will apply: else: try: parsed = SUBS.parseString(row[i]) parsed[0] = second_mapping[parsed[0]] row[i] = ''.join(parsed) except: pass return parsed_data
def makeNewickParser(): # pyparsing from pyparsing import Combine, Optional, Literal, CaselessLiteral, \ Word, alphanums, \ nums, oneOf, Group, Dict, Forward, \ ParseResults, CharsNotIn, ZeroOrMore # literals lparen = Literal("(").suppress() rparen = Literal(")").suppress() colon = Literal(":").suppress() semicolon = Literal(":").suppress() comma = Literal(",").suppress() point = Literal(".") e = CaselessLiteral("E") # terminal rules name = Word(alphanums + "_" + "-" + "." + "+") fnumber = Combine(Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums))) dist = fnumber bootstrap = fnumber # recursive rules subtree = Forward() subtreelist = Forward() subtree << \ Group( ( (lparen + subtreelist + rparen).setResultsName("subtree") | name.setResultsName("name") ) + Optional( CharsNotIn(",);").setResultsName("data") ) ) subtreelist << subtree + Optional(comma + subtreelist) # top level rule tree = subtree + Word(";").suppress() return tree.parseString
def parse_variadic_templates(txt): template_param_type = Word(alphas) template_variadic = Literal('...') template_id = Word(alphas) template_variadic_param = Group(template_param_type + template_variadic + template_id) template_param = Group(template_param_type + template_id) # template_params = Group ( delimitedList( template_variadic_param | Optional(template_param) ) ) template_params = (Optional(OneOrMore(template_param + ',')) + template_variadic_param + Optional(OneOrMore(',' + template_param))) template_params_no_variadic = (template_param + Optional(OneOrMore(',' + template_param))) template_decl = Optional("template" + Literal("<") + template_params_no_variadic + Literal(">")) + "template" + Literal( "<") + template_params + Literal(">") block_content = Forward() block = nestedExpr('{', '}', content=block_content) + Literal(';') * (0, 1) block_content << (CharsNotIn('{}') | block) decl = originalTextFor(template_decl + CharsNotIn('{') + block) template_file = Forward() code_block = decl | White() | Word(printables) template_file << (Optional(OneOrMore(code_block)) | template_file) parsed = template_file.parseString(txt) return parsed
def process_task_lists(content: str) -> str: item = Group(CharsNotIn('\n') + (StringEnd() | '\n')).leaveWhitespace() checkbox = oneOf(['[ ]', '[x]']) marker = Suppress(oneOf(['+', '-', '*']) | Word(nums) + '.') # indent = oneOf([' ', '\t']).leaveWhitespace() indents = Group(ZeroOrMore(indent)) # list_item = Group(indents + marker + checkbox + item) # before = Suppress(StringStart() | Literal('\n\n')).leaveWhitespace() list_ = before + OneOrMore(list_item) # list_.setParseAction(replace_list) return list_.transformString(content)
def pattern(): """pyparsing pattern of the command """ def attachLocation(s, loc, tocs): return [(loc, tocs[0])] from pyparsing import CharsNotIn, Literal, Optional, White # delayed import, performance optimization path = CharsNotIn(" \t")("path") path.setParseAction(attachLocation) pat = (Literal('s ') + Optional(White()) + Optional(path)) pat.leaveWhitespace() pat.setParseAction(CommandSaveAs.create) return pat
SUPPORT_MISSING_VALUES = True # from the RFCs ABNF description nilvalue = Word("-") digit = Regex("[0-9]{1}") nonzero_digit = Regex("[1-9]{1}") printusascii = printables sp = White(" ", exact=1) octet = Regex("[\x00-\xFF]") utf_8_string = Regex("[\x00-\xFF]*") BOM = "\xef\xbb\xbf" bom = Regex(BOM) msg_utf8 = bom + utf_8_string msg_any = utf_8_string msg = Combine(Or([msg_utf8, msg_any])).setResultsName("MSG") sd_name = CharsNotIn('= ]"', 1, 32) param_name = sd_name.setResultsName("SD_PARAM_NAME") param_value = QuotedString(quoteChar='"', escChar="\\", multiline=True) param_value = param_value.setResultsName("SD_PARAM_VALUE") sd_id = sd_name.setResultsName("SD_ID") sd_param = Group(param_name + Regex("=") + param_value) sd_params = Group(ZeroOrMore(Group(sp + sd_param.setResultsName("SD_PARAM")))) sd_element = Group("[" + sd_id + sd_params.setResultsName("SD_PARAMS") + "]") sd_element = sd_element.setResultsName("SD_ELEMENT") sd_elements = Group(OneOrMore(sd_element)) structured_data = Or([nilvalue, sd_elements.setResultsName("SD_ELEMENTS")]) structured_data = structured_data.setResultsName("STRUCTURED_DATA") time_hour = Regex("0[0-9]|1[0-9]|2[0-3]") time_minute = Regex("[0-5][0-9]") time_second = time_minute time_secfrac = Regex("\.[0-9]{1,6}")
SUPPORT_MISSING_VALUES = True # from the RFCs ABNF description nilvalue = Word("-") digit = Regex("[0-9]{1}") nonzero_digit = Regex("[1-9]{1}") printusascii = printables sp = White(" ", exact=1) octet = Regex('[\x00-\xFF]') utf_8_string = Regex('[\x00-\xFF]*') BOM = '\xef\xbb\xbf' bom = Regex(BOM) msg_utf8 = bom + utf_8_string msg_any = utf_8_string msg = Combine(Or([msg_utf8, msg_any])).setResultsName('MSG') sd_name = CharsNotIn('= ]"', 1, 32) param_name = sd_name.setResultsName('SD_PARAM_NAME') param_value = QuotedString(quoteChar='"', escChar='\\', multiline=True) param_value = param_value.setResultsName('SD_PARAM_VALUE') sd_id = sd_name.setResultsName('SD_ID') sd_param = Group(param_name + Regex('=') + param_value) sd_params = Group(ZeroOrMore(Group(sp+sd_param.setResultsName('SD_PARAM')))) sd_element = Group('['+sd_id+sd_params.setResultsName('SD_PARAMS')+']') sd_element = sd_element.setResultsName('SD_ELEMENT') sd_elements = Group(OneOrMore(sd_element)) structured_data = Or([nilvalue, sd_elements.setResultsName('SD_ELEMENTS')]) structured_data = structured_data.setResultsName('STRUCTURED_DATA') time_hour = Regex('0[0-9]|1[0-9]|2[0-3]') time_minute = Regex('[0-5][0-9]') time_second = time_minute time_secfrac = Regex('\.[0-9]{1,6}')