def init_parser(self): INTEGER = Word(nums) INTEGER.setParseAction(lambda x: int(x[0])) header = INTEGER("species_count") + INTEGER("sequence_length") +\ Suppress(restOfLine) header.setParseAction(self.set_header) sequence_name = Word( alphas + nums + "!#$%&\'*+-./;<=>?@[\\]^_`{|}~", max=100) # Take a copy and disallow line breaks in the bases bases = self.BASES.copy() bases.setWhitespaceChars(" \t") seq_start = sequence_name("species") + bases( "sequence") + Suppress(LineEnd()) seq_start.setParseAction(self.set_seq_start) seq_start_block = OneOrMore(seq_start) seq_start_block.setParseAction(self.set_start_block) seq_continue = bases("sequence") + Suppress(LineEnd()) seq_continue.setParseAction(self.set_seq_continue) seq_continue_block = Suppress(LineEnd()) + OneOrMore(seq_continue) seq_continue_block.setParseAction(self.set_continue_block) return header + seq_start_block + ZeroOrMore(seq_continue_block)
def parse_choices(): indent_stack = [1] stmt = Forward() identifier = Word(alphas, alphanums + "_") number = Word(nums) string = QuotedString('"', escChar="\\") value = number | string comment = Group("#" + restOfLine) command = Group("@" + identifier + OneOrMore((identifier | value), stopOn=LineEnd())) choice_param = ((identifier + identifier) | (identifier + value)) choice_params = Group(ZeroOrMore(choice_param, stopOn=LineEnd())) choice_value = value choice_label = value choice = Group(choice_value + choice_label + choice_params) list_name = identifier list_def = "list" + list_name + choice_params + Suppress(":") list_body = indentedBlock(ungroup(stmt), indent_stack) list_block = Group(list_def + list_body) stmt <<= (comment | command | list_block | choice) stmts = OneOrMore(stmt) return stmts
def _compile_grammar(self): # type: () -> ParserElement """ Takes the individual grammars from each registered directive and compiles them into a full test fixture grammar whose callback methods are the bound methods on this class instance. :return: The full PyParsing grammar for test fixture files. """ grammars = [ (LineEnd().suppress()).setParseAction( functools.partial(self._finalize_test_case) ) ] # directives for directive_class in get_all_directives(): grammars.append( LineStart() + directive_class.get_full_grammar().setParseAction( functools.partial(self._ingest_directive, directive_class) ) + LineEnd() ) return StringStart() + OneOrMore(MatchFirst(grammars)) + StringEnd()
def __init_parser(self): varname = Word(alphas + nums + T_US + T_MIN + T_DOT)(P_VARNAME) sname = Word(alphas + nums + T_I + T_S + T_US)(P_SNAME) boolvalue = Literal(T_TRUE) | Literal(T_FALSE) ivalue = Word(nums) hexvalue = Word(nums + T_A + T_B + T_C + T_D + T_E + T_F) bvvalue = Combine(ivalue + T_US + ivalue) value = (boolvalue | bvvalue | hexvalue)(P_VALUE) comment = (T_COM + restOfLine + LineEnd())(P_COMMENT) emptyline = (ZeroOrMore(White(' \t')) + LineEnd())(P_EMPTY) init = (Literal(T_I)(P_TYPE) + Literal(T_CL) + ((varname + Literal(T_EQ) + value) | (boolvalue)(P_VALUE)))(P_INIT) inits = OneOrMore(init) state = (Literal(T_S)(P_TYPE) + ((varname)(P_ID)) + Literal(T_CL) + ((varname + Literal(T_EQ) + value) | (boolvalue)(P_VALUE)))(P_STATE) states = OneOrMore(state) trans = (sname(P_START) + Literal(T_ARROW) + sname(P_END) + restOfLine)(P_TRANS) transs = OneOrMore(trans) ets = OneOrMore(comment | inits | states | transs | emptyline) return ets
def rename_completed_tests(): settings_path = pathlib.Path("settings.py") vardict= {} with open(settings_path) as f: code = compile(f.read(), settings_path.name, 'exec') exec(code, vardict) returned_exam_folder =vardict["returned_exam_folder"] cw = os.getcwd() os.chdir(returned_exam_folder) lst =[] for filename in sorted(os.listdir(".")): f, e = os.path.splitext(filename) if e.lower()!=".txt": continue handle = codecs.open(filename,"rb", "latin-1") exam = handle.read() handle.close() md5_ = md5(exam.encode("latin-1")).hexdigest() name = (Literal("Nome") + SkipTo(LineEnd()).setResultsName("name")) id = (Literal("(mec)") + SkipTo(LineEnd()).setResultsName("mec")) for data,dataStart,dataEnd in name.scanString(exam): parsed_student_name = data["name"].strip() for data,dataStart,dataEnd in id.scanString(exam): parsed_mec = data["mec"].strip().lower() mec_in_filename = re.search("[\d|A|a|e|E]\d{4,5}",filename) if mec_in_filename: mec_in_filename = mec_in_filename.group() if parsed_mec.upper().startswith("NA"): parsed_mec=mec_in_filename.lower() new_name = parsed_student_name.replace(" ","_")+"_"+parsed_mec+"_"+md5_+".txt" #if unicode(filename,"utf-8") != new_name: if not re.search("_([a-fA-F\d]{32})\.(txt|TXT)", filename): print("rename",filename, end=' ') print("to", new_name) lst.append((filename, new_name)) for filename, newname in lst: os.rename(filename, newname) os.chdir(cw)
def dom_document_setup(): crn_DWC = "".join( [x for x in ParseElementEnhance.DEFAULT_WHITE_CHARS if x != "\n"]) ParseElementEnhance.setDefaultWhitespaceChars(crn_DWC) W = Word G = Group S = Suppress O = Optional L = Literal identifier = W(alphas, alphanums + "_") number = W(nums, nums) domain = G(OneOrMore((G(identifier + O("*")) | "?" | "+"))) dotparen = G(OneOrMore(W("().+", max=1))) structure = domain + OneOrMore(LineEnd().suppress()) + dotparen sequence = G( S("sequence") + identifier + S(":") + number + OneOrMore(LineEnd().suppress())) molecule = G(identifier + S(":") + OneOrMore(LineEnd().suppress()) + structure) document = StringStart() + ZeroOrMore(LineEnd().suppress()) + G( ZeroOrMore(sequence)) + G( OneOrMore(molecule + OneOrMore(LineEnd().suppress()))) + StringEnd() document.ignore(pythonStyleComment) return document
def getGrammar(self): if self.grammar is None: value=self.getValueGrammar() identifier=self.getIdentifier() idlink=Group( identifier+identifier+identifier ).setParseAction(self.convertToTriple)("idLink") islink=Group( identifier+Suppress('is')+identifier+Suppress('of')+identifier ).setParseAction(self.convertToTriple)("isLink") haslink=Group( identifier+Suppress('has')+identifier+identifier ).setParseAction(self.convertToTriple)("hasLink") link=Group(islink|haslink|idlink).setParseAction(self.handleGroup)("link") comment=Group( Suppress("#")+ZeroOrMore(Word(printables))+LineEnd()|LineEnd() ).setParseAction(self.handleComment)('comment*') line=Group( value|link ).setParseAction(self.handleGroup)('line') links=Group( OneOrMore(line+LineEnd()|comment) ).setParseAction(self.handleLines)('links*') self.grammar=links return self.grammar
def __init_parser(self): varname = (Combine( Literal("'") + Word(alphas + nums + T_US + T_MIN + T_DOT + "$" + "[" + "]" + ":") + Literal("'")) | Word(alphas + nums + T_US + T_MIN + T_DOT))(P_VARNAME) comment = Group(T_COM + restOfLine + LineEnd())(P_COMMENT) emptyline = Group(ZeroOrMore(White(' \t')) + LineEnd())(P_EMPTY) varsize = (Word(nums))(P_VARSIZE) parlist = (ZeroOrMore(varname) + ZeroOrMore((Literal(T_CM) + varname))) modtype = (Word(alphas + T_US + nums) + Literal(T_OP) + parlist + Literal(T_CP))(P_MODTYPE) basictype = Forward() basictype << ( Combine(Literal(T_BV) + Literal(T_OP) + varsize + Literal(T_CP)) | Combine(Literal(T_BOOL)) | Combine( Literal(T_ARRAY) + Literal(T_OP) + basictype + Literal(T_CM) + basictype + Literal(T_CP))) vartype = (basictype | modtype)(P_VARTYPE) vartypedef = (vartype)(P_VARTYPEDEF) vardef = varname + Literal(T_CL) + vartypedef + Literal(T_SC) basicvardef = (varname + Literal(T_CL) + basictype)(P_VARTYPEDEF) parlistdef = (ZeroOrMore(basicvardef) + ZeroOrMore( (Literal(T_CM) + basicvardef)))(P_PARDEF) moddef = (Literal(T_DEF) + Word(alphas + T_US + nums) + Literal(T_OP) + parlistdef + Literal(T_CP) + Literal(T_CL))(P_MODDEF) operators = T_NEG + T_MIN + T_PLUS + T_EQ + T_NEQ + T_LT + T_LTE + T_IMPL + T_BOOLSYM + T_ITE formula = (Word(alphas + nums + T_US + T_SP + T_DOT + T_OP + T_CP + T_OB + T_CB + "'" + operators) + Literal(T_SC))(P_FORMULA) vardefs = (Literal(T_VAR) + (OneOrMore(vardef)(P_VARDEFS)))(P_VARS) statedefs = (Literal(T_STATE) + (OneOrMore(vardef)(P_STATEDEFS)))(P_STATES) inputdefs = (Literal(T_INPUT) + (OneOrMore(vardef)(P_INPUTDEFS)))(P_INPUTS) outputdefs = (Literal(T_OUTPUT) + (OneOrMore(vardef)(P_OUTPUTDEFS)))(P_OUTPUTS) inits = (Literal(T_INIT) + (OneOrMore(formula))(P_FORMULAE))(P_INIT) transs = (Literal(T_TRANS) + (OneOrMore(formula))(P_FORMULAE))(P_TRANS) invars = (Literal(T_INVAR) + (OneOrMore(formula))(P_FORMULAE))(P_INVAR) sts = Group((Optional(moddef) + OneOrMore(vardefs | statedefs | inputdefs | outputdefs | inits | transs | invars | emptyline)))(P_STS) return (OneOrMore(sts))(P_STSS)
def build_parser(): """ Build a pyparsing parser for our custom topology description language. :return: A pyparsing parser. :rtype: pyparsing.MatchFirst """ ParserElement.setDefaultWhitespaceChars(' \t') nl = Suppress(LineEnd()) inumber = Word(nums).setParseAction(lambda l, s, t: int(t[0])) fnumber = (Combine( Optional('-') + Word(nums) + '.' + Word(nums) + Optional('E' | 'e' + Optional('-') + Word(nums))) ).setParseAction(lambda toks: float(toks[0])) boolean = (CaselessLiteral('true') | CaselessLiteral('false') ).setParseAction(lambda l, s, t: t[0].casefold() == 'true') comment = Literal('#') + restOfLine + nl text = QuotedString('"') identifier = Word(alphas, alphanums + '_') empty_line = LineStart() + LineEnd() item_list = ((text | fnumber | inumber | boolean) + Optional(Suppress(',')) + Optional(nl)) custom_list = (Suppress('(') + Optional(nl) + Group(OneOrMore(item_list)) + Optional(nl) + Suppress(')')).setParseAction(lambda tok: tok.asList()) attribute = Group( identifier('key') + Suppress(Literal('=')) + (custom_list | text | fnumber | inumber | boolean | identifier)('value') + Optional(nl)) attributes = (Suppress(Literal('[')) + Optional(nl) + OneOrMore(attribute) + Suppress(Literal(']'))) node = identifier('node') port = Group(node + Suppress(Literal(':')) + (identifier | inumber)('port')) link = Group( port('endpoint_a') + Suppress(Literal('--')) + port('endpoint_b')) environment_spec = (attributes + nl).setResultsName('env_spec', listAllMatches=True) nodes_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(node))('nodes')) + nl).setResultsName('node_spec', listAllMatches=True) ports_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(port))('ports')) + nl).setResultsName('port_spec', listAllMatches=True) link_spec = (Group(Optional(attributes)('attributes') + link('links')) + nl).setResultsName('link_spec', listAllMatches=True) statements = OneOrMore(comment | link_spec | ports_spec | nodes_spec | environment_spec | empty_line) return statements
def getToken(self): tableCell = Regex(r"(?P<text>(.|(\\\n))*?)\|\|") tableCell.setParseAction(self.__convertTableCell) tableRow = LineStart() + Literal("||") + OneOrMore( tableCell).leaveWhitespace() + Optional(LineEnd()) tableRow.setParseAction(self.__convertTableRow) table = LineStart() + Regex( r"\|\| *(?P<params>.+)?") + LineEnd() + OneOrMore(tableRow) table = table.setParseAction(self.__convertTable)("table") return table
def init_parser(self): sequence_name = Word( alphas + nums + "!#$%&\'*+-./;?@[\\]^_`{|}~", max=100) sequence_name.setParseAction(self.set_name) name_block = Suppress(">") + sequence_name("name") + Suppress(LineEnd()) # Take a copy and disallow line breaks in the bases bases = self.BASES.copy() seq = bases("sequence") + Suppress(LineEnd()) seq.setParseAction(self.set_sequence) name_and_seq = name_block + seq return OneOrMore(name_and_seq)
def parse_survey(): indent_stack = [1] stmt = Forward() identifier = Word(alphas, alphanums + "_") number = Word(nums) doublequoted_string = QuotedString('"', escChar="\\") singlequoted_string = QuotedString("'", escChar="\\") string = doublequoted_string | singlequoted_string value = number | string # TODO Parse expressions properly. # op = oneOf("+ - * /") comp_op = oneOf("= != >= > <= <") comment = Group("#" + restOfLine) command = Group("@" + identifier + OneOrMore((identifier | value), stopOn=LineEnd())) variable = Group("${" + identifier + "}") dot = Literal(".") expr = Group((variable | dot) + comp_op + value) if_cond = "if" + (expr | value) if_body = indentedBlock(pp.ungroup(stmt), indent_stack) if_block = Group(if_cond + Suppress(":") + if_body) q_name = identifier q_type = Group(OneOrMore(identifier)) q_label = string q_param = ((identifier + identifier) | (identifier + value)) q_params = ZeroOrMore(q_param, stopOn=LineEnd()) question = Group(q_name + q_type + Suppress(":") + Optional(q_label) + q_params) group_params = Group(ZeroOrMore(q_param, stopOn=LineEnd()) + Optional(if_cond)) group_label = string group_def = "group" + Group(identifier + Optional(group_label)) + group_params + Optional(if_cond) + Suppress(":") group_body = indentedBlock(pp.ungroup(stmt), indent_stack) group_block = Group(group_def + group_body) repeat_def = "repeat" + Group(identifier + Optional(group_label)) + group_params + Suppress(":") repeat_block = Group(repeat_def + group_body) # TODO Add + Suppress(LineEnd())? stmt <<= (comment | command | if_block | group_block | repeat_block | question) stmts = OneOrMore(stmt) return stmts
def songs_pyparsing(fh): r""" >>> import os >>> filename = os.path.dirname(__file__) >>> filename = os.path.join(filename, "data/Various-Pop.m3u") >>> with open(filename, "rt", encoding="utf8") as fh: ... songs = songs_pyparsing(fh) >>> songs[0].title, songs[0].seconds, songs[0].filename ('Various - Two Tribes', 236, 'Various\\Frankie Goes To Hollywood\\02-Two Tribes.ogg') >>> songs[-1].title, songs[-1].seconds, songs[-1].filename ('The Police - Walking On The Moon', 303, 'Various\\Sting & The Police 1997\\06-Walking On The Moon.ogg') >>> lines = [] >>> lines.append("#EXTM3U") >>> lines.append("#EXTINF:140,The Beatles - Love Me Do") >>> lines.append("Beatles\\Greatest Hits\\01-Love Me Do.ogg") >>> lines.append("#EXTINF:-1,The Beatles - From Me To You") >>> lines.append("Beatles\\Greatest Hits\\02-From Me To You.ogg") >>> import io >>> data = io.StringIO("\n".join(lines)) >>> songs = songs_ply(data) >>> len(songs) == 2 True >>> songs[0].title, songs[0].seconds ('The Beatles - Love Me Do', 140) >>> songs[1].title, songs[1].seconds ('The Beatles - From Me To You', -1) """ def add_song(tokens): songs.append(Song(tokens.title, tokens.seconds, tokens.filename)) #songs.append(Song(**tokens.asDict())) songs = [] title = restOfLine("title") filename = restOfLine("filename") seconds = Combine(Optional("-") + Word(nums)).setParseAction( lambda tokens: int(tokens[0]))("seconds") info = Suppress("#EXTINF:") + seconds + Suppress(",") + title entry = info + LineEnd() + filename + LineEnd() entry.setParseAction(add_song) parser = Suppress("#EXTM3U") + OneOrMore(entry) try: parser.parseFile(fh) except ParseException as err: print("parse error: {0}".format(err)) return [] return songs
def __init__(self): self.locator = Empty().setParseAction(self.locator_parse_action)('location') import_entry = Group(Suppress(Keyword('import')) - self.locator + SkipTo(LineEnd())('path')) imports = ZeroOrMore(import_entry)('imports') array = Group(Suppress('[') + Suppress(']')) type_ = Group(self.locator + (Word(alphas, alphanums) + ZeroOrMore(Suppress('.') - Word(alphas, alphanums)))('name') - ZeroOrMore(array)('array')) identifier = Word(alphas, alphanums) param = Group(self.locator + type_('type') + identifier('name')) params_list = Group(Optional(param + ZeroOrMore(Suppress(',') + param))) method = Group(self.locator + type_('ret_type') + identifier('name') + Suppress('(') + params_list('params') + Suppress(');')) method_list = Group(ZeroOrMore(method)) package_name = Group(identifier + ZeroOrMore(Suppress('.') + identifier)) bases_list = type_ + ZeroOrMore(Suppress(',') + type_) interface = Group(self.locator + Keyword('interface')('kind') - identifier('name') + Optional(Suppress(':') - bases_list)('bases') + Suppress('{') + method_list('methods') + Suppress('}')) integer_constant = Word(nums).setParseAction(lambda s, l, t: int(t[0])) enum_value = Group(self.locator + identifier('name') - Optional(Suppress('=') + integer_constant('value'))) enum_values_list = Group(Optional(enum_value + ZeroOrMore(Suppress(',') + enum_value))) enum = Group(self.locator + Keyword('enum')('kind') - identifier('name') + Suppress('{') + enum_values_list('values') + Suppress('}')) struct_member = Group(self.locator + type_('type') + identifier('name') + Suppress(';')) struct_members_list = Group(ZeroOrMore(struct_member)) struct = Group(self.locator + Keyword('struct')('kind') - identifier('name') + Suppress('{') + struct_members_list('members') + Suppress('}')) package = Suppress(Keyword('package')) + package_name('package') + Suppress('{') + Group(ZeroOrMore(interface | enum | struct))('types') + Suppress('}') self.grammar = imports + package self.grammar.ignore(cppStyleComment) self.grammar.parseWithTabs()
def compute(self, text, verbose=True): vbar = Literal("|") eol = LineEnd().suppress() # Colors result = text endTag = ((vbar + (Literal("r") | Literal("R")) | eol)) parser = (Suppress(vbar + (Literal("c") | Literal("C"))) + Word(hexnums, exact=8).setResultsName("hex") + SkipTo(endTag).setResultsName("content") + Suppress(endTag)).addParseAction(self.colorize) new_result = parser.transformString(result) result = parser.transformString(new_result) while (new_result != result): new_result = result result = parser.transformString(new_result) # Normalize line breakers result = result.replace("|n", "\n") result = result.replace("\n\n\n", "\n\n").strip("\n") if self.formatType == "html" and verbose: result = result.replace("$bullet;", "• ") result = result.replace("\r\n", "<br>").replace("\n", "<br>") return result
def construct_bnf(self, assignment="=", commentChar="#"): """ The EBNF for a normal Dune style ini file. """ # A comment starts with the comment literal and affects the rest of the line comment = Literal(commentChar).suppress() + Optional(restOfLine).suppress() # A section is guarded by square brackets section = Literal("[") + Word(alphanums + "._").setParseAction(self.setGroup) + Literal("]") # A key can consist of anything that is not an equal sign key = Word(alphanums + "_.") # define a command command = Group(Literal("|").suppress() + oneOf(self._commands) + ZeroOrMore(Word(alphanums + "_{}", excludeChars=[commentChar, "|"]))) # A value may contain virtually anything value = Combine(OneOrMore(QuotedString(quoteChar='"', escChar='\\').setParseAction(self.escapeQuoted) | Word(printables + " ", excludeChars=[commentChar, '"', "|"]))) # A key value pair is a concatenation of those 3 keyval = (key + Literal(assignment).suppress() + value + ZeroOrMore(command)).setParseAction(self.setKeyValuePair) # We allow reading data, that is not of key/value pair form # We do lose the embeddedness of our language at this point. # An alternative would be to place commands behind ## directive. nonkeyval = (value + OneOrMore(command)).setParseAction(self.setNonKeyValueLine) # Introduce the include statement here, although I do like it anymore. include = oneOf("include import") + Word(printables, excludeChars=commentChar).setParseAction(self.processInclude) # Define the priority between the different sorts of lines. Important: keyval >> nonkeyval content = keyval | section | include | nonkeyval line = Optional(content) + Optional(comment) + LineEnd() return line
class Scheme(Fragment): """ Fragment which defines where the input sections defined in a Sections fragment is going to end up, the target. The targets are markers in a linker script template (see LinkerScript in linker_script.py). [scheme:<name>] entries: sections1 -> target1 ... """ ENTRY = Fragment.IDENTIFIER + Suppress('->') + Fragment.IDENTIFIER + LineEnd().suppress() @staticmethod def parse_entry(toks): # section, target return toks[0], toks[1] @staticmethod def parse(s, loc, toks): this = toks[0] name = this[0] entries = {entry for entry in this[1] if entry} if not entries: raise ParseFatalException(s, loc, 'Scheme entries shouldn\'t be empty') return Scheme(name, entries)
def __init__(self): ints = Word(nums) EOL = LineEnd().suppress() # ip address of device ipAddress = Optional( delimitedList(ints, ".", combine=True) + Suppress(":")) # priority priority = Suppress("<") + ints + Suppress(">") # timestamp month = Word(string.uppercase, string.lowercase, exact=3) day = ints hour = Combine(ints + ":" + ints + ":" + ints) timestamp = month + day + hour # hostname hostname = Word(alphas + nums + "_" + "-" + ".") # daemon daemon = Word(alphas + nums + "/" + "-" + "_" + ".") + Optional(Suppress("[") + ints + Suppress("]")) + Suppress(":") # message message = Regex(".*") # pattern build self.__pattern = ipAddress + priority + timestamp + \ hostname + daemon + message + StringEnd() | EOL self.__pattern_without_daemon = ipAddress + priority + \ timestamp + hostname + message + StringEnd() | EOL
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() # word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL))) # redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") | Optional(fd_src, 1) + Literal(">")) + \ (word | (fd_dst + Optional("-"))) # "&>word" || ">&word" obj = (oneOf("&> >&") + word) full_redir = obj.setParseAction(lambda t: ("&", ">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = (Optional(fd_src | Literal("&"), 1) + Literal(">>") + word) # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word obj = (fd_redir | full_redir | here_doc | add_to_file | fd_bind) redirector = obj.setParseAction(lambda token: tuple(token)) # single command (args/redir list) command = Group(OneOrMore(redirector | word)) # logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon # pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def parser(cls, width, height): """Parse a BZW file. For now, we're only supporting a subset of BZW's allobjects. """ comment = '#' + SkipTo(LineEnd()) bzw = ZeroOrMore(Box.parser() | Base.parser()).ignore(comment) bzw.setParseAction(lambda toks: cls(width, height, toks)) return bzw
def _parse_atat_lattice(lattice_in): """Parse an ATAT-style `lat.in` string. The parsed string will be in three groups: (Coordinate system) (lattice) (atoms) where the atom group is split up into subgroups, each describing the position and atom name """ float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?' ).setParseAction(lambda t: [float(t[0])]) vector = Group(float_number + float_number + float_number) angles = vector vector_line = vector + Suppress(LineEnd()) coord_sys = Group((vector_line + vector_line + vector_line) | (vector + angles + Suppress(LineEnd()))) lattice = Group(vector + vector + vector) atom = Group(vector + Group(OneOrMore(Word(alphas, alphanums + '_')))) atat_lattice_grammer = coord_sys + lattice + Group(OneOrMore(atom)) # parse the input string and convert it to a POSCAR string return atat_lattice_grammer.parseString(lattice_in)
def _items(self, items, name="Transaction"): item_list = [] for (code, name) in items.iteritems(): item = self._item(code, name) item_list.append(item) return Group(OneOrMore(Or(item_list)) + oneOf('^EUR ^').setResultsName('Currency') + LineEnd().suppress() ).setResultsName(name)
def __init__(self): ints = Word(nums) word = Word(alphas) EOL = LineEnd().suppress() SOL = LineStart().leaveWhitespace() blankline = SOL + LineEnd() # ip address of device ipAddress = Optional( delimitedList( ints, ".", combine=True) + Suppress( ":")) # Received message rec_msg = Suppress(OneOrMore(word)) + Suppress(Literal("'")) # priority priority = Suppress("<") + ints + Suppress(">") # timestamp month = Word(string.uppercase, string.lowercase, exact=3) day = ints hour = Combine(ints + ":" + ints + ":" + ints) timestamp = month + day + hour # hostname hostname = Word(alphas + nums + "_" + "-" + ".") # appname appname = Word(alphas + "/" + "-" + "_" + ".") + Optional( Suppress("[") + ints + Suppress("]")) + Suppress(":") # message message = Regex(".*") # pattern build self.__pattern = ipAddress + priority + timestamp + \ hostname + appname + message + StringEnd() | EOL self.__pattern_without_appname = ipAddress + priority + \ timestamp + hostname + message + StringEnd() | EOL
def instance(): lit_e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusorminus) + number).setParseAction(lambda t: int(t[0])) index = integer.copy().addParseAction(index_check(0)) floatnumber = Combine(integer + Optional(Literal('.') + Optional(number)) + Optional(lit_e + integer)).setParseAction( lambda t: float(t[0])) #comment = Suppress("%") + Word(alphanums + " ") comment = Regex(r"%.*").setName("comment").suppress() linend = Or([comment, LineEnd()]).suppress() section_end = (Literal('#') + LineEnd()).suppress() vertex = (Group( OneOrMore(floatnumber('point') + OneOrMore(White()).suppress())) + linend)('vertex') vertex_header = (Keyword('VERTEX') + linend).suppress() vertex_section = (vertex_header + Group(OneOrMore(vertex))('vertices') + section_end) simplex = ( Group(OneOrMore(index('index') + OneOrMore(White()).suppress())) + linend)('simplex') simplex_header = (Keyword('SIMPLEX') + linend).suppress() simplex_section = (simplex_header + Group(OneOrMore(simplex))('simplices') + section_end) boundarysegment = (Group( index('id') + OneOrMore(index('index') + OneOrMore(White()).suppress())) + linend)('boundarysegment') boundarysegment_header = (Keyword('BOUNDARYSEGMENTS') + linend).suppress() boundarysegment_section = ( boundarysegment_header + Dict(OneOrMore(boundarysegment))('boundarysegments') + section_end) sections = Each([vertex_section, simplex_section, boundarysegment_section]) dgf_header = (Keyword('DGF') + linend).suppress() dgf = (dgf_header + Dict(sections) + OneOrMore(section_end))('dgf') return dgf
def get_full_grammar(cls): return ( super(ActionExpectsNotPresentDirective, cls).get_full_grammar() + Literal('expect not present') + ':' + Literal('attribute value') + ':' + VarNameGrammar + Optional(~Suppress(LineEnd()) + ':') )
class NginxParser(object): """ A class that parses nginx configuration with pyparsing """ # constants left_bracket = Literal("{").suppress() right_bracket = Literal("}").suppress() semicolon = Literal(";").suppress() space = White().suppress() key = Word(alphanums + "_/") value = CharsNotIn("{};") value2 = CharsNotIn(";") location = CharsNotIn("{};," + string.whitespace) ifword = Literal("if") setword = Literal("set") # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules assignment = key + Optional(space + value) + semicolon comment = pythonStyleComment + LineEnd() setblock = setword + OneOrMore(space + value2) + semicolon block = Forward() ifblock = Forward() subblock = Forward() ifblock << (Group(ifword + Optional(space) + Optional(value) + SkipTo('{')) + left_bracket + Group(subblock) + right_bracket) subblock << ZeroOrMore( Group(comment) | Group(assignment) | block | Group(ifblock) | setblock) block << Group( Group(key + Optional(space + modifier) + Optional(space) + Optional(location)) + left_bracket + Group(subblock) + right_bracket) # script = OneOrMore(Group(assignment) | block).ignore(pythonStyleComment) script = OneOrMore(Group(comment) | Group(assignment) | block) def __init__(self, source): self.source = source def parse(self): """ Returns the parsed tree. """ return self.script.parseString(self.source) def as_list(self): """ Returns the list of tree. """ return self.parse().asList()
def expr(self) -> ParserElement: NL = LineEnd().suppress() SEP = (Literal("||") | Literal("|")).suppress() ROW_BREAK = NL + SEP | NL + NL | StringEnd() IGNORE = (Link(**self.init_kwargs).expr | MailTo(**self.init_kwargs).expr | Image(**self.init_kwargs).expr | Mention(**self.init_kwargs).expr) ROW = SEP + ZeroOrMore( SkipTo(SEP | ROW_BREAK, ignore=IGNORE) + Optional(SEP), stopOn=ROW_BREAK | NL + ~SEP, ) EMPTY_LINE = Combine("\n" + White(" \t", min=0) + "\n") return (((StringStart() + Optional("\n")) ^ Optional(EMPTY_LINE, default="\n")) + OneOrMore(LineStart() + Group(ROW) + NL).setParseAction( self.action) + (StringEnd() | Optional(LineEnd(), default="\n")))
def load_symbols_file(self, file): file = self.xkb_basedir / file try: return self.loaded[file] except KeyError: pass sections = [] def quoted(name): return QuotedString(quoteChar='"', unquoteResults=True) # Callback, toks[0] is "foo" for xkb_symbols "foo" def new_symbols_section(name, loc, toks): assert len(toks) == 1 sections.append(XkbSymbols(file, toks[0])) # Callback, toks[0] is "foo(bar)" for include "foo(bar)" def append_includes(name, loc, toks): assert len(toks) == 1 sections[-1].includes.append(toks[0]) EOL = LineEnd().suppress() SECTIONTYPE = ( "default", "partial", "hidden", "alphanumeric_keys", "modifier_keys", "keypad_keys", "function_keys", "alternate_group", ) NAME = quoted("name").setParseAction(new_symbols_section) INCLUDE = (lit("include") + quoted("include").setParseAction(append_includes) + EOL) # We only care about includes OTHERLINE = And([~lit("};"), ~lit("include") + Regex(".*")]) + EOL with open(file) as fd: types = OneOrMore(oneOf(SECTIONTYPE)).suppress() include_or_other = Or([INCLUDE, OTHERLINE.suppress()]) section = (types + lit("xkb_symbols") + NAME + lit("{") + OneOrMore(include_or_other) + lit("};")) grammar = OneOrMore(section) grammar.ignore(cppStyleComment) try: result = grammar.parseFile(fd) except ParseException as e: raise XkbLoader.XkbParserException(str(e)) self.loaded[file] = sections return sections
def initBNF(self): constdecl = (CONST + NAME + VALUE).setParseAction(self.const_action) vardecl = (VAR + NAME + VALUE + Optional(COMMA + Regex("[^#\n]*"))).setParseAction( self.var_action) insertdecl = (INSERT + dblQuotedString + LineEnd().suppress()).setParseAction(self.insert_action) LABEL = IDENTIFIER + COLON COMMANDEXP = (IDENTIFIER.setWhitespaceChars(" \t") + Regex("[^#\n]*").setWhitespaceChars(" \t") + LineEnd().suppress()) COMMAND = COMMANDEXP.setParseAction(self.command_action) LABELEDCOMMAND = (LABEL + COMMANDEXP).setParseAction( self.label_command_action) decl = constdecl | vardecl | insertdecl | LABELEDCOMMAND | COMMAND self.program = ZeroOrMore(decl) self.program.ignore(pythonStyleComment)
def get_full_grammar(cls): return ( super(ActionExpectsFieldValueDirective, cls).get_full_grammar() + Literal('expect') + CaselessLiteral('None')('data_type') + ':' + Literal('attribute value') + ':' + VarNameGrammar + Optional(~Suppress(LineEnd()) + ':') )
# htmlStripper.py # # Sample code for stripping HTML markup tags and scripts from # HTML source files. # # Copyright (c) 2006, 2016, Paul McGuire # from contextlib import closing import urllib.request, urllib.parse, urllib.error from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity, htmlComment, anyOpenTag, anyCloseTag, LineEnd, OneOrMore, replaceWith) scriptOpen,scriptClose = makeHTMLTags("script") scriptBody = scriptOpen + SkipTo(scriptClose) + scriptClose commonHTMLEntity.setParseAction(replaceHTMLEntity) # get some HTML targetURL = "http://wiki.python.org/moin/PythonDecoratorLibrary" with closing(urllib.request.urlopen( targetURL )) as targetPage: targetHTML = targetPage.read().decode("UTF-8") # first pass, strip out tags and translate entities firstPass = (htmlComment | scriptBody | commonHTMLEntity | anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML) # first pass leaves many blank lines, collapse these down repeatedNewlines = LineEnd() + OneOrMore(LineEnd()) repeatedNewlines.setParseAction(replaceWith("\n\n")) secondPass = repeatedNewlines.transformString(firstPass) print(secondPass)