def _build_csv_parser(): separator = pp.Suppress(':') key = pp.Word(pp.printables, excludeChars=':') value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress() block_name = key + separator + pp.LineEnd().suppress() key_value = key + separator + value header = (pp.LineStart().suppress() + pp.Word(pp.nums) + pp.ZeroOrMore( pp.White().suppress() + pp.Word(pp.nums)) + pp.LineEnd().suppress()) csv_header = pp.delimitedList(pp.Word(pp.printables, excludeChars=',')) + pp.LineEnd().suppress() csv_row = pp.delimitedList(pp.Word(pp.nums + ';.+-e_') | pp.Literal('custom')) + pp.LineEnd().suppress() indent_stack = [1] block = pp.Forward() block_body = ( block | key_value) indented_block = pp.Dict(pp.ungroup(pp.indentedBlock(block_body, indent_stack))) block << ( block_name + indented_block | key_value) return pp.Optional(header) + pp.ZeroOrMore(pp.Dict(pp.Group(block))).setResultsName('meta') + \ csv_header.setResultsName('csvHeader') + \ pp.Group(pp.OneOrMore(pp.Group(csv_row))).setResultsName('csvValues')
def _parse(mystr): LBRACE, RBRACE, EQUAL = map(pp.Suppress, "{}=") field = pp.Word(pp.printables + ' ', excludeChars='[]=') field.addParseAction(pp.tokenMap(str.rstrip)) string = pp.dblQuotedString().setParseAction(pp.removeQuotes) number = pp.pyparsing_common.number() date_expr = pp.Regex(r'\d\d\d\d-\d\d-\d\d') time_expr = pp.Regex(r'\d\d:\d\d:\d\d\.\d\d\d') scalar_value = (string | date_expr | time_expr | number) list_marker = pp.Suppress("[]") value_list = pp.Forward() jobject = pp.Forward() memberDef1 = pp.Group(field + EQUAL + scalar_value) memberDef2 = pp.Group(field + EQUAL + jobject) memberDef3 = pp.Group(field + list_marker + EQUAL + LBRACE + value_list + RBRACE) memberDef = memberDef1 | memberDef2 | memberDef3 value_list <<= (pp.delimitedList(scalar_value, ",") | pp.ZeroOrMore(pp.Group(pp.Dict(memberDef2)))) value_list.setParseAction(lambda t: [pp.ParseResults(t[:])]) members = pp.OneOrMore(memberDef) jobject <<= pp.Dict(LBRACE + pp.ZeroOrMore(memberDef) + RBRACE) # force empty jobject to be a dict jobject.setParseAction(lambda t: t or {}) parser = members parser = pp.OneOrMore(pp.Group(pp.Dict(memberDef))) return parser.parseString(mystr)
class TestGroups(PyparsingExpressionTestCase): EQ = pp.Suppress('=') tests = [ PpTestSpec( desc = "Define multiple results names in groups", expr = pp.OneOrMore(pp.Group(pp.Word(pp.alphas)("key") + EQ + pp.pyparsing_common.number("value"))), text = "range=5280 long=-138.52 lat=46.91", expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]], ), PpTestSpec( desc = "Define multiple results names in groups - use Dict to define results names using parsed keys", expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas) + EQ + pp.pyparsing_common.number))), text = "range=5280 long=-138.52 lat=46.91", expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]], expected_dict = {'lat': 46.91, 'long': -138.52, 'range': 5280} ), PpTestSpec( desc = "Define multiple value types", expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas) + EQ + (pp.pyparsing_common.number | pp.oneOf("True False") | pp.QuotedString("'")) ))), text = "long=-122.47 lat=37.82 public=True name='Golden Gate Bridge'", expected_list = [['long', -122.47], ['lat', 37.82], ['public', 'True'], ['name', 'Golden Gate Bridge']], expected_dict = {'long': -122.47, 'lat': 37.82, 'public': 'True', 'name': 'Golden Gate Bridge'} ), ]
def _build_ana_rcp_parser(): separator = pp.Suppress(':') key = pp.Word(pp.printables, excludeChars=':') value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress() block_name = key + separator + pp.LineEnd().suppress() platemap_keylist = pp.Literal( 'platemap_comp4plot_keylist') + separator + pp.delimitedList( pp.Word(pp.alphas)) run_ids = pp.Literal('run_ids') + separator + pp.delimitedList( pyparsing_common.integer) plate_id = (pp.Literal('plate_ids') | pp.Literal('plate_id')) + separator + pyparsing_common.integer key_value = (platemap_keylist | run_ids | plate_id | key + separator + value) indent_stack = [1] block = pp.Forward() block_body = (block | key_value) indented_block = pp.Dict( pp.ungroup(pp.indentedBlock(block_body, indent_stack))) block << (block_name + indented_block | key_value) return pp.OneOrMore(pp.Dict(pp.Group(block)))
class TestGroups(PyparsingExpressionTestCase): EQ = pp.Suppress("=") tests = [ PpTestSpec( desc="Define multiple results names in groups", expr=pp.Group( pp.Word(pp.alphas)("key") + EQ + pp.pyparsing_common.number("value") )[...], text="range=5280 long=-138.52 lat=46.91", expected_list=[["range", 5280], ["long", -138.52], ["lat", 46.91]], ), PpTestSpec( desc="Define multiple results names in groups - use Dict to define results names using parsed keys", expr=pp.Dict( pp.Group(pp.Word(pp.alphas) + EQ + pp.pyparsing_common.number)[...] ), text="range=5280 long=-138.52 lat=46.91", expected_list=[["range", 5280], ["long", -138.52], ["lat", 46.91]], expected_dict={"lat": 46.91, "long": -138.52, "range": 5280}, ), PpTestSpec( desc="Define multiple value types", expr=pp.Dict( pp.Group( pp.Word(pp.alphas) + EQ + ( pp.pyparsing_common.number | pp.oneOf("True False") | pp.QuotedString("'") ) )[...] ), text="long=-122.47 lat=37.82 public=True name='Golden Gate Bridge'", expected_list=[ ["long", -122.47], ["lat", 37.82], ["public", "True"], ["name", "Golden Gate Bridge"], ], expected_dict={ "long": -122.47, "lat": 37.82, "public": "True", "name": "Golden Gate Bridge", }, ), ]
def compile(): LBRACE, RBRACE, LBRACK, RBRACK, COLON = map(pp.Suppress, '{}[]:') value = pp.Forward() true = pp.Keyword('true').setParseAction(pp.replaceWith(True)) false = pp.Keyword('false').setParseAction(pp.replaceWith(False)) null = pp.Keyword('null').setParseAction(pp.replaceWith(None)) number = (pp.Regex( r'-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?').setParseAction( pp.tokenMap(float))) string = (pp.Regex( r'"([ !#-\[\]-\U0010ffff]+' r'|\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4}))*"').setParseAction( pp.tokenMap(json_unescape))) items = pp.delimitedList(value) array = (pp.Group(LBRACK - pp.Optional(items) + RBRACK).setParseAction(lambda t: t.asList())) member = pp.Group(string + COLON + value) members = pp.delimitedList(member) object = (pp.Dict(LBRACE - pp.Optional(members) + RBRACE).setParseAction(lambda t: t.asDict())) value << (object | array | string | number | true | false | null) json = value('top') + pp.StringEnd() json.setDefaultWhitespaceChars(' \t\n\r') json.parseWithTabs() return lambda s: json.parseString(s)['top']
def __parse(self): lbrace, rbrace, semi, quote = map(pyparsing.Suppress, '{};"') ip_address = pyparsing.Combine(pyparsing.Word(pyparsing.nums) + ('.' + pyparsing.Word(pyparsing.nums)) * 3) hex_int = pyparsing.Word(pyparsing.hexnums, exact=2) mac_address = pyparsing.Combine(hex_int + (':' + hex_int) * 5) hdw_type = pyparsing.Word(pyparsing.alphanums) yyyy_mm_dd = pyparsing.Combine((pyparsing.Word(pyparsing.nums, exact=4) | pyparsing.Word(pyparsing.nums, exact=2)) + ('/' + pyparsing.Word(pyparsing.nums, exact=2)) * 2) hh_mm_ss = pyparsing.Combine(pyparsing.Word(pyparsing.nums, exact=2) + (':' + pyparsing.Word(pyparsing.nums, exact=2)) * 2) date_ref = pyparsing.oneOf(list("0123456"))("weekday") + yyyy_mm_dd("date") + hh_mm_ss("time") def to_datetime(tokens): tokens["datetime"] = datetime.strptime("%(date)s %(time)s" % tokens, "%Y/%m/%d %H:%M:%S") date_ref.setParseAction(to_datetime) starts_stmt = "starts" + date_ref + semi ends_stmt = "ends" + (date_ref | "never") + semi tstp_stmt = "tstp" + date_ref + semi tsfp_stmt = "tsfp" + date_ref + semi hdw_stmt = "hardware" + hdw_type("type") + mac_address("mac") + semi uid_stmt = "uid" + pyparsing.QuotedString('"')("uid") + semi binding_stmt = "binding" + pyparsing.Word(pyparsing.alphanums) + pyparsing.Word(pyparsing.alphanums) + semi lease_statement = starts_stmt | ends_stmt | tstp_stmt | tsfp_stmt | hdw_stmt | uid_stmt | binding_stmt lease_def = "lease" + ip_address("ipaddress") + lbrace + \ pyparsing.Dict(pyparsing.ZeroOrMore(pyparsing.Group(lease_statement))) + rbrace with open(self.lease_file, 'r') as file: parsed = lease_def.scanString(file.read()) return parsed
def grammar(*, has_complex: bool = False) -> pp.ParserElement: """The Getkw recursive grammar. Parameters ---------- has_complex: bool Whether to include complex numbers. Defaults to `False`. Returns ------- A parsing grammar. """ EQ, COMMA = map(pp.Suppress, "=,") LBRACE, RBRACE = map(pp.Suppress, "{}") # Define key key = pp.Word(pp.alphas + "_<>", pp.alphanums + "_<>") # A scalar value (bool, int, float, str) if has_complex: scalar = quoted_str_t ^ complex_t ^ float_t ^ int_t ^ bool_t ^ unquoted_str_t else: scalar = quoted_str_t ^ float_t ^ int_t ^ bool_t ^ unquoted_str_t # Coerce lists to be lists list_t = make_list_t(scalar) list_t.set_parse_action(lambda t: [t]) # Define key-value pairs, i.e. our keywords pair = pp.Group(key + EQ + list_t) | pp.Group(key + EQ + scalar) # Define values and section recursively section = pp.Forward() values = pp.Forward() section << pp.Group(key + LBRACE + values + RBRACE) values << pp.Dict(pp.OneOrMore(section | data_t | pair)) # Define input retval = pp.Dict(pp.OneOrMore(section) | pp.OneOrMore(values)) # Ignore Python (#), C/C++ (/* */ and //), and Fortran (!) style comments comment = pp.cpp_style_comment | pp.python_style_comment | fortran_style_comment retval.ignore(comment) return retval
class QueryParser(collections.OrderedDict): TAG_TOKEN = (pp.Keyword("tag") + pp.Suppress(":") + pp.Word(pp.alphas, pp.alphanums + "_")) SORT_TOKEN = (pp.Keyword("sort") + pp.Suppress(":") + (pp.Keyword("name") | pp.Keyword("date") | TAG_TOKEN) + pp.Optional( pp.Suppress(":") + pp.oneOf("s n d"), default="s", ) + pp.Optional( pp.Suppress(pp.Keyword("order")) + pp.Suppress(":") + (pp.Keyword("asc") | pp.Keyword("desc")), default="desc", )) SEARCH_TOKEN = ( ((pp.Keyword("name") | pp.Keyword("date")) + pp.Suppress(":") + (pp.Word(pp.printables) | pp.dblQuotedString().setParseAction(pp.removeQuotes) | pp.sglQuotedString().setParseAction(pp.removeQuotes))) | (TAG_TOKEN + pp.Optional( pp.Suppress(":") + (pp.Word(pp.printables) | pp.dblQuotedString().setParseAction(pp.removeQuotes) | pp.sglQuotedString().setParseAction(pp.removeQuotes))))) # TODO: date, from, to should be able to grab dates in EXIF tags # TODO: support quoted %c datetimes # TODO: support quoted datetimes with hour/minute/second individually DATETIME = (Date("%Y/%m/%d", DateHints.YEAR | DateHints.MONTH | DateHints.DAY) | Date("%Y/%m", DateHints.YEAR | DateHints.MONTH) | Date("%Y", DateHints.YEAR)) FROM_TOKEN = (pp.Keyword("from") + pp.Suppress(":") + DATETIME) TO_TOKEN = (pp.Keyword("to") + pp.Suppress(":") + DATETIME) TYPE_TOKEN = (pp.Keyword("type") + pp.Suppress(":") + (pp.Keyword("image") | pp.Keyword("video"))) QUERY_TOKEN = pp.Group(SORT_TOKEN | SEARCH_TOKEN | FROM_TOKEN | TO_TOKEN | TYPE_TOKEN) GRAMMAR = pp.Dict(pp.OneOrMore(QUERY_TOKEN)) def __init__(self, s, grammar=GRAMMAR): try: r = grammar.setDebug(logging.getLogger().isEnabledFor( logging.DEBUG)).parseString(s, parseAll=True) logging.debug("search query parse results: %s", r) self.update(collections.OrderedDict(r)) except (Exception, pp.ParseException, pp.RecursiveGrammarException, pp.ParseFatalException, pp.ParseSyntaxException) as e: raise QueryError("unable to parse query: %s" % e)
def load_steam_folders(self): if self.loaded_steam: return hkey = reg.OpenKey(reg.HKEY_CURRENT_USER, "Software\\Valve\\Steam") steam_path = reg.QueryValueEx(hkey, "SteamPath")[0] dl_folder = steam_path + "/steamapps/downloading" if os.path.isdir(dl_folder): self.directories.add(dl_folder) # Read the steam vdf file that contains path strings to all # game install directories. try: file = open(steam_path + "/steamapps/LibraryFolders.vdf").read() except OSError: print("Unable to open {}.".format( steam_path + "/steamapps/LibraryFolders.vdf")) # parse Valve's weird cfg format (its like a shitty version of JSON) # forward declare the value of a key value = pp.Forward() # expression for our dict structure that looks like: ["key1", value] key_value = pp.Group(pp.QuotedString('"') + value) # create a parse structure for value so value looks like: c expression = pp.Suppress( '{') + pp.Dict(pp.ZeroOrMore(key_value)) + pp.Suppress('}') # set our value to be a quoted string follow by the structure we defined, # looks like this in Python: # ["outer_key", { ["inner_key1", value], ["inner_key2", value] } ] # we can acess the above as either a dict or array. value <<= pp.QuotedString('"') | expression parser = pp.Dict(key_value) content = parser.parseString(file) # get the last pair's key, this should be the last folder numbered folder, # so we can use it as our max nr of folders for looping. max_folders = int(content["LibraryFolders"][-1][0]) # loop from 1 to (incl) max folders and use it as a dictionary key to get # the value of that key which should be a steam library folder path. for i in range(1, max_folders + 1): libpath = content["LibraryFolders"][str(i)] dlpath = libpath + "\\steamapps\\downloading" if os.path.isdir(dlpath): self.directories.add(dlpath) self.loaded_steam = True
def comment_handler(input_string, locn, tokens): tokenstr = "\n".join(tokens) mod_string = "comment{:d} comment\n(\n{:s}\n)".format(locn, tokenstr) mod_def = pp.Dict( pp.Group( identifier.setResultsName("_name") + identifier.setResultsName("_type") + LPAREN + pp.Group(pp.OneOrMore(pp.dblSlashComment)).setResultsName("text") + RPAREN)) return mod_def.parseString(mod_string)
def register_res_parse_rule(cls): begin_function_map = _pp.Keyword("BEGIN_FUNCTION_MAP").suppress() begin_data_map = _pp.Keyword("BEGIN_DATA_MAP").suppress() end_data_map = _pp.Keyword("END_DATA_MAP").suppress() end_function_map = _pp.Keyword("END_FUNCTION_MAP").suppress() begin_block = _pp.Keyword("begin").suppress() end_block = _pp.Keyword("end").suppress() semicolon = _pp.Literal(";").suppress() valid_metadata_words = _pp.Word( _pp.pyparsing_unicode.Korean.alphas + _pp.alphas + _pp.printables + r" \t", excludeChars=",;", ) comma_separated_row = _pp.delimitedList( valid_metadata_words) + _pp.Optional(semicolon) type_tab = { "char": str, "date": str, "long": int, "int": int, "float": float, "double": float, } def parse_field(x): cur_type = type_tab[x[3]] return [[ x[2], { "desc": x[0], "_reserved": x[1], "type": cur_type, "size": cur_type(x[4]), }, ]] function_metadata = comma_separated_row.setResultsName("__FUNC_META") data_metadata = comma_separated_row.setResultsName("__DATA_META") field_metadata = _pp.Dict(comma_separated_row.setParseAction(parse_field)) data_block = _pp.Group(data_metadata + begin_block + _pp.ZeroOrMore(~end_block + field_metadata) + end_block) data_blocks = _pp.Group(begin_data_map + _pp.OneOrMore(data_block) + end_data_map).setResultsName("DATA_BLOCKS") function_block = _pp.Group(begin_function_map + function_metadata + data_blocks + end_function_map).setResultsName("FUNC_BLOCK") cls._parser = function_block return cls
def aggregatorMetas(): star = p.Literal('*').suppress() metaName = p.Word(p.alphanums) metaKeyword = p.Combine(star + metaName).setResultsName('key') equals = p.Literal('=').suppress() value = p.Word(p.printables + ' ') metaValue = (equals + value).setResultsName('value') metaDef = p.Dict( p.Group(metaKeyword + metaValue) + p.Optional(p.LineEnd().suppress())) return metaDef
def grammer(): lparen = pp.Suppress("(") rparen = pp.Suppress(")") equal = pp.Suppress("=") nl = pp.Suppress(pp.LineEnd()) reg = pp.Combine("$" + pp.Optional("cr") + pp.Word(pp.srange("[0-7]"), max=1)) num = pp.Word(pp.srange("[0-9]")).setParseAction(lambda s, l, t: int(t[0])) val = pp.Word( pp.srange("[0-9a-fA-F]")).setParseAction(lambda s, l, t: int(t[0], 16)) values = pp.Dict(pp.OneOrMore(pp.Group(reg + equal + val))) return num + lparen + values + rparen + nl
def craft_parse(text): """ """ LineComment = pyp.Combine(pyp.Literal('::') + pyp.restOfLine).suppress() BlockComment = pyp.Combine( pyp.Literal(':>') + pyp.SkipTo(pyp.Literal('<:')) + pyp.Literal('<:')).suppress() Comment = BlockComment | LineComment BlockComment = pyp.Combine( pyp.Literal(':<') + pyp.Combine( pyp.NotAny(pyp.Literal(':>')) + pyp.Word(pyp.printables + ' ')) + pyp.Literal('>:')) Identifier = pyp.Word(pyp.alphanums + '!#$%&()*+,./;<=>?@\\^-_`{|}~') Value = (Comment | pyp.QuotedString('"') | pyp.QuotedString("'") | Identifier.addParseAction(_type_cast_value)) LBRACKET, RBRACKET, COLON = map(pyp.Suppress, '[]:') Function = pyp.Forward() List = pyp.Forward() Function << pyp.Dict( pyp.Group(Identifier + pyp.Literal(':') + pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List | Value) + RBRACKET))) List << pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List | Value) + RBRACKET) Program = pyp.OneOrMore(Comment | Function) # Validate for syntax error messages: validator = SourceValidator() Value.setParseAction(validator.validate) List.setParseAction(validator.validate) Identifier.addParseAction(validator.validate) #Comment.setParseAction(validator.validate) Function.setParseAction(validator.validate) Program.setParseAction(validator.validate) syntax_error = None try: return __walk(Program.parseString(text)[0]) except Exception as e: syntax_error = validator.panic() # Now raise the exception with a clean stack trace raise syntax_error
def parse_verbs(): get = pp.Literal("GET") post = pp.Literal("POST") options = pp.Literal("OPTIONS") put=pp.Literal("PUT") patch=pp.Literal("PATCH") delete=pp.Literal("DELETE") connect=pp.Literal("CONNECT") trace=pp.Literal("TRACE") track=pp.Literal("TRACK") head = pp.Literal("HEAD") crappaton = pp.Word(pp.printables).setResultsName('bleed') headers = pp.ZeroOrMore(get|post|head|options|put|patch|delete|connect|trace|track).setResultsName('headers') verb = pp.Dict(pp.Group((headers+pp.ZeroOrMore(crappaton))).setResultsName('results')) return verb
def metaParser(): # --- meta parser --- metaIndicator = p.LineStart() + p.Suppress(p.Literal('*')) metaName = p.Word(p.alphanums).setResultsName('metaname') metaSeparator = p.Suppress(p.Literal('=')) # TODO force case insensitivity in attributeMode keyword match # TODO add debug names # TODO add a conditional debug flag metavalue = p.Combine(p.restOfLine() + p.Suppress(p.LineEnd())).setResultsName( 'metavalue') metaList = p.Dict( p.Group(metaIndicator + metaName + metaSeparator + metavalue)) return metaList
def _construct_fnsuffix_parser(): atom = pp.Regex(r"[^=,{}()[\]]+") value = pp.Forward().setName("value") # .setDebug() key = pp.Regex(r"\w*").setName("key") # .setDebug() item = pp.Dict(pp.Group(key + pp.Suppress("=") + value)) items = pp.delimitedList(item) dict_ = pp.Suppress("{") + items + pp.Suppress("}") list_, tuple_, set_ = (o + pp.delimitedList(value, combine=True) + c for o, c in zip(["[", "(", "{"], ["]", ")", "}"])) combine_values = [ pp.Combine(expr) for expr in (list_, tuple_, set_, atom + value) ] value << ( pp.quotedString | dict_ | pp.Or(combine_values) | atom ) # Caution: brackets are needed because of << operator precedence!! return dict_ + pp.StringEnd()
class TestGroups(PyparsingExpressionTestCase): EQ = pp.Suppress('=') tests = [ PpTestSpec( desc = "Define multiple results names in groups", expr = pp.OneOrMore(pp.Group(pp.Word(pp.alphas)("key") + EQ + pp.pyparsing_common.number("value"))), text = "range=5280 long=-138.52 lat=46.91", expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]], ), PpTestSpec( desc = "Define multiple results names in groups - use Dict to define results names using parsed keys", expr = pp.Dict(pp.OneOrMore(pp.Group(pp.Word(pp.alphas) + EQ + pp.pyparsing_common.number))), text = "range=5280 long=-138.52 lat=46.91", expected_list = [['range', 5280], ['long', -138.52], ['lat', 46.91]], expected_dict = {'lat': 46.91, 'long': -138.52, 'range': 5280} ), ]
def _build_txt_parser(): separator = pp.Suppress('=') key = pp.Literal('%') + pp.Word(pp.printables, excludeChars='=') value = pp.Regex(r'[^\n\r]*') + pp.LineEnd().suppress() key_value = key + separator + value header = (pp.LineStart().suppress() + pp.Word(pp.nums) + pp.ZeroOrMore(pp.White().suppress() + pp.Word(pp.nums)) + pp.LineEnd().suppress()) column_heading = pp.Literal('%') + pp.Word( pp.printables, excludeChars='=') + separator + value txt_row = pp.delimitedList( pp.Word(pp.nums + '.+-e_') | pp.Literal('custom')) + pp.LineEnd().suppress() return pp.Optional(header) + pp.ZeroOrMore(pp.Dict(pp.Group(block))).setResultsName('meta') + \ column_heading.setResultsName('columnHeading') + \ pp.Group(pp.OneOrMore(pp.Group(csv_row))).setResultsName('textValues')
def __init__(self): COLON, EQUAL, COMMA = map(pp.Literal, ":=,") SCOLON, SEQUAL, SCOMMA = map(pp.Suppress, ":=,") LPAREN, RPAREN = map(pp.Suppress, "()") word_strict = pp.Regex(r"[^\s'\":=]+") sgl_quoted_string = pp.QuotedString("'", escChar="\\") dbl_quoted_string = pp.QuotedString('"', escChar="\\") word = sgl_quoted_string | dbl_quoted_string | word_strict date = pp.pyparsing_common.iso8601_date.copy() date.setParseAction(pp.pyparsing_common.convertToDate()) date_expr = date + SCOMMA + date | COMMA + date | date + COMMA date_range = LPAREN + date_expr + RPAREN filtr_delim = COLON | EQUAL filtr_delim_suppress = SCOLON | SEQUAL filtr = pp.Group(word_strict + filtr_delim_suppress + date_range) | pp.Group(word_strict + filtr_delim + word) query_patt = pp.Dict(filtr) | word self.__expr = query_patt() * (1, )
def get_parser(): """A Parser that parses the dumped ParamFile attribute by FastPM. This must be a result produced by the lua dump module. Must be a lua table. e.g. { a = 3, b = {0, 1, 2,} } (modified from jsonParser.py example under pyparsing) When using the parser, convert the attribute from an array of U1 to a string first. """ import pyparsing as pp from pyparsing import pyparsing_common as ppc def make_keyword(kwd_str, kwd_value): return pp.Keyword(kwd_str).setParseAction( pp.replaceWith(kwd_value)) TRUE = make_keyword("true", True) FALSE = make_keyword("false", False) NULL = make_keyword("nil", None) LBRACE, RBRACE, ASSIGN, COMMA = map(pp.Suppress, "{}=,") luaName = pp.Word(pp.alphas + "_", pp.alphanums + "_") luaString = pp.dblQuotedString().setParseAction(pp.removeQuotes) luaNumber = ppc.number() luaObject = pp.Forward() luaValue = pp.Forward() luaElements = pp.delimitedList(luaValue) + pp.Optional(COMMA) luaArray = pp.Group(LBRACE + pp.Optional(luaElements, []) + RBRACE) luaValue << (luaString | luaNumber | pp.Group(luaObject) | luaArray | TRUE | FALSE | NULL) memberDef = pp.Group(luaName + ASSIGN + luaValue) luaMembers = pp.delimitedList(memberDef) + pp.Optional(COMMA) luaObject << pp.Dict(LBRACE + pp.Optional(luaMembers) + RBRACE) return luaObject
def _build_sample_parser(): separator = pp.Suppress('=') key = pp.LineStart() + pp.Literal('%').suppress() + pp.Word( pp.printables, excludeChars='=') value = pp.Regex(r'[^\r%]*') | pp.Empty() + pp.LineEnd().suppress() element = pp.Word(pp.alphas) elements = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('elements') + separator + element + pp.ZeroOrMore(pp.White(ws='\t ').suppress() + element) + pp.LineEnd().suppress()) compositions = pp.Group( pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('compositions') + separator + pyparsing_common.number + pp.ZeroOrMore(pp.White(ws='\t ').suppress() + pyparsing_common.number) + pp.LineEnd().suppress()) epoch = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('Epoch') + separator + pyparsing_common.number + pp.LineEnd().suppress()) sample = pp.Group(pp.LineStart().suppress() + pp.Literal('%').suppress() + pp.Literal('Sample') + separator + pyparsing_common.number + pp.LineEnd().suppress()) key_value = (sample | epoch | elements | compositions | pp.Group(key + separator + value)) row_separator = pp.White(ws='\t ').suppress() row = (pp.LineStart().suppress() + pyparsing_common.number + pp.ZeroOrMore(row_separator + pyparsing_common.number) + pp.LineEnd().suppress()) return pp.OneOrMore(pp.Dict(key_value)).setResultsName('meta') + \ pp.Group(pp.ZeroOrMore(pp.Group(row))).setResultsName('values')
pp.Group(pp.OneOrMore(pp.dblSlashComment)).setResultsName("text") + RPAREN)) return mod_def.parseString(mod_string) tor_comment.setParseAction(comment_handler) tor_comment.setResultsName("_name") tor_comment.setResultsName("_type") tor_string = pp.dblQuotedString() | pp.Word(pp.alphas, pp.alphanums + "_-.") number = pp.pyparsing_common.number() tor_members = pp.Forward() tor_value = pp.Forward() tor_struct = pp.Literal("struct").setResultsName("_type") + LPAREN + pp.Dict( tor_members) + RPAREN tor_sequence = pp.Literal("sequence").setResultsName( "_type") + LPAREN + pp.delimitedList(tor_value) + RPAREN tor_ref = pp.Literal("ref").setResultsName( "_type") + LPAREN + identifier + RPAREN tor_value << (tor_sequence | tor_ref | tor_struct | tor_string | pp.Group(number + identifier) | number) member_def = pp.Dict(pp.Group(identifier + COLON + tor_value)) tor_members << pp.delimitedList(member_def) object_def = pp.Group( identifier.setResultsName("_name") + identifier.setResultsName("_type") + pp.Dict(LPAREN + pp.Optional(tor_members) + RPAREN)) tor_object = pp.Dict(object_def | tor_comment) tor_file = pp.Dict(pp.OneOrMore(tor_object)) + pp.stringEnd
NULL = make_keyword("null", None) LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(pp.Suppress, "[]{}:") jsonString = pp.dblQuotedString().setParseAction(pp.removeQuotes) jsonNumber = ppc.number() jsonObject = pp.Forward() jsonValue = pp.Forward() jsonElements = pp.delimitedList(jsonValue) jsonArray = pp.Group(LBRACK + pp.Optional(jsonElements, []) + RBRACK) jsonValue << (jsonString | jsonNumber | pp.Group(jsonObject) | jsonArray | TRUE | FALSE | NULL) memberDef = pp.Group(jsonString + COLON + jsonValue) jsonMembers = pp.delimitedList(memberDef) jsonObject << pp.Dict(LBRACE + pp.Optional(jsonMembers) + RBRACE) jsonComment = pp.cppStyleComment jsonObject.ignore(jsonComment) if __name__ == "__main__": testdata = """ { "glossary": { "title": "example glossary", "GlossDiv": { "title": "S", "GlossList": { "ID": "SGML", "SortAs": "SGML",
def _parse_ios_interfaces(data, acls_as_list=True, auto_cleanup=True, skip_disabled=True): """ Walks through a IOS interface config and returns a dict of parts. Intended for use by `~trigger.cmds.NetACLInfo.ios_parse()` but was written to be portable. :param acls_as_list: Whether you want acl names as strings instead of list members, e.g. :param auto_cleanup: Whether you want to pass results through cleanup_results(). Default: ``True``) "ABC123" vs. ['ABC123']. (Default: ``True``) :param skip_disabled: Whether to skip disabled interfaces. (Default: ``True``) """ import pyparsing as pp # Setup bang = pp.Literal("!").suppress() anychar = pp.Word(pp.printables) nonbang = pp.Word(''.join([x for x in pp.printables if x != "!"]) + '\n\r\t ') comment = bang + pp.restOfLine.suppress() #weird things to ignore in foundries aaa_line = pp.Literal("aaa").suppress() + pp.restOfLine.suppress() module_line = pp.Literal("module").suppress() + pp.restOfLine.suppress() startup_line = pp.Literal("Startup").suppress() + pp.restOfLine.suppress() ver_line = pp.Literal("ver") + anychar #+ pp.restOfLine.suppress() #using SkipTO instead now #foundry example: #telnet@olse1-dc5#show configuration | include ^(interface | ip address | ip access-group | description|!) #! #Startup-config data location is flash memory #! #Startup configuration: #! #ver 07.5.05hT53 #! #module 1 bi-0-port-m4-management-module #module 2 bi-8-port-gig-module #there is a lot more that foundry is including in the output that should be ignored interface_keyword = pp.Keyword("interface") unwanted = pp.SkipTo(interface_keyword, include=False).suppress() #unwanted = pp.ZeroOrMore(bang ^ comment ^ aaa_line ^ module_line ^ startup_line ^ ver_line) octet = pp.Word(pp.nums, max=3) ipaddr = pp.Combine(octet + "." + octet + "." + octet + "." + octet) address = ipaddr netmask = ipaddr cidr = pp.Literal("/").suppress() + pp.Word(pp.nums, max=2) # Description desc_keyword = pp.Keyword("description") description = pp.Dict(pp.Group(desc_keyword + pp.Group(pp.restOfLine))) # Addresses #cisco example: # ip address 172.29.188.27 255.255.255.224 secondary # #foundry example: # ip address 10.62.161.187/26 ipaddr_keyword = pp.Keyword("ip address").suppress() secondary = pp.Literal("secondary").suppress() #foundry matches on cidr and cisco matches on netmask #netmask converted to cidr in cleanup ip_tuple = pp.Group(address + (cidr ^ netmask)).setResultsName( 'addr', listAllMatches=True) negotiated = pp.Literal('negotiated') # Seen on Cisco 886 ip_address = ipaddr_keyword + (negotiated ^ ip_tuple) + pp.Optional(secondary) addrs = pp.ZeroOrMore(ip_address) # ACLs acl_keyword = pp.Keyword("ip access-group").suppress() # acl_name to be [''] or '' depending on acls_as_list acl_name = pp.Group(anychar) if acls_as_list else anychar direction = pp.oneOf('in out').suppress() acl_in = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('in')) acl_in.setParseAction(pp.replaceWith('acl_in')) acl_out = acl_keyword + pp.FollowedBy(acl_name + pp.Literal('out')) acl_out.setParseAction(pp.replaceWith('acl_out')) acl = pp.Dict(pp.Group((acl_in ^ acl_out) + acl_name)) + direction acls = pp.ZeroOrMore(acl) # Interfaces iface_keyword = pp.Keyword("interface").suppress() foundry_awesome = pp.Literal(" ").suppress() + anychar #foundry exmaple: #! #interface ethernet 6/6 # ip access-group 126 in # ip address 172.18.48.187 255.255.255.255 #cisco example: #! #interface Port-channel1 # description gear1-mtc : AE1 : iwslbfa1-mtc-sw0 : : 1x1000 : 172.20.166.0/24 : : : # ip address 172.20.166.251 255.255.255.0 interface = pp.Combine(anychar + pp.Optional(foundry_awesome)) iface_body = pp.Optional(description) + pp.Optional(acls) + pp.Optional( addrs) + pp.Optional(acls) #foundry's body is acl then ip and cisco's is ip then acl iface_info = pp.Optional(unwanted) + iface_keyword + pp.Dict( pp.Group(interface + iface_body)) + pp.Optional(pp.SkipTo(bang)) interfaces = pp.Dict(pp.ZeroOrMore(iface_info)) # This is where the parsing is actually happening try: results = interfaces.parseString(data) except: # (ParseException, ParseFatalException, RecursiveGrammarException): results = {} if auto_cleanup: return _cleanup_interface_results(results, skip_disabled=skip_disabled) return results
unquote = lambda s, l, t: UNQUOTE_PAIRS.sub(r"\1", t[0][1:-1]) # https://tools.ietf.org/html/rfc7235#section-1.2 # https://tools.ietf.org/html/rfc7235#appendix-B tchar = "!#$%&'*+-.^_`|~" + pp.nums + pp.alphas token = pp.Word(tchar).setName("token") token68 = pp.Combine( pp.Word("-._~+/" + pp.nums + pp.alphas) + pp.Optional(pp.Word("=").leaveWhitespace())).setName("token68") quoted_string = pp.dblQuotedString.copy().setName( "quoted-string").setParseAction(unquote) auth_param_name = token.copy().setName("auth-param-name").addParseAction( downcaseTokens) auth_param = auth_param_name + pp.Suppress("=") + (quoted_string | token) params = pp.Dict(pp.delimitedList(pp.Group(auth_param))) scheme = token("scheme") challenge = scheme + (params("params") | token68("token")) authentication_info = params.copy() www_authenticate = pp.delimitedList(pp.Group(challenge)) def _parse_authentication_info(headers, headername="authentication-info"): """https://tools.ietf.org/html/rfc7615 """ header = headers.get(headername, "").strip() if not header: return {} try:
class SELinuxParser(text_parser.PyparsingSingleLineTextParser): """Parser for SELinux audit.log files.""" NAME = 'selinux' DESCRIPTION = 'Parser for SELinux audit.log files.' _SELINUX_KEY_VALUE_GROUP = pyparsing.Group( pyparsing.Word(pyparsing.alphanums).setResultsName('key') + pyparsing.Suppress('=') + (pyparsing.QuotedString('"') ^ pyparsing.Word(pyparsing.printables)).setResultsName('value')) _SELINUX_KEY_VALUE_DICT = pyparsing.Dict( pyparsing.ZeroOrMore(_SELINUX_KEY_VALUE_GROUP)) _SELINUX_BODY_GROUP = pyparsing.Group( pyparsing.Empty().setResultsName('key') + pyparsing.restOfLine.setResultsName('value')) _SELINUX_MSG_GROUP = pyparsing.Group( pyparsing.Literal('msg').setResultsName('key') + pyparsing.Suppress('=audit(') + pyparsing.Word(pyparsing.nums).setResultsName('seconds') + pyparsing.Suppress('.') + pyparsing.Word(pyparsing.nums).setResultsName('milliseconds') + pyparsing.Suppress(':') + pyparsing.Word(pyparsing.nums).setResultsName('serial') + pyparsing.Suppress('):')) _SELINUX_TYPE_GROUP = pyparsing.Group( pyparsing.Literal('type').setResultsName('key') + pyparsing.Suppress('=') + (pyparsing.Word(pyparsing.srange('[A-Z_]')) ^ pyparsing.Regex(r'UNKNOWN\[[0-9]+\]')).setResultsName('value')) _SELINUX_TYPE_AVC_GROUP = pyparsing.Group( pyparsing.Literal('type').setResultsName('key') + pyparsing.Suppress('=') + (pyparsing.Word('AVC') ^ pyparsing.Word('USER_AVC')).setResultsName('value')) # A log line is formatted as: type=TYPE msg=audit([0-9]+\.[0-9]+:[0-9]+): .* _SELINUX_LOG_LINE = pyparsing.Dict(_SELINUX_TYPE_GROUP + _SELINUX_MSG_GROUP + _SELINUX_BODY_GROUP) LINE_STRUCTURES = [('line', _SELINUX_LOG_LINE)] def ParseRecord(self, parser_mediator, key, structure): """Parses a structure of tokens derived from a line of a text file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. file_object (dfvfs.FileIO): a file-like object. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key != 'line': raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) msg_value = structure.get('msg') if not msg_value: parser_mediator.ProduceExtractionError( 'missing msg value: {0!s}'.format(structure)) return try: seconds = int(msg_value[0], 10) except ValueError: parser_mediator.ProduceExtractionError( 'unsupported number of seconds in msg value: {0!s}'.format( structure)) return try: milliseconds = int(msg_value[1], 10) except ValueError: parser_mediator.ProduceExtractionError( 'unsupported number of milliseconds in msg value: {0!s}'. format(structure)) return timestamp = ((seconds * 1000) + milliseconds) * 1000 body_text = structure[2][0] try: # Try to parse the body text as key value pairs. Note that not # all log lines will be properly formatted key value pairs. key_value_dict = self._SELINUX_KEY_VALUE_DICT.parseString( body_text) except pyparsing.ParseException: key_value_dict = {} event_data = SELinuxLogEventData() event_data.audit_type = structure.get('type', None) event_data.body = body_text event_data.pid = key_value_dict.get('pid', None) # TODO: pass line number to offset or remove. event_data.offset = 0 event = time_events.TimestampEvent( timestamp, definitions.TIME_DESCRIPTION_WRITTEN) parser_mediator.ProduceEventWithEventData(event, event_data) def VerifyStructure(self, parser_mediator, line): """Verifies if a line from a text file is in the expected format. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (bytes): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._SELINUX_LOG_LINE.parseString(line) except pyparsing.ParseException as exception: logging.debug( 'Unable to parse SELinux audit.log file with error: {0:s}'. format(exception)) return False return 'type' in structure and 'msg' in structure
class SELinuxParser(text_parser.PyparsingSingleLineTextParser): """Parser for SELinux audit.log files.""" NAME = u'selinux' DESCRIPTION = u'Parser for SELinux audit.log files.' _SELINUX_KEY_VALUE_GROUP = pyparsing.Group( pyparsing.Word(pyparsing.alphanums).setResultsName(u'key') + pyparsing.Suppress(u'=') + (pyparsing.QuotedString(u'"') ^ pyparsing.Word(pyparsing.printables)).setResultsName(u'value')) _SELINUX_KEY_VALUE_DICT = pyparsing.Dict( pyparsing.ZeroOrMore(_SELINUX_KEY_VALUE_GROUP)) _SELINUX_BODY_GROUP = pyparsing.Group( pyparsing.Empty().setResultsName(u'key') + pyparsing.restOfLine.setResultsName(u'value')) _SELINUX_MSG_GROUP = pyparsing.Group( pyparsing.Literal(u'msg').setResultsName(u'key') + pyparsing.Suppress(u'=audit(') + pyparsing.Word(pyparsing.nums).setResultsName(u'seconds') + pyparsing.Suppress(u'.') + pyparsing.Word(pyparsing.nums).setResultsName(u'milliseconds') + pyparsing.Suppress(u':') + pyparsing.Word(pyparsing.nums).setResultsName(u'serial') + pyparsing.Suppress(u'):')) _SELINUX_TYPE_GROUP = pyparsing.Group( pyparsing.Literal(u'type').setResultsName(u'key') + pyparsing.Suppress(u'=') + (pyparsing.Word(pyparsing.srange(u'[A-Z_]')) ^ pyparsing.Regex(r'UNKNOWN\[[0-9]+\]')).setResultsName(u'value')) _SELINUX_TYPE_AVC_GROUP = pyparsing.Group( pyparsing.Literal(u'type').setResultsName(u'key') + pyparsing.Suppress(u'=') + (pyparsing.Word(u'AVC') ^ pyparsing.Word(u'USER_AVC')).setResultsName(u'value')) # A log line is formatted as: type=TYPE msg=audit([0-9]+\.[0-9]+:[0-9]+): .* _SELINUX_LOG_LINE = pyparsing.Dict(_SELINUX_TYPE_GROUP + _SELINUX_MSG_GROUP + _SELINUX_BODY_GROUP) LINE_STRUCTURES = [(u'line', _SELINUX_LOG_LINE)] def ParseRecord(self, parser_mediator, key, structure): """Parses a structure of tokens derived from a line of a text file. Args: parser_mediator (ParserMediator): parser mediator. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key != u'line': raise errors.ParseError( u'Unable to parse record, unknown structure: {0:s}'.format( key)) msg_value = structure.get(u'msg') if not msg_value: parser_mediator.ProduceExtractionError( u'missing msg value: {0!s}'.format(structure)) return try: seconds = int(msg_value[0], 10) except ValueError: parser_mediator.ProduceExtractionError( u'unsupported number of seconds in msg value: {0!s}'.format( structure)) return try: milliseconds = int(msg_value[1], 10) except ValueError: parser_mediator.ProduceExtractionError( u'unsupported number of milliseconds in msg value: {0!s}'. format(structure)) return timestamp = ((seconds * 1000) + milliseconds) * 1000 body_text = structure[2][0] try: # Try to parse the body text as key value pairs. Note that not # all log lines will be properly formatted key value pairs. key_value_dict = self._SELINUX_KEY_VALUE_DICT.parseString( body_text) except pyparsing.ParseException: key_value_dict = {} audit_type = structure.get(u'type') pid = key_value_dict.get(u'pid') event_object = SELinuxLineEvent(timestamp, 0, audit_type, pid, body_text) parser_mediator.ProduceEvent(event_object) def VerifyStructure(self, parser_mediator, line): """Verifies if a line from a text file is in the expected format. Args: parser_mediator (ParserMediator): parser mediator. line (bytes): line from a text file. Returns: bool: True if the line is in the expected format. """ try: structure = self._SELINUX_LOG_LINE.parseString(line) except pyparsing.ParseException as exception: logging.debug( u'Unable to parse SELinux audit.log file with error: {0:s}'. format(exception)) return False return u'type' in structure and u'msg' in structure
dict_key_value = pp.dictOf(string_value + pp.Suppress(":"), value) dict_value << pp.Group( pp.Suppress("{") + pp.delimitedList(dict_key_value) + pp.Suppress("}")).setParseAction(_parse_type("dict")) # Positionals must be end of line or has a space (or more) afterwards. # This is to ensure that the parser treats text like "something=" as invalid # instead of parsing this as positional "something" and leaving the "=" as # invalid on its own. positionals = pp.ZeroOrMore(value + ( pp.StringEnd() ^ pp.Suppress(pp.OneOrMore(pp.White())))).setResultsName("positionals") key_value = pp.Dict( pp.ZeroOrMore(pp.Group(identifier + pp.Suppress("=") + value))).setResultsName("kv") subcommand = identifier.setResultsName("__subcommand__") # Subcommand is optional here as it maybe missing, in this case we still want to # pass the parsing and we will handle the fact that the subcommand is missing # while validating the arguments command_with_subcommand = pp.Optional(subcommand) + key_value + positionals # Positionals will be passed as the last argument command = key_value + positionals def parse(text: str, expect_subcommand: bool) -> pp.ParseResults: expected_pattern = command_with_subcommand if expect_subcommand else command