def hwdb_grammar(): ParserElement.setDefaultWhitespaceChars('') prefix = Or(category + ':' + Or(conn) + ':' for category, conn in TYPES.items()) matchline = Combine(prefix + Word(printables + ' ' + '®')) + EOL propertyline = ( White(' ', exact=1).suppress() + Combine(UDEV_TAG - '=' - Word(alphanums + '_=:@*.! ') - Optional(pythonStyleComment)) + EOL ) propertycomment = White(' ', exact=1) + pythonStyleComment + EOL group = ( OneOrMore(matchline('MATCHES*') ^ COMMENTLINE.suppress()) - OneOrMore(propertyline('PROPERTIES*') ^ propertycomment.suppress()) - (EMPTYLINE ^ stringEnd()).suppress() ) commentgroup = OneOrMore(COMMENTLINE).suppress() - EMPTYLINE.suppress() grammar = OneOrMore(group('GROUPS*') ^ commentgroup) + stringEnd() return grammar
def _generate_members(self, template_file): lines = template_file.readlines() target = Fragment.IDENTIFIER reference = Suppress("mapping") + Suppress("[") + target.setResultsName("target") + Suppress("]") pattern = White(" \t").setResultsName("indent") + reference # Find the markers in the template file line by line. If line does not match marker grammar, # set it as a literal to be copied as is to the output file. for line in lines: try: parsed = pattern.parseString(line) indent = parsed.indent target = parsed.target marker = TemplateModel.Marker(target, indent, []) self.members.append(marker) except ParseException: # Does not match marker syntax self.members.append(line)
class NginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") # Matches anything that is not a special character, and ${SHELL_VARS}, AND # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 dquoted = Regex(r'(\".*\")') squoted = Regex(r"(\'.*\')") nonspecial = Regex(r"[^\{\};,]") varsub = Regex(r"(\$\{\w+\})") # nonspecial nibbles one character at a time, but the other objects take # precedence. We use ZeroOrMore to allow entries like "break ;" to be # parsed as assignments value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space charset_map_statement = space + Literal( "charset_map") + space + value + space + value map_statement = space + Literal( "map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost # certainly too permissive and may be wrong in other ways, but it should # preserve things correctly in mmmmost or all cases. # # - I can neither prove nor disprove that it is correct wrt all escaped # semicolon situations # Addresses https://github.com/fatiherikli/nginxparser/issues/19 map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() block_innards = Group( ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
def usfmTokenNumber(key): return Group( Suppress(backslash) + Literal(key) + Suppress(White()) + Word(nums + '-()') + Suppress(White()))
Bold = Suppress(Literal('**')) Italic = Suppress(Literal('__')) Striked = Suppress(Literal('~~')) Text = OneOrMore(Word(printables)) StyledText = Forward() BoldText = (Bold + StyledText + Bold)('is_bold') ItalicText = (Italic + StyledText + Italic)('is_italic') StrikedText = (Striked + StyledText + Striked)('is_striked') StyledText << (BoldText | ItalicText | StrikedText | StopOnSuffix(['**', '__', '~~', '!icon=', '<!--', '(see:'])) StyledText.resultsName = 'text' StyledText.saveAsList = True # must be done at this point, not before TextGrammar = StyledText | Text.setResultsName('text', listAllMatches=True) Checkbox = (Literal('[') + (Literal('x')('is_checked') | White()) + Literal(']'))('has_checkbox') Icon = Literal('!icon=') + Word(printables).setResultsName('icons', listAllMatches=True) DestNodeText = QuotedString('"', escChar='\\') See = Keyword('(see:') + delimitedList( DestNodeText, delim=',').setResultsName('see') + Literal(')') XMLAttrs = Literal('<!--') + StopOnSuffix( ['-->']).setResultsName('attrs') + Literal('-->') Url = CharsNotIn(') ')('url') ImgDimensions = Word(nums)('img_width') + Literal('x') + Word(nums)( 'img_height')
class White(White): """ Customize whitespace to match the CSS spec values""" def __init__(self, ws=" \t\r\n\f", min=1, max=0, exact=0): super(White, self).__init__(ws, min, max, exact) escaped = ( Literal("\\").suppress() + #chr(20)-chr(126) + chr(128)-unichr(sys.maxunicode) Regex(u"[\u0020-\u007e\u0080-\uffff]", re.IGNORECASE)) def convertToUnicode(t): return unichr(int(t[0], 16)) hex_unicode = ( Literal("\\").suppress() + Regex("[0-9a-f]{1,6}", re.IGNORECASE) + Optional(White(exact=1)).suppress()).setParseAction(convertToUnicode) escape = hex_unicode | escaped #any unicode literal outside the 0-127 ascii range nonascii = Regex(u"[^\u0000-\u007f]") #single character for starting an identifier. nmstart = Regex(u"[A-Z]", re.IGNORECASE) | nonascii | escape nmchar = Regex(u"[0-9A-Z-]", re.IGNORECASE) | nonascii | escape identifier = Combine(nmstart + ZeroOrMore(nmchar))
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) # matching float w/ regex is ugly but is recommended by pyparsing float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums + '_:', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums + '+-*', min=1) + Optional(Suppress('%')) # constituent arrays are semicolon-delimited # each subarray can be comma- or space-delimited constituent_array = Group( delimitedList(Group(delimitedList(species_name, ',') & \ ZeroOrMore(species_name) ), ':') ) param_types = MatchFirst( [TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = Optional(float_number) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1))) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas + '/-', min=1, max=2) # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd() # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + Suppress(')') + \ func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_lor | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
# Redis PyParsing grammar quot = Optional(oneOf(('"', "'"))) command = oneOf( ('CONFIG', 'DBSIZE', 'DECR', 'DECRBY', 'DEL', 'DUMP', 'ECHO', 'EXISTS', 'EXPIRE', 'EXPIREAT', 'FLUSHDB', 'GET', 'HDEL', 'HEXISTS', 'HGET', 'HGETALL', 'HINCRBY', 'HKEYS', 'HLEN', 'HSETNX', 'HVALS', 'INCR', 'INCRBY', 'INFO', 'KEYS', 'LLEN', 'LPOP', 'LPUSH', 'LPUSHX', 'LRANGE', 'LREM', 'LSET', 'LTRIM', 'MGET', 'MSET', 'MSETNX', 'OBJECT', 'PERSIST', 'PEXPIRE', 'PEXPIREAT', 'PING', 'PSETEX', 'PTTL', 'RANDOMKEY', 'RENAME', 'RENAMENX', 'RESTORE', 'RPOP', 'SADD', 'SET', 'SISMEMBER', 'SMEMBERS', 'SREM', 'TIME', 'TTL', 'TYPE', 'ZADD', 'ZRANGE', 'ZREM'), caseless=True).setResultsName('command') parameters = (OneOrMore(Word(alphanums + '-' + punctuation))).setResultsName('parameters') redis_grammar = command + Optional(White().suppress() + parameters) # ################################################################################################################################ class LuaContainer(object): """ A class which knows how to add and execute Lua scripts against Redis. """ def __init__(self, kvdb=None, initial_programs=None): self.kvdb = kvdb self.lua_programs = {} self.add_initial_lua_programs(initial_programs or {}) def add_initial_lua_programs(self, programs): for name, program in programs: self.add_lua_program(name, program)
import pyparsing from pyparsing import Optional, White, Word, Regex, alphas, CaselessLiteral, CaselessKeyword, oneOf, delimitedList, Forward, ZeroOrMore, NotAny, Keyword, Literal # transformString doesn't play nice with suppressed anything, including # whitespace. So it seems like the only effective way to use it is with # explicit whitespace. pyparsing.ParserElement.setDefaultWhitespaceChars('') W = White() OW = Optional(White()) CKeyword = CaselessKeyword comma_list = lambda x: x + ZeroOrMore(OW + ',' + OW + x) unary_op = oneOf('- + ~', caseless=True) unary_op |= CKeyword('NOT') # TODO this does not encode precedence binary_op = oneOf("|| * / % + - << >> & | < <= > >= = == != <>", caseless=True) binary_op |= reduce(lambda x,y: x|y, [CKeyword(x) for x in 'IS,IS NOT,IN,LIKE,GLOB,MATCH,REGEXP,AND,OR'.split(',')]) # these direct from the SQLite docs KEYWORDS = 'ABORT ACTION ADD AFTER ALL ALTER ANALYZE AND AS ASC ATTACH AUTOINCREMENT BEFORE BEGIN BETWEEN BY CASCADE CASE CAST CHECK COLLATE COLUMN COMMIT CONFLICT CONSTRAINT CREATE CROSS CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP DATABASE DEFAULT DEFERRABLE DEFERRED DELETE DESC DETACH DISTINCT DROP EACH ELSE END ESCAPE EXCEPT EXCLUSIVE EXISTS EXPLAIN FAIL FOR FOREIGN FROM FULL GLOB GROUP HAVING IF IGNORE IMMEDIATE IN INDEX INDEXED INITIALLY INNER INSERT INSTEAD INTERSECT INTO IS ISNULL JOIN KEY LEFT LIKE LIMIT MATCH NATURAL NO NOT NOTNULL NULL OF OFFSET ON OR ORDER OUTER PLAN PRAGMA PRIMARY QUERY RAISE REFERENCES REGEXP REINDEX RELEASE RENAME REPLACE RESTRICT RIGHT ROLLBACK ROW SAVEPOINT SELECT SET TABLE TEMP TEMPORARY THEN TO TRANSACTION TRIGGER UNION UNIQUE UPDATE USING VACUUM VALUES VIEW VIRTUAL WHEN WHERE' # TODO probably not right charset & does not account for escaping identifiers # https://www.sqlite.org/lang_keywords.html identifier = NotAny( reduce(lambda x,y: x|y, [CKeyword(x) for x in KEYWORDS.split(' ')]) ) + Regex('[a-zA-Z_][a-zA-Z0-9_]*') # for the purposes of attaching parse actions to these
def get_logical_operator(): return CaselessLiteral('AND') | CaselessLiteral('OR') | White().suppress()
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count, fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count, fragment*). """ # Recursive composite = Forward() mixture = Forward() # whitespace and separators space = Optional(White().suppress()) separator = space + Literal('+').suppress() + space # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s, l, t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White() + openiso + Regex("[1-9][0-9]*") + closeiso, default='0') isotope = isotope.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 0) # Translate ion openion = Literal('{').suppress() closeion = Literal('}').suppress() ion = Optional(~White() + openion + Regex("([1-9][0-9]*)?[+-]") + closeion, default='0+') ion = ion.setParseAction( lambda s, l, t: int(t[0][-1] + (t[0][:-1] if len(t[0]) > 1 else '1'))) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s, l, t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 1) count = Optional(~White() + (fract | whole), default=1) # Convert symbol, isotope, ion, count to (count, isotope) element = symbol + isotope + ion + count def convert_element(string, location, tokens): """interpret string as element""" #print "convert_element received", tokens symbol, isotope, ion, count = tokens[0:4] if isotope != 0: symbol = symbol[isotope] if ion != 0: symbol = symbol.ion[ion] return (count, symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count + OneOrMore(element) def convert_implicit(string, location, tokens): """convert count followed by fragment""" #print "implicit", tokens count = tokens[0] fragment = tokens[1:] return fragment if count == 1 else (count, fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(composite) count" to a pair opengrp = space + Literal('(').suppress() + space closegrp = space + Literal(')').suppress() + space explicit_group = opengrp + composite + closegrp + count def convert_explicit(string, location, tokens): """convert (fragment)count""" #print "explicit", tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count, fragment) explicit_group = explicit_group.setParseAction(convert_explicit) # Build composite from a set of groups group = implicit_group | explicit_group implicit_separator = separator | space composite << group + ZeroOrMore(implicit_separator + group) density = Literal('@').suppress() + count + Optional(Regex("[ni]"), default='i') compound = composite + Optional(density, default=None) def convert_compound(string, location, tokens): """convert material @ density""" #print "compound", tokens if tokens[-1] is None: return Formula(structure=_immutable(tokens[:-1])) elif tokens[-1] == 'n': return Formula(structure=_immutable(tokens[:-2]), natural_density=tokens[-2]) else: return Formula(structure=_immutable(tokens[:-2]), density=tokens[-2]) compound = compound.setParseAction(convert_compound) partsep = space + Literal('//').suppress() + space percent = Literal('%').suppress() weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space by_weight = (count + weight_percent + mixture + ZeroOrMore(partsep + count + (weight_percent | percent) + mixture) + partsep + mixture) def convert_by_weight(string, location, tokens): """convert mixture by %wt or %mass""" #print "by weight", tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100 - sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture") if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_weight_pairs(zip(piece, fract)) mixture_by_weight = by_weight.setParseAction(convert_by_weight) volume_percent = Regex("%v(ol(ume)?)?").suppress() + space by_volume = (count + volume_percent + mixture + ZeroOrMore(partsep + count + (volume_percent | percent) + mixture) + partsep + mixture) def convert_by_volume(string, location, tokens): """convert mixture by %vol""" #print "by volume", tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100 - sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture " + string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_volume_pairs(zip(piece, fract)) mixture_by_volume = by_volume.setParseAction(convert_by_volume) mixture_by_layer = Forward() layer_thick = Group(count + Regex(LENGTH_RE) + space) layer_part = (layer_thick + mixture) | (opengrp + mixture_by_layer + closegrp + count) mixture_by_layer << layer_part + ZeroOrMore(partsep + layer_part) def convert_by_layer(string, location, tokens): """convert layer thickness '# nm material'""" if len(tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absthick * float(p2) p = p1 else: f = float(p1[0]) * LENGTH_UNITS[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [(v / total) * 100 for v in fract] result = _mix_by_volume_pairs(zip(piece, vfract)) result.thickness = total return result mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer) mixture_by_absmass = Forward() absmass_mass = Group(count + Regex(MASS_VOLUME_RE) + space) absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass + closegrp + count) mixture_by_absmass << absmass_part + ZeroOrMore(partsep + absmass_part) def convert_by_absmass(string, location, tokens): """convert mass '# mg material'""" if len(tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): p = p1 f = p1.total_mass * float(p2) else: p = p2 value = float(p1[0]) if p1[1] in VOLUME_UNITS: # convert to volume in liters to mass in grams before mixing if p.density is None: raise ValueError("Need the mass density of " + str(p)) f = value * VOLUME_UNITS[p1[1]] * 1000. * p.density else: f = value * MASS_UNITS[p1[1]] piece.append(p) fract.append(f) total = sum(fract) mfract = [(m / total) * 100 for m in fract] result = _mix_by_weight_pairs(zip(piece, mfract)) result.total_mass = total return result mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass) ungrouped_mixture = (mixture_by_weight | mixture_by_volume | mixture_by_layer | mixture_by_absmass) grouped_mixture = opengrp + ungrouped_mixture + closegrp + Optional( density, default=None) def convert_mixture(string, location, tokens): """convert (mixture) @ density""" formula = tokens[0] if tokens[-1] == 'n': formula.natural_density = tokens[-2] elif tokens[-1] == 'i': formula.density = tokens[-2] # elif tokens[-1] is None return formula grouped_mixture = grouped_mixture.setParseAction(convert_mixture) mixture << (compound | grouped_mixture) formula = (compound | ungrouped_mixture | grouped_mixture) grammar = Optional(formula, default=Formula()) + StringEnd() grammar.setName('Chemical Formula') return grammar
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums+'_:', min=1) ref_phase_name = symbol_name = Word(alphanums+'_-:()/', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums+'+-*/_.', min=1) + Optional(Suppress('%')) # constituent arrays are colon-delimited # each subarray can be comma- or space-delimited constituent_array = Group(delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':')) param_types = MatchFirst([TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = (float_number | ZeroOrMore(',').setParseAction(lambda t: 0.01)) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1) | White())) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + ref_phase_name + \ float_number + float_number + float_number + LineEnd() # SPECIES cmd_species = TCCommand('SPECIES') + species_name + chemical_formula + LineEnd() # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # DATABASE_INFO cmd_database_info = TCCommand('DATABASE_INFO') + SkipTo(LineEnd()) # VERSION_DATE cmd_version_date = TCCommand('VERSION_DATE') + SkipTo(LineEnd()) # REFERENCE_FILE cmd_reference_file = TCCommand('REFERENCE_FILE') + SkipTo(LineEnd()) # ADD_REFERENCES cmd_add_ref = TCCommand('ADD_REFERENCES') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # TEMPERATURE_LIMITS cmd_templim = TCCommand('TEMPERATURE_LIMITS') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + \ Suppress(SkipTo(LineEnd())) # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + \ Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \ Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + \ Suppress(')') + func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_species | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_database_info | \ cmd_version_date | \ cmd_reference_file | \ cmd_add_ref | \ cmd_lor | \ cmd_templim | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
def parser(text): cvtTuple = lambda toks: tuple(toks.asList()) cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList()))) #cvtDict = lambda toks: dict(toks.asList()) cvtGlobDict = lambda toks: GlobDict(toks.asList()) cvtDict = cvtGlobDict extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end]) def pythonize(toks): s = toks[0] if s == 'true': return True elif s == 'false': return False elif s == 'none': return [None] elif s.isdigit(): return int(s) elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s): return float(s) return toks[0] def noneDefault(s, loc, t): return t if len(t) else [RawEOL] # define punctuation as suppressed literals lbrace, rbrace = map(Suppress, "{}") identifier = Word(printables, excludeChars='{}"\'') quotedStr = QuotedString('"', escChar='\\', multiline=True) | \ QuotedString('\'', escChar='\\', multiline=True) quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \ QuotedString('\'', escChar='\\', unquoteResults=False) dictStr = Forward() setStr = Forward() objStr = Forward() #anyIdentifier = identifier | quotedIdentifier oddIdentifier = identifier + quotedIdentifier dictKey = dictStr | quotedStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) dictKey.setParseAction(cvtRaw) dictValue = quotedStr | dictStr | setStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) if OLD_STYLE_KEYS: dictKey |= Combine(identifier + ZeroOrMore(White(' ') + (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd())))) dictValue |= identifier.setParseAction(pythonize) else: dictKey |= identifier dictValue |= delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True).setParseAction(pythonize) ParserElement.setDefaultWhitespaceChars(' \t') #dictEntry = Group(Combine(OneOrMore(identifier | quotedIdentifier)).setParseAction(cvtRaw) + dictEntry = Group(dictKey + Optional(White(' ').suppress() + dictValue).setParseAction(noneDefault) + Optional(White(' ').suppress()) + LineEnd().suppress()) #dictEntry = Group(SkipTo(dictKey + LineEnd() + dictKey)) dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace) dictStr.setParseAction(cvtDict) ParserElement.setDefaultWhitespaceChars(' \t\r\n') setEntry = identifier.setParseAction(pythonize) | quotedString.setParseAction(removeQuotes) setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace) setStr.setParseAction(cvtTuple) # TODO: take other literals as arguments blobObj = Group(((Literal('ltm') + Literal('rule') + identifier) | \ (Literal('rule') + identifier)).setParseAction(cvtRaw) + originalTextFor(nestedExpr('{', '}')).setParseAction(extractText)) objEntry = Group(OneOrMore(identifier | quotedIdentifier).setParseAction(cvtRaw) + Optional(dictStr).setParseAction(noneDefault)) objStr << (Optional(delimitedList(blobObj | objEntry, delim=LineEnd()))) objStr.setParseAction(cvtGlobDict) #objStr.setParseAction(cvtTuple) objStr.ignore(pythonStyleComment) return objStr.parseString(text)[0]
def _make_default_parser(): escapechar = "\\" #wordchars = printables #for specialchar in '*?^():"{}[] ' + escapechar: # wordchars = wordchars.replace(specialchar, "") #wordtext = Word(wordchars) wordtext = CharsNotIn('\\*?^():"{}[] ') escape = Suppress(escapechar) + (Word(printables, exact=1) | White(exact=1)) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word("?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars, or the next token wildstart = wildchars + (OneOrMore(wordtoken + Optional(wildchars)) | FollowedBy(White() | StringEnd())) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") | Literal("{") endfence = Literal("]") | Literal("}") rangeitem = QuotedString('"') | wordtoken openstartrange = Group( Empty()) + Suppress(Keyword("TO") + White()) + Group(rangeitem) openendrange = Group(rangeitem) + Suppress(White() + Keyword("TO")) + Group(Empty()) normalrange = Group(rangeitem) + Suppress(White() + Keyword("TO") + White()) + Group(rangeitem) range = Group(startfence + (normalrange | openstartrange | openendrange) + endfence).setResultsName("Range") # A word-like thing generalWord = range | wildcard | plainWord # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group(boostableUnit + Suppress("^") + Word("0123456789", ".0123456789")).setResultsName( "Boost") # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group(Word(alphanums + "_") + Suppress(':') + fieldableUnit).setResultsName("Field") # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress(Keyword("not", caseless=True)) + Suppress(White()) + unit).setResultsName("Not") generalUnit = operatorNot | unit andToken = Keyword("AND", caseless=False) orToken = Keyword("OR", caseless=False) andNotToken = Keyword("ANDNOT", caseless=False) operatorAnd = Group(generalUnit + Suppress(White()) + Suppress(andToken) + Suppress(White()) + expression).setResultsName("And") operatorOr = Group(generalUnit + Suppress(White()) + Suppress(orToken) + Suppress(White()) + expression).setResultsName("Or") operatorAndNot = Group(unit + Suppress(White()) + Suppress(andNotToken) + Suppress(White()) + unit).setResultsName("AndNot") expression << (OneOrMore(operatorAnd | operatorOr | operatorAndNot | generalUnit | Suppress(White())) | Empty()) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
class NetworkParser(object): interface = Word(alphanums) key = Word(alphanums + "-_") space = White().suppress() value = CharsNotIn("{}\n#") line = Regex("^.*$") comment = ("#") method = Regex("loopback|manual|dhcp|static") stanza = Regex("auto|iface|mapping") option_key = Regex("bridge_\w*|post-\w*|up|down|pre-\w*|address" "|network|netmask|gateway|broadcast|dns-\w*|scope|" "pointtopoint|metric|hwaddress|mtu|hostname|" "leasehours|leasetime|vendor|client|bootfile|server" "|mode|endpoint|dstaddr|local|ttl|provider|unit" "|options|frame|netnum|media") _eol = Literal("\n").suppress() option = Forward() option << Group(space #+ Regex("^\s*") + option_key + space + SkipTo(_eol)) interface_block = Forward() interface_block << Group(stanza + space + interface + Optional(space + Regex("inet") + method + Group(ZeroOrMore(option)))) # + Group(ZeroOrMore(assignment))) interface_file = OneOrMore(interface_block).ignore(pythonStyleComment) file_header = """# File parsed and saved by privacyidea.\n\n""" def __init__(self, infile="/etc/network/interfaces", content=None): self.filename = None if content: self.content = content else: self.filename = infile self._read() self.interfaces = self.get_interfaces() def _read(self): """ Reread the contents from the disk """ f = codecs.open(self.filename, "r", "utf-8") self.content = f.read() f.close() def get(self): """ return the grouped config """ if self.filename: self._read() config = self.interface_file.parseString(self.content) return config def save(self, filename=None): if not filename and not self.filename: raise Exception("No filename specified") # The given filename overrules the own filename fname = filename or self.filename f = open(fname, "w") f.write(self.format()) f.close() def format(self): """ Format the single interfaces e.g. for writing to a file. {"eth0": {"auto": True, "method": "static", "options": {"address": "1.1.1.1", "netmask": "255.255.255.0" } } } results in auto eth0 iface eth0 inet static address 1.1.1.1 netmask 255.255.255.0 :param interface: dictionary of interface :return: string """ output = "" for iface, iconfig in self.interfaces.items(): if iconfig.get("auto"): output += "auto %s\n" % iface output += "iface %s inet %s\n" % (iface, iconfig.get("method", "manual")) # options for opt_key, opt_value in iconfig.get("options", {}).items(): output += " %s %s\n" % (opt_key, opt_value) # add a new line output += "\n" return output def get_interfaces(self): """ return the configuration by interfaces as a dictionary like { "eth0": {"auto": True, "method": "static", "options": {"address": "192.168.1.1", "netmask": "255.255.255.0", "gateway": "192.168.1.254", "dns-nameserver": "1.2.3.4" } } } :return: dict """ interfaces = {} np = self.get() for idefinition in np: interface = idefinition[1] if interface not in interfaces: interfaces[interface] = {} # auto? if idefinition[0] == "auto": interfaces[interface]["auto"] = True elif idefinition[0] == "iface": method = idefinition[3] interfaces[interface]["method"] = method # check for options if len(idefinition) == 5: options = {} for o in idefinition[4]: options[o[0]] = o[1] interfaces[interface]["options"] = options return interfaces
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"')).setResultsName('quotes') | operator_word operator_parenthesis = Group( (Suppress('(') + operator_or + Suppress(")") )).setResultsName('parenthesis') | operator_quotes operator_not = Forward() operator_not << ( Group(Suppress(Keyword('no', caseless=True)) + operator_not).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << ( Group(operator_not + Suppress(Keyword('and', caseless=True)) + operator_and).setResultsName('and') | Group(operator_not + OneOrMore(~oneOf('and or') + operator_and) ).setResultsName('and') | operator_not) operator_or << ( Group(operator_and + Suppress(Keyword('or', caseless=True)) + operator_or).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2)) date = Combine((time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) + ' ' + time_cmpnt + ':' + time_cmpnt) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + (White(min=16).setParseAction( lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task'))) + restOfLine.setResultsName('message'))
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count,fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count,fragment*). """ # Recursive formula = Forward() # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s, l, t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White() + openiso + Regex("[1-9][0-9]*") + closeiso, default='0') isotope = isotope.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 0) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s, l, t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 1) count = Optional(~White() + (fract | whole), default=1) # Convert symbol,isotope,count to (count,isotope) element = symbol + isotope + count def convert_element(string, location, tokens): #print "convert_element received",tokens symbol, isotope, count = tokens[0:3] if isotope != 0: symbol = symbol[isotope] return (count, symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count + OneOrMore(element) def convert_implicit(string, location, tokens): #print "convert_implicit received",tokens count = tokens[0] fragment = tokens[1:] return fragment if count == 1 else (count, fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(formula) count" to a pair opengrp = Literal('(').suppress() closegrp = Literal(')').suppress() explicit_group = opengrp + formula + closegrp + count def convert_explicit(string, location, tokens): #print "convert_group received",tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count, fragment) explicit_group = explicit_group.setParseAction(convert_explicit) group = implicit_group | explicit_group separator = Optional(Literal('+').suppress()) + Optional( White().suppress()) formula << group + ZeroOrMore( Optional(White().suppress()) + separator + group) grammar = Optional(formula) + StringEnd() grammar.setName('Chemical Formula') return grammar
def script(self): # constants left_bracket = Suppress("{") right_bracket = Suppress("}") semicolon = Suppress(";") space = White().suppress() keyword = Word(alphanums + ".+-_/") path = Word(alphanums + ".-_/") variable = Word("$_-" + alphanums) value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+') value_sq = NginxQuotedString(quoteChar="'") value_dq = NginxQuotedString(quoteChar='"') value = (value_dq | value_sq | value_wq) # modifier for location uri [ = | ~ | ~* | ^~ ] location_modifier = (Keyword("=") | Keyword("~*") | Keyword("~") | Keyword("^~")) # modifier for if statement if_modifier = Combine( Optional("!") + (Keyword("=") | Keyword("~*") | Keyword("~") | (Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x"))))) # This ugly workaround needed to parse unquoted regex with nested parentheses # so we capture all content between parentheses and then parse it :( # TODO(buglloc): may be use something better? condition_body = ( (if_modifier + Optional(space) + value) | (variable + Optional(space + if_modifier + Optional(space) + value))) condition = Regex(r'\((?:[^()\n\r\\]|(?:\(.*\))|(?:\\.))+?\)')\ .setParseAction(lambda s, l, t: condition_body.parseString(t[0][1:-1])) # rules include = (Keyword("include") + space + value + semicolon)("include") directive = (keyword + ZeroOrMore(space + value) + semicolon)("directive") file_delimiter = (Suppress("# configuration file ") + path + Suppress(":"))("file_delimiter") comment = (Regex(r"#.*"))("comment").setParseAction(_fix_comment) hash_value = Group(value + ZeroOrMore(space + value) + semicolon)("hash_value") generic_block = Forward() if_block = Forward() location_block = Forward() hash_block = Forward() unparsed_block = Forward() sub_block = OneOrMore( Group(if_block | location_block | hash_block | generic_block | include | directive | file_delimiter | comment | unparsed_block)) if_block << ( Keyword("if") + Group(condition) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") location_block << (Keyword("location") + Group( Optional(space + location_modifier) + Optional(space) + value) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") hash_block << (keyword + Group(OneOrMore(space + value)) + Group(left_bracket + Optional(OneOrMore(hash_value)) + right_bracket))("block") generic_block << ( keyword + Group(ZeroOrMore(space + value)) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") unparsed_block << ( keyword + Group(ZeroOrMore(space + value)) + nestedExpr(opener="{", closer="}"))("unparsed_block") return sub_block
def all_in(name): return (tag('allin{}'.format(name)) + Suppress(':') + Suppress(Optional(White())) + OneOrMore( word, stopOn=any_tag | StringEnd())).setParseAction(tag_value)
INCP = Literal( '>' ) # increment the data pointer (to point to the next cell to the right). DECP = Literal( '<') # decrement the data pointer (to point to the next cell to the left). INPUT = Literal( ',' ) # accept one byte of input, storing its value in the byte at the data pointer. OUTPUT = Literal('.') # output the byte at the data pointer. OPEN_LOOP = Literal( '[' ) # if the byte at the data pointer is zero, then instead of moving the instruction pointer forward to the next command, jump it forward to the command after the matching ] command. CLOSE_LOOP = Literal( ']' ) # if the byte at the data pointer is nonzero, then instead of moving the instruction pointer forward to the next command, jump it back to the command after the matching [ command. COMMENTS = Combine(Word(printables) + White(ws='\n') | Word(printables)) program = ZeroOrMore(ADD | SUB | INCP | DECP | INPUT | OUTPUT | OPEN_LOOP | CLOSE_LOOP | Suppress(COMMENTS)) class LEXER(object): def __init__(self, file): self.path = file self.token_list = [] def tokenize_file(self): try: return program.parseFile(self.path) except:
from pyparsing import Word, oneOf, White, OneOrMore, alphanums, LineEnd, \ Group, Suppress, Literal, printables, ParseException, ungroup # For tags that have an argument in the form of # a conditional expression. The reason this is done # is so that a tag with the ">" operator in the # arguments will parse correctly. OPERAND = Word(alphanums + "." + '"' + '/-' + "*:^_![]?$%@)(#=`" + '\\') OPERATOR = oneOf(["<=", ">=", "==", "!=", "<", ">", "~"], useRegex=False) EXPRESSION_TAG = OPERAND + White() + OPERATOR + White() + OPERAND # LITERAL_TAG will match tags that do not have # a conditional expression. So any other tag # with arguments that don't contain OPERATORs LITERAL_TAG = OneOrMore(Word( alphanums + '*:' + '/' + '"-' + '.' + " " + "^" + "_" + "!" + "[]?$" + "'" + '\\' )) # Will match the start of any tag TAG_START_GRAMMAR = Group(Literal("<") + (EXPRESSION_TAG | LITERAL_TAG) + Literal(">") + LineEnd()) # Will match the end of any tag TAG_END_GRAMMAR = Group(Literal("</") + Word(alphanums) + Literal(">") + LineEnd()) # Will match any directive. We are performing # a simple parse by matching the directive on # the left, and everything else on the right. ANY_DIRECTIVE = Group(Word(alphanums) + Suppress(White()) + Word(printables + " ") + LineEnd())
import re import sys from pyparsing import (Word, White, Literal, Regex, LineEnd, SkipTo, ZeroOrMore, OneOrMore, Combine, Optional, Suppress, Group, ParserElement, stringEnd, pythonStyleComment) EOL = LineEnd().suppress() NUM1 = Word('0123456789abcdefABCDEF', exact=1) NUM2 = Word('0123456789abcdefABCDEF', exact=2) NUM3 = Word('0123456789abcdefABCDEF', exact=3) NUM4 = Word('0123456789abcdefABCDEF', exact=4) NUM6 = Word('0123456789abcdefABCDEF', exact=6) TAB = White('\t', exact=1).suppress() COMMENTLINE = pythonStyleComment + EOL EMPTYLINE = LineEnd() text_eol = lambda name: Regex(r'[^\n]+')(name) + EOL ParserElement.set_default_whitespace_chars(' \n') def klass_grammar(): klass_line = Literal('C ').suppress() + NUM2('klass') + text_eol('text') subclass_line = TAB + NUM2('subclass') + text_eol('text') protocol_line = TAB + TAB + NUM2('protocol') + text_eol('name') subclass = (subclass_line('SUBCLASS') - ZeroOrMore(Group(protocol_line)('PROTOCOLS*') ^ COMMENTLINE.suppress())) klass = (klass_line('KLASS') - ZeroOrMore(Group(subclass)('SUBCLASSES*')
# need to add support for alg expressions columnRval = realNum | intNum | quotedString.addParseAction( removeQuotes) | columnName whereCondition = Group((columnName + binop + (columnRval | Word(printables))) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + statement + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression) ''' Assignment for handoff. ''' setExpression = Forward() setStatement = Group((ident) | (quotedString("json_path") + AS + ident("name")) | ("(" + setExpression + ")")) setExpression << setStatement + ZeroOrMore((and_ | or_) + setExpression) optWhite = ZeroOrMore(LineEnd() | White()) """ Define the statement grammar. """ statement <<= (Group( Group(SELECT + question_graph_expression)("concepts") + optWhite + Group(FROM + tableNameList) + optWhite + Group(Optional(WHERE + whereExpression("where"), "")) + optWhite + Group(Optional(SET + setExpression("set"), ""))("select")) | Group(SET + (columnName + EQ + (quotedString | ident | intNum | realNum)))("set") | Group( Group(CREATE + GRAPH + ident) + optWhite + Group(AT + (ident | quotedString)) + optWhite + Group(AS + (ident | quotedString))))("statement") """ Make a program a series of statements. """ program_grammar = statement + ZeroOrMore(statement)
from .errors import GrammarError from .expansions import (AlternativeSet, KleeneStar, Literal, NamedRuleRef, NullRef, OptionalGrouping, RequiredGrouping, Repeat, Sequence, VoidRef, SingleChildExpansion) from .grammars import Grammar, Import from .references import (optionally_qualified_name, import_name, grammar_name, word, words) from .rules import Rule # Define angled brackets that don't appear in the output. langle, rangle = map(Suppress, "<>") # Define line endings as either ; or \n. This will also gobble empty lines. line_delimiter = Suppress( OneOrMore((PPLiteral(";") | White("\n")).setName("line end"))) class WrapperExpansion(SingleChildExpansion): """ Wrapper expansion class used during the parser's post-processing stage. """ class WeightedExpansion(SingleChildExpansion): """ Internal class used during parsing of alternative sets with weights. """ def __init__(self, expansion, weight): super(WeightedExpansion, self).__init__(expansion) self.weight = weight self._child = expansion
class TypeDocGrammar: """ EOL ::= ["\r"] "\n" SOL ::= LINE_START line ::= [^EOL]+ EOL word ::= alphanums + "_" indented_block ::= INDENT (line_indented | any_line) line_indented ::= any_line indented_block type_definition ::= ":type" [^:]+ ":" [^EOL]+ rtype_definition ::= ":rtype:" [^EOL]+ returns_definition ::= (":returns:" | ":return:") [^EOL]+ param_definition ::= ":param" [^:]+ ":" [^EOL]+ EOL [indented_block] response_structure ::= "**Response Structure**" line [indented_block] typed_dict_key_line ::= "-" "**" word "**" "*(" word ")" "--*" [^EOL]+ + EOL type_line ::= "-" "*(" word ")" "--*" [^EOL]+ + EOL any_line ::= typed_dict_key_line | type_line | line """ indent_stack = [1] SOL = LineStart().suppress() EOL = LineEnd().suppress() word = Word(alphanums + "_") line = SkipTo(LineEnd()) + EOL line_indented = Forward() any_line = Forward() indented_block = indentedBlock( line_indented | any_line, indentStack=indent_stack).setResultsName("indented") line_indented <<= any_line + indented_block type_definition = (SOL + Literal(":type") + SkipTo(":").setResultsName("name") + Literal(":") + SkipTo(EOL).setResultsName("type_name")) rtype_definition = (SOL + Literal(":rtype:") + SkipTo(EOL).setResultsName("type_name")) returns_definition = (SOL + (Literal(":returns:") | Literal(":return:")) + SkipTo(EOL).setResultsName("description")) param_definition = (SOL + Literal(":param") + SkipTo(":").setResultsName("name") + Literal(":") + SkipTo(EOL).setResultsName("description") + EOL + Optional(indented_block)) response_structure = Literal("**Response Structure**") + line_indented typed_dict_key_line = (Literal("-") + White(ws=" \t") + Literal("**") + word.setResultsName("name") + Literal("**") + White(ws=" \t") + Literal("*(") + word.setResultsName("type_name") + Literal(")") + White(ws=" \t") + Literal("--*") + SkipTo(EOL).setResultsName("description") + EOL) type_line = (Literal("-") + White(ws=" \t") + Literal("*(") + word.setResultsName("type_name") + Literal(")") + White(ws=" \t") + Literal("--*") + SkipTo(EOL).setResultsName("description") + EOL) any_line <<= (typed_dict_key_line | type_line | line).setResultsName("line") @classmethod def fail_action(cls, _input_string: str, _chr_index: int, _source: str, error: BaseException) -> None: if "found end of text" not in str(error): raise error @classmethod def reset(cls) -> None: cls.disable_packrat() cls.indented_block.setFailAction(cls.fail_action) cls.indent_stack.clear() cls.indent_stack.append(1) @staticmethod def enable_packrat() -> None: ParserElement.enablePackrat(cache_size_limit=128) @staticmethod def disable_packrat() -> None: ParserElement.enablePackrat(cache_size_limit=None)
def usfmTokenValue(key, value): return Group( Suppress(backslash) + Literal(key) + Suppress(White()) + Optional(value))
class SmilesPattern: def __init__(self): pass def addRawStr(toks): if 'branch' in toks: toks['rawStr'] = toks['branch'] else: toks['rawStr'] = ''.join(toks[:]) return toks #whitespace = " \t\n" whitespace = White().leaveWhitespace() ### ATOM SECTION ### # Organic Subset section _aliphatic_organic = ( Literal('Cl').setResultsName('symbol') \ | Literal('Br').setResultsName('symbol') \ | Word('BCNOSPFI',exact=1).setResultsName('symbol') ).setResultsName('organic') _aromatic_organic = ( Literal('c').setResultsName('symbol') \ | Word('bnosp',exact=1).setResultsName('symbol') ).setResultsName('organic') #_aliphatic_organic.setResultsName('organic') #_aromatic_organic.setResultsName('organic') # Bracketed Atoms section _isotope = Word(nums, min=1) _element_symbols =Literal('He') | Literal('Li') | Literal('Be') | Literal('Ne') | Literal('Na') | Literal('Mg') \ | Literal('Al') | Literal('Si') | Literal('Cl') | Literal('Ar') | Literal('Ca') | Literal('Sc') \ | Literal('Ti') | Literal('Cr') | Literal('Mn') | Literal('Fe') | Literal('Co') | Literal('Ni') \ | Literal('Cu') | Literal('Zn') | Literal('Ga') | Literal('Ge') | Literal('As') | Literal('Se') \ | Literal('Br') | Literal('Kr') | Literal('Rb') | Literal('Sr') | Literal('Zr') | Literal('Nb') \ | Literal('Mo') | Literal('Tc') | Literal('Ru') | Literal('Rh') | Literal('Pd') | Literal('Ag') \ | Literal('Cd') | Literal('In') | Literal('Sn') | Literal('Sb') | Literal('Te') | Literal('Xe') \ | Literal('Cs') | Literal('Ba') | Literal('Hf') | Literal('Ta') | Literal('Re') | Literal('Os') \ | Literal('Ir') | Literal('Pt') | Literal('Au') | Literal('Hg') | Literal('Tl') | Literal('Pb') \ | Literal('Bi') | Literal('Po') | Literal('At') | Literal('Rn') | Literal('Fr') | Literal('Ra') \ | Literal('Rf') | Literal('Db') | Literal('Sg') | Literal('Bh') | Literal('Hs') | Literal('Mt') \ | Literal('Ds') | Literal('Rg') | Literal('La') | Literal('Ce') | Literal('Pr') | Literal('Nd') \ | Literal('Pm') | Literal('Sm') | Literal('Eu') | Literal('Gd') | Literal('Tb') | Literal('Dy') \ | Literal('Ho') | Literal('Er') | Literal('Tm') | Literal('Yb') | Literal('Lu') | Literal('Ac') \ | Literal('Th') | Literal('Pa') | Literal('Np') | Literal('Pu') | Literal('Am') | Literal('Cm') \ | Literal('Bk') | Literal('Cf') | Literal('Es') | Literal('Fm') | Literal('Md') | Literal('No') \ | Literal('Lr') \ | Literal('H') | Literal('B') | Literal('C') | Literal('N') | Literal('O') | Literal('F') | Literal('P') \ | Literal('S') | Literal('K') | Literal('V') | Literal('Y') | Literal('I') | Literal('W') | Literal('U') _aromatic_symbols = Literal('se') | Literal('as') | Word('cnops', exact=1) _symbol = _element_symbols | _aromatic_symbols | Literal('*') # Chirality section _chiral = Literal('@@') | Literal( '@') #| Literal('@TH1') | Literal('@TH2') \ #| Literal('@SP1') | Literal('@SP2') | Literal('@SP3') \ #| Literal('@AL1') | Literal('@AL2') | '@TB'+Word(nums,min=1,max=2) | '@OH'+Word(nums,min=1,max=2) _chiral.setParseAction(''.join) # Hydrogens section _hcount = Literal('H') + (Word('123456789', exact=1) * (0, 1)).setResultsName('nH') #_hcount.setParseAction(''.join) # Charge section _charge = ('-' + Word('123456789', exact=1) * (0, 1)) | ('+' + Word('123456789', exact=1) * (0, 1)) | Literal('--') | Literal('++') #_charge.setParseAction(''.join) # Atom Class section _class = ':' + Word(nums, min=1) # Bracketed Atom definition _bracket_atom = '[' + _isotope.setResultsName('isotope')*(0,1) \ + _symbol.setResultsName('symbol') \ + _chiral.setResultsName('chiral')*(0,1) \ + _hcount.setResultsName('hcount')*(0,1) \ + _charge.setResultsName('charge')*(0,1) \ + _class.setResultsName('_class')*(0,1) \ + ']' #_bracket_atom.setResultsName('bracket_atom') # Atom definition #_atom = _aliphatic_organic | _aromatic_organic | _bracket_atom | Literal('*').setResultsName('symbol') _atom = _aliphatic_organic \ | _aromatic_organic \ | _bracket_atom.setResultsName('bracket_atom') \ | Literal('*').setResultsName('symbol') #def addRawStr(toks): # toks['rawStr']=''.join(toks) # return toks #_atom.setParseAction(addRawStr) _atom.leaveWhitespace() #_atom.setParseAction(''.join) #_atom.setParseAction(lambda locn,tokens: (locn,''.join(tokens[:]))) ### BOND SECTION ### _bond = Word('-=#:\/', exact=1) _bond.leaveWhitespace() #_bond.setParseAction(addRawStr) #_ringbond = _bond*(0,1) + \ # (Word(nums,exact=1).setParseAction(lambda tok:[''.join(tok)] ) | \ # (Literal('%')+Word(nums,exact=2).setResultsName('ringid')).setParseAction(lambda tok:[''.join(tok[:])] ) ) _ringbond = (_bond*(0,1)).setResultsName('ringbondtype') + \ (Word(nums,exact=1).setResultsName('ringid') | \ Literal('%')+Word(nums,exact=2).setResultsName('ringid') ) _ringbond.leaveWhitespace() #_ringbond.setParseAction(addRawStr) _dot = Literal('.') #_dot.setParseAction(addRawStr) _smilesChar = _ringbond.setResultsName('ringbond') | _bond.setResultsName('bond') \ | _atom.setResultsName('atom') | _dot.setResultsName('dot') _branchContent = _smilesChar * (1, None) _branchContent.setParseAction(lambda toks: ''.join(toks)) _branch = nestedExpr('(', ')', content=_branchContent) _branch.setParseAction(lambda toks: '(' + ''.join( [str(item) for sublist in toks for item in sublist]) + ')') _smilesElement = _smilesChar | _branch.setResultsName('branch') _smilesElement.setParseAction(addRawStr)
def usfmToken(key): return Group(Suppress(backslash) + Literal(key) + Suppress(White()))
def __init__(self): """ A program is a list of statements. Statements can be 'set' or 'select' statements. """ statement = Forward() SELECT, FROM, WHERE, SET, AS = map(CaselessKeyword, "select from where set as".split()) ident = Word( "$" + alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList(ident, ".", combine=True).setName("column name") columnNameList = Group( delimitedList(columnName)) tableName = delimitedList(ident, ".", combine=True).setName("column name") tableNameList = Group(delimitedList(tableName)) SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=") arrow = Literal ("->") t_expr = Group(ident + LPAR + Word("$" + alphas, alphanums + "_$") + RPAR + ZeroOrMore(LineEnd())).setName("t_expr") | \ Word(alphas, alphanums + "_$") + ZeroOrMore(LineEnd()) t_expr_chain = t_expr + ZeroOrMore(arrow + t_expr) whereExpression = Forward() and_, or_, in_ = map(CaselessKeyword, "and or in".split()) binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) realNum = ppc.real() intNum = ppc.signed_integer() columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + (columnRval | Word(printables) ) ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + statement + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) ''' Assignment for handoff. ''' setExpression = Forward () setStatement = Group( ( ident ) | ( quotedString("json_path") + AS + ident("name") ) | ( "(" + setExpression + ")" ) ) setExpression << setStatement + ZeroOrMore( ( and_ | or_ ) + setExpression ) optWhite = ZeroOrMore(LineEnd() | White()) """ Define the statement grammar. """ statement <<= ( Group( Group(SELECT + t_expr_chain)("concepts") + optWhite + Group(FROM + tableNameList) + optWhite + Group(Optional(WHERE + whereExpression("where"), "")) + optWhite + Group(Optional(SET + setExpression("set"), ""))("select") ) | Group( SET + (columnName + EQ + ( quotedString | intNum | realNum )) )("set") )("statement") """ Make a program a series of statements. """ self.program = statement + ZeroOrMore(statement) """ Make rest-of-line comments. """ comment = "--" + restOfLine self.program.ignore (comment)
import logging from pyparsing import ParserElement, Empty, Word, CharsNotIn, White, Optional, ZeroOrMore, OneOrMore, StringStart, StringEnd, Combine, Group, Suppress, nums, ParseException import debug import modules logger = None CTL = ''.join(chr(i) for i in range(0, 32)) + chr(127) WS = " \t" ParserElement.setDefaultWhitespaceChars(WS) _ws = White(WS) _quoted_pair = Suppress('\\') + CharsNotIn("", exact=1) _dqtext = CharsNotIn("\"\\" + CTL, exact=1) _dqstring = Combine(Suppress('"') + ZeroOrMore(_dqtext | _quoted_pair) + Suppress('"')) _sqtext = CharsNotIn("'\\" + CTL, exact=1) _sqstring = Combine(Suppress('\'') + ZeroOrMore(_sqtext | _quoted_pair) + Suppress('\'')) _atom = Empty() + CharsNotIn(" '\"\\" + CTL) _string = Combine(OneOrMore(_dqstring | _sqstring | _quoted_pair)) _word = Combine(OneOrMore(_atom | _string)) _ws_state = "" def _ws_action(t): global _ws_state
# floating point fp = Combine(Word(nums + "+-") + Literal(".") + Word(nums)) # fortran real exp = oneOf("E e D d") real = Combine(fp("base") + exp.setParseAction(lambda x: "e") + integer("exponent")) # C type char = Word(printables) # Decks of data # ------------------------------------------------------------------------------------------ # prelim data_type = oneOf("R I C") name_of_deck = LineStart() + OneOrMore( Word(printables), stopOn=White(min=3) + data_type ).setParseAction(" ".join) # single value decks ival_deck = name_of_deck("key") + Literal("I")("type") + integer("value") rval_deck = name_of_deck("key") + Literal("R")("type") + real("value") cval_deck = name_of_deck("key") + Literal("C")("type") + char("value") # we have to parse this one differently char_arr_deck = ( name_of_deck("key") + Literal("C")("type") + Literal("N=").suppress() + integer("size") + LineEnd().suppress() + Group(SkipTo(LineEnd() + name_of_deck + data_type) | SkipTo(StringEnd()))("value")
@author: luca (Minor updates by Paul McGuire, June, 2012) ''' from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \ LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \ cStyleComment, Regex, Forward, MatchFirst, And, srange, oneOf, alphas, alphanums, \ delimitedList # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g # Tokens EOL = Suppress(LineEnd()) # $ singleTextString = originalTextFor( ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = Literal('\\') + (oneOf(list(r'nrtbf\">' + "'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1)) LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1) CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'") STRING_LITERAL = Suppress("'") + Combine( OneOrMore(LITERAL_CHAR)) + Suppress("'") DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>' TOKEN_REF = Word(alphas.upper(), alphanums + '_') RULE_REF = Word(alphas.lower(), alphanums + '_')
>>> for font in tokens.fonts: ... print font.fontNumber, font.fontFamily, font.fontName, font.panose or 0 0 roman Times New Roman 02020603050405020304 1 modern Courier New 02070309020205020404 2 roman Symbol 05050102010706020507 3 roman Times New Roman (Hebrew) 0 """ from pyparsing import Optional, Literal, Word, Group, White from pyparsing import Suppress, Combine, replaceWith from pyparsing import alphas, nums, printables, alphanums from pyparsing import restOfLine, oneOf, OneOrMore, ZeroOrMore from pyparsing import ParseException separator = Literal(';') space = Literal(' ') white = White() leftBracket = Literal('{') rightBracket = Literal('}') bracket = leftBracket | rightBracket.setResultsName('bracket') # basic RTF control codes, ie. "\labelname3434" controlLabel = Combine(Word(alphas + "'") + Optional(Word(nums))) controlValue = Optional(space) + Optional(Word(alphanums + '-')) baseControl = Combine(Literal('\\') + controlLabel + controlValue).setResultsName('baseControl') # in some cases (color and font table declarations), control has ';' # suffix rtfControl = Combine(baseControl + Optional(separator)).setResultsName('control')