def get_plaintext(parse_actions=False): """Get plaintext parser element. plaintext = { ( any of a-zA-Z0-9 or '!"$%&()+,-./?@\^_`~', any of "[]*#:;='", any Unicode character without "|[]*#:;<>='{}" ) | any Unicode character without "|[]*#:;<>='{}" }-; :returns: plaintext parser element :rtype: ParserElement """ str0 = '!"$%&()+,-./?@\^_`~' str1 = "[]*#:;='" str2 = "|[]*#:;<>='{}" plaintext = pyparsing.Combine( pyparsing.OneOrMore( ( pyparsing.oneOf(" ".join(pyparsing.alphanums+str0)) + pyparsing.oneOf(" ".join(str1)) + pyparsing.CharsNotIn(str2, max=1) ) ^ pyparsing.CharsNotIn(str2, max=1) ) ) plaintext.setName("plaintext") plaintext.parseWithTabs() if parse_actions: pass return plaintext
class TestTransformStringUsingParseActions(PyparsingExpressionTestCase): markup_convert_map = { '*': 'B', '_': 'U', '/': 'I', } def markup_convert(t): htmltag = TestTransformStringUsingParseActions.markup_convert_map[ t.markup_symbol] return "<{0}>{1}</{2}>".format(htmltag, t.body, htmltag) tests = [ PpTestSpec( desc="Use transformString to convert simple markup to HTML", expr=(pp.oneOf(markup_convert_map)('markup_symbol') + "(" + pp.CharsNotIn(")")('body') + ")").addParseAction(markup_convert), text="Show in *(bold), _(underscore), or /(italic) type", expected_list=[ 'Show in <B>bold</B>, <U>underscore</U>, or <I>italic</I> type' ], parse_fn='transformString', ), ]
def _parse(cls, text, function): '''Prep `text` into a list of strs and then pass it to `function`. The given text should contain one or more {}s in it, like "foo {bar} buzz", and this function will convert it to ['foo', ['bar'], 'buzz'] and then pass that to `function`. Args: text (str): The text to convert into a list (of lists) of strs. function (callable[list[str or list[str]]]): The function that takes the parsed `text` as input. Returns: str: The output of function. ''' # 1. We wrap the entire text in {}s, to make it a nested expression text = cls._wrap(text) pyparsing.ParserElement.setDefaultWhitespaceChars('\n\t') _content = pyparsing.CharsNotIn(['{', '}']) _curlys = pyparsing.nestedExpr('{', '}', content=_content) # 2. Since we made the expression nested a little while ago, lets unpack it # by getting the 0th index # parsed_text = _curlys.parseString(text).asList()[0] result = function(parsed_text) # 3. The {}s that we added with `_wrap` need to be removed. # So we just remove the first and last characters, [1:-1] # from the final result # return result[1:-1]
def _get_simple_ref_parser(): string = pp.CharsNotIn(_EXCLUDES).setParseAction(_string) ref_open = pp.Literal(_REF_OPEN).suppress() ref_close = pp.Literal(_REF_CLOSE).suppress() reference = (ref_open + pp.Group(string) + ref_close).setParseAction(_reference) line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional(string) + pp.StringEnd() return line
class Parser(object): comment_def = "--" + pyparsing.NotAny( '-' + pyparsing.CaselessKeyword('begin')) + pyparsing.ZeroOrMore( pyparsing.CharsNotIn("\n")) def __init__(self, scanner, retainSeparator=True): self.scanner = scanner self.scanner.ignore(pyparsing.sglQuotedString) self.scanner.ignore(pyparsing.dblQuotedString) self.scanner.ignore(self.comment_def) self.scanner.ignore(pyparsing.cStyleComment) self.retainSeparator = retainSeparator def separate(self, txt): itms = [] for (sqlcommand, start, end) in self.scanner.scanString(txt): if sqlcommand: if type(sqlcommand[0]) == pyparsing.ParseResults: if self.retainSeparator: itms.append("".join(sqlcommand[0])) else: itms.append(sqlcommand[0][0]) else: if sqlcommand[0]: itms.append(sqlcommand[0]) return itms
def parse_input(self, s): function = (p.oneOf(accepted_functions) + "(" + p.CharsNotIn(")") + ")" + p.White()).setParseAction(self.function_action) block = ("{block " + p.Word(p.nums) + "}" + p.ZeroOrMore(function)).setParseAction(self.block_action) cf2text = p.OneOrMore(block) cf2text.parseString(s)
class TestTransformStringUsingParseActions(PyparsingExpressionTestCase): markup_convert_map = { "*": "B", "_": "U", "/": "I", } def markup_convert(t): htmltag = TestTransformStringUsingParseActions.markup_convert_map[ t.markup_symbol] return "<{}>{}</{}>".format(htmltag, t.body, htmltag) tests = [ PpTestSpec( desc="Use transformString to convert simple markup to HTML", expr=(pp.oneOf(markup_convert_map)("markup_symbol") + "(" + pp.CharsNotIn(")")("body") + ")").addParseAction(markup_convert), text="Show in *(bold), _(underscore), or /(italic) type", expected_list=[ "Show in <B>bold</B>, <U>underscore</U>, or <I>italic</I> type" ], parse_fn="transformString", ), ]
def get_parser(): "Construct and return the parser." left = pp.Suppress("(") right = pp.Suppress(")") value = pp.QuotedString(quote_char='"', esc_char="\\") | pp.CharsNotIn(")") identifier = pp.Word(pp.alphas, pp.alphanums).set_parse_action(_Identifier) label = (pp.Keyword("label") + left + value + right).set_parse_action(_Label) year = (pp.Keyword("year") + left + value + right).set_parse_action(_Year) author = (pp.Keyword("author") + left + value + right).set_parse_action(_Author) orcid = (pp.Keyword("orcid") + left + value + right).set_parse_action(_Orcid) issn = (pp.Keyword("issn") + left + value + right).set_parse_action(_Issn) published = (pp.Keyword("published") + left + value + right).set_parse_action(_Published) first = (pp.Keyword("first") + left + value + right).set_parse_action(_First) online = (pp.Keyword("online") + left + value + right).set_parse_action(_Online) modified = (pp.Keyword("modified") + left + value + right).set_parse_action(_Modified) no_pmid = (pp.Keyword("no_pmid") + left + right).set_parse_action(_NoPmid) no_doi = (pp.Keyword("no_doi") + left + right).set_parse_action(_NoDoi) no_label = (pp.Keyword("no_label") + left + right).set_parse_action(_NoLabel) function = (label | year | author | orcid | issn | published | first | online | modified | no_pmid | no_doi | no_label) if settings["TEMPORAL_LABELS"]: current = (pp.Keyword("active") + left + right).set_parse_action(_Active) active = (pp.Keyword("active") + left + value + right).set_parse_action(_Active) function = function | current | active union = pp.Literal("+").set_parse_action(_Union) symdifference = pp.Literal("^").set_parse_action(_Symdifference) intersection = pp.Literal("#").set_parse_action(_Intersection) difference = pp.Literal("-").set_parse_action(_Difference) operator = union | symdifference | difference | intersection expression = pp.Forward() atom = function | identifier | pp.Group(left + expression + right) expression <<= atom + (operator + atom)[...] expression.set_parse_action(_Expression) expression.ignore("!" + pp.rest_of_line) return expression
class constant_def: zero = pp.Char("0") nonzero_digit = pp.Char("123456789") digit = pp.Char(pp.nums) octal_digit = pp.Char("01234567") hexadecimal_digit = pp.Char(pp.hexnums) # (6.4.4.1) decimal-constant: decimal_constant = nonzero_digit + digit.copy()[...] # (6.4.4.1) octal-constant: octal_constant = zero + octal_digit.copy()[...] # (6.4.4.1) hexadecimal-constant: hexadecimal_prefix = pp.Word("0x") | pp.Word("0X") hexadecimal_constant = hexadecimal_prefix + hexadecimal_digit.copy()[1, ...] # (6.4.4.1) integer-suffix: unsigned_suffix = pp.Char("uU") long_suffix = pp.Char("lL") longlong_suffix = pp.Word("ll") | pp.Word("LL") integer_suffix = (unsigned_suffix + pp.Optional(long_suffix | longlong_suffix)) | ((long_suffix | longlong_suffix) + pp.Optional(unsigned_suffix)) # (6.4.4.1) integer-constant: integer_constant = (decimal_constant | octal_constant | hexadecimal_constant) + pp.Optional(integer_suffix) # (6.4.4.2) floating-constant: # 略 # (6.4.4.3) enumeration-constant: enumeration_constant = identifier # (6.4.4.4) character-constant: simple_escape_sequence = pp.Char("\\") + pp.Char("\\'\"?abfnrtv") octal_escape_sequence = pp.Char("\\") + octal_digit.copy()[1, ...] hexadecimal_escape_sequence = pp.Word("\\x") + hexadecimal_digit.copy()[1, ...] escape_sequence = simple_escape_sequence | octal_escape_sequence | hexadecimal_escape_sequence | identifier_def.universal_character_name c_char = pp.CharsNotIn("'\\\r\n") | escape_sequence character_constant = pp.Optional(pp.Char("L")) + pp.Char("'") + c_char[1,...] + pp.Char("'")
def get_param(wiki_markup, parse_actions=False): """Get parameter parser element. param = "{{{", { any Unicode character without "|={}" }-, [ default ], "}}}"; default = "|", { any Unicode character }; :returns: parameter parser element :rtype: ParserElement """ name = pyparsing.CharsNotIn("|={}").setResultsName("name") name.setName("name") name.parseWithTabs() value = _get_value(wiki_markup, parse_actions=parse_actions) param = pyparsing.nestedExpr( opener="{{{", closer="}}}", content=(name + pyparsing.Optional( pyparsing.Literal("|") + pyparsing.Optional(value))), ignoreExpr=None) param.setName("param") param.parseWithTabs() if parse_actions: pass return param
class Tokens(object): # shared tokens delim_chars = '[]{},' pause = pp.FollowedBy(pp.Word(delim_chars) | pp.StringEnd()) number = (pp.pyparsing_common.number + pause) quoted_string = pp.QuotedString('"', escChar='\\') true = (pp.Regex(r'(True|true|yes|on)') + pause). \ setParseAction(lambda _: True) false = (pp.Regex(r'(False|false|no|off)') + pause). \ setParseAction(lambda _: False) unquoted_string = pp.CharsNotIn(delim_chars). \ setParseAction(lambda toks: toks[0].strip()) empty_value = pp.Empty(). \ setParseAction(lambda _: '') # tokens for configs identifier = pp.pyparsing_common.identifier.copy() comma = pp.Literal(',').suppress() assign = pp.Literal('=').suppress() config_value = ( number | true | false | quoted_string | unquoted_string | empty_value) key_value_pair = (identifier + assign + config_value). \ setParseAction(lambda toks: (toks[0], toks[1])) key_value_pair_list = pp.Optional( key_value_pair + pp.ZeroOrMore(comma + key_value_pair)) # tokens for tags tag = quoted_string | unquoted_string | pp.Empty().suppress() tag_list = pp.Optional(tag + pp.ZeroOrMore(comma + tag))
def _make_valid_state_name(self, state_name): """Transform the input state_name into a valid state in XMLBIF. XMLBIF states must start with a letter an only contain letters, numbers and underscores. """ s = str(state_name) s_fixed = pp.CharsNotIn(pp.alphanums + "_").setParseAction(pp.replaceWith("_")).transformString(s) if not s_fixed[0].isalpha(): s_fixed = "state" + s_fixed return s_fixed
def _get_parser(self): assign = pp.oneOf(['=', '?=', ':=', '::=', '+='])('assign') var_name = pp.Word(pp.alphas + '_', pp.alphanums + '_')('var') enclosed = pp.Forward() nested_parents = pp.nestedExpr('$(', ')', content=enclosed) nested_brackets = pp.nestedExpr('${', '}', content=enclosed) enclosed <<= (nested_parents | nested_brackets | pp.CharsNotIn('$(){}\n')).leaveWhitespace() return pp.lineStart + var_name + assign + pp.ZeroOrMore(pp.White()) + pp.ZeroOrMore(enclosed)('value')
def _make_parser(): word = pp.CharsNotIn(''.join(whitespace)) word.skipWhitespace = True value = pp.MatchFirst([ pp.dblQuotedString.copy().setParseAction(pp.removeQuotes), pp.sglQuotedString.copy().setParseAction(pp.removeQuotes), pp.Empty() + pp.CharsNotIn(''.join(whitespace)), ]) expressions = [] for field in named_fields: exp = pp.Suppress(pp.CaselessLiteral(field) + ':') + \ value.copy().setParseAction(_decorate_match(field)) expressions.append(exp) any_ = value.copy().setParseAction(_decorate_match('any')) expressions.append(any_) return pp.ZeroOrMore(pp.MatchFirst(expressions))
def _make_parser(): word = pp.CharsNotIn("".join(whitespace)) word.skipWhitespace = True value = pp.MatchFirst([ pp.dbl_quoted_string.copy().set_parse_action(pp.remove_quotes), pp.sgl_quoted_string.copy().set_parse_action(pp.remove_quotes), pp.Empty() + pp.CharsNotIn("".join(whitespace)), ]) expressions = [] for field in named_fields: exp = pp.Suppress(pp.CaselessLiteral(field) + ":") + value.copy().set_parse_action( _decorate_match(field)) expressions.append(exp) any_ = value.copy().set_parse_action(_decorate_match("any")) expressions.append(any_) return pp.ZeroOrMore(pp.MatchFirst(expressions))
def _make(): # Order is important - multi-char expressions need to come before narrow # ones. parts = [] for cls in filter_unary: f = pp.Literal(f"~{cls.code}") + pp.WordEnd() f.setParseAction(cls.make) parts.append(f) # This is a bit of a hack to simulate Word(pyparsing_unicode.printables), # which has a horrible performance with len(pyparsing.pyparsing_unicode.printables) == 1114060 unicode_words = pp.CharsNotIn("()~'\"" + pp.ParserElement.DEFAULT_WHITE_CHARS) unicode_words.skipWhitespace = True regex = ( unicode_words | pp.QuotedString('"', escChar='\\') | pp.QuotedString("'", escChar='\\') ) for cls in filter_rex: f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + regex.copy() f.setParseAction(cls.make) parts.append(f) for cls in filter_int: f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + pp.Word(pp.nums) f.setParseAction(cls.make) parts.append(f) # A naked rex is a URL rex: f = regex.copy() f.setParseAction(FUrl.make) parts.append(f) atom = pp.MatchFirst(parts) expr = pp.infixNotation( atom, [(pp.Literal("!").suppress(), 1, pp.opAssoc.RIGHT, lambda x: FNot(*x)), (pp.Literal("&").suppress(), 2, pp.opAssoc.LEFT, lambda x: FAnd(*x)), (pp.Literal("|").suppress(), 2, pp.opAssoc.LEFT, lambda x: FOr(*x)), ]) expr = pp.OneOrMore(expr) return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
def parser_factory(styler): """Builds the S-expression parser.""" def cond_optional(expr): return pp.Optional(expr) if styler else expr LPAR, RPAR, SQUO, DQUO = map(pp.Suppress, '()\'"') form_first = pp.Forward() form = pp.Forward() nil = pp.CaselessKeyword('nil').addParseAction(pp.replaceWith([])) t = pp.CaselessKeyword('t').addParseAction(pp.replaceWith(True)) constant = styler('class:constant', nil | t) number = styler('class:number', ppc.number).setName('number') control_chars = ''.join(map(chr, range(0, 32))) + '\x7f' symbol = pp.CharsNotIn(control_chars + '\'"`;,()[]{} ') symbol = styler('class:symbol', symbol).setName('symbol') symbol.addParseAction(lambda t: Symbol(t[0])) call = styler('class:call', symbol) string = DQUO + pp.Combine(pp.Optional( pp.CharsNotIn('"'))) + cond_optional(DQUO) string = styler('class:string', string).setName('string') forms = (form_first + pp.ZeroOrMore(form)).setName('one or more forms') sexp = (LPAR + pp.Optional(forms) + cond_optional(RPAR)).setName('s-expression') sexp.addParseAction(lambda t: [list(t)]) quote = (styler('class:quote', SQUO) + form).setName('quoted form') quote.addParseAction(lambda t: Quote(t[0])) form_first <<= constant | number ^ call | string | sexp | quote form <<= constant | number ^ symbol | string | sexp | quote return form
def get_pagename(parse_actions=False): """Get pagename parser element. pagename = { any Unicode character without "|[]#<>{}" }-; :returns: pagename parser element :rtype: ParserElement """ pagename = pyparsing.CharsNotIn("|[]#<>{}").setResultsName("pagename") pagename.setName("pagename") pagename.parseWithTabs() if parse_actions: pass return pagename
def _get_heading(parse_actions=False): """Get heading parser element. heading = { any Unicode character without "|[]" }-; :returns: heading parser element :rtype: ParserElement """ heading = pyparsing.CharsNotIn("|[]") heading.setName("heading") heading.parseWithTabs() if parse_actions: pass return heading
def _get_label(parse_actions=False): """Get label parser element. label = { any Unicode character without "|[]" }-; :returns: label parser element :rtype: ParserElement """ label = pyparsing.CharsNotIn("|[]").setResultsName("label") label.setName("label") label.parseWithTabs() if parse_actions: pass return label
def _get_anchor(parse_actions=False): """Get anchor parser element. anchor = { any Unicode character without "[]" }-; :returns: anchor parser element :rtype: ParserElement """ anchor = pyparsing.CharsNotIn("[]").setResultsName("anchor") anchor.setName("anchor") anchor.parseWithTabs() if parse_actions: pass return anchor
def _get_parser(): double_escape = pp.Combine(pp.Literal(_DOUBLE_ESCAPE) + pp.MatchFirst([pp.FollowedBy(_REF_OPEN), pp.FollowedBy(_REF_CLOSE), pp.FollowedBy(_INV_OPEN), pp.FollowedBy(_INV_CLOSE)])).setParseAction(pp.replaceWith(_ESCAPE)) ref_open = pp.Literal(_REF_OPEN).suppress() ref_close = pp.Literal(_REF_CLOSE).suppress() ref_not_open = ~pp.Literal(_REF_OPEN) + ~pp.Literal(_REF_ESCAPE_OPEN) + ~pp.Literal(_REF_DOUBLE_ESCAPE_OPEN) ref_not_close = ~pp.Literal(_REF_CLOSE) + ~pp.Literal(_REF_ESCAPE_CLOSE) + ~pp.Literal(_REF_DOUBLE_ESCAPE_CLOSE) ref_escape_open = pp.Literal(_REF_ESCAPE_OPEN).setParseAction(pp.replaceWith(_REF_OPEN)) ref_escape_close = pp.Literal(_REF_ESCAPE_CLOSE).setParseAction(pp.replaceWith(_REF_CLOSE)) ref_text = pp.CharsNotIn(_REF_EXCLUDES) | pp.CharsNotIn(_REF_CLOSE_FIRST, exact=1) ref_content = pp.Combine(pp.OneOrMore(ref_not_open + ref_not_close + ref_text)) ref_string = pp.MatchFirst([double_escape, ref_escape_open, ref_escape_close, ref_content]).setParseAction(_string) ref_item = pp.Forward() ref_items = pp.OneOrMore(ref_item) reference = (ref_open + pp.Group(ref_items) + ref_close).setParseAction(_reference) ref_item << (reference | ref_string) inv_open = pp.Literal(_INV_OPEN).suppress() inv_close = pp.Literal(_INV_CLOSE).suppress() inv_not_open = ~pp.Literal(_INV_OPEN) + ~pp.Literal(_INV_ESCAPE_OPEN) + ~pp.Literal(_INV_DOUBLE_ESCAPE_OPEN) inv_not_close = ~pp.Literal(_INV_CLOSE) + ~pp.Literal(_INV_ESCAPE_CLOSE) + ~pp.Literal(_INV_DOUBLE_ESCAPE_CLOSE) inv_escape_open = pp.Literal(_INV_ESCAPE_OPEN).setParseAction(pp.replaceWith(_INV_OPEN)) inv_escape_close = pp.Literal(_INV_ESCAPE_CLOSE).setParseAction(pp.replaceWith(_INV_CLOSE)) inv_text = pp.CharsNotIn(_INV_CLOSE_FIRST) inv_content = pp.Combine(pp.OneOrMore(inv_not_close + inv_text)) inv_string = pp.MatchFirst([double_escape, inv_escape_open, inv_escape_close, inv_content]).setParseAction(_string) inv_items = pp.OneOrMore(inv_string) export = (inv_open + pp.Group(inv_items) + inv_close).setParseAction(_invquery) text = pp.CharsNotIn(_EXCLUDES) | pp.CharsNotIn('', exact=1) content = pp.Combine(pp.OneOrMore(ref_not_open + inv_not_open + text)) string = pp.MatchFirst([double_escape, ref_escape_open, inv_escape_open, content]).setParseAction(_string) item = reference | export | string line = pp.OneOrMore(item) + pp.StringEnd() return line
def get_simple_ref_parser(settings): ESCAPE = settings.escape_character REF_OPEN, REF_CLOSE = settings.reference_sentinels INV_OPEN, INV_CLOSE = settings.export_sentinels EXCLUDES = ESCAPE + REF_OPEN + REF_CLOSE + INV_OPEN + INV_CLOSE string = pp.CharsNotIn(EXCLUDES).setParseAction(_tag_with(tags.STR)) ref_open = pp.Literal(REF_OPEN).suppress() ref_close = pp.Literal(REF_CLOSE).suppress() reference = (ref_open + pp.Group(string) + ref_close).setParseAction( _tag_with(tags.REF)) line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional( string) + s_end return line.leaveWhitespace()
def parser_factory(styler): """Builds the repr() parser.""" squo = styler('class:string', "'") dquo = styler('class:string', '"') esc_single = pp.oneOf(r'\\ \' \" \n \r \t') esc_hex = pp.Literal(r'\x') + pp.Word(pp.hexnums, exact=2) escs = styler('class:escape', esc_single | esc_hex) control_chars = ''.join(map(chr, range(32))) + '\x7f' normal_chars_squo = pp.CharsNotIn(control_chars + r"\'") chars_squo = styler('class:string', normal_chars_squo) | escs normal_chars_dquo = pp.CharsNotIn(control_chars + r'\"') chars_dquo = styler('class:string', normal_chars_dquo) | escs skip_white = pp.Optional(pp.White()) bytes_prefix = pp.Optional(styler('class:string_prefix', 'b')) string_squo = skip_white + bytes_prefix + squo - pp.ZeroOrMore( chars_squo) + squo string_dquo = skip_white + bytes_prefix + dquo - pp.ZeroOrMore( chars_dquo) + dquo string = string_squo | string_dquo string.leaveWhitespace() address = styler('class:address', '0x' + pp.Word(pp.hexnums)) number = styler('class:number', ppc.number) const = pp.oneOf('True False None NotImplemented Ellipsis ...') const = styler('class:constant', const) kwarg = styler('class:kwarg', ppc.identifier) + styler( 'class:operator', '=') call = styler('class:call', ppc.identifier) + pp.FollowedBy('(') magic = styler('class:magic', pp.Regex(r'__[a-zA-Z0-9_]+__')) token = string | address | number | const | kwarg | call | magic token.parseWithTabs() return pp.originalTextFor(token)
def parser_factory(styler): """Builds the JSON parser.""" LBRK, RBRK, LBRC, RBRC, COLON, DQUO = map(pp.Suppress, '[]{}:"') DQUO = styler('class:string', DQUO) control_chars = ''.join(map(chr, range(32))) + '\x7f' normal_chars = pp.CharsNotIn(control_chars + '\\"') s_quo = pp.Literal('\\"').addParseAction(pp.replaceWith('"')) s_sol = pp.Literal('\\/').addParseAction(pp.replaceWith('/')) s_rsol = pp.Literal('\\\\').addParseAction(pp.replaceWith('\\')) s_back = pp.Literal('\\b').addParseAction(pp.replaceWith('\b')) s_form = pp.Literal('\\f').addParseAction(pp.replaceWith('\f')) s_nl = pp.Literal('\\n').addParseAction(pp.replaceWith('\n')) s_ret = pp.Literal('\\r').addParseAction(pp.replaceWith('\r')) s_tab = pp.Literal('\\t').addParseAction(pp.replaceWith('\t')) s_unicode = pp.Suppress('\\u') + pp.Word(pp.hexnums, exact=4) s_unicode.addParseAction(lambda t: chr(int(t[0], 16))) escape_seqs = s_quo | s_sol | s_rsol | s_back | s_form | s_nl | s_ret | s_tab | s_unicode chars = styler('class:string', normal_chars) | styler( 'class:escape', escape_seqs) skip_white = pp.Optional(pp.Suppress(pp.White())) string = skip_white + DQUO - pp.Combine(pp.ZeroOrMore(chars)) + DQUO string.leaveWhitespace() string.setName('string') value = pp.Forward() pair = string + COLON + value pair.addParseAction(tuple) obj = LBRC - pp.Optional(pp.delimitedList(pair)) + pp.NotAny(',') + RBRC obj.addParseAction(lambda t: {k: v for k, v in t}) obj.setName('object') array = LBRK - pp.Optional(pp.delimitedList(value)) + pp.NotAny(',') + RBRK array.addParseAction(lambda t: [list(t)]) array.setName('array') true = pp.Literal('true').addParseAction(pp.replaceWith(True)) false = pp.Literal('false').addParseAction(pp.replaceWith(False)) null = pp.Literal('null').addParseAction(pp.replaceWith(None)) constant = styler('class:constant', true | false | null) value <<= obj | array | string | styler('class:number', ppc.number) | constant value.parseWithTabs() value.setName('JSON value') return value
def _get_parser(self): """ Initialize the pyparsing parser for Makefile """ assign = pp.oneOf(['=', '?=', ':=', '::=', '+='])('assign') var_name = pp.Word(pp.alphas + '_', pp.alphanums + '_')('var') enclosed = pp.Forward() nested_parents = pp.nestedExpr('$(', ')', content=enclosed) nested_brackets = pp.nestedExpr('${', '}', content=enclosed) enclosed <<= (nested_parents | nested_brackets | pp.CharsNotIn('$(){}#\n')).leaveWhitespace() return pp.lineStart + var_name + pp.ZeroOrMore( pp.White()) + assign + pp.ZeroOrMore( pp.White()) + pp.ZeroOrMore(enclosed)('value') + pp.Optional( pp.pythonStyleComment)('comment')
def get_simple_ref_parser(escape_character, reference_sentinels, export_sentinels): _ESCAPE = escape_character _REF_OPEN = reference_sentinels[0] _REF_CLOSE = reference_sentinels[1] _INV_OPEN = export_sentinels[0] _INV_CLOSE = export_sentinels[1] _EXCLUDES = _ESCAPE + _REF_OPEN + _REF_CLOSE + _INV_OPEN + _INV_CLOSE string = pp.CharsNotIn(_EXCLUDES).setParseAction(_string) ref_open = pp.Literal(_REF_OPEN).suppress() ref_close = pp.Literal(_REF_CLOSE).suppress() reference = (ref_open + pp.Group(string) + ref_close).setParseAction(_reference) line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional( string) + pp.StringEnd() return line
def read(self, state): if state is None: state = {} self.formatString = state.get( 'formatString', '/tmp/<1.artist>/<1.title>/<#> - <title>') self.replaceChars = state.get('replaceChars', '\\:/') self.replaceBy = state.get('replaceBy', '_.;') self.removeChars = state.get('removeChars', '?*') if len(self.replaceChars) != len(self.replaceBy): raise ValueError("replaceChars and replaceBy must equal in length") self.translation = str.maketrans(self.replaceChars, self.replaceBy, self.removeChars) oldDefaultWhitespaceChars = pyparsing.ParserElement.DEFAULT_WHITE_CHARS pyparsing.ParserElement.setDefaultWhitespaceChars("\t\n") #pyparsing.ParserElement.enablePackrat() does not work (buggy) lbrace = Literal("<").suppress() rbrace = Literal(">").suppress() number = Word(nums) levelDef = number + Literal(".").suppress() levelDef.setParseAction(self.levelDefAction) tagName = Word(alphas + "_", alphanums + "_:()/\\") tagDefinition = Optional(levelDef("levelDef")) + (tagName | "#")("tag") tagDefinition.setParseAction(self.tagDefinitionAction) expression = Forward() ifExpr = Literal("?").suppress() + expression elseExpr = Literal("!").suppress() + expression condition = lbrace + tagDefinition("tagDef") \ + ((Optional(ifExpr("if")) + Optional(elseExpr("else"))) | (Optional(elseExpr("else")) + Optional(ifExpr("else"))))\ + rbrace # i am sure this is possible in a nicer way ... condition.setParseAction(self.conditionAction) staticText = pyparsing.CharsNotIn( "<>!?") #Word("".join(p for p in printables if p not in "<>")) expression << OneOrMore(condition | staticText) self.expression = expression pyparsing.ParserElement.setDefaultWhitespaceChars( oldDefaultWhitespaceChars)
def _ParseTags(cls, string : str) -> dict: SINGLE_VALUE = pp.CharsNotIn(";$") LIST_VALUE = pp.OneOrMore(pp.Word(pp.alphanums + "_.") + pp.Literal(",")) + pp.Word(pp.alphanums + "_.") ANY_VALUE = pp.Group(LIST_VALUE("listVal") | SINGLE_VALUE("singleVal")) TAGS = "$$" + pp.OneOrMore( pp.Group(pp.Word(pp.alphas)("tag") + "=" + ANY_VALUE("value") + pp.Optional(";")))("tags") + "$$" tags = {} for t, s, e in TAGS.scanString(string): for tag in t.get("tags"): val = tag.get("value") if "listVal" in val.keys(): val = "".join(val).split(",") elif "singleVal" in val.keys(): val = val.get("singleVal").strip() else: raise Exception("Illegal Tag Format") tags[tag.get("tag").lower()] = val return tags
def _get_named_arg(wiki_markup, parse_actions=False): """Get named argument parser element. :param ParserElement wiki_markup: wiki markup named_arg = { any Unicode character without "|=" }, "=", value; :returns: named argument parser element :rtype: ParserElement """ name = pyparsing.CharsNotIn("|=").setResultsName("name") name.setName("name") name.parseWithTabs() value = _get_value(wiki_markup) named_arg = (pyparsing.Optional(name) + pyparsing.Literal("=") + value) named_arg.setName("named_arg") named_arg.parseWithTabs() if parse_actions: pass return named_arg