def _init_parser(self): #outputParser = (pyparsing.Literal('>>') | (pyparsing.WordStart() + '>') | pyparsing.Regex('[^=]>'))('output') outputParser = (pyparsing.Literal(self.redirector *2) | \ (pyparsing.WordStart() + self.redirector) | \ pyparsing.Regex('[^=]' + self.redirector))('output') inputMark = pyparsing.Literal('<')('input') terminatorParser = pyparsing.Or([(hasattr(t, 'parseString') and t) or pyparsing.Literal(t) for t in self.terminators])('terminator') stringEnd = pyparsing.stringEnd ^ '\nEOF' self.multilineCommand = pyparsing.Or([pyparsing.Keyword(c, caseless=self.case_insensitive) for c in self.multilineCommands])('multilineCommand') oneLineCommand = (~self.multilineCommand + pyparsing.Word(self.legalChars))('command') pipe = pyparsing.Keyword('|', identChars='|') self.commentGrammars.ignore(pyparsing.quotedString).setParseAction(lambda x: '') doNotParse = self.commentGrammars | self.commentInProgress | pyparsing.quotedString afterElements = \ pyparsing.Optional(inputMark + pyparsing.SkipTo(outputParser ^ pipe ^ stringEnd, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('inputFrom')) + \ pyparsing.Optional(pipe + pyparsing.SkipTo(outputParser ^ stringEnd, ignore=doNotParse)('pipeTo')) + \ pyparsing.Optional(outputParser + pyparsing.SkipTo(stringEnd, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('outputTo')) if self.case_insensitive: self.multilineCommand.setParseAction(lambda x: x[0].lower()) oneLineCommand.setParseAction(lambda x: x[0].lower()) if self.blankLinesAllowed: self.blankLineTerminationParser = pyparsing.NoMatch else: self.blankLineTerminator = (pyparsing.lineEnd + pyparsing.lineEnd)('terminator') self.blankLineTerminator.setResultsName('terminator') self.blankLineTerminationParser = ((self.multilineCommand ^ oneLineCommand) + pyparsing.SkipTo(self.blankLineTerminator, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('args') + self.blankLineTerminator)('statement') self.multilineParser = (((self.multilineCommand ^ oneLineCommand) + pyparsing.SkipTo(terminatorParser, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('args') + terminatorParser)('statement') + pyparsing.SkipTo(outputParser ^ inputMark ^ pipe ^ stringEnd, ignore=doNotParse).setParseAction(lambda x: x[0].strip())('suffix') + afterElements) self.multilineParser.ignore(self.commentInProgress) self.singleLineParser = ((oneLineCommand + pyparsing.SkipTo(terminatorParser ^ stringEnd ^ pipe ^ outputParser ^ inputMark, ignore=doNotParse).setParseAction(lambda x:x[0].strip())('args'))('statement') + pyparsing.Optional(terminatorParser) + afterElements) #self.multilineParser = self.multilineParser.setResultsName('multilineParser') #self.singleLineParser = self.singleLineParser.setResultsName('singleLineParser') self.blankLineTerminationParser = self.blankLineTerminationParser.setResultsName('statement') self.parser = self.prefixParser + ( stringEnd | self.multilineParser | self.singleLineParser | self.blankLineTerminationParser | self.multilineCommand + pyparsing.SkipTo(stringEnd, ignore=doNotParse) ) self.parser.ignore(self.commentGrammars) fileName = pyparsing.Word(self.legalChars + '/\\') inputFrom = fileName('inputFrom') # a not-entirely-satisfactory way of distinguishing < as in "import from" from < # as in "lesser than" self.inputParser = inputMark + pyparsing.Optional(inputFrom) + pyparsing.Optional('>') + \ pyparsing.Optional(fileName) + (pyparsing.stringEnd | '|') self.inputParser.ignore(self.commentInProgress)
def test_finds_same(self): """Expect QuickSearchable to find the same stuff""" self._compare_search(pyparsing.Literal("the"), "The the theory ThE the") self._compare_search( pyparsing.Literal("some") + pyparsing.Literal("thing"), "something some thing some one thing SomeThing") self._compare_search( pyparsing.WordStart() + pyparsing.Literal("the"), "the the theory ThE the other more") self._compare_search( pyparsing.Optional("the").setResultsName("opt") + "term", "some term the term The Term some the Some term") self._compare_search( pyparsing.Literal("hey") | pyparsing.Literal("you"), "hey there you hey you hey there here heyyo") self._compare_search( pyparsing.Suppress("you") + "there", "hey you there! do you see this? there is here youthere") self._compare_search(pyparsing.Regex(r'\d+'), "this thing 123 more l337 h47p")
def parse_filter_str(self, filter_str): """ method to parse filter string """ prop = pp.WordStart(pp.alphas) + pp.Word(pp.alphanums + "_").setResultsName("prop") value = (pp.QuotedString("'") | pp.QuotedString('"') | pp.Word( pp.printables, excludeChars=",")).setResultsName("value") types_ = pp.oneOf("re eq ne gt ge lt le").setResultsName("types") flags = pp.oneOf("C I").setResultsName("flags") comma = pp.Literal(',') quote = (pp.Literal("'") | pp.Literal('"')).setResultsName("quote") type_exp = pp.Group(pp.Literal("type") + pp.Literal( "=") + quote + types_ + quote).setResultsName("type_exp") flag_exp = pp.Group(pp.Literal("flag") + pp.Literal( "=") + quote + flags + quote).setResultsName("flag_exp") semi_expression = pp.Forward() semi_expression << pp.Group(pp.Literal("(") + prop + comma + value + pp.Optional(comma + type_exp) + pp.Optional(comma + flag_exp) + pp.Literal(")") ).setParseAction( self.parse_filter_obj).setResultsName("semi_expression") expr = pp.Forward() expr << pp.operatorPrecedence(semi_expression, [ ("not", 1, pp.opAssoc.RIGHT, self.not_operator), ("and", 2, pp.opAssoc.LEFT, self.and_operator), ("or", 2, pp.opAssoc.LEFT, self.or_operator) ]) result = expr.parseString(filter_str) return result
def WordBoundaries(grammar): # noqa - we treat this like a pyparsing class return (pp.WordStart(pp.alphanums) + grammar + pp.WordEnd(pp.alphanums))
def WordBoundaries(grammar): return (pyparsing.WordStart(pyparsing.alphanums) + grammar + pyparsing.WordEnd(pyparsing.alphanums))
def transform_human(text, variables=None): """Transform user input with given context. Args: text (str): User input. variables (dict): Variables for purposes of substitution. Returns: A 2-tuple of: (A human-readable script that Script can parse, A list of contextual information for tooltips, etc.) """ if variables is None: variables = {} # No mutable default value. # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = variables.get(t[1:]) if val: toks[i] = val return toks def implicit_opcode_to_explicit(s, loc, toks): """Add "OP_" prefix to an opcode.""" for i, t in enumerate(toks): toks[i] = '_'.join(['OP', t]) return toks def hex_to_formatted_hex(s, loc, toks): """Add "0x" prefix and ensure even length.""" for i, t in enumerate(toks): new_tok = t # Add '0x' prefix if not t.startswith('0x'): if t.startswith('x'): new_tok = ''.join(['0', t]) else: new_tok = ''.join(['0x', t]) # Even-length string if len(new_tok) % 2 != 0: new_tok = ''.join([new_tok[0:2], '0', new_tok[2:]]) toks[i] = new_tok return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) var_name = Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) # Here we populate the list of contextual tips. # Explicit opcode names op_names = [str(i) for i in OPCODE_NAMES.keys()] op_names_explicit = ' '.join(op_names) def is_small_int(op): """True if op is one of OP_1, OP_2, ...OP_16""" try: i = int(op[3:]) return True except ValueError: return False op_names_implicit = ' '.join( [i[3:] for i in op_names if not is_small_int(i)]) # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a') explicit_hex = Combine( Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd()) implicit_hex = Combine(pyparsing.WordStart() + OneOrMore(Word(pyparsing.hexnums)) + pyparsing.WordEnd()) explicit_hex.setParseAction(hex_to_formatted_hex) implicit_hex.setParseAction(hex_to_formatted_hex) # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD') explicit_op = pyparsing.oneOf(op_names_explicit) implicit_op = Combine(pyparsing.WordStart() + pyparsing.oneOf(op_names_implicit)) implicit_op.setParseAction(implicit_opcode_to_explicit) contexts = pyparsing.Optional( var_name('Variable') | str_literal('String literal') | explicit_op('Opcode') | implicit_op('Opcode') | explicit_hex('Hex') | implicit_hex('Hex')) matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)] context_tips = [] for i in matches: d = i[0] if len(d.items()) == 0: continue match_type, value = d.items()[0] start = i[1] end = i[2] context_tips.append((start, end, value, match_type)) # Now we do the actual transformation. s = text s = var_name.transformString(s) s = str_literal.transformString(s) s = implicit_op.transformString(s) s = implicit_hex.transformString(s) s = explicit_hex.transformString(s) return s, context_tips
return or_filter @staticmethod def not_operator(str_, loc, toks): """ method to support logical 'and' operator expression """ not_filter = NotFilter() for op_filter in toks[0][1:]: not_filter.child_add(op_filter) return not_filter prop = pp.WordStart( pp.alphas) + pp.Word(pp.alphanums + "_").setResultsName("prop") value = (pp.QuotedString("'") | pp.QuotedString('"') | pp.Word(pp.printables, excludeChars=",")).setResultsName("value") types_ = pp.oneOf("re eq ne gt ge lt le").setResultsName("types") flags = pp.oneOf("C I").setResultsName("flags") comma = pp.Literal(',') quote = (pp.Literal("'") | pp.Literal('"')).setResultsName("quote") type_exp = pp.Group( pp.Literal("type") + pp.Literal("=") + quote + types_ + quote).setResultsName("type_exp") flag_exp = pp.Group( pp.Literal("flag") + pp.Literal("=") + quote + flags + quote).setResultsName("flag_exp") semi_expression = pp.Forward()
def transform_human(text, variables=None): """Transform user input with given context. Args: text (str): User input. variables (dict): Variables for purposes of substitution. Returns: A 2-tuple of: (A human-readable script that Script can parse, A list of contextual information for tooltips, etc.) """ if variables is None: variables = {} # No mutable default value. # these are parseActions for pyparsing. def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = variables.get(t[1:]) if val: toks[i] = val return toks def implicit_opcode_to_explicit(s, loc, toks): """Add "OP_" prefix to an opcode.""" for i, t in enumerate(toks): toks[i] = '_'.join(['OP', t]) return toks def hex_to_formatted_hex(s, loc, toks): """Add "0x" prefix and ensure even length.""" for i, t in enumerate(toks): new_tok = format_hex_string(t) toks[i] = new_tok return toks def decimal_to_formatted_hex(s, loc, toks=None): """Convert decimal to hex.""" if toks is None: return for i, t in enumerate(toks): token = hex(int(t)) new_tok = format_hex_string(token) toks[i] = new_tok return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') var_name = Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) # Here we populate the list of contextual tips. # Explicit opcode names op_names = [str(i) for i in opcodes.opcodes_by_name.keys()] op_names_explicit = ' '.join(op_names) def is_small_int(op): """True if op is one of OP_1, OP_2, ...OP_16""" try: i = int(op[3:]) return True except ValueError: return False op_names_implicit = ' '.join( [i[3:] for i in op_names if not is_small_int(i)]) # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a') explicit_hex = Combine( Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd()) decimal_number = Combine(pyparsing.WordStart() + OneOrMore(Word(pyparsing.nums)) + pyparsing.WordEnd()) explicit_hex.setParseAction(hex_to_formatted_hex) decimal_number.setParseAction(decimal_to_formatted_hex) # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD') explicit_op = pyparsing.oneOf(op_names_explicit) implicit_op = Combine(pyparsing.WordStart() + pyparsing.oneOf(op_names_implicit)) implicit_op.setParseAction(implicit_opcode_to_explicit) contexts = pyparsing.Optional( var_name('Variable') | str_literal('String literal') | explicit_op('Opcode') | implicit_op('Opcode') | explicit_hex('Hex') | decimal_number('Decimal')) matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)] context_tips = [] for i in matches: d = i[0] if len(d.items()) == 0: continue match_type, value = d.items()[0] start = i[1] end = i[2] context_tips.append((start, end, value, match_type)) # Now we do the actual transformation. strings = [] try: words = shlex.split(text, posix=False) except Exception: words = text.split() for s in words: # Do not transform strings if they are string literals. is_literal = True if pyparsing.Optional(str_literal).parseString( s) else False if not is_literal: s = var_name.transformString(s) s = implicit_op.transformString(s) s = decimal_number.transformString(s) s = explicit_hex.transformString(s) strings.append(s) return ' '.join(strings), context_tips
import pyparsing as pp from pyparsing import pyparsing_common as ppc WHITE_CHARS = pp.ParserElement.DEFAULT_WHITE_CHARS uword = pp.Suppress(pp.SkipTo(pp.WordStart())) + pp.CharsNotIn(WHITE_CHARS) common_parsers = { int: pp.Combine(pp.Optional('-') + pp.Word(pp.nums)).setParseAction( ppc.convertToInteger) + pp.Suppress(pp.WordEnd()), str: (pp.QuotedString("'") | pp.QuotedString('"') | uword) + pp.Suppress(pp.WordEnd()), bool: pp.Empty().setParseAction(lambda x: True) } def default_parser(default): return pp.Empty().setParseAction(default) rest_of_line = pp.restOfLine.copy() rest_of_string = pp.SkipTo(pp.StringEnd()) rest_parser = rest_of_string("_rest").setName("_rest").addParseAction( lambda x: {"_rest": x[0]}) def update_dict(dict_list): ret = dict() for i in dict_list:
import pyparsing as pp from pydbml.definitions.generic import name from pydbml.definitions.common import _, _c, end, note, note_object from pydbml.definitions.column import table_column from pydbml.definitions.index import indexes from pydbml.classes import Table pp.ParserElement.setDefaultWhitespaceChars(' \t\r') alias = pp.WordStart() + pp.Literal('as').suppress() - pp.WordEnd() - name hex_char = pp.Word(pp.srange('[0-9a-fA-F]'), exact=1) hex_color = ("#" - (hex_char * 3 ^ hex_char * 6)).leaveWhitespace() header_color = (pp.CaselessLiteral('headercolor:').suppress() + _ - pp.Combine(hex_color)('header_color')) table_setting = _ + (note('note') | header_color) + _ table_settings = '[' + table_setting + (',' + table_setting)[...] + ']' def parse_table_settings(s, l, t): ''' [headercolor: #cccccc, note: 'note'] ''' result = {} if 'note' in t: result['note'] = t['note'] if 'header_color' in t: result['header_color'] = t['header_color'] return result
return or_filter @staticmethod def not_operator(str_, loc, toks): """ method to support logical 'and' operator expression """ not_filter = NotFilter() for op_filter in toks[0][1:]: not_filter.child_add(op_filter) return not_filter prop = pp.WordStart(pp.alphas) + pp.Word(pp.alphanums + "_").setResultsName("prop") value = (pp.QuotedString("'") | pp.QuotedString('"') | pp.Word( pp.printables, excludeChars=",")).setResultsName("value") types_ = pp.oneOf("re eq ne gt ge lt le").setResultsName("types") flags = pp.oneOf("C I").setResultsName("flags") comma = pp.Literal(',') quote = (pp.Literal("'") | pp.Literal('"')).setResultsName("quote") type_exp = pp.Group(pp.Literal("type") + pp.Literal( "=") + quote + types_ + quote).setResultsName("type_exp") flag_exp = pp.Group(pp.Literal("flag") + pp.Literal( "=") + quote + flags + quote).setResultsName("flag_exp") semi_expression = pp.Forward() semi_expression << pp.Group(pp.Literal("(") +