def test_prefixed_line(self): parser = ZeroOrMore(comment | empty_line) + prefixed_line( "Foo:") + ZeroOrMore(comment | empty_line) foo = prefixed_line("Foo:") self.assertEqual( parser.parseString("Foo: bar\n\n\n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual( parser.parseString("Foo: bar \n\n\n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual( parser.parseString("Foo: bar baz\n\n\n", True).asList(), ["bar baz", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual( parser.parseString("Foo: bar \n \n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual( parser.parseString("Foo: bar \n#baz\n #spam\n", True).asList(), ["bar", "<COMMENT>", "<COMMENT>", "<EMPTYLINE>"]) bar = parser + prefixed_line("Bar:") + ZeroOrMore(comment | empty_line) self.assertEqual( bar.parseString("Foo: bar \n#baz\nBar: baz spam\n #spam\n", True).asList(), ["bar", "<COMMENT>", "baz spam", "<COMMENT>", "<EMPTYLINE>"])
def parse_macro_arguments(argument_string, return_kwargs=False): if not argument_string: return None import re from pyparsing import Group, Or, QuotedString, Regex, Suppress, ZeroOrMore # General argument string parser argstring_def = ZeroOrMore(Or([ \ QuotedString('"'), # long arguments Group(Regex('[\w]+', flags=re.UNICODE) + # keyword arguments Suppress('=').leaveWhitespace() + Or([Regex('[\w]+'), QuotedString('"')])), Regex(r'\(\(.*\)\)', flags=re.UNICODE), # nested macros Regex('[\S]+', flags=re.UNICODE) # basic arguments ])) args = argstring_def.parseString(argument_string).asList() # The keyword arguments are stored as lists in the `args' variable, # extract them and convert them into a dict, then return if return_kwargs: kwargs = {} for arg in args: if isinstance(arg, list): kwargs[str(arg[0])] = arg[1] args.remove(arg) # remove the nested list return args, kwargs return args
def parse_enum_defs(s, parsers=None): """Parse all of the enum defs in a string, and add them to a list of type parsers :Parameters: - `s`: the string to parse - `parsers`: the dictionary of parsers to which the new ones will be added (optional) @return: a list of parsers for parsing type declarations >>> test_string = \"""# A sample file ... a foo ... bar baz ... ... typedef enum { # a comment ... START, /* another comment */ ... END ... } RUNMARK; ... ... typedef struct { ... float x; ... int y ... } GOO ... ... typedef enum { ... OAK, ... MAPLE ... } TREES; ... ... GOO 3.4 6 ... \""" >>> parsed_defs = parse_enum_defs(test_string) >>> runmark_parser = parsed_defs['RUNMARK'] >>> runmark_parser.parseString("START")[0] 'START' >>> runmark_parser.parseString("END")[0] 'END' >>> try: ... runmark_parser.parseString("41")[0] ... except: ... print "parser threw an exception" parser threw an exception >>> >>> tree_parser = parsed_defs['TREES'] >>> tree_parser.parseString("MAPLE")[0] 'MAPLE' """ type_parser = {} if parsers is None else parsers one_enum_def_parser = make_one_enum_def_parser() not_enum = ((value_name | struct_declaration_start | possible_type_name | right_brace) + restOfLine).suppress() enum_def_parser = ZeroOrMore(Group(one_enum_def_parser) | not_enum) + stringEnd enum_def_parser.ignore(hash_comment) enum_def_parser.ignore(cStyleComment) for enum in enum_def_parser.parseString(s): type_parser[enum['enum_name']] = oneOf(enum['values'].asList()) return type_parser
def parse_weak_order(s: str) -> list: """ Convert a string representing a weak order to a list of sets. :param s: a string. :return: a list of sets, where each set is an indifference class. The first set of the list contains the top (= most liked) candidates, while the last set of the list contains the bottom (= most disliked) candidates. >>> s = 'Alice ~ Bob ~ Catherine32 > me > you ~ us > them' >>> parse_weak_order(s) == [{'Alice', 'Bob', 'Catherine32'}, {'me'}, {'you', 'us'}, {'them'}] True """ # Build the parser candidate = Word(alphas.upper() + alphas.lower() + nums + '_') equiv_class = Group(candidate + ZeroOrMore(Word('~').suppress() + candidate)) weak_preference = equiv_class + ZeroOrMore( Word('>').suppress() + equiv_class) empty_preference = ZeroOrMore(' ') # if s = 'Jean ~ Titi ~ tata32 > me > you ~ us > them', then # parsed = [['Jean', 'Titi', 'tata32'], ['me'], ['you', 'us'], ['them']] try: parsed = empty_preference.parseString(s, parseAll=True).asList() except ParseException: parsed = weak_preference.parseString(s, parseAll=True).asList() # Final conversion to format [{'Jean', 'tata32', 'Titi'}, {'me'}, {'us', 'you'}, {'them'}] return [NiceSet(s) for s in parsed]
def _parse_data(data: str) -> Dict[str, Any]: lcur, rcur, lbrk, rbrk, comma, eq = map(Suppress, '{}[],=') tablekey = Regex(r'[a-z][a-z0-9_]*') | (lbrk + QuotedString(quoteChar="'") + rbrk) qstring = QuotedString(quoteChar='"') value = Forward() keyval = (tablekey + eq + value).setParseAction(lambda s, l, t: [(str(t[0]), t[1])]) array_table = ( value + ZeroOrMore(comma + value)).setParseAction(lambda s, l, t: [list(t)]) dict_table = (keyval + ZeroOrMore(comma + keyval) ).setParseAction(lambda s, l, t: [{k: v for k, v in t}]) table = lcur + (dict_table | array_table | Empty().setParseAction(lambda s, l, t: [None])) + rcur value << (qstring | table) root = ZeroOrMore(keyval).setParseAction( lambda s, l, t: {k: v for k, v in t}) return root.parseString(data, parseAll=True)[0] # type: ignore
def parse_nested(expression: str) -> list: operators = oneOf("+ *") nested_braces = nestedExpr( '(', ')', ) combined = ZeroOrMore(Word(nums) | operators | nested_braces) return combined.parseString(expression).asList()
def parse_header(s, enum_def_parser, struct_def_parser, struct_parsers): """Create a dictionary of keyword assignments in a yanny par file :Parameters: - `s`: the string to parse - `enum_def_parser`: a parser that parses one enum definition - `struct_def_parser`: a parser that parses on struct definition - `struct_parsers`: a dictionary of structures that parse structure data @return: a dictionary with the keyword assignments >>> test_string = \"""# A sample file ... a foo ... bar baz goo # no more ... ... typedef enum { ... START, ... END ... } RUNMARK; ... ... typedef struct { ... float x; ... int y ... } GOO ... ... typedef enum { ... OAK, ... MAPLE ... } TREES; ... ... GOO 3.4 6 ... \""" >>> >>> enum_def_parser = make_one_enum_def_parser() >>> type_parsers = parse_enum_defs(test_string, base_type_parsers) >>> struct_def_parser = make_one_struct_def_parser(type_parsers) >>> structs, struct_parsers = parse_struct_defs(test_string, type_parsers) >>> h = parse_header(test_string, enum_def_parser, struct_def_parser, struct_parsers) >>> print h['a'] foo >>> print h['bar'] baz goo """ one_header_assignment_parser = make_one_header_assignment_parser( struct_parsers) not_header = Or([enum_def_parser, struct_def_parser] + \ [struct_parsers[k] for k in struct_parsers.keys()]).suppress() header_parser = ZeroOrMore(not_header | one_header_assignment_parser) + stringEnd header_parser.ignore(hash_comment) header_parser.ignore(cStyleComment) header = {} for d in header_parser.parseString(s): header[d['name']] = d['value'].partition('#')[0].lstrip().rstrip() return header
def test_prefixed_line(self): parser = ZeroOrMore(comment | empty_line) + prefixed_line("Foo:") + ZeroOrMore(comment | empty_line) foo = prefixed_line("Foo:") self.assertEqual(parser.parseString("Foo: bar\n\n\n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual(parser.parseString("Foo: bar \n\n\n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual(parser.parseString("Foo: bar baz\n\n\n", True).asList(), ["bar baz", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual(parser.parseString("Foo: bar \n \n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual(parser.parseString("Foo: bar \n#baz\n #spam\n", True).asList(), ["bar", "<COMMENT>", "<COMMENT>", "<EMPTYLINE>"]) bar = parser + prefixed_line("Bar:") + ZeroOrMore(comment | empty_line) self.assertEqual(bar.parseString("Foo: bar \n#baz\nBar: baz spam\n #spam\n", True).asList(), ["bar", "<COMMENT>", "baz spam", "<COMMENT>", "<EMPTYLINE>"])
def _parse_items(self, source): ParserElement.setDefaultWhitespaceChars(' \t\r') EOL = LineEnd().suppress() comment = Literal('#') + Optional( restOfLine ) + EOL string = CharsNotIn("\n") line = Group( Word(alphanums + '-')('key') + Literal(':').suppress() + Optional(Combine(string + ZeroOrMore(EOL + Literal(' ') + string)))("value") + EOL ) group = ZeroOrMore(line) group.ignore(comment) return group.parseString(source, True)
def split_quote(s): r_link_ref = Regex(r'\[[^]]*\]:.*') # match [1]:http:.... r_link_text = Regex(r'(\[[^]]*\])+') # match [text][1] r_link_url = Regex(r'\[[^]]*\]\(.*\)') # match [text](http://) r_code_section = Regex(r'```') r_code_html = Regex(r'<[^>]*>') r_quote_1 = Regex(r'\'[^\']*\'') # not split '....' r_quote_2 = Regex(r'"[^"]*"') r_quote_code = Regex(r'\`[^`]*\`') r_word = Regex(r'[^ ]+') parser = ZeroOrMore(r_link_ref | r_link_url | r_link_text | r_code_section | r_code_html | r_quote_1 | r_quote_2 | r_quote_code | r_word) return parser.parseString(s)
def parse_template(template_text): identifier = Word(alphas, alphanums + '_') param = Group(identifier('name') + Suppress(':') + CharsNotIn(',)')('value')) param_list = Group(Suppress('(') + delimitedList(param, delim=',') + Suppress(')')) benchmark_id = originalTextFor(identifier + '.' + identifier + '.' + identifier) measurement_id = Group(benchmark_id('benchmark') + Optional(param_list('params')) + Suppress('[') + identifier('local_id') + Suppress(']')) macro = Group(Suppress('${') + measurement_id('measurement') + Suppress('}')) raw_text_block = originalTextFor(CharsNotIn('$')) text = ZeroOrMore(Group(raw_text_block('text') | macro('macro')))('template') text.leaveWhitespace() return text.parseString(template_text).asDict()
def guess_language(string=None, filename=None): """ Attempt to guess the language Do this by parsing the comments at the top of the file for the # language: fr phrase. """ LANG_PARSER = ZeroOrMore( Suppress('#') + ( ((Suppress(Keyword('language')) + Suppress(':') + Word(unicodePrintables)('language')) | Suppress(restOfLine)) ) ) try: if string: tokens = LANG_PARSER.parseString(string) elif filename: with open(filename, 'r', 'utf-8') as fp: tokens = LANG_PARSER.parseFile(fp) else: raise RuntimeError("Must pass string or filename") code = tokens.language if code != '': return languages.Language(code=code) except ParseException as e: # try English pass return languages.English()
# #s = cgen.genPackFun( structList[1]) # #print(s) # cgen.pprint() # s = cgen.genHeader(pp.asDict()) # print(s) # print(pp.asDict()) # cgen = OOcodeGenerator(test1) # cgen.pprint() # s = cgen.genAll() # print(s) # print(annotateDict) # docgen = MarkdownGenerator() # s = docgen.genAll() # print(s) pp = parser.parseString(test1) pygen = OOpythonGenerator() pygen.pprint() s = pygen.genAll() print(s) print("\n------------------------------\n\n") oogen = OOcodeGenerator() #oogen.pprint() s = oogen.genAll("test.hpp", "test.cpp") print(s)
| smoothQuadraticBezierCurveto | quadraticBezierCurveto | smoothCurve | curve | horizontalLine | verticalLine) # ~ number.debug = True moveToDrawToCommands = moveTo + ZeroOrMore(drawToCommand) path = ZeroOrMore(moveToDrawToCommands) path.keepTabs = True def get_points(d): commands = path.parseString(d) points = [] currentset = None for command in commands: if command[0] == 'M' or command[0] == 'm': currentset = [] points.append(currentset) currentset.append(command[1][-1]) elif command[0] == 'L' or command[0] == 'l': currentset.extend(command[1]) elif command[0] == 'C' or command[0] == 'c': currentset.extend(command[1]) return points if __name__ == "__main__": print path.parseString( "M 242.96145,653.59282 L 244.83646,650.1553 L 247.02397,649.8428 L 247.33647,650.62405 L 245.30521,653.59282 L 242.96145,653.59282 z M 252.80525,649.99905 L 258.74278,652.49906 L 260.77404,652.18656 L 262.33654,648.43654 L 261.71154,645.15528 L 257.64902,644.68653 L 253.74275,646.40528 L 252.80525,649.99905 z M 282.49289,659.6866 L 286.08665,664.99912 L 288.43041,664.68662 L 289.52417,664.21787 L 290.93042,665.46787 L 294.52419,665.31162 L 295.4617,663.90537 L 292.64918,662.18661 L 290.77417,658.59284 L 288.74291,655.15533 L 283.11789,657.96784 L 282.49289,659.6866 z M 302.02423,668.28039 L 303.27423,666.40538 L 307.8055,667.34288 L 308.43051,666.87413 L 314.36803,667.49913 L 314.05553,668.74914 L 311.55552,670.15539 L 307.33675,669.84289 L 302.02423,668.28039 z M 307.1805,673.28041 L 309.05551,677.03043 L 312.02427,675.93667 L 312.33677,674.37416 L 310.77427,672.3429 L 307.1805,672.0304 L 307.1805,673.28041 z M 313.89928,672.18665 L 316.08679,669.37414 L 320.61806,671.7179 L 324.83683,672.81166 L 329.0556,675.46792 L 329.0556,677.34293 L 325.61809,679.06169 L 320.93056,679.99919 L 318.5868,678.59293 L 313.89928,672.18665 z M 329.99311,687.18672 L 331.55561,685.93672 L 334.83688,687.49923 L 342.18066,690.93674 L 345.46193,692.968 L 347.02443,695.31176 L 348.89944,699.53053 L 352.80571,702.03054 L 352.49321,703.28055 L 348.74319,706.40556 L 344.68067,707.81182 L 343.27442,707.18682 L 340.30565,708.90557 L 337.96189,712.03059 L 335.77438,714.8431 L 334.05562,714.68685 L 330.61811,712.18684 L 330.30561,707.81182 L 330.93061,705.46806 L 329.3681,699.99928 L 327.33684,698.28052 L 327.18059,695.78051 L 329.3681,694.84301 L 331.39936,691.87425 L 331.86811,690.93674 L 330.30561,689.21798 L 329.99311,687.18672 z " )
def parse(self, argument): """Constructor. Variables: argument: Data to be parsed. """ # pyparsing syntax parser definition classQualifier = Forward() classQualifierRegex = Regex(r'[a-zA-Z_0-9]+::') classQualifier << classQualifierRegex + ZeroOrMore(classQualifier) className = Optional(classQualifier) + \ ~(Keyword('Handle') | Keyword('shared_ptr') | \ Keyword('vector')) + \ Regex(r'[a-zA-Z_0-9_]+') templClass = Forward() templClass << (className + Optional( '<' + \ templClass + ZeroOrMore(',' + \ templClass) + '>'))\ .setResultsName('class_name') handle = classQualifier + 'Handle<' + templClass + '>' sharedPtr = 'boost::shared_ptr<' + templClass + '>' allTypes = templClass | handle | sharedPtr vectorWord = Forward() vectorWord << ('std::vector<' + vectorWord + '>' | allTypes) variableWord = Regex(r'[a-zA-Z_]+[a-zA-Z0-9]*').\ setResultsName('variable_name') defaultValue = ZeroOrMore(' ') + '=' + ZeroOrMore(' ') + \ Regex(r'.+').setResultsName('default_value') # Regex(r'[()a-zA-Z0-9<>:\., ]+').\ # setResultsName('default_value') parameter = ZeroOrMore(' ') + vectorWord + variableWord + \ Optional( ZeroOrMore(' ') + defaultValue ) #parse argument parse = None try: parse = parameter.parseString(argument, True) except ParseException as err: Logger().debug(err.line) Logger().debug(str(err)) Logger().error('parameter parsing error "%s"' % argument) sys.exit(5) try: Logger().debug('entering conversion ...') #set class_name and variable_name self.class_name = '' for item in parse['class_name']: self.class_name = self.class_name + item.strip() self.variable_name = parse['variable_name'].strip() # analyse for handle and shared_ptr self.is_handle = False if parse.asList().count('Handle<')>=1 or \ parse.asList().count('Handle')>=1: self.is_handle = True self.is_sharedptr = False if parse.asList().count('boost::shared_ptr<')>=1 or \ parse.asList().count('boost::shared_ptr')>=1: self.is_sharedptr = True #sets default value if 'default_value' in parse.keys(): self.default_value = parse['default_value'] #determine dimension self.dimension = parse.asList().count('std::vector<') Logger().debug('Class ' + self.class_name) Logger().debug('Variable ' + self.variable_name) Logger().debug('Handle ' + str(self.is_handle)) Logger().debug('SharedPtr ' + str(self.is_sharedptr)) Logger().debug('Dimension ' + str(self.dimension)) Logger().debug('Default ' + str(self.default_value)) Logger().debug('leaving conversion ...') except: Logger().error('parameter conversion error "%s"' % argument) sys.exit(5)
def parse(content, basedir=None, resolve=True): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: If true, resolve substitutions :type resolve: boolean :return: a ConfigTree or a list """ def norm_string(value): for k, v in ConfigParser.REPLACEMENTS.items(): value = value.replace(k, v) return value def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3: -3] def convert_number(tokens): n = tokens[0] try: return int(n) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION = "\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>\s*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution def include_config(token): url = None file = None if len(token) == 1: # include "test" if token[0].startswith("http://") or token[0].startswith("https://") or token[0].startswith("file://"): url = token[0] else: file = token[0] elif len(token) == 2: # include url("test") or file("test") if token[0] == 'url': url = token[1] else: file = token[1] if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False) if file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file(path, required=False, resolve=False) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(None)) key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + '._- ') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex('[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex('""".*?"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = QuotedString(quoteChar='"', escChar='\\', multiline=True) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex(r'(\\[ \t]*[\r\n]|[^\[\{\n\r\]\}#,=\$])+?(?=($|\$|[ \t]*(//|[\}\],#\n\r])))', re.DOTALL).setParseAction(unescape_string) substitution_expr = Regex('[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_expr = (Keyword("include", caseless=True).suppress() - ( quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))) \ .setParseAction(include_config) dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore((Literal( '\\') - eol).suppress() | comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group( key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | Suppress(Literal('=') | Literal(':')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) ) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | dict_expr | inside_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: ConfigParser.resolve_substitutions(config) return config
verticalLine = Group(Command("V") + Arguments(coordinateSequence)) drawToCommand = ( lineTo | moveTo | closePath | ellipticalArc | smoothQuadraticBezierCurveto | quadraticBezierCurveto | smoothCurve | curve | horizontalLine | verticalLine ) #~ number.debug = True moveToDrawToCommands = moveTo + ZeroOrMore(drawToCommand) path = ZeroOrMore(moveToDrawToCommands) path.keepTabs = True def get_points(d): commands = path.parseString(d) points = [] currentset = None for command in commands: if command[0] == 'M' or command[0] == 'm': currentset = [] points.append(currentset) currentset.append(command[1][-1]) elif command[0] == 'L' or command[0] == 'l': currentset.extend(command[1]) elif command[0] == 'C' or command[0] == 'c': currentset.extend(command[1]) return points if __name__ == "__main__": print path.parseString("M 242.96145,653.59282 L 244.83646,650.1553 L 247.02397,649.8428 L 247.33647,650.62405 L 245.30521,653.59282 L 242.96145,653.59282 z M 252.80525,649.99905 L 258.74278,652.49906 L 260.77404,652.18656 L 262.33654,648.43654 L 261.71154,645.15528 L 257.64902,644.68653 L 253.74275,646.40528 L 252.80525,649.99905 z M 282.49289,659.6866 L 286.08665,664.99912 L 288.43041,664.68662 L 289.52417,664.21787 L 290.93042,665.46787 L 294.52419,665.31162 L 295.4617,663.90537 L 292.64918,662.18661 L 290.77417,658.59284 L 288.74291,655.15533 L 283.11789,657.96784 L 282.49289,659.6866 z M 302.02423,668.28039 L 303.27423,666.40538 L 307.8055,667.34288 L 308.43051,666.87413 L 314.36803,667.49913 L 314.05553,668.74914 L 311.55552,670.15539 L 307.33675,669.84289 L 302.02423,668.28039 z M 307.1805,673.28041 L 309.05551,677.03043 L 312.02427,675.93667 L 312.33677,674.37416 L 310.77427,672.3429 L 307.1805,672.0304 L 307.1805,673.28041 z M 313.89928,672.18665 L 316.08679,669.37414 L 320.61806,671.7179 L 324.83683,672.81166 L 329.0556,675.46792 L 329.0556,677.34293 L 325.61809,679.06169 L 320.93056,679.99919 L 318.5868,678.59293 L 313.89928,672.18665 z M 329.99311,687.18672 L 331.55561,685.93672 L 334.83688,687.49923 L 342.18066,690.93674 L 345.46193,692.968 L 347.02443,695.31176 L 348.89944,699.53053 L 352.80571,702.03054 L 352.49321,703.28055 L 348.74319,706.40556 L 344.68067,707.81182 L 343.27442,707.18682 L 340.30565,708.90557 L 337.96189,712.03059 L 335.77438,714.8431 L 334.05562,714.68685 L 330.61811,712.18684 L 330.30561,707.81182 L 330.93061,705.46806 L 329.3681,699.99928 L 327.33684,698.28052 L 327.18059,695.78051 L 329.3681,694.84301 L 331.39936,691.87425 L 331.86811,690.93674 L 330.30561,689.21798 L 329.99311,687.18672 z ")
# # A simple example showing the use of the implied listAllMatches=True for # results names with a trailing '*' character. # # This example performs work similar to itertools.groupby, but without # having to sort the input first. # from pyparsing import Word, ZeroOrMore, nums aExpr = Word("A", nums) bExpr = Word("B", nums) cExpr = Word("C", nums) grammar = ZeroOrMore(aExpr("A*") | bExpr("B*") | cExpr("C*")) results = grammar.parseString("A1 B1 A2 C1 B2 A3") print results.dump()
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value value to unresolved substitution. If overriden with a default value, it will replace all unresolved value to the default value. If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3: -3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) def convert_period(tokens): period_value = int(tokens.value) period_identifier = tokens.unit period_unit = next((single_unit for single_unit, values in cls.get_supported_period_type_map().items() if period_identifier in values)) return period(period_value, period_unit) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance(final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith("https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance(token[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL( url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) else: raise ConfigException('No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) @contextlib.contextmanager def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default) with set_default_white_spaces(): assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue())) key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) period_types = itertools.chain.from_iterable(cls.get_supported_period_type_map().values()) period_expr = Regex(r'(?P<value>\d+)\s*(?P<unit>' + '|'.join(period_types) + ')$' ).setParseAction(convert_period) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = ( Keyword("include", caseless=True).suppress() + ( include_content | ( Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress() ) ) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal( '\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group( key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore( comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) ) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore( comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions(config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException('resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION') if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
def read_chain(s, struct_name, enum_def_parser, struct_def_parser, header_assignment_parser, struct_parsers): """Return a list of dictionaries with the contents of structures in a Yanny par file. :Parameters: - `s`: the string to parse - `struct_name`: the name of the struct to extract - `enum_def_parser`: a parser that parses one enum definition - `struct_def_parser`: a parser that parses on struct definition - `header_assignment_parser`: a parser that parses header assignments - `struct_parsers`: a dictionary of structures that parse structure data @return: a list of dictionaries with the contents of a chain >>> test_string = \"""# A sample file ... a foo ... bar baz ... ... typedef enum { ... OAK, ... MAPLE ... } TREETYPE; ... ... typedef struct { ... float x; ... int y ... } GOO ... ... typedef struct { ... int i; ... TREETYPE s ... } TREE; ... ... GOO 3.4 6 ... TREE 42 MAPLE ... TREE 44 OAK ... TREE 3 MAPLE ... GOO 4.22 103 ... \""" >>> >>> enum_def_parser = make_one_enum_def_parser() >>> type_parsers = parse_enum_defs(test_string, base_type_parsers) >>> struct_def_parser = make_one_struct_def_parser(type_parsers) >>> structs, struct_parsers = parse_struct_defs(test_string, type_parsers) >>> header_assignment_parser = make_one_header_assignment_parser(struct_parsers) >>> trees = read_chain(test_string, 'TREE', ... enum_def_parser, struct_def_parser, header_assignment_parser, struct_parsers) >>> print len(trees) 3 >>> print trees[1]['i'] 44 >>> print trees[2]['s'] MAPLE """ struct_name = struct_name.upper() other_list_parsers = [ struct_parsers[sn] for sn in struct_parsers.keys() if not sn == struct_name ] not_this_list_parser = Or( other_list_parsers + [enum_def_parser, header_assignment_parser, struct_def_parser] ).suppress() this_list_parser = ZeroOrMore(not_this_list_parser | struct_parsers[struct_name]) + stringEnd this_list_parser.ignore(hash_comment) this_list_parser.ignore(cStyleComment) raw_results = this_list_parser.parseString(s) results = [] for row_result in raw_results: dict_result = {} for field, value in row_result.items(): dict_result[field] = value results.append(dict_result) return results
class pppCompiler: def __init__(self): self.initBNF() self.symbols = SymbolTable() def initBNF(self): indentStack = [1] encoding = Literal("<").suppress() + identifier("encoding") + Literal( ">").suppress() constdecl = Group((const + identifier + assign + value).setParseAction( self.const_action)) vardecl = Group( (type_("type_") + Optional(encoding) + identifier("name") + Optional(assign + value("value") + Optional(identifier)("unit")) ).setParseAction(self.var_action)) insertdecl = Group( (insert + dblQuotedString + LineEnd().suppress()).setParseAction( self.insert_action)) procedurecall = Group((identifier + Literal("(").suppress() + Optional( delimitedList( (identifier + Optional(assign + identifier)).setParseAction( self.named_param_action))) + Literal(")").suppress()).setParseAction( self.procedurecall_action)) condition = Group( (identifier("leftidentifier") + comparison("comparison") + (identifier("identifier") | value.setParseAction(self.value_action))).setParseAction( self.condition_action))("condition") pointer = Literal("*") + identifier rExp = Forward() #numexpression = Forward() opexpression = (identifier("operand") + (Literal(">>") | Literal("<<") | Literal("+") | Literal("*") | Literal("/") | Literal("-"))("op") + Group(rExp)("argument")).setParseAction( self.opexpression_action) rExp << ( procedurecall | opexpression | identifier("identifier") | value.setParseAction(self.value_action) | #Group( Suppress("(") + rExp + Suppress(")") ) | #Group( "+" + rExp) | #Group( "-" + rExp) | Group(Literal("not") + rExp)) rExpCondition = Group( (Optional(not_)("not_") + rExp("rExp"))).setParseAction( self.rExp_condition_action)("condition") rExp.setParseAction(self.rExp_action) assignment = ((identifier | pointer)("lval") + assign + rExp("rval")).setParseAction(self.assignment_action) addassignment = ((identifier | pointer)("lval") + (Literal("+=") | Literal("-=") | Literal("*=") | Literal("&=") | Literal("|=") | Literal(">>=") | Literal("/=") | Literal("<<="))("op") + Group(rExp)("rval")).setParseAction( self.addassignement_action) statement = Forward() statementBlock = indentedBlock(statement, indentStack).setParseAction( self.statementBlock_action) procedure_statement = Group( (Keyword("def").suppress() + identifier("funcname") + Literal("(").suppress() + Literal(")").suppress() + colon.suppress() + statementBlock).setParseAction( self.def_action)) while_statement = Group( (Keyword("while").suppress() + (condition | rExpCondition)("condition") + colon.suppress() + statementBlock("statementBlock")).setParseAction( self.while_action)) if_statement = (Keyword("if") + condition + colon + statementBlock("ifblock") + Optional( Keyword("else").suppress() + colon + statementBlock("elseblock"))).setParseAction( self.if_action) statement << (procedure_statement | while_statement | if_statement | procedurecall | assignment | addassignment) decl = constdecl | vardecl | insertdecl | Group(statement) self.program = ZeroOrMore(decl) self.program.ignore(pythonStyleComment) def assignment_action(self, text, loc, arg): logging.getLogger(__name__).debug("assignment_action {0} {1}".format( lineno(loc, text), arg)) try: code = [ "# line {0} assignment {1}".format(lineno(loc, text), line(loc, text)) ] rval_code = find_and_get(arg.rval, 'code') if rval_code is not None: code += arg.rval.code elif arg.rval == "*P": code.append(" LDWI") elif 'identifier' in arg: self.symbols.getVar(arg.identifier) code.append(" LDWR {0}".format(arg.identifier)) if arg.lval == "*P": code.append(" STWI") elif arg.lval != "W": symbol = self.symbols.getVar(arg.lval) code.append(" STWR {0}".format(symbol.name)) if 'code' in arg: arg['code'].extend(code) else: arg['code'] = code except Exception as e: raise CompileException(text, loc, str(e), self) return arg def addassignement_action(self, text, loc, arg): logging.getLogger(__name__).debug( "addassignement_action {0} {1}".format(lineno(loc, text), arg)) try: code = [ "# line {0}: add_assignment: {1}".format( lineno(loc, text), line(loc, text)) ] if arg.rval[0] == '1' and arg.op in ['+=', '-=']: self.symbols.getVar(arg.lval) if arg.op == "+=": code.append(" INC {0}".format(arg.lval)) else: code.append(" DEC {0}".format(arg.lval)) else: if 'code' in arg.rval: code += arg.rval.code self.symbols.getVar(arg.lval) if arg.op == "-=": raise CompileException( "-= with expression needs to be fixed in the compiler" ) code.append(" {0} {1}".format(opassignmentLookup[arg.op], arg.lval)) elif 'identifier' in arg.rval: self.symbols.getVar(arg.rval.identifier) code.append(" LDWR {0}".format(arg.lval)) self.symbols.getVar(arg.lval) code.append(" {0} {1}".format(opassignmentLookup[arg.op], arg.rval.identifier)) code.append(" STWR {0}".format(arg.lval)) arg['code'] = code except Exception as e: raise CompileException(text, loc, str(e), self) return arg def condition_action(self, text, loc, arg): logging.getLogger(__name__).debug("condition_action {0} {1}".format( lineno(loc, text), arg)) try: code = [ "# line {0} condition {1}".format(lineno(loc, text), line(loc, text)) ] if arg.leftidentifier != "W": self.symbols.getVar(arg.leftidentifier) code.append(' LDWR {0}'.format(arg.leftidentifier)) if arg.identifier == 'NULL' and arg.comparison in jmpNullCommands: arg['jmpcmd'] = jmpNullCommands[arg.comparison] else: code.append(' {0} {1}'.format( comparisonCommands[arg.comparison], arg.identifier)) arg["code"] = code except Exception as e: raise CompileException(text, loc, str(e), self) return arg def rExp_condition_action(self, text, loc, arg): logging.getLogger(__name__).debug( "rExp_condition_action {0} {1}".format(lineno(loc, text), arg)) try: code = [ "# line {0} rExp_condition {1}".format(lineno(loc, text), line(loc, text)) ] condition_code = arg.condition.rExp['code'] if isinstance(condition_code, str): if 'not_' in arg['condition']: code += [" CMPEQUAL NULL"] else: code += [" CMPNOTEQUAL NULL"] arg['code'] = code else: if 'not_' in arg['condition']: arg['code'] = { False: condition_code[True], True: condition_code[False] } else: arg['code'] = condition_code except Exception as e: raise CompileException(text, loc, str(e), self) return arg def named_param_action(self, text, loc, arg): if len(arg) == 2: arg[arg[0]] = arg[1] return arg def value_action(self, text, loc, arg): if arg[0][0:2] == '0x': value = int(arg[0], 16) else: value = int(arg[0]) arg["identifier"] = self.symbols.getInlineParameter("inlinevar", value) return arg def opexpression_action(self, text, loc, arg): try: logging.getLogger(__name__).debug( "opexpression_action {0} {1}".format(lineno(loc, text), arg)) code = [ "# line {0}: shiftexpression {1}".format( lineno(loc, text), line(loc, text)), " LDWR {0}".format(arg.operand), " {0} {1}".format(shiftLookup[arg.op], arg.argument.identifier) ] arg['code'] = code logging.getLogger(__name__).debug( "shiftexpression generated code {0}".format(code)) except Exception as e: raise CompileException(text, loc, str(e), self) return arg def procedurecall_action(self, text, loc, arg): try: logging.getLogger(__name__).debug( "procedurecall_action {0} {1}".format(lineno(loc, text), arg)) procedure = self.symbols.getProcedure(arg[0]) code = [ "# line {0}: procedurecall {1}".format(lineno(loc, text), line(loc, text)) ] opcode = procedure.codegen(self.symbols, arg=arg.asList(), kwarg=arg.asDict()) if isinstance(opcode, list): code += opcode else: code = opcode arg['code'] = code logging.getLogger(__name__).debug( "procedurecall generated code {0}".format(code)) except Exception as e: raise CompileException(text, loc, str(e), self) return arg def rExp_action(self, text, loc, arg): logging.getLogger(__name__).debug("rExp_action {0} {1}".format( lineno(loc, text), arg)) pass def if_action(self, text, loc, arg): logging.getLogger(__name__).debug("if_action {0} {1}".format( lineno(loc, text), arg)) try: block0 = [ "# line {0} if statement {1}".format(lineno(loc, text), line(loc, text)) ] if isinstance(arg.condition.code, list): block0 += arg.condition.code JMPCMD = arg.condition.get('jmpcmd', {False: "JMPNCMP"})[False] else: JMPCMD = arg.condition.code[True] if 'elseblock' in arg: block1 = arg.ifblock.ifblock.code block2 = arg.elseblock.elseblock[ 'code'] if 'elseblock' in arg.elseblock else arg.elseblock[ 'code'] else: block1 = arg.ifblock.ifblock['code'] block2 = None arg['code'] = [ IfGenerator(self.symbols, JMPCMD, block0, block1, block2) ] except Exception as e: raise CompileException(text, loc, str(e), self) return arg def while_action(self, text, loc, arg): logging.getLogger(__name__).debug("while_action {0} {1}".format( lineno(loc, text), arg)) try: block0 = [ "# line {0} while_statement {1}".format( lineno(loc, text), line(loc, text)) ] if 'code' in arg.condition: if isinstance(arg.condition.code, list): block1 = arg.condition.code JMPCMD = arg.condition.get('jmpcmd', {False: "JMPNCMP"})[False] else: JMPCMD = arg.condition.code[True] block1 = [] elif 'rExp' in arg.condition and 'code' in arg.condition.rExp: if isinstance(arg.condition.rExp.code, list): block1 = arg.condition.rExp.code JMPCMD = arg.condition.rExp.get('jmpcmd', "JMPNCMP") else: JMPCMD = arg.condition.rExp.code[True] block1 = [] block2 = arg.statementBlock.statementBlock['code'] arg['code'] = [ WhileGenerator(self.symbols, JMPCMD, block0, block1, block2) ] logging.getLogger(__name__).debug("while_action generated code ") except Exception as e: raise CompileException(text, loc, str(e), self) return arg def statementBlock_action(self, text, loc, arg): logging.getLogger(__name__).debug( "statementBlock_action {0} {1} {2}".format(lineno(loc, text), arg.funcname, arg)) try: code = list() for command in arg[0]: if 'code' in command: code += command['code'] elif 'code' in command[0]: code += command[0]['code'] arg[0]['code'] = code logging.getLogger(__name__).debug( "statementBlock generated code {0}".format(code)) except Exception as e: raise CompileException(text, loc, str(e), self) return arg def def_action(self, text, loc, arg): logging.getLogger(__name__).debug("def_action {0} {1} {2}".format( lineno(loc, text), arg.funcname, arg)) try: name = arg[0] self.symbols.checkAvailable(name) self.symbols[name] = FunctionSymbol(name, arg[1]['code']) except Exception as e: raise CompileException(text, loc, str(e), self) def const_action(self, text, loc, arg): try: name, value = arg logging.getLogger(__name__).debug( "const_action {0} {1} {2} {3}".format(self.currentFile, lineno(loc, text), name, value)) self.symbols[name] = ConstSymbol(name, value) except Exception as e: raise CompileException(text, loc, str(e), self) def var_action(self, text, loc, arg): logging.getLogger(__name__).debug( "var_action {0} {1} {2} {3} {4} {5} {6}".format( self.currentFile, lineno(loc, text), arg["type_"], arg.get("encoding"), arg["name"], arg.get("value"), arg.get("unit"))) try: type_ = arg["type_"] if arg["type_"] != "var" else None self.symbols[arg["name"]] = VarSymbol(type_=type_, name=arg["name"], value=arg.get("value"), encoding=arg.get("encoding"), unit=arg.get("unit")) except Exception as e: raise CompileException(text, loc, str(e), self) def insert_action(self, text, loc, arg): try: oldfile = self.currentFile myprogram = self.program.copy() self.currentFile = arg[0][1:-1] result = myprogram.parseFile(self.currentFile) self.currentFile = oldfile except Exception as e: raise CompileException(text, loc, str(e), self) return result def compileFile(self, filename): self.currentFile = filename result = self.program.parseFile(self.currentFile, parseAll=True) allcode = list() for element in result: if not isinstance(element, str) and 'code' in element: allcode += element['code'] elif not isinstance(element[0], str) and 'code' in element[0]: allcode += element[0]['code'] header = self.createHeader() codetext = "\n".join(header + allcode) return codetext def compileString(self, programText): self.programText = programText self.currentFile = "Memory" result = self.program.parseString(self.programText, parseAll=True) allcode = list() for element in result: if not isinstance(element, str) and 'code' in element: allcode += element['code'] elif not isinstance(element[0], str) and 'code' in element[0]: allcode += element[0]['code'] header = self.createHeader() codetext = """# autogenerated # DO NOT EDIT DIRECTLY # The file will be overwritten by the compiler # """ codetext += "\n".join(header + list(generate(allcode))) self.reverseLineLookup = self.generateReverseLineLookup(codetext) return codetext def generateReverseLineLookup(self, codetext): lookup = dict() sourceline = None for codeline, line in enumerate(codetext.splitlines()): m = re.search('^\# line (\d+).*$', line) if m: sourceline = int(m.group(1)) else: lookup[codeline + 1] = sourceline return lookup def createHeader(self): header = ["# const values"] for constval in self.symbols.getAllConst(): header.append("const {0} {1}".format(constval.name, constval.value)) header.append("# variables ") for var in self.symbols.getAllVar(): if var.type_ == "masked_shutter": header.append("var {0} {1}, {2}".format( var.name + "_mask", var.value if var.value is not None else 0, "mask")) header.append("var {0} {1}, {2}".format( var.name, var.value if var.value is not None else 0, "shutter {0}_mask".format(var.name))) else: optionals = [ s if s is not None else "" for s in list_rtrim([var.type_, var.unit, var.encoding]) ] varline = "var {0} {1}".format( var.name, var.value if var.value is not None else 0) if len(optionals) > 0: varline += ", " + ", ".join(optionals) header.append(varline) header.append("# inline variables") # for value, name in self.symbols.inlineParameterValues.items(): # header.append("var {0} {1}".format(name, value)) header.append("# end header") header.append("") return header
print(convert_test(t1)) t1 = '```\n+ list2\n\n> line 3 `code` word\n```\n' print(convert_markdown(t1)) print(convert_test(t1)) t1 = '> + list2\n>\n> line \n3 `code` word\n```\n' print(convert_markdown(t1)) print(convert_test(t1)) t1 = '1. list2\n2. line \n3. `code` word\n```\n' print(convert_markdown(t1)) print(convert_test(t1)) t1 = 'this is "hello world" \'test ddd\' `com add` <font color=red>nihao [a a a][1] [basd sd] [cd dd ]</font>\n```\nhello\nasdasd```\n' s = shlex.shlex(t1) print(list(s)) parser = ZeroOrMore( Regex(r'(\[[^]]*\])+') | Regex(r'```') | Regex(r'<[^>]*>') | Regex(r'\[[^]]*\]') | Regex(r'\'[^"]*\'') | Regex(r'"[^"]*"') | Regex(r'\`[^`]*\`') | Regex(r'[^ ]+')) for i in parser.parseString(t1): print(i) t1 = '[Bug 412968][1] 1162: device level allow to save' print(is_link_text(t1)) p = re.compile('\[[^]]*\]') a = p.findall(t1)[1] # b = p.match(t1).group(1) print(a)
def parse_struct_defs(s, type_parsers=None): """Parse all the structure definitions in a string :Parameters: - `s`: the string to parse - `type_parsers`: a list of type parsers that can be used @return: a tuple with two dictionaries, one that describes the structs, the other stores the parsers >>> test_string = \"""# A sample file ... a foo ... bar baz ... ... typedef struct { # a comment ... float x; /* another comment */ ... int y ... } GOO ... ... typedef enum { ... START, ... END ... } RUNMARK; ... ... typedef struct { ... int i; ... RUNMARK m ... } MOO; ... ... GOO 3.4 6 ... \""" >>> >>> type_parsers = parse_enum_defs(test_string, base_type_parsers) >>> structs, struct_parsers = parse_struct_defs(test_string, type_parsers) >>> >>> print structs.keys() ['GOO', 'MOO'] >>> >>> print struct_parsers.keys() ['GOO', 'MOO'] >>> >>> for f in structs['GOO']: ... print "Field name: %-15s type name: %s" % (f['field_name'], f['type_name']) Field name: x type name: float Field name: y type name: int >>> >>> for f in structs['MOO']: ... print "Field name: %-15s type name: %s" % (f['field_name'], f['type_name']) Field name: i type name: int Field name: m type name: RUNMARK >>> >>> test_goo = "GOO 3.14 42" >>> test_parsed_goo = struct_parsers['GOO'].parseString(test_goo) >>> print "x: %f y: %d" % (test_parsed_goo[0]['x'], test_parsed_goo[0]['y']) x: 3.140000 y: 42 >>> >>> test_moo = "MOO 44 END" >>> test_parsed_moo = struct_parsers['MOO'].parseString(test_moo) >>> print "i: %d m: %s" % (test_parsed_moo[0]['i'], test_parsed_moo[0]['m']) i: 44 m: END """ if type_parsers is None: type_parsers = base_type_parsers one_struct_def_parser = make_one_struct_def_parser(type_parsers) not_struct = ((value_name | enum_declaration_start | possible_type_name | right_brace) + restOfLine).suppress() struct_def_parser = ZeroOrMore(Group(one_struct_def_parser) | not_struct) + stringEnd struct_def_parser.ignore(hash_comment) struct_def_parser.ignore(cStyleComment) struct = {} struct_parser = {} for this_struct in struct_def_parser.parseString(s): struct_name = this_struct['struct_name'].upper() struct[struct_name] = \ [{'field_name': f['field_name'], 'type_name': f['type_name']} for f in this_struct['fields']] struct_parser[struct_name] = \ Group(CaselessKeyword(this_struct['struct_name']) \ + And( [linecont+type_parsers[f['type_name']](f['field_name']) for f in this_struct['fields']] )) return struct, struct_parser
class Parser(object): """Lexical and Syntax analysis""" @property def semantic_analyser(self): return self._AST.semantic_analyser def __init__(self): self._AST = Syntax_tree() # keywords self.int_ = Keyword('Int') self.false_ = Keyword('False') self.true_ = Keyword('True') self.bit_ = Combine(Optional(Literal("@")) + Keyword('Bit')) self.sbox_ = Keyword('Sbox') self.l_shift_ = Keyword('<<') self.r_shift_ = Keyword('>>') self.circ_l_shift_ = Keyword('<<<') self.circ_r_shift_ = Keyword('>>>') self.bit_val = self.false_ ^ self.true_ self.if_ = Keyword('if') self.for_ = Keyword('for') self.return_ = Keyword('return') self.void_ = Keyword('void') self.ID = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_') # NOQA self.ID_ = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_') # Other Tokens self.l_bracket = Literal('(') self.r_bracket = Literal(')') self.eq_set = Literal('=')("set") self.term_st = Literal(';') self.b_2_num = Combine(Literal("0b") + Word("01")) self.b_2_num.setParseAction(self.semantic_analyser.convert_base_to_str) self.b_16_num = Combine(Literal("0x") + Word(srange("[0-9a-fA-F]"))) self.b_16_num.setParseAction(self.semantic_analyser.convert_base_to_str) self.b_10_num = Word(nums) self.bit_and = Literal('&') self.bit_or = Keyword('|') self.bit_xor = Keyword('^') self.bit_not = Literal('~') self.eq_compare = Literal('==') self.neq_compare = Literal('!=') self.l_brace = Literal('{') self.r_brace = Literal('}') self.bin_add = Literal('+') self.bin_mult = Literal('*') self.bin_sub = Literal('-') self.bin_mod = Literal('%') self.bin_div = Literal('/') self.g_than = Literal('>') self.ge_than = Literal('>=') self.l_than = Literal('<') self.le_than = Literal('<=') self.log_and = Keyword('&&') self.log_or = Keyword('||') self.l_sq_b = Literal('[') self.r_sq_b = Literal(']') # Operator Productions self.log_op = self.log_and ^ self.log_or self.comparison_op = self.g_than ^ self.ge_than ^ self.l_than ^ self.le_than ^ self.eq_compare ^ self.neq_compare self.arith_op = self.bin_add ^ self.bin_mult ^ self.bin_sub ^ self.bin_mod ^ self.bin_div self.bitwise_op = self.bit_and ^ self.bit_or ^ self.bit_xor ^ self.bit_not ^ self.l_shift_ ^ self.r_shift_ ^ self.circ_l_shift_ ^ self.circ_r_shift_ # Grammar self.stmt = Forward() self.for_loop = Forward() self.cast = Forward() self.seq_val = Forward() self.int_value = self.b_2_num ^ self.b_16_num ^ self.b_10_num self.expr = Forward() self.function_call = Forward() self.index_select = Forward() self.seq_ = Forward() self.operand = Forward() self.seq_range = Forward() # #######Operands self.sbox_call = Group((self.ID ^ self.seq_val) + ~White() + Literal(".") + ~White() + self.sbox_ + ~White() + self.l_bracket + (self.ID ^ self.int_value) + self.r_bracket) self.operand = self.index_select | self.seq_val | self.function_call | self.ID | self.int_value | self.cast | self.bit_val self.seq_val.setParseAction(lambda t: ['Seq_val'] + [t.asList()]) self.index_select.setParseAction(lambda t: ['index_select'] + [t.asList()]) self.function_call.setParseAction(lambda t: ['function_call'] + [t.asList()]) self.ID.setParseAction(lambda t: ['ID'] + [t.asList()]) self.int_value.setParseAction(lambda t: ['Int_val'] + [t.asList()]) self.cast.setParseAction(lambda t: ['cast'] + [t.asList()]) self.bit_val.setParseAction(lambda t: ['Bit_val'] + [t.asList()]) self.seq_range.setParseAction(lambda t: ['seq_range'] + [t.asList()]) # #######Expressions self.expr = Group(infixNotation(Group(self.operand), [(self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)])) # self.expr.setParseAction(self.expr_p) self.int_size = Combine(Optional(Literal("@")) + self.int_)("decl") + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.sbox_size = self.sbox_ + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.seq_range << self.expr + Suppress(Literal(":")) + self.expr self.seq_val << Suppress(self.l_sq_b) + Optional(Group(delimitedList(self.expr))) + Suppress(self.r_sq_b) self.seq_ << (self.int_size | self.bit_ | self.sbox_size)("type") +\ Group(OneOrMore(~White() + Suppress(self.l_sq_b) + self.expr + Suppress(self.r_sq_b)))("seq_size") self.function_call << self.ID("function_name") + ~White() + Suppress(self.l_bracket) +\ Optional(Group(delimitedList(self.expr)))("param_list") + Suppress(self.r_bracket) self.cast << Suppress(self.l_bracket) + Group((self.seq_ | self.int_size | self.bit_)) +\ Suppress(self.r_bracket) + (self.expr)("target") self.index_select << (self.ID("ID") ^ (Suppress(self.l_bracket) + self.cast + Suppress(self.r_bracket))("cast")) + ~White() +\ Group(OneOrMore(Suppress(self.l_sq_b) + Group(delimitedList(self.expr ^ Group(Group(self.seq_range))))("index") + Suppress(self.r_sq_b))) # ####### Declarations self.id_set = Group((Group(self.index_select) | self.ID_) + self.eq_set + self.expr) self.id_set.setParseAction(self.AST.id_set) self.int_decl = Group(self.int_size + delimitedList(Group((self.ID_("ID") + self.eq_set + self.expr("set_value")) | self.ID_("ID")))("value")) # NOQA self.int_decl.setParseAction(self.AST.int_decl) self.bit_decl = Group(self.bit_("decl") + delimitedList(Group(self.ID_("ID")) ^ Group(self.ID_("ID") + self.eq_set + self.expr("set_value")))("value")) self.bit_decl.setParseAction(self.AST.bit_decl) self.seq_decl = Group(self.seq_("decl") + Group(self.ID)("ID") + Optional(self.eq_set + Group(self.expr))("value")) self.seq_decl.setParseAction(self.AST.seq_decl) self.decl = self.bit_decl ^ self.int_decl ^ self.seq_decl # ###### Statements self.return_stmt = Group(self.return_ + self.expr) self.return_stmt.setParseAction(self.AST.return_stmt) self.function_start = Literal("{") self.function_start.setParseAction(self.AST.function_start) self.function_end = Literal("}") self.function_decl = Group((Group(self.seq_) | Group(self.int_size) | Group(self.bit_) | Group(self.void_))("return_type") + Group(self.ID)("func_ID") + Suppress(self.l_bracket) + Group(Optional(delimitedList(Group((self.seq_ | self.int_size | self.bit_) + Group(self.ID)))))("func_param") + # NOQA Suppress(self.r_bracket) + Suppress(self.function_start) + Group(self.stmt)("body") + Suppress(self.r_brace)) self.function_decl.setParseAction(self.AST.function_decl) self.for_init = Literal('(') self.for_init.setParseAction(self.AST.begin_for) self.for_terminator = Literal(';') self.for_terminator.setParseAction(self.AST.for_terminator) self.for_increment = Literal(';') self.for_increment.setParseAction(self.AST.for_increment) self.terminator_expr = Group(infixNotation(Group(self.operand), [(self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)])) self.terminator_expr.setParseAction(self.AST.terminator_expr) self.for_body = Literal('{') self.for_body.setParseAction(self.AST.for_body) self.end_for = Literal('}') self.end_for.setParseAction(self.AST.end_for) self.for_loop << Group(self.for_ + ~White() + Suppress(self.for_init) + Optional(delimitedList(self.decl ^ self.id_set))("init") + Suppress(self.for_terminator) + Optional(self.terminator_expr) + Suppress(self.for_increment) + Optional(delimitedList(self.id_set))("increm") + Suppress(self.r_bracket) + Suppress(self.for_body) + self.stmt("loop_body") + Suppress(self.end_for)) self.if_condition = Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.if_condition.setParseAction(self.AST.if_cond) self.if_.setParseAction(self.AST.begin_if) self.if_body_st = Literal('{') self.if_body_st.setParseAction(self.AST.if_body_st) self.if_body_end = Literal('}') self.if_body_end.setParseAction(self.AST.if_body_end) self.if_stmt = Group(self.if_ + self.if_condition("if_cond") + Suppress(self.if_body_st) + Group(self.stmt).setResultsName("body") + Suppress(self.if_body_end)) self.single_expr = self.expr + Suppress(self.term_st) self.single_expr.setParseAction(self.AST.stand_alone_expr) self.stmt << ZeroOrMore(self.decl + Suppress(self.term_st) ^ self.function_decl ^ self.id_set + Suppress(self.term_st) ^ self.single_expr ^ self.for_loop ^ self.if_stmt ^ self.return_stmt + Suppress(self.term_st) ^ self.sbox_call + Suppress(self.term_st)) self.grammar_test = self.stmt + StringEnd() # Allows single statements to be parsed self.grammar = ZeroOrMore(self.function_decl ^ self.seq_decl + Suppress(self.term_st)) + StringEnd() def nest_operand_pairs(self, tokens): tokens = tokens[0] ret = ParseResults(tokens[:3]) remaining = iter(tokens[3:]) done = False while not done: next_pair = (next(remaining, None), next(remaining, None)) if next_pair == (None, None): done = True break ret = ParseResults([ret]) ret += ParseResults(list(next_pair)) return [ret] @property def AST(self): return self._AST @AST.setter def AST(self, value): self._AST = value def analyse_tree_test(self, AST): return self.semantic_analyser.analyse(AST) def parse_test_unit(self, data_in): """Parses single statements""" try: res = self.grammar_test.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False if type(res[0]) is not bool: pass # print(res[0].dump()) return [res, True] def parse_test_AST_semantic(self, data_in): """Parses single statements and returns AST""" try: self.grammar_test.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False return self.AST def parse_test_integration(self, data_in): """Only Parses Statements in functions""" try: res = self.grammar.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False # if type(res[0]) is not bool: # print(res[0].dump()) return [res, True] def parse(self, data_in): """Prod parsing entry point""" self.grammar.parseString(data_in) if self.semantic_analyser.analyse(self.AST, True) is True: return self.semantic_analyser.IR.translate()
from pyparsing import Word, nums, oneOf, Suppress, restOfLine, alphas, alphanums, Group, ZeroOrMore, Optional, Keyword # Parses Spring log lines (logs are taken from ENBD project) log_sample = ''' 2016-08-01 11:27:21.047 WARN 22458 --- [http-bio-8080-exec-24] c.m.enbd.core.AbstractEnbdController : POST /goal/daily: User haven't defined a goal yet 2016-08-01 17:54:47.908 INFO 22458 --- [http-bio-8080-exec-27] com.monitise.enbd.core.RequestFilter : Handling request, /version, POST 2016-08-01 17:54:47.909 INFO 22458 --- [http-bio-8080-exec-30] com.monitise.enbd.core.RequestFilter : Handling request, /features, POST 2016-08-01 17:54:47.914 DEBUG 22458 --- [http-bio-8080-exec-30] c.m.enbd.core.AbstractEnbdController : getting IOS features 2016-08-01 17:54:47.924 INFO 22458 --- [http-bio-8080-exec-27] c.m.enbd.core.AbstractEnbdController : checking version for device code: 1 and device OS: 10.0 and version: 1.0.3''' date = Word(nums + '-') time = Word(nums + ':.') level = Keyword('INFO') | Keyword('WARN') | Keyword('DEBUG') # oneOf(['INFO', 'WARN', 'DEBUG']) pid = Word(nums) thread = Suppress('[') + Word(alphanums + '-') + Suppress(']') source = Word(alphas + '.') message = restOfLine log_line = Group(date + time + level + pid + Suppress('---') + thread + source + Suppress(':') + message) logs = ZeroOrMore(log_line) print(logs.parseString(log_sample)) # So, if you design log strings that can be parsed, you can actually parse them
start = site_contents.find(' ', site_contents.find('Current Cutout Values:')) end = site_contents.find('\r\n', start) cutout = site_contents[start:end].strip().split(' ') cutout = [p for p in cutout if len(p) != 0] primal_grammar = OneOrMore(Combine(Literal('Primal') + ' ' + Word(alphas)) + Word(nums+'.') * 2) start = site_contents.find('Primal Rib') end = site_contents.find('---', start) primal = primal_grammar.parseString(site_contents[start:end]) primal = [primal[i:i+3] for i in range(0, len(primal), 3)] start = site_contents.find('Choice Cuts', start) end = site_contents.find('---', start) volume_grammar = ZeroOrMore(Combine(Word(alphas) + ' ' + ZeroOrMore(Word(alphas))) + (Word(nums + '.' + ',' ) + Suppress(Word(alphas))) * 2) volume = volume_grammar.parseString(site_contents[start:end]) volume = [volume[i:i + 3] for i in range(0, len(volume), 3)] choice = [] select = [] choice_select = [] ground_beef = [] blended_gb = [] beef_trimmings = [] end1 = site_contents.find('Rib,', end)-10 end2 = site_contents.find('Rib,', site_contents.find('-----', end1))-10 end3 = site_contents.find('Rib,', site_contents.find('-----', end2))-10 end4 = site_contents.find('Ground Beef 73%', end3) end5 = site_contents.find('Blended Ground Beef', end4) end6 = site_contents.find('Fresh 50%', end5) end = [end1, end2, end3, end4, end5, end6]
class TOMLParser(object): def __init__(self): key_name = Word(re.sub(r"[\[\]=\"]", "", printables)) kgrp_name = Word(re.sub(r"[\[\]\.]", "", printables)) basic_int = Optional("-") + ("0" | Word(nums)) types = dict( string = QuotedString("\"", escChar="\\"), integer = Combine(basic_int), float = Combine(basic_int + "." + Word(nums)), datetime = Regex(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z"), boolean = Keyword("true") | Keyword("false"), array = Forward(), ) pure_array = Or(delimitedList(type_) for type_ in types.values()) types["array"] << Group(Suppress("[") + Optional(pure_array) + Suppress("]")) value = Or(type_ for type_ in types.values()) keyvalue = key_name + Suppress("=") + value + Suppress(LineEnd()) keygroup_namespace = kgrp_name + ZeroOrMore(Suppress(".") + kgrp_name) keygroup = "[" + keygroup_namespace + "]" + LineEnd() comments = pythonStyleComment self._toplevel = ZeroOrMore(keyvalue | keygroup) self._toplevel.ignore(comments) for k, v in types.items(): v.setParseAction(getattr(self, "_parse_"+k)) keyvalue.setParseAction(self._parse_keyvalue) keygroup_namespace.setParseAction(self._parse_keygroup_namespace) def _parse_string(self, src, loc, toks): match = re.search(r"(?<!\\)(\\[^0tnr\"\\])", toks[0]) if match: raise ParseException("Reserved escape sequence \"%s\"" % match.group(), loc) return unescape(toks[0]) _parse_integer = lambda self, tok: int(tok[0]) _parse_float = lambda self, tok: float(tok[0]) _parse_boolean = lambda self, tok: bool(tok[0]) ISO8601 = "%Y-%m-%dT%H:%M:%SZ" def _parse_datetime(self, src, loc, toks): try: return datetime.strptime(toks[0], self.ISO8601) except ValueError: # this informative error message will never make it out because # pyparsing catches ParseBaseException and reraises on its own. # oh well. raise ParseException("invalid datetime \"%s\"" % toks[0], loc) _parse_array = lambda self, tok: [tok[0]] def _parse_keyvalue(self, s, loc, toks): k, v = toks.asList() if k in self._cur: raise ParseException("key %s already exists" % k, loc) self._cur[k] = v def _parse_keygroup_namespace(self, s, loc, toks): cur = self._root for subname in toks: subspace = cur.get(subname, {}) if not isinstance(subspace, dict): raise ParseException("key %s already exists" % subname, loc) cur = cur.setdefault(subname, subspace) self._cur = cur def parse(self, s): self._root = {} self._cur = self._root self._toplevel.parseWithTabs() self._toplevel.parseString(s, parseAll=True) return self._root
def parse_imp (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas+"_+*-?!=<>+" QUOTE = Literal('"') INTERNAL_QUOTE = QUOTE.copy().leaveWhitespace() pIDENTIFIER = Word(idChars, idChars+"0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction(lambda result: EPrimCall(oper_deref,[EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") #| Keyword("&\"") | Keyword("&\'") pNAMECON = "," + pNAME pNAMECON.setParseAction(lambda result: result[1]) pNAMES = pNAME + ZeroOrMore(pNAMECON) | ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) QUOTE = Literal("&\"") | Literal("&\'") pSTRINGSTART = Literal('"') + ZeroOrMore(Word(" ")).leaveWhitespace() pSTRINGSTART.setParseAction(lambda result: result[1:]) pSTRING = pSTRINGSTART + ZeroOrMore(Combine( Word(idChars+"0123456789'"+" ") | QUOTE)) + Literal('"') pSTRING.setParseAction(lambda result: EValue(VString(str(result[:-1])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EValue(VBoolean(result[0]=="true"))) pEXPR = Forward() pEXPR2 = Forward() pSTMT_BLOCK = Forward() pSTMT = Forward() pEXPRS = ZeroOrMore(pEXPR2) pEXPRS.setParseAction(lambda result: [result]) pIF = pEXPR + Keyword("?") + pEXPR + Keyword(':') + pEXPR pIF.setParseAction(lambda result: EIf(result[0], result[2], result[4])) def mkFunBody (params,body): bindings = [ (p,ERefCell(EId(p))) for p in params ] return ELet(bindings,body) def mkLetBody (bindings,body): bindings = [ (p[0],ERefCell(p[1])) for p in bindings ] return ELet(bindings,body) def multiCallHelper(result, start, i, length): if i < length: start = ECall(result[1][i][0], [result[1][i][1], start]) multiCallHelper(result, start, i + 1, length) return start def multiCall(result): start = ECall(result[1][0][0], [result[0], result[1][0][1]]) return multiCallHelper(result, start, 1, len(result[1])) def eFunHelper(variables, expression): if len(variables) == 1: return EFunction(variables[0], expression) else: return EFunction(variables[0], eFunHelper(variables[1:], expression)) def eFunName(result): varName = result[1] variables = result[3] expression = result[-1] print variables, expression return EFunction(variables, expression, varName) pFUN = Keyword("fun") + "(" + pNAMES + ")" + pSTMT pFUN.setParseAction(lambda result: EFunction(result[2],mkFunBody(result[2],result[4]))) pFUNR = Keyword("fun") + pNAME + "(" + pNAMES + ")" + pSTMT # pFUNR.setParseAction(eFunName) pFUNR.setParseAction(lambda result: EFunction(result[3],mkFunBody(result[3],result[5]), result[1])) pEXPR2CAR = "," + pEXPR2 pEXPR2CAR.setParseAction(lambda result: result[1]) pEXPR2MULTIALL = pEXPR2 + ZeroOrMore(pEXPR2CAR) | ZeroOrMore(pEXPR2) pEXPR2MULTIALL.setParseAction(lambda result: [result]) pFUNCALL = pEXPR + "(" + pEXPR2MULTIALL + ")" pFUNCALL.setParseAction(lambda result: ECall(result[0], result[2])) pBINDINGCAR = "," + pNAME + "=" + pEXPR2 pBINDINGCAR.setParseAction(lambda result: (result[1], result[3])) pBINDINGCON = pNAME + "=" + pEXPR2 pBINDINGCON.setParseAction(lambda result: (result[0], result[2])) pBINDINGS = pBINDINGCON + ZeroOrMore(pBINDINGCAR) pBINDINGS.setParseAction(lambda result: [result]) pLET = Keyword("let") + "(" + pBINDINGS + ")" + pEXPR2 pLET.setParseAction(lambda result: mkLetBody(result[2], result[4])) pCALLG = pIDENTIFIER + pEXPR2 pCALLG.setParseAction(lambda result: (result[0], result[1])) pCALL1S = OneOrMore(pCALLG) pCALL1S.setParseAction(lambda result: [ result ]) pCALL = pEXPR + pCALL1S pCALL.setParseAction(multiCall) pCALL1 = pIDENTIFIER + pEXPR2 pCALL1.setParseAction(lambda result: ECall(result[0], [result[1]])) pNOT = "not" + pEXPR2 pNOT.setParseAction(lambda result: EPrimCall(oper_not, [result[1]])) pARRAYITEM = "," + pEXPR2 pARRAYITEM.setParseAction(lambda result: (result[1])) pARRAYITEMS = ZeroOrMore(pARRAYITEM) pARRAYITEMS.setParseAction(lambda result: [result]) pARRAY = "[" + ZeroOrMore(pEXPR2) + pARRAYITEMS + "]" pARRAY.setParseAction(lambda result: EArray(result[1],result[2])) pDICTPAIR = pNAME + ":" + pEXPR pDICTPAIR.setParseAction(lambda result: (result[0],result[2])) pDICTPAIRWITHCOMMA = "," + pNAME + ":" + pEXPR pDICTPAIRWITHCOMMA.setParseAction(lambda result: (result[1],result[3])) pDICTS = ZeroOrMore(pDICTPAIRWITHCOMMA) pDICTS.setParseAction(lambda result: [ result ]) pDICT = "{" + pDICTPAIR + pDICTS + "}" pDICT.setParseAction(lambda result:EDict(result[1],result[2])) pEXPR2P = "(" + pEXPR2 + ")" pEXPR2P.setParseAction(lambda result: result[1]) pACCESS = pNAME + "[" + pEXPR + "]" pACCESS.setParseAction(lambda result: EPrimCall(oper_access_arr,[EId(result[0]),result[2]])) pLEN = Keyword("len") + "(" + pNAME + ")" pLEN.setParseAction(lambda result: EPrimCall(oper_len,[EId(result[2])])) pEXPR << ( pEXPR2P | pINTEGER | pNOT | pARRAY | pACCESS | pDICT | pSTRING | pBOOLEAN | pIDENTIFIER | pCALL1 | pLEN ) pEXPR2 << ( pLET | pFUN | pFUNR | pFUNCALL | pIF | pCALL | pEXPR ) pDECL_VAR_E = "var" + pNAME + ";" pDECL_VAR_E.setParseAction(lambda result: (result[1], EValue(VNone))) pDECL_VAR = "var" + pNAME + "=" + pEXPR2 + ";" pDECL_VAR.setParseAction(lambda result: (result[1],result[3])) pDECL_PROCEDURE = "def" + pNAME + "(" + pNAMES + ")" + pSTMT pDECL_PROCEDURE.setParseAction(lambda result: (result[1], EProcedure(result[3], mkFunBody(result[3], result[5])))) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = ( pDECL_VAR_E | pDECL_VAR | pDECL_PROCEDURE | NoMatch() | ";" ) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT_IF_1 = "if (" + pEXPR2 + ")" + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction(lambda result: EIf(result[1],result[3],result[5])) pSTMT_IF_2 = "if (" + pEXPR2 + ")" + pSTMT pSTMT_IF_2.setParseAction(lambda result: EIf(result[1],result[3],EValue(VBoolean(True)))) pSTMT_WHILE = "while (" + pEXPR2 + ")" + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1],result[3])) pSTMT_FOR = "for (" + pNAME + "in" + pEXPR2 + ")" + pSTMT pSTMT_FOR.setParseAction(lambda result: EFor(result[1], result[3], result[5])) pSTMT_PRINT_STMS = "," + pEXPR2 pSTMT_PRINT_STMS.setParseAction(lambda result: [ result[1] ]) pSTMT_PRINT_ZERO = ZeroOrMore(pSTMT_PRINT_STMS) pSTMT_PRINT_ZERO.setParseAction(lambda result: [ result ]) def printStmEval(result): newArray = [] newArray.append(result[1]) for i in result[2]: newArray.append(i) return EPrimCall(oper_print,newArray) pSTMT_PRINT = "print" + pEXPR2 + pSTMT_PRINT_ZERO + ";" pSTMT_PRINT.setParseAction(printStmEval) pSTMT_UPDATE_ARR = pNAME + "[" + pEXPR +"]" + "=" + pEXPR + ";" pSTMT_UPDATE_ARR.setParseAction(lambda result: EPrimCall(oper_update_arr,[EId(result[0]),result[2],result[5]])) pSTMT_UPDATE = pNAME + "=" + pEXPR2 + ";" pSTMT_UPDATE.setParseAction(lambda result: EPrimCall(oper_update,[EId(result[0]),result[2]])) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock (decls,stmts): bindings = [ (n,ERefCell(expr)) for (n,expr) in decls ] return ELet(bindings,EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1],result[2])) pSTMT_pEXPR2 = pEXPR2 + ";" pSTMT_pEXPR2.setParseAction(lambda result: result[0]) pSTMT << ( pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_FOR | pSTMT_PRINT | pSTMT_UPDATE_ARR | pSTMT_UPDATE | pSTMT_BLOCK | pSTMT_pEXPR2 | pEXPR2 ) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: {"result":"statement", "stmt":result[0]}) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: {"result":"declaration", "decl":result[0]}) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: {"result":"abstract", "stmt":result[1]}) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result":"quit"}) pTOP = ZeroOrMore(pTOP_DECL) + ZeroOrMore(pTOP_STMT) return pTOP.parseString(input)
def from_gml(text): """ Return the list of graphs read from `text`, a string in GML format. This function assumes that the input follows the GML specification, provides unique integer ids even for isolated nodes, and defines one or more graphs. This function ignores anything other than node ids and edge endpoints. This means directed graphs are read as undirected graphs, node labels and edge weights are discarded, etc. If an edge endpoint (integer) is an unknown node id, the node is created. """ # Define the grammar with [pyparsing](http://pyparsing.wikispaces.com). # Don't use `from pyparsing import *` as it adds many constants # to the generated documentation. from pyparsing import ( srange, oneOf, Forward, Optional, Suppress, Word, ZeroOrMore, dblQuotedString, pythonStyleComment ) digit = srange("[0-9]") sign = Optional(oneOf("+ -")) mantissa = Optional("E" + sign + digit) # `Word(x)` is a sequence of one or more characters from the set x. digits = Word(digit) integer = sign + digits real = sign + Optional(digits) + "." + Optional(digits) + mantissa # For simplicity, use pyparsing's string with double-quotes, # hoping that it is a generalisation of GML's definition of a string. string = dblQuotedString # A GML file is a list of key-value pairs, where a value may be a list. # To handle this recursive definition, we delay what a pair is. pair = Forward() list = ZeroOrMore(pair) # A file may have comments, which are as in Python. Ignore them. list.ignore(pythonStyleComment) # `Word(x, y)` is 1 character from x followed by 0 or more from y. key = Word(srange("[a-zA-Z]"), srange("[a-zA-Z0-9]")) # `Suppress(x)` matches x but doesn't put it in the list of parsed tokens. listValue = Suppress("[") + list + Suppress("]") value = real | integer | string | listValue # The mandatory key-value pairs for graphs are as follows. graph = Suppress("graph") + listValue node = Suppress("node") + listValue anEdge = "edge" + listValue # to avoid conflict with edge() function id = Suppress("id") + integer source = Suppress("source") + integer target = Suppress("target") + integer # First try to parse graph-specific key-value pairs; otherwise ignore pair. pair <<= graph | node | anEdge | id | source | target | Suppress(key+value) # The above suppressions lead to the GML string # `'graph [ node [id 1 label "ego"] edge [source 1 target 1 weight 0.5] ]'` # being parsed into the list of tokens # `["1", "edge", "1", "1"]`, # which is converted by the following functions into a graph. def to_int(text, position, tokens): # Convert parsed integer tokens to integers, e.g. `["1"]` to `1`. return int(tokens[0]) def to_edge(text, position, tokens): # Assuming the above conversion was done, # convert `["edge", a, b]` to an edge incident to a and b. return edge(tokens[1], tokens[2]) def to_graph(text, position, tokens): # `tokens` is now a list of integers and edges, in any order. nodes = set() edges = set() for token in tokens: # If the token is an integer, it's a node id. if isinstance(token, int): nodes.add(token) else: edges.add(token) return network(edges, nodes) # Do the conversions as soon as the respective tokens are parsed. integer.setParseAction(to_int) anEdge.setParseAction(to_edge) graph.setParseAction(to_graph) # Parse the text with the main grammar rule. # Return the result as a list, not as a pyparsing object. return list.parseString(text).asList()
def handle(self, *args, **options): self.stdout.write("Retreiving file access logs") s3 = boto3.resource("s3") bucket = s3.Bucket(settings.LOG_BUCKET) for item in bucket.objects.all(): log_objects = item.get()["Body"].read().decode("utf-8").split("\n") for log_entry in log_objects: if not log_entry: continue # This here solution brought to you by Stack Overflow: # https://stackoverflow.com/questions/27303977/split-string-at-double-quotes-and-box-brackets parser = ZeroOrMore( Regex(r"\[[^]]*\]") | Regex(r'"[^"]*"') | Regex(r"[^ ]+") ) log = list(parser.parseString(log_entry)) if log[4] == "AmazonS3": # Internal S3 operation, can be skipped continue aws_log_entry = AWSDataFileAccessLog() fields = [ "bucket_owner", "bucket", "time", "remote_ip", "requester", "request_id", "operation", "bucket_key", "request_uri", "status", "error_code", "bytes_sent", "object_size", "total_time", "turn_around_time", "referrer", "user_agent", "version_id", "host_id", "signature_version", "cipher_suite", "auth_type", "host_header", ] for index, field_name in enumerate(fields): field = aws_log_entry._meta.get_field(field_name) if "IntegerField" in field.get_internal_type(): log_item = log[index] if (log_item == "-") or (log_item == '"-"'): log_item = 0 log[index] = int(log_item) if field.get_internal_type() == "DateTimeField": log[index] = datetime.strptime( log[index], "[%d/%b/%Y:%H:%M:%S %z]" ) if index == 17: # Sometimes, aws inserts a stray '-' here, klugey workaround if (log[17] == "-") and (len(log[18]) < 32): # The actual Host ID is always quite long log.pop(17) setattr(aws_log_entry, field_name, log[index]) url = aws_log_entry.request_uri.split(" ")[1] # Split out the key from the url parsed_url = urlparse(url) # parse_qs returns a dict with lists as values oh_key = parse_qs(parsed_url.query).get("x-oh-key", [""])[0] if oh_key == "None": oh_key = None if oh_key: oh_data_file_access_logs = NewDataFileAccessLog.objects.filter( data_file_key__key=oh_key ) else: oh_data_file_access_logs = None data_file = DataFile.objects.filter(file=aws_log_entry.bucket_key) if data_file: if data_file.count() == 1: aws_log_entry.serialized_data_file = serialize_datafile_to_dict( data_file.get() ) elif oh_data_file_access_logs: aws_log_entry.serialized_data_file = serialize_datafile_to_dict( oh_data_file_access_logs.get().data_file ) else: aws_log_entry.serialized_data_file = None # Get target datafile user, if possible. datafile_user = AnonymousUser() if data_file: datafile_user = data_file.user elif oh_data_file_access_logs: user_id = oh_data_file_access_logs.get().serialized_data_file[ "user_id" ] datafile_user = User.objects.get(id=user_id) # Abort if the feature is inactive. flag = FlagModel.get("datafile-access-logging") if not flag.is_active(request=request, subject=datafile_user): continue # Filter out things we don't care to log if settings.AWS_STORAGE_BUCKET_NAME in url: continue if "GET" not in str(aws_log_entry.operation): continue if any( blacklist_item in url for blacklist_item in AWS_LOG_KEY_BLACKLIST ): continue aws_log_entry.save() # Associate with any potential access logs from the Open Humans end. if oh_data_file_access_logs: aws_log_entry.oh_data_file_access_log.set(oh_data_file_access_logs) item.delete()
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value value to unresolved substitution. If overriden with a default value, it will replace all unresolved value to the default value. If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3: -3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance(final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith("https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance(token[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL( url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) else: raise ConfigException('No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue())) key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = ( Keyword("include", caseless=True).suppress() + ( include_content | ( Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress() ) ) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal( '\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group( key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore( comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) ) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore( comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions(config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException('resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION') if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
def policy_lexer(file_name='rule.txt'): # defines ip_address ip_address = Word(nums) + ('.' + Word(nums)) * 3 # defines identifier underscore_id = Word(alphas + '_', alphanums + '_') dashed_id = Word(alphas + '-', alphanums + '-') identifier = dashed_id ^ underscore_id # print(identifier.parseString('src-ip')) # defines comparision comp_oper = oneOf("< = > <= >= != ∉ ∈ <> ∅") # number = Word(nums) percent_number = Word(nums, nums + '%') term = '∅' | percent_number | identifier comparison_expr = term + comp_oper + term # print(comparison_expr.parseString('E <> ∅')) # define a func_call func_call = Forward() arg_expr = identifier | real | integer | dict_literal | list_literal | tuple_literal | func_call named_arg = identifier + '=' + arg_expr func_arg = named_arg | ip_address | arg_expr # print(delimitedList(Group(func_arg)).parseString('l, t')) func_call << identifier + '(' + Optional(delimitedList( Group(func_arg))) + ')' # print(func_call.parseString('Link-Flooded() ')) # ActiveSDN Policy BNF # using rate > 50% operator = oneOf('OR or ; || && ->') weight = srange(1 - 10) action_attribution = delimitedList(Group(comparison_expr)) | identifier # print(action_attribution.parseString('rate > 50%')) # BY IDS - App # BY Switch < 1.1.1.1 > # BY FIREWALL < 1.5.6.4, “admin” > as_using_param = identifier + '<' + delimitedList(Group(func_arg)) + '>' actuator_spec = identifier ^ delimitedList(Group(as_using_param)) # print(actuator_spec.parseString('IDS-App')) # print(actuator_spec.parseString('Switch < 1.1.1.1 >')) # print(actuator_spec.parseString('FIREWALL < 1, admin >')) # Object = oneOf('files flows links machines') object = identifier fw__actions = oneOf('ACCEPT DENY REDIRECT') snmp_get_action = identifier log_audit_action = identifier splunk_action = identifier camera_actions = identifier ids_actions = identifier ipsec_action = identifier proxy_action = identifier investigation_action = snmp_get_action | log_audit_action | splunk_action | camera_actions config_action = fw__actions | ids_actions | ipsec_action | proxy_action action = config_action | investigation_action outcome_value = delimitedList( func_arg) + operator + comparison_expr ^ func_arg value = delimitedList(Group(outcome_value)) | identifier # print(value.parseString('P,l && P <> 0')) # OF(proto=ICMP or UDP in P) # OF E # OF(src_ip ∈ N) # OF(src_ip ∈ WHITE - LIST) # OF src_ip ∉ W multiple_flow_attributes = func_arg + operator + identifier single_flow_attribute = func_arg | identifier flow_attributes = multiple_flow_attributes | single_flow_attribute # OF(Reachable(L) and dport = 80) link_attributes = func_call + operator + func_arg | func_call | func_arg | identifier # print(link_attributes.parseString('Reachable(L) && dport = 80')) # TODO we will define them later file_attributes = identifier machine_attributes = identifier attributes = flow_attributes ^ file_attributes ^ link_attributes ^ machine_attributes attribute_values = ZeroOrMore(lparen) \ + (comparison_expr ^ (attributes + 'in' + identifier) ^ attributes ^ identifier) \ + ZeroOrMore(rparen) obj_attribute_values = delimitedList(Group(attribute_values)) # print(obj_attribute_values.parseString('(Reachable(L) && dport = 80)')) keyword = oneOf('DO ON OF BY USING FOR OUTCOME UNTIL') goal = identifier event__exp = Forward() do_action = 'DO' + action on_object = 'ON' + object of_obj_attribute_values = 'OF' + obj_attribute_values by_actuator_spec = 'BY' + actuator_spec using_action_attribution = 'USING' + action_attribution for_goal = 'FOR' + goal outcome_value = 'OUTCOME' + value action_spec = do_action + on_object + of_obj_attribute_values + by_actuator_spec + using_action_attribution \ + for_goal + outcome_value action_spec = Group(delimitedList(action_spec)) coas = infixNotation(action_spec, [ (oneOf('OR ||'), 2, opAssoc.LEFT), (oneOf('&& ;'), 2, opAssoc.LEFT), ]) coas_spec = Forward() if_then_else = 'IF' + Group( delimitedList(identifier ^ comparison_expr)) + 'THEN' + Group( delimitedList(coas_spec)) + 'ELSE' + Group( delimitedList(coas_spec)) # coas_spec << (if_then_else ^ coas) coas_spec << ((coas + operator + if_then_else) ^ (if_then_else) ^ (coas)) temp_context_exp = func_call config_context_exp = func_call dynamic_context_exp = func_call context_exp = Group(temp_context_exp | config_context_exp | dynamic_context_exp).setName('exp') rule = ZeroOrMore(lparen) + context_exp + ZeroOrMore( rparen) + operator + Group(delimitedList(coas_spec)) policy_string = '' with open(file_name) as f: content = f.read() policy_string += content # print(policy_string) parsed_policy = rule.parseString(policy_string) # print(parsed_policy) return parsed_policy
param.setParseAction(paramfun) def bs(c): return Literal("\\" + c) singles = bs("[") | bs("]") | bs("{") | bs("}") | bs("\\") | bs("&") | bs("_") | bs(",") | bs("#") | bs("\n") | bs(";") | bs("|") | bs("%") | bs("*") | bs("~") | bs("^") texcmd << (singles | Word("\\", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", min = 2)) + ZeroOrMoreAsList(arg) + ZeroOrMoreAsList(param) def texcmdfun(s, loc, toks): return TexCmd(s, loc, toks) texcmd.setParseAction(texcmdfun) #legal = "".join([chr(x) for x in set(range(32, 127)) - set(backslash)]) #filler = Word(legal) document = ZeroOrMore(dollarmath | texcmd | filler) + StringEnd().suppress() if 0: s = "This is \\\\ test" print s for t in document.parseString(s): if isinstance(t, TexCmd): print '====> cmd=[%s]' % t.cmd, t else: print '====>', t sys.exit(-1) selfstr = open( __file__).read() # Own source as a string. Used as part of hash. hashbase = hashlib.md5(selfstr) def tokenize(filename): f = open(filename, "rt") def uncomment(s): if '%' in s and not '\\%' in s: return s[:s.index('%')] + '\n'
option << (options_definition + ZeroOrMore(indentedBlock(response, indentStack, True))).setParseAction(Option) dialog_begin = Literal('begin').suppress() + Group(atom + Optional(Literal('extends').suppress() + atom)) dialog_end = Literal('end').suppress() dialog = (dialog_begin + ZeroOrMore(indentedBlock(response, indentStack, True)) + dialog_end).setParseAction(Dialog) dialogs = ZeroOrMore(indentedBlock(dialog, indentStack, False)) if __name__ == '__main__': print expression.parseString("1 * 2.5 * 8 * (9 + 3.12 - 1 + 4) / |(2 - -(7 - 9))") print dialogs.parseString("""begin testDialog Hello, this is a greating response! if player:health > 10 then ~ This is option A ~ This is option B with a response Option B Response ~ Yes ~ No, send event -> player!test, -> self!test2 Another possible initial response. ~ Alt Option 1 ~ Alt Option 2 end begin testDialog2 extends testDialog end """)
def split(s): if s is None or "!" in s: return [] match = ZeroOrMore(Group(Word(alphas+"æøåÆØÅ") + Regex(r"[0-9/ ]*\d"))) result = match.parseString(s.replace("%", "")) return result.asList()
class SFZParser(object): def __init__(self, path, text, state=None): self.path = path self.base_path = os.path.dirname(path) self.text = text self.state = state opcode_name = Word(alphanums + '_') value = Regex(r'.*?(?=\s*(([a-zA-Z0-9_]+=)|//|<[a-z]|$))', re.MULTILINE) opcode = locatedExpr(opcode_name) + Literal('=').suppress() + value opcode.setParseAction(self.handle_opcode) section_name = Literal('<').suppress() + Word(alphas) + Literal( '>').suppress() section = section_name section.setParseAction(self.handle_section) include = Literal('#include').suppress() + locatedExpr( QuotedString('"')) include.setParseAction(self.handle_include) statement = (section ^ opcode ^ include) self.sfz_file = ZeroOrMore(statement) + stringEnd comment = Literal('//') + restOfLine self.sfz_file.ignore(comment) def handle_include(self, s, loc, toks): path = os.path.join(self.base_path, normalize_path(toks[0].value)) try: with open(path) as fp: f = fp.read() except IOError as exc: raise IncludeException(s, loc=toks[0].locn_start, msg=str(exc)) subparser = SFZParser(path, f, self.state) subparser.sfz_file.parseString(f) def handle_section(self, s, loc, toks): name = toks[0] if name == 'region': section = Region(self.state.instr, name, group=self.state.current_group, control=self.state.current_control) self.state.instr.regions.append(section) elif name == 'group': section = Section(self.state.instr, name) self.state.current_group = section elif name == 'control': section = Section(self.state.instr, name) self.state.current_control = section else: raise InvalidSectionException(s, loc, "Invalid section name '%s'" % name) self.state.current_section = section def handle_opcode(self, s, loc, toks): loc = toks[0].locn_start name = toks[0].value try: opdef = opmap[name] except KeyError: raise UnknownOpCodeException(s, loc=loc, msg="Unknown opcode '%s'" % key) try: value = opdef.parse_value(toks[1]) except ValueError as exc: raise InvalidValueException( s, loc=loc, msg="Invalid value for opcode '%s': %s" % (key, str(exc))) self.state.current_section._opcodes[name] = value self.state.current_section._opcode_locs[name] = (s, loc) def parse(self): self.state = ParserState() self.state.instr = Instrument(os.path.abspath(self.path)) self.sfz_file.parseString(self.text) for region in self.state.instr.regions: if not os.path.isfile(region.sample): s, loc = region.get_opcode_loc('sample') raise SampleMissingException( s, loc, "Missing sample '%s'" % region.sample) return self.state.instr
ellipticalArc = Group(Command("A") + Arguments(Sequence(ellipticalArcArgument))) smoothQuadraticBezierCurveto = Group(Command("T") + Arguments(coordinatePairSequence)) quadraticBezierCurveto = Group(Command("Q") + Arguments(coordinatePairPairSequence)) smoothCurve = Group(Command("S") + Arguments(coordinatePairPairSequence)) #curve = Group(Command("C") + Arguments(coordinatePairTripleSequence)) horizontalLine = Group(Command("H") + Arguments(coordinateSequence)) verticalLine = Group(Command("V") + Arguments(coordinateSequence)) drawToCommand = ( lineTo | moveTo | closePath | ellipticalArc | smoothQuadraticBezierCurveto | quadraticBezierCurveto | smoothCurve | curve | horizontalLine | verticalLine ) #~ number.debug = True moveToDrawToCommands = moveTo + ZeroOrMore(drawToCommand) parser = ZeroOrMore(moveToDrawToCommands) parser.keepTabs = True import sys if __name__ == "__main__": # EX: print parser.parseString("M 242.96145,653.59282 L 244.83646,650.1553 L 247.02397,649.8428 L 247.33647,650.62405 L 245.30521,653.59282 L 242.96145,653.59282 z M 252.80525,649.99905 L 258.74278,652.49906 L 260.77404,652.18656 L 262.33654,648.43654 L 261.71154,645.15528 L 257.64902,644.68653 L 253.74275,646.40528 L 252.80525,649.99905 z M 282.49289,659.6866 L 286.08665,664.99912 L 288.43041,664.68662 L 289.52417,664.21787 L 290.93042,665.46787 L 294.52419,665.31162 L 295.4617,663.90537 L 292.64918,662.18661 L 290.77417,658.59284 L 288.74291,655.15533 L 283.11789,657.96784 L 282.49289,659.6866 z M 302.02423,668.28039 L 303.27423,666.40538 L 307.8055,667.34288 L 308.43051,666.87413 L 314.36803,667.49913 L 314.05553,668.74914 L 311.55552,670.15539 L 307.33675,669.84289 L 302.02423,668.28039 z M 307.1805,673.28041 L 309.05551,677.03043 L 312.02427,675.93667 L 312.33677,674.37416 L 310.77427,672.3429 L 307.1805,672.0304 L 307.1805,673.28041 z M 313.89928,672.18665 L 316.08679,669.37414 L 320.61806,671.7179 L 324.83683,672.81166 L 329.0556,675.46792 L 329.0556,677.34293 L 325.61809,679.06169 L 320.93056,679.99919 L 318.5868,678.59293 L 313.89928,672.18665 z M 329.99311,687.18672 L 331.55561,685.93672 L 334.83688,687.49923 L 342.18066,690.93674 L 345.46193,692.968 L 347.02443,695.31176 L 348.89944,699.53053 L 352.80571,702.03054 L 352.49321,703.28055 L 348.74319,706.40556 L 344.68067,707.81182 L 343.27442,707.18682 L 340.30565,708.90557 L 337.96189,712.03059 L 335.77438,714.8431 L 334.05562,714.68685 L 330.61811,712.18684 L 330.30561,707.81182 L 330.93061,705.46806 L 329.3681,699.99928 L 327.33684,698.28052 L 327.18059,695.78051 L 329.3681,694.84301 L 331.39936,691.87425 L 331.86811,690.93674 L 330.30561,689.21798 L 329.99311,687.18672 z ") print parser.parseString(sys.argv[1])
if __name__ == "__main__": s = ("M 242.96145,653.59282 L 244.83646,650.1553 L 247.02397,649.8428 " "L 247.33647,650.62405 L 245.30521,653.59282 L 242.96145,653.59282 z " "M 252.80525,649.99905 L 258.74278,652.49906 L 260.77404,652.18656 " "L 262.33654,648.43654 L 261.71154,645.15528 L 257.64902,644.68653 " "L 253.74275,646.40528 L 252.80525,649.99905 z M 282.49289,659.6866 " "L 286.08665,664.99912 L 288.43041,664.68662 L 289.52417,664.21787 " "L 290.93042,665.46787 L 294.52419,665.31162 L 295.4617,663.90537 " "L 292.64918,662.18661 L 290.77417,658.59284 L 288.74291,655.15533 " "L 283.11789,657.96784 L 282.49289,659.6866 z M 302.02423,668.28039 " "L 303.27423,666.40538 L 307.8055,667.34288 L 308.43051,666.87413 " "L 314.36803,667.49913 L 314.05553,668.74914 L 311.55552,670.15539 " "L 307.33675,669.84289 L 302.02423,668.28039 z M 307.1805,673.28041 " "L 309.05551,677.03043 L 312.02427,675.93667 L 312.33677,674.37416 " "L 310.77427,672.3429 L 307.1805,672.0304 L 307.1805,673.28041 z " "M 313.89928,672.18665 L 316.08679,669.37414 L 320.61806,671.7179 " "L 324.83683,672.81166 L 329.0556,675.46792 L 329.0556,677.34293 " "L 325.61809,679.06169 L 320.93056,679.99919 L 318.5868,678.59293 " "L 313.89928,672.18665 z M 329.99311,687.18672 L 331.55561,685.93672 " "L 334.83688,687.49923 L 342.18066,690.93674 L 345.46193,692.968 " "L 347.02443,695.31176 L 348.89944,699.53053 L 352.80571,702.03054 " "L 352.49321,703.28055 L 348.74319,706.40556 L 344.68067,707.81182 " "L 343.27442,707.18682 L 340.30565,708.90557 L 337.96189,712.03059 " "L 335.77438,714.8431 L 334.05562,714.68685 L 330.61811,712.18684 " "L 330.30561,707.81182 L 330.93061,705.46806 L 329.3681,699.99928 " "L 327.33684,698.28052 L 327.18059,695.78051 L 329.3681,694.84301 " "L 331.39936,691.87425 L 331.86811,690.93674 L 330.30561,689.21798 " "L 329.99311,687.18672 z ") print(path.parseString(s))
def parse_headers(headers): header = p4_header() all_headers = ZeroOrMore(header) return all_headers.parseString(headers)
test_string = """ key first_key_got_gone { }; view "internal" { zone "first_zone" { }; zone "second_zone" { }; zone "third_zone" { }; }; key second_key { }; view "external" { zone "fourth_zone" { }; }; """ # pp = pprint.PrettyPrinter(width=4, compact=True, indent=4) pp = pprint.PrettyPrinter(compact=True, indent=4) result = clause_statements.parseString(test_string, parseAll=True) print("\nresult:", result) print("\nresult.asDict():", result.asDict()) print("\nPretty(result.asDict():") pp.pprint(result.asDict())
def parse_actions(actions): action = p4_action() all_actions = ZeroOrMore(action) return all_actions.parseString(actions)
def parse(content, basedir=None, resolve=True): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: If true, resolve substitutions :type resolve: boolean :return: a ConfigTree or a list """ def norm_string(value): for k, v in ConfigParser.REPLACEMENTS.items(): value = value.replace(k, v) return value def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3:-3] def convert_number(tokens): n = tokens[0] try: return int(n) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = "\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '(")(?P<value>[^"]*)\\1(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(token): url = None file = None if len(token) == 1: # include "test" value = token[0].value if isinstance( token[0], ConfigQuotedString) else token[0] if value.startswith("http://") or value.startswith( "https://") or value.startswith("file://"): url = value else: file = value elif len(token) == 2: # include url("test") or file("test") value = token[1].value if isinstance( token[1], ConfigQuotedString) else token[1] if token[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False) if file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file(path, required=False, resolve=False) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction( replaceWith(NoneValue())) key = QuotedString( '"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- ') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex( '[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex( '""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex('".*?"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex('(?:\\\\|[^\[\{\s\]\}#,=\$])+[ \t]*' ).setParseAction(unescape_string) substitution_expr = Regex('[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction( create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_expr = (Keyword("include", caseless=True).suppress() + ( quoted_string | ( (Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))) \ .setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal('\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser( list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: ConfigParser.resolve_substitutions(config) return config
def texcmdfun(s, loc, toks): return TexCmd(s, loc, toks) texcmd.setParseAction(texcmdfun) #legal = "".join([chr(x) for x in set(range(32, 127)) - set(backslash)]) #filler = Word(legal) document = ZeroOrMore(dollarmath | texcmd | filler) + StringEnd().suppress() if 0: s = "This is \\\\ test" print s for t in document.parseString(s): if isinstance(t, TexCmd): print '====> cmd=[%s]' % t.cmd, t else: print '====>', t sys.exit(-1) selfstr = open( __file__).read() # Own source as a string. Used as part of hash. hashbase = hashlib.md5(selfstr) def tokenize(filename): f = open(filename, "rt") def uncomment(s):
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value to unresolved substitution. If overriden with a default value, it will replace all unresolved values by the default value. If it is set to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3:-3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) def convert_period(tokens): period_value = int(tokens.value) period_identifier = tokens.unit period_unit = next((single_unit for single_unit, values in cls.get_supported_period_type_map().items() if period_identifier in values)) return period(period_value, period_unit) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance( final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith( "https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance( final_tokens[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value elif final_tokens[0] == 'package': file = cls.resolve_package_path(value) else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) def _make_prefix(path): return ('<root>' if path is None else '[%s]' % path).ljust(55).replace('\\', '/') _prefix = _make_prefix(path) def _load(path): _prefix = _make_prefix(path) logger.debug('%s Loading config from file %r', _prefix, path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) logger.debug('%s Result: %s', _prefix, obj) return obj if '*' in path or '?' in path: paths = glob(path, recursive=True) obj = None def _merge(a, b): if a is None or b is None: return a or b elif isinstance(a, ConfigTree) and isinstance( b, ConfigTree): return ConfigTree.merge_configs(a, b) elif isinstance(a, list) and isinstance(b, list): return a + b else: raise ConfigException( 'Unable to make such include (merging unexpected types: {a} and {b}', a=type(a), b=type(b)) logger.debug('%s Loading following configs: %s', _prefix, paths) for p in paths: obj = _merge(obj, _load(p)) logger.debug('%s Result: %s', _prefix, obj) else: logger.debug('%s Loading single config: %s', _prefix, path) obj = _load(path) else: raise ConfigException( 'No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) @contextlib.contextmanager def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default) with set_default_white_spaces(): assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction( replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction( replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction( replaceWith(NoneValue())) key = QuotedString( '"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex( r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # Flatten the list of lists with unit strings. period_types = list( itertools.chain(*cls.get_supported_period_type_map().values())) # `Or()` tries to match the longest expression if more expressions # are matching. We employ this to match e.g.: 'weeks' so that we # don't end up with 'w' and 'eeks'. Note that 'weeks' but also 'w' # are valid unit identifiers. # Allow only spaces as a valid separator between value and unit. # E.g. \t as a separator is invalid: '10<TAB>weeks'. period_expr = ( Word(nums)('value') + ZeroOrMore(White(ws=' ')).suppress() + Or(period_types)('unit') + WordEnd(alphanums).suppress()).setParseAction(convert_period) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex( '""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex( r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex( r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*' ).setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ( (Keyword('url') | Keyword('file') | Keyword('package')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = (Keyword("include", caseless=True).suppress() + (include_content | (Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress())) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal('\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress( '}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore( eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - ( dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions( config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException( 'resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION' ) if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
class SFZParser(object): def __init__(self, path, text, state=None): self.path = path self.base_path = os.path.dirname(path) self.text = text self.state = state opcode_name = Word(alphanums + '_') value = Regex(r'.*?(?=\s*(([a-zA-Z0-9_]+=)|//|<[a-z]|$))', re.MULTILINE) opcode = locatedExpr(opcode_name) + Literal('=').suppress() + value opcode.setParseAction(self.handle_opcode) section_name = Literal('<').suppress() + Word(alphas) + Literal('>').suppress() section = section_name section.setParseAction(self.handle_section) include = Literal('#include').suppress() + locatedExpr(QuotedString('"')) include.setParseAction(self.handle_include) statement = (section ^ opcode ^ include) self.sfz_file = ZeroOrMore(statement) + stringEnd comment = Literal('//') + restOfLine self.sfz_file.ignore(comment) def handle_include(self, s, loc, toks): path = os.path.join(self.base_path, normalize_path(toks[0].value)) try: with open(path) as fp: f = fp.read() except IOError as exc: raise IncludeException( s, loc=toks[0].locn_start, msg=str(exc)) subparser = SFZParser(path, f, self.state) subparser.sfz_file.parseString(f) def handle_section(self, s, loc, toks): name = toks[0] if name == 'region': section = Region(self.state.instr, name, group=self.state.current_group, control=self.state.current_control) self.state.instr.regions.append(section) elif name == 'group': section = Section(self.state.instr, name) self.state.current_group = section elif name == 'control': section = Section(self.state.instr, name) self.state.current_control = section else: raise InvalidSectionException( s, loc, "Invalid section name '%s'" % name) self.state.current_section = section def handle_opcode(self, s, loc, toks): loc = toks[0].locn_start name = toks[0].value try: opdef = opmap[name] except KeyError: raise UnknownOpCodeException( s, loc=loc, msg="Unknown opcode '%s'" % key) try: value = opdef.parse_value(toks[1]) except ValueError as exc: raise InvalidValueException( s, loc=loc, msg="Invalid value for opcode '%s': %s" % (key, str(exc))) self.state.current_section._opcodes[name] = value self.state.current_section._opcode_locs[name] = (s, loc) def parse(self): self.state = ParserState() self.state.instr = Instrument(os.path.abspath(self.path)) self.sfz_file.parseString(self.text) for region in self.state.instr.regions: if not os.path.isfile(region.sample): s, loc = region.get_opcode_loc('sample') raise SampleMissingException( s, loc, "Missing sample '%s'" % region.sample) return self.state.instr