def make_jinja_element_parser(name_parsers, content): """ `name_parsers` must be a list of tag name parsers. For example, `name_parsers` can be defined as follow in order to parse `if` statements: name_parsers = [P.string(n) for n in ['if', 'elif', 'else', 'endif']] """ if len(name_parsers) == 1: tag = make_jinja_tag_parser(name_parsers[0]) part = locate(P.seq( tag, P.success(None), )).combine(_combine_jinja_element_part) parts = [part] end_tag_parser = None else: part_names = name_parsers[:-1] first_part = make_jinja_element_part_parser( part_names[0], content=content) next_parts = [ make_jinja_element_part_parser(name, content=content).many() for name in part_names[1:] ] parts = [first_part] + next_parts end_tag_parser = make_jinja_tag_parser(name_parsers[-1]) content = [P.seq(*parts)] if end_tag_parser: content.append(end_tag_parser) return ( locate(P.seq(*content)) .combine(_combine_jinja_element) )
def type_sequence_parser() -> Generator: name = parsers.token('NAME') individual_type_variable = ( # FIXME: Keep track of individual type variables parsers.token('BACKTICK') >> name >> parsy.success(None)) lpar = parsers.token('LPAR') rpar = parsers.token('RPAR') nested_stack_effect = lpar >> parsers['stack-effect-type'] << rpar type = parsers['type'] | individual_type_variable | nested_stack_effect # TODO: Allow type-only items item = parsy.seq( name, (parsers.token('COLON') >> type).optional()).map(_TypeSequenceIndividualTypeNode) items = item.many() seq_var = parsers.token('STAR') >> name seq_var_parsed, i = yield parsy.seq(seq_var.optional(), items) seq_var_value = None if seq_var_parsed is None and i: location = i[0].location elif seq_var_parsed is not None: location = seq_var_parsed.start seq_var_value = seq_var_parsed.value else: location = None return TypeSequenceNode(location, seq_var_value, i)
def make_jinja_element_parser(name_parsers, content): if len(name_parsers) == 1: tag = make_jinja_tag_parser(name_parsers[0]) part = locate(P.seq( tag, P.success(None), )).combine(_combine_jinja_element_part) parts = [part] end_tag_parser = None else: part_names = name_parsers[:-1] first_part = make_jinja_element_part_parser(part_names[0], content=content) next_parts = [ make_jinja_element_part_parser(name, content=content).many() for name in part_names[1:] ] parts = [first_part] + next_parts end_tag_parser = make_jinja_tag_parser(name_parsers[-1]) content = [P.seq(*parts)] if end_tag_parser: content.append(end_tag_parser) return (locate(P.seq(*content)).combine(_combine_jinja_element))
def dwarfdump(): import parsy as p def dbg(parser): return parser.mark().map(lambda x: print(repr(x))) newline = p.regex(r'[\r\n]').desc('newline') newlines = newline.times(min=1).desc('newlines') blank_line = newline.times(min=2).desc('blank line') rest_of_line = p.regex(r'.*$', flags=re.MULTILINE) quoted_string = p.string('"') >> p.regex(r'[^"]*') << p.string('"') hex_number = p.regex(r'0x[0-9a-fA-F]+').map(lambda x: int(x, 0)).desc( 'hex number') decimal_number = p.regex(r'-?[0-9]+').map(lambda x: int(x, 0)).desc( 'decimal number') boolean = (p.string('true') | p.string('false')).map(lambda b: b == 'true').desc('boolean') dwarf_code = p.string('DW_') >> p.regex(r'\w+') null = p.string('NULL') attribute_contents = p.alt( quoted_string, p.seq(hex_number, p.whitespace >> quoted_string).combine(_DwarfRawAttributeRef), hex_number, decimal_number, boolean, p.seq( p.string('DW_') >> p.regex(r'OP_\w+'), (p.whitespace >> (hex_number | decimal_number)).optional()).combine(DwarfAttributeOp), dwarf_code.map(DwarfAttributeVal), ) attribute = p.seq( p.whitespace >> dwarf_code, p.whitespace >> p.string('(') >> attribute_contents << p.string(')'), ) die = p.seq( address=hex_number << p.string(': '), indent=p.regex('( )*').map(lambda i: len(i) // 2), tag=null | dwarf_code, attributes=(p.string('\n') >> attribute).many().map(dict), ).combine_dict(_DwarfRawDie) compilation_unit = p.seq( addr_size=(p.regex(r'.*addr_size = ') >> hex_number << rest_of_line).desc('compilation unit header'), children=blank_line >> die.sep_by(blank_line, min=1), ).combine_dict(DwarfCompilationUnit) return (yield (p.regex(r'.*:\s*file format.*\n\n') >> p.regex(r'.*\.debug_info contents:.*\n') >> compilation_unit.sep_by(blank_line) << newline.many()).many())
def test_seq(self): self.assertEqual(seq().parse(''), []) self.assertEqual(seq(letter).parse('a'), ['a']) self.assertEqual(seq(letter, digit).parse('a1'), ['a', '1']) self.assertRaises(ParseError, seq(letter, digit).parse, '1a')
def make_attribute_parser(jinja): attribute_value = make_attribute_value_parser(jinja) return (locate( P.seq( interpolated(tag_name).skip(whitespace), P.seq( P.string('=').skip(whitespace).tag('equal'), interpolated(attribute_value).tag('value'), ).map(dict).optional(), )).combine(_combine_attribute).desc('attribute'))
def make_attribute_parser(jinja): attribute_value = make_attribute_value_parser(jinja) return (locate( P.seq( interpolated(attr_name), whitespace.then( P.seq( P.string("=").skip(whitespace).tag("equal"), interpolated(attribute_value).tag("value"), ).map(dict)).optional(), )).combine(_combine_attribute).desc("attribute"))
def description_parser(): point_join = lambda *args: '.'.join(args) identifier = regex(r'[0-9a-zA-Z\$_]+') class_name = string('L') >> identifier.sep_by( string('/')).combine(point_join) << string(';') base_type = regex('[BCDFIJSZ]').map(lambda i: BASE_TYPE_NAMES[i]) array = seq( string('[').at_least(1).map(lambda o: '[]' * len(o)), (base_type | class_name)).combine(lambda o, t: t + o) parameter = base_type | class_name | array parameters = parameter.many().combine( lambda *args: '(' + ','.join(args) + ')') void = string('V').map(lambda v: 'void') return_type = void | parameter return seq(string('(') >> parameters << string(')'), return_type)
def test_combine_dict_list(self): Pair = namedtuple('Pair', ['word', 'number']) parser = seq( regex(r'[A-Z]+').tag('word'), regex(r'[0-9]+').map(int).tag('number'), ).combine_dict(Pair) self.assertEqual(parser.parse('ABC123'), Pair(word='ABC', number=123))
def test_combine_dict(self): ddmmyyyy = seq( regex(r'[0-9]{2}').map(int).tag('day'), regex(r'[0-9]{2}').map(int).tag('month'), regex(r'[0-9]{4}').map(int).tag('year'), ).map(dict).combine_dict(date) self.assertEqual(ddmmyyyy.parse('05042003'), date(2003, 4, 5))
def test_seq_kwargs(self): self.assertEqual( seq(first_name=regex(r"\S+") << whitespace, last_name=regex(r"\S+")).parse("Jane Smith"), { 'first_name': 'Jane', 'last_name': 'Smith' })
def test_tag_map_dict(self): parser = seq(letter.tag("first_letter"), letter.many().concat().tag("remainder")).map(dict) self.assertEqual(parser.parse("Hello"), { 'first_letter': 'H', 'remainder': 'ello' })
def platform_device(device_idx: int) -> Parser: return (line_with(string(f"-- {device_idx} --")) >> seq( name=var_def("DEVICE_NAME"), vendor=var_def("DEVICE_VENDOR"), version=var_def("DEVICE_VERSION"), driver_version=var_def("DRIVER_VERSION").optional(), ).combine_dict(Device))
def platform(platform_idx: int) -> Parser: return (line_with(string(f"-- {platform_idx} --")) >> seq( profile=var_def("PROFILE"), version=var_def("VERSION"), name=var_def("NAME"), vendor=var_def("VENDOR"), ).combine_dict(PlatformInfo))
def expression(): fst = yield single_expression rest = yield P.seq(binary_operator, single_expression).map(tuple).many() val_stack = [fst] op_stack = [] def apply_op(): h2 = val_stack.pop() h1 = val_stack.pop() o = op_stack.pop() o = var_from_op(o) val_stack.append( ast.Application(start=h1.start, end=h2.end, function=o, args=[h1, h2])) for op, val in rest: while len(op_stack) != 0 and ( op_stack[-1].precedence < op.precedence or (op_stack[-1].precedence == op.precedence and op_stack[-1].associativity == "left")): apply_op() op_stack.append(op) val_stack.append(val) while len(op_stack) != 0: apply_op() assert len(val_stack) == 1 and len(op_stack) == 0 return val_stack[-1]
def __init__(self): spaces = regex(r'[ \t]*') # Excludes newline whitespace = regex(r'\s*') # Includes newline newline = string('\n') equal = string('=') lbrace = whitespace << string('{') << whitespace rbrace = whitespace << string('}') << whitespace # These parsers don't terminate blocks word = regex('[^\s=}]+') words = word + (spaces + word).many().concat() characters = regex(r'[^}]*') key_value_line = seq(spaces >> word << spaces << equal, spaces >> words << spaces) key_value_lines = key_value_line.sep_by(newline).map(dict) def block(name, content): return seq(whitespace >> name, lbrace >> content << rbrace) key_value_block = block(word, key_value_lines) key_value_blocks = key_value_block.many().map(dict_of_list) region_block = block(string('Region'), key_value_blocks) other_block = block(word, characters) self.parser = (region_block | other_block).many().map(dict)
def decimal(): yield spaceless_string("decimal", "numeric") prec_scale = (yield lparen.then( p.seq(precision.skip(comma), scale).combine(lambda prec, scale: (prec, scale))).skip( rparen).optional()) or default_decimal_parameters return Decimal(*prec_scale)
def parse_header(name: str, clock: str) -> Tuple[str, Dict[str, str]]: in8 = string(' VL_IN8(').map(lambda x: 'IN8') in16 = string(' VL_IN16(').map(lambda x: 'IN16') in32 = string(' VL_IN(').map(lambda x: 'IN32') in64 = string(' VL_IN64(').map(lambda x: 'IN64') inw = string(' VL_INW(').map(lambda x: 'INW') out8 = string(' VL_OUT8(').map(lambda x: 'OUT8') out16 = string(' VL_OUT16(').map(lambda x: 'OUT16') out32 = string(' VL_OUT(').map(lambda x: 'OUT32') out64 = string(' VL_OUT64(').map(lambda x: 'OUT64') outw = string(' VL_OUTW(').map(lambda x: 'OUTW') ports = (in8 | in16 | in32 | in64 | inw | out8 | out16 | out32 | out64 | outw).desc('variable width definition') varname = regex('[a-za-z]+\w*').desc('variable name') with open(f'obj_dir/V{name}.h', 'r') as f: lines = f.readlines() portlist = {} for line in lines: try: port_def, _ = seq(ports, varname).parse_partial(line) portlist[port_def[1]] = port_def[0] except ParseError: pass # remove clock from port list del portlist[clock] return name, portlist
def make_element_parser(config, content, jinja): container_element = make_container_element_parser( config, content=content, jinja=jinja, ) self_closing_element_opening_tag = make_opening_tag_parser( config, tag_name_parser=P.string_from(*SELF_CLOSING_ELEMENTS), allow_slash=True, jinja=jinja, ) self_closing_element = ( locate(P.seq( self_closing_element_opening_tag.skip(whitespace), P.success(None), # No content P.success(None), # No closing tag )) .combine(_combine_element) ) style = make_raw_text_element_parser(config, 'style', jinja=jinja) script = make_raw_text_element_parser(config, 'script', jinja=jinja) return style | script | self_closing_element | container_element
def expression(): fst = yield single_expression rest = yield P.seq(operator, single_expression).map(tuple).many() val_stack = [fst] op_stack = [] def apply_op(): h2 = val_stack.pop() h1 = val_stack.pop() o = op_stack.pop() val_stack.append(ast.Application(h1, h2, o)) for op, val in rest: while len(op_stack) != 0 and ( op_stack[-1].precedence > op.precedence or (op_stack[-1].precedence == op.precedence and op_stack[-1].associativity == "left") ): apply_op() op_stack.append(op) val_stack.append(val) while len(op_stack) != 0: apply_op() assert len(val_stack) == 1 and len(op_stack) == 0 return val_stack[-1]
def make_opening_tag_parser(config, jinja, tag_name_parser=None, allow_slash=False): attributes = make_attributes_parser(config, jinja) if not tag_name_parser: tag_name_parser = tag_name | jinja if allow_slash: slash = ( locate( P.string('/') .skip(whitespace) ) .combine(_combine_slash) .optional() ) else: slash = P.success(None) return ( locate(P.seq( P.string('<'), tag_name_parser, attributes.skip(whitespace), slash, P.string('>'), )) .combine(_combine_opening_tag) )
def struct(): yield spaceless_string("struct") yield langle field_names_types = yield (p.seq( field.skip(colon), ty).combine(lambda field, ty: (field, ty)).sep_by(comma)) yield rangle return Struct.from_tuples(field_names_types)
def make_jinja_tag_parser(name_parser): return (locate( P.seq( P.string('{%') + whitespace, name_parser.skip(whitespace), until(whitespace + P.string('%}')).concat(), whitespace + P.string('%}'), )).combine(_combine_jinja_tag))
def def_top_level(keyword: str, ty: type): return seq( attrs=attributes << padding, _1=string(keyword) << whitespace, name=identifier << padding, bases=(colon >> identifier.sep_by(comma) << padding).optional(), methods=lbrace >> method.many() << rbrace, ).combine_dict(ty)
def make_element_parser(config, content, jinja): container_element = make_container_element_parser( config, content=content, jinja=jinja ) void_element_opening_tag = make_opening_tag_parser( config, tag_name_parser=P.string_from(*VOID_ELEMENTS), allow_slash=True, jinja=jinja, ) void_element = locate( P.seq( void_element_opening_tag.skip(whitespace), P.success(None), # No content P.success(None), # No closing tag ) ).combine(_combine_element) svg_self_closing_tag = make_opening_tag_parser( config, tag_name_parser=P.string_from(*SVG_SELF_CLOSING_ELEMENTS), mandate_slash=True, jinja=jinja, ) svg_self_closing_element = locate( P.seq( svg_self_closing_tag.skip(whitespace), P.success(None), # No content P.success(None), # No closing tag ) ).combine(_combine_element) style = make_raw_text_element_parser(config, "style", jinja=jinja) script = make_raw_text_element_parser(config, "script", jinja=jinja) return ( style | script | void_element | svg_self_closing_element | container_element )
def __init__(self): # TODO: finish this parser to it does more than just parse indi and GPO reponses indi_operator = string_from("indi") gp_operator = string_from("GPO", "GPI") space = string(" ") obj = regex(r"[a-zA-Z0-9.#]*") name = regex(r"[a-zA-Z]*") simple_string = regex(r"[a-zA-Z ]*") equals = string("=") value = regex(r"[^,]*") | regex(r'".*"') number = regex(r"[0-9]+") gp_value = regex(r"[hl]").map(lambda v: {"h": False, "l": True}[v]) * 5 indi_parser = seq(indi_operator << space).then( seq( path=obj << space.optional(), info=seq( name=name << equals, value=value << string(", ").optional()).map(lambda x: { x["name"]: x["value"] }).many().map( lambda kv: {k: v for d in kv for k, v in d.items()}), )) gp_parser = seq(gp_operator << space).then( seq(number=number << space, pins=gp_value)) error_parser = seq(string("ERROR") << space).then( seq(number << space, simple_string)) self.p = indi_parser | gp_parser | error_parser
def internal_to_parser(idx): rule_def = rule_defs[idx] if isinstance(rule_def, str): return string(rule_def) else: return alt(*[ seq(*list(map(internal_to_parser, to_seq))).map(lambda l: ''.join(l)) for to_seq in rule_def ])
def locate(parser): return (P.seq( P.index, P.line_info, parser, P.index, P.line_info, ).combine(combine_locations))
def is_message_valid_pt2(message, rule31, rule42): for i in range(1, 10): for j in range(1, 10): try: result = seq(rule42.times(i + j).concat(), rule31.times(j).concat()).concat().parse(message) return result except ParseError: pass return False
def struct(): yield spaceless_string("struct") yield LPAREN field_names_types = yield ( p.seq(field, ty) .combine(lambda field, ty: (field, ty)) .sep_by(COMMA) ) yield RPAREN return Struct.from_tuples(field_names_types)