def hex_value(): return int(build_string(many(hex_decimal_digit)), 16)
def comment(): string("<!--") commit() result, _ = many_until(any_token, tri(partial(string, "-->"))) return "COMMENT", build_string(result)
def text_node(): return "TEXT", build_string(many1(xml_char))
def version_num(): string('1.') return "1." + build_string(many1(decimal_digit))
def dec_entity(): return unichr(int(build_string(many1(decimal_digit)), 10))
def named_entity(): name = build_string(many1(partial(not_one_of,';#'))) if name not in named_entities: fail() return named_entities[name]
def xml_name(): return build_string([name_start_char()] + many(name_char))
close_angle() @tri def attribute(): name = xml_name() commit() lexeme(equals) return "ATTR", name, quoted() parse_xml = partial(run_text_parser, xml) tokens, remaining = parse_xml(""" <?xml version="1.0" ?> <!DOCTYPE MyDoctype> <!-- a comment --> <root> <!-- another comment --> <self-closing /> <? this processing is ignored ?> <node foo="bar" baz="bo&p'"> This is some node text & it contains a named entity And some A numeric </node> </root> """) print "nodes:", tokens print print "remaining:", build_string(remaining)