def build_section_parser(): """ Build a parser for structure of markdown document. This works by recursion - we build a parser which captures all subsections of header depth at least n (markdown_depth(n)). A markdown parser is then just as many markdown_depth(0) as we can possibly collect. When we parse a header, we need to figure out whether we are going to add it inside the subsections of the currently accumulating section, or go up a level instead. To do this we use a lookahead parse (lib.peek) and a conditional subsequent parse. This is the motivation for parse_section_if_depth_sat. """ parser_concat = lib.lift(lambda *args: "".join(args)) header_tag = parser_concat( lib.char('#'), lib.take_until(lib.char(' '), lib.char('#')).map(lambda cs: "".join(cs))) def parse_section_if_depth_sat(pred): """ Parses a header if it's depth satisfies the passed predicate """ def conditional_parse(t): return section(len(t)) if pred(len(t)) else lib.fail() return lib.peek(header_tag).bind(conditional_parse) def markdown_depth(n): """ Parse markdown of section depth at least n """ return lib.alternative( parse_section_if_depth_sat(lambda m: m > n), line) def section(n): """ Capture the contents of a section of depth n. Note that this function assumes that n is the correct depth for the next header. A section of depth n is parsed by parsing it's title, then parsing all subsections of depth exceeding n+1 """ remove_whitespace = lib.lift(lambda s: s.strip()) title = remove_whitespace(lib.right(header_tag, line)) subsections = lib.take_until( parse_section_if_depth_sat(lambda m: m <= n), markdown_depth(n)) @lib.parser(title, subsections) def _section(t, ss): return Section(n, t, ss) return _section return lib.many(markdown_depth(0))
def test_bind_initial_failue(): p = lib.char('a').bind(lambda _: lib.tag("foo")) with pytest.raises(lib.ParseError): p("bfoo")
def test_many1(): parser = lib.many1(lib.char('a')).map(lambda cs: "".join(cs)).partial() assert (parser("aaabbb") == ("aaa", "bbb"))
def test_many_on_empty_string(): parser = lib.many(lib.char('a')) assert (parser("") == [])
def test_many_until_end_of_string(): parser = lib.many(lib.char('a')).map(lambda cs: "".join(cs)) assert (parser("aaa") == "aaa")
def test_take_until_until_end_of_input(): p = lib.take_until(lib.char("b"), lib.anychar()).map(lambda cs: "".join(cs)) assert (p("aaa") == "aaa")
def test_take_until(): p = lib.take_until(lib.char("b"), lib.anychar()) assert (p._run_parser("foobar") == (['f', 'o', 'o'], "bar"))
""" def __init__(self, depth, title, children): self.depth = depth self.title = title self.children = children def as_string(p): """ Take a parser which parses a list of characters, and modify it to return the string consisting of those characters. """ return p.map(lambda cs: "".join(cs)) newline = lib.char('\n') line = as_string( lib.alternative( lib.left(lib.take_until(newline, lib.anychar()), newline), lib.many1(lib.anychar()))) def build_section_parser(): """ Build a parser for structure of markdown document. This works by recursion - we build a parser which captures all subsections of header depth at least n (markdown_depth(n)). A markdown parser is then just as many markdown_depth(0) as we can