Exemplo n.º 1
0
def toc(ast, autonumber, includetitle):
    top = []
    current = [top]
    level = 0
    for node, entering in ast.walker():
        if node.t == 'heading' and entering:
            title = text(node)
            attrs = {}
            if node.level == level:
                current.pop()
                spec = (node.level, title, [], attrs)
                current[-1].append(spec)
                current.append(spec[2])
            elif node.level > level:
                # Added empty intermediary levels
                for newLevel in range(level + 1, node.level + 1):
                    spec = (newLevel, None if newLevel < node.level else title,
                            [], None if newLevel < node.level else attrs)
                    current[-1].append(spec)
                    current.append(spec[2])
                level = node.level
            elif node.level < level:
                for oldLevel in range(node.level, level + 1):
                    current.pop()
                spec = (node.level, title, [], attrs)
                current[-1].append(spec)
                current.append(spec[2])
                level = node.level
            tumbler = list(map(lambda x: len(x), current[0:-1]))
            if not includetitle:
                tumbler = tumbler[1:]
            node.id = 'h' + ''.join(map(lambda n: '-' + str(n), tumbler))
            attrs['id'] = node.id
            attrs['tumbler'] = tumbler
            if autonumber and node.first_child:
                first = node.first_child
                node.first_child = Node('text', first.sourcepos)
                first.prv = node.first_child
                node.first_child.parent = node
                node.first_child.nxt = first
                node.first_child.literal = ''.join(
                    map(lambda n: str(n) + '.', tumbler)) + ' '

    return top
Exemplo n.º 2
0
    def setext_heading(parser, container=None):
        if (not parser.indented and container.t == 'Paragraph'
                and (container.string_content.index('\n')
                     == len(container.string_content) - 1)):
            m = re.match(reSetextHeadingLine,
                         parser.current_line[parser.next_nonspace:])
            if m:
                parser.close_unmatched_blocks()
                heading = Node('Heading', container.sourcepos)
                heading.level = 1 if m.group()[0] == '=' else 2
                heading.string_content = container.string_content
                container.insert_after(heading)
                container.unlink()
                parser.tip = heading
                parser.advance_offset(
                    len(parser.current_line) - parser.offset, False)
                return 2

        return 0
Exemplo n.º 3
0
 def __init__(self, options={}):
     self.doc = Node('Document', [[1, 1], [0, 0]])
     self.block_starts = BlockStarts()
     self.tip = self.doc
     self.oldtip = self.doc
     self.current_line = ''
     self.line_number = 0
     self.offset = 0
     self.column = 0
     self.next_nonspace = 0
     self.next_nonspace_column = 0
     self.indent = 0
     self.indented = False
     self.blank = False
     self.all_closed = True
     self.last_matched_container = self.doc
     self.refmap = {}
     self.last_line_length = 0
     self.inline_parser = InlineParser(options)
     self.options = options
Exemplo n.º 4
0
 def parse(self, my_input):
     """ The main parsing function.  Returns a parsed document AST."""
     self.doc = Node('document', [[1, 1], [0, 0]])
     self.tip = self.doc
     self.refmap = {}
     self.line_number = 0
     self.last_line_length = 0
     self.offset = 0
     self.column = 0
     self.last_matched_container = self.doc
     self.current_line = ''
     lines = re.split(reLineEnding, my_input)
     length = len(lines)
     if len(my_input) > 0 and my_input[-1] == '\n':
         # ignore last blank line created by final newline
         length -= 1
     for i in range(length):
         self.incorporate_line(lines[i])
     while (self.tip):
         self.finalize(self.tip, length)
     self.process_inlines(self.doc)
     return self.doc
Exemplo n.º 5
0
 def parseBackticks(self, block):
     """ Attempt to parse backticks, adding either a backtick code span or a
     literal sequence of backticks to the 'inlines' list."""
     ticks = self.match(reTicksHere)
     if ticks is None:
         return False
     after_open_ticks = self.pos
     matched = self.match(reTicks)
     while matched is not None:
         if (matched == ticks):
             node = Node('Code', None)
             c = self.subject[after_open_ticks:self.pos - len(ticks)]
             c = c.strip()
             c = re.sub(reWhitespace, ' ', c)
             node.literal = c
             block.append_child(node)
             return True
         matched = self.match(reTicks)
     # If we got here, we didn't match a closing backtick sequence.
     self.pos = after_open_ticks
     block.append_child(text(ticks))
     return True
Exemplo n.º 6
0
def section(ast):
    sections = [ast]
    for n in children(ast):
        if n.t == u'Heading':
            doc = Node('Document', [[1, 1], [0, 0]])

            n.parent = doc
            doc.first_child = n
            doc.last_child = ast.last_child

            if n == ast.first_child:
                sections = []

            ast.last_child = n.prv
            if n.prv:
                n.prv.nxt = None
            else:
                ast.first_child = None
            n.prv = None

            sections.append(doc)

    return sections
Exemplo n.º 7
0
 def test_node_walker_iter(self):
     node = Node('document', [[1, 1], [0, 0]])
     for subnode, entered in node.walker():
         pass
Exemplo n.º 8
0
 def test_node_walker(self):
     node = Node('document', [[1, 1], [0, 0]])
     NodeWalker(node)
Exemplo n.º 9
0
 def test_doc_node(self):
     Node('document', [[1, 1], [0, 0]])
Exemplo n.º 10
0
def text(s):
    node = Node('Text', None)
    node.literal = s
    return node
Exemplo n.º 11
0
    def parseCloseBracket(self, block):
        """
        Try to match close bracket against an opening in the delimiter
        stack. Add either a link or image, or a plain [ character,
        to block's children. If there is a matching delimiter,
        remove it from the delimiter stack.
        """
        title = None
        matched = False
        self.pos += 1
        startpos = self.pos

        # look through the stack of delimiters for a [ or ![
        opener = self.delimiters

        while opener is not None:
            if opener.get('cc') == '[' or opener.get('cc') == '!':
                break
            opener = opener.get('previous')

        if opener is None:
            # no matched opener, just return a literal
            block.append_child(text(']'))
            return True

        if not opener.get('active'):
            # no matched opener, just return a literal
            block.append_child(text(']'))
            # take opener off emphasis stack
            self.removeDelimiter(opener)
            return True

        # If we got here, opener is a potential opener
        is_image = opener.get('cc') == '!'

        # Check to see if we have a link/image

        # Inline link?
        if self.peek() == '(':
            self.pos += 1
            self.spnl()
            dest = self.parseLinkDestination()
            if dest is not None and \
               self.spnl():
                # make sure there's a space before the title
                if re.match(reWhitespaceChar, self.subject[self.pos - 1]):
                    title = self.parseLinkTitle()
                if self.spnl() and self.peek() == ')':
                    self.pos += 1
                    matched = True
        else:
            # Next, see if there's a link label
            savepos = self.pos
            beforelabel = self.pos
            n = self.parseLinkLabel()
            if n == 0 or n == 2:
                # empty or missing second label
                reflabel = self.subject[opener['index']:startpos]
            else:
                reflabel = self.subject[beforelabel:beforelabel + n]
            if n == 0:
                # If shortcut reference link, rewind before spaces we skipped.
                self.pos = savepos

            # lookup rawlabel in refmap
            link = self.refmap.get(normalizeReference(reflabel))
            if link:
                dest = link['destination']
                title = link['title']
                matched = True

        if matched:
            node = Node('Image' if is_image else 'Link', None)

            node.destination = dest
            node.title = title or ''
            tmp = opener.get('node').nxt
            while tmp:
                nxt = tmp.nxt
                tmp.unlink()
                node.append_child(tmp)
                tmp = nxt
            block.append_child(node)
            self.processEmphasis(opener.get('previous'))

            opener.get('node').unlink()

            # processEmphasis will remove this and later delimiters.
            # Now, for a link, we also deactivate earlier link openers.
            # (no links in links)
            if not is_image:
                opener = self.delimiters
                while opener is not None:
                    if opener.get('cc') == '[':
                        # deactivate this opener
                        opener['active'] = False
                    opener = opener.get('previous')

            return True
        else:
            # no match
            # remove this opener from stack
            self.removeDelimiter(opener)
            self.pos = startpos
            block.append_child(text(']'))
            return True
Exemplo n.º 12
0
    def processEmphasis(self, stack_bottom):
        openers_bottom = {
            '_': stack_bottom,
            '*': stack_bottom,
            "'": stack_bottom,
            '"': stack_bottom,
        }
        use_delims = 0

        # Find first closer above stack_bottom
        closer = self.delimiters
        while closer is not None and closer.get('previous') != stack_bottom:
            closer = closer.get('previous')

        # Move forward, looking for closers, and handling each
        while closer is not None:
            closercc = closer.get('cc')
            if not (closer.get('can_close') and
                    (closercc == '_' or closercc == '*' or closercc == "'"
                     or closercc == '"')):
                closer = closer.get('next')
            else:
                # found emphasis closer. now look back for first
                # matching opener:
                opener = closer.get('previous')
                opener_found = False
                while (opener is not None and opener != stack_bottom
                       and opener != openers_bottom[closercc]):
                    if opener.get('cc') == closercc and opener.get('can_open'):
                        opener_found = True
                        break
                    opener = opener.get('previous')
                old_closer = closer

                if closercc == '*' or closercc == '_':
                    if not opener_found:
                        closer = closer.get('next')
                    else:
                        # Calculate actual number of delimiters used from
                        # closer
                        if closer['numdelims'] < 3 or opener['numdelims'] < 3:
                            if closer['numdelims'] <= opener['numdelims']:
                                use_delims = closer['numdelims']
                            else:
                                use_delims = opener['numdelims']
                        else:
                            if closer['numdelims'] % 2 == 0:
                                use_delims = 2
                            else:
                                use_delims = 1

                        opener_inl = opener.get('node')
                        closer_inl = closer.get('node')

                        # Remove used delimiters from stack elts and inlines
                        opener['numdelims'] -= use_delims
                        closer['numdelims'] -= use_delims
                        opener_inl.literal = opener_inl.literal[:len(
                            opener_inl.literal) - use_delims]
                        closer_inl.literal = closer_inl.literal[:len(
                            closer_inl.literal) - use_delims]

                        # Build contents for new Emph element
                        if use_delims == 1:
                            emph = Node('Emph', None)
                        else:
                            emph = Node('Strong', None)

                        tmp = opener_inl.nxt
                        while tmp and tmp != closer_inl:
                            nxt = tmp.nxt
                            tmp.unlink()
                            emph.append_child(tmp)
                            tmp = nxt

                        opener_inl.insert_after(emph)

                        # Remove elts between opener and closer in delimiters
                        # stack
                        self.removeDelimitersBetween(opener, closer)

                        # If opener has 0 delims, remove it and the inline
                        if opener['numdelims'] == 0:
                            opener_inl.unlink()
                            self.removeDelimiter(opener)

                        if closer['numdelims'] == 0:
                            closer_inl.unlink()
                            tempstack = closer['next']
                            self.removeDelimiter(closer)
                            closer = tempstack

                elif closercc == "'":
                    closer['node'].literal = '\u2019'
                    if opener_found:
                        opener['node'].literal = '\u2018'
                    closer = closer['next']

                elif closercc == '"':
                    closer['node'].literal = '\u201D'
                    if opener_found:
                        opener['node'].literal = '\u201C'
                    closer = closer['next']

                if not opener_found:
                    # Set lower bound for future searches for openers:
                    openers_bottom[closercc] = old_closer['previous']
                    if not old_closer['can_open']:
                        # We can remove a closer that can't be an opener,
                        # once we've seen there's no matching opener:
                        self.removeDelimiter(old_closer)

        # Remove all delimiters
        while self.delimiters is not None and self.delimiters != stack_bottom:
            self.removeDelimiter(self.delimiters)
Exemplo n.º 13
0
    def parseCloseBracket(self, block):
        """
        Try to match close bracket against an opening in the delimiter
        stack. Add either a link or image, or a plain [ character,
        to block's children. If there is a matching delimiter,
        remove it from the delimiter stack.
        """
        title = None
        matched = False
        self.pos += 1
        startpos = self.pos

        # get last [ or ![
        opener = self.brackets

        if opener is None:
            # no matched opener, just return a literal
            block.append_child(text(']'))
            return True

        if not opener.get('active'):
            # no matched opener, just return a literal
            block.append_child(text(']'))
            # take opener off brackets stack
            self.removeBracket()
            return True

        # If we got here, opener is a potential opener
        is_image = opener.get('image')

        # Check to see if we have a link/image

        savepos = self.pos

        # Inline link?
        if self.peek() == '(':
            self.pos += 1
            self.spnl()
            dest = self.parseLinkDestination()
            if dest is not None and self.spnl():
                # make sure there's a space before the title
                if re.match(reWhitespaceChar, self.subject[self.pos-1]):
                    title = self.parseLinkTitle()
                if self.spnl() and self.peek() == ')':
                    self.pos += 1
                    matched = True
            else:
                self.pos = savepos

        if not matched:
            # Next, see if there's a link label
            beforelabel = self.pos
            n = self.parseLinkLabel()
            if n > 2:
                reflabel = self.subject[beforelabel:beforelabel + n]
            elif not opener.get('bracket_after'):
                # Empty or missing second label means to use the first
                # label as the reference.  The reference must not
                # contain a bracket. If we know there's a bracket, we
                # don't even bother checking it.
                reflabel = self.subject[opener.get('index'):startpos]
            if n == 0:
                # If shortcut reference link, rewind before spaces we skipped.
                self.pos = savepos

            if reflabel:
                # lookup rawlabel in refmap
                link = self.refmap.get(normalizeReference(reflabel))
                if link:
                    dest = link['destination']
                    title = link['title']
                    matched = True

        if matched:
            node = Node('image' if is_image else 'link', None)

            node.destination = dest
            node.title = title or ''
            tmp = opener.get('node').nxt
            while tmp:
                nxt = tmp.nxt
                tmp.unlink()
                node.append_child(tmp)
                tmp = nxt
            block.append_child(node)
            self.processEmphasis(opener.get('previousDelimiter'))
            self.removeBracket()
            opener.get('node').unlink()

            # We remove this bracket and processEmphasis will remove
            # later delimiters.
            # Now, for a link, we also deactivate earlier link openers.
            # (no links in links)
            if not is_image:
                opener = self.brackets
                while opener is not None:
                    if not opener.get('image'):
                        # deactivate this opener
                        opener['active'] = False
                    opener = opener.get('previous')

            return True
        else:
            # no match
            # remove this opener from stack
            self.removeBracket()
            self.pos = startpos
            block.append_child(text(']'))
            return True
Exemplo n.º 14
0
def nestSections(block, level=1):
    """
    Sections aren't handled by CommonMark at the moment.
    This function adds sections to a block of nodes.
    'title' nodes with an assigned level below 'level' will be put in a child section.
    If there are no child nodes with titles of level 'level' then nothing is done
    """
    cur = block.first_child
    if cur is not None:
        children = []
        # Do we need to do anything?
        nest = False
        while cur is not None:
            if cur.t == 'heading' and cur.level == level:
                nest = True
                break
            cur = cur.nxt
        if not nest:
            return

        section = Node('MDsection', 0)
        section.parent = block
        cur = block.first_child
        while cur is not None:
            if cur.t == 'heading' and cur.level == level:
                # Found a split point, flush the last section if needed
                if section.first_child is not None:
                    finalizeSection(section)
                    children.append(section)
                    section = Node('MDsection', 0)
            nxt = cur.nxt
            # Avoid adding sections without titles at the start
            if section.first_child is None:
                if cur.t == 'heading' and cur.level == level:
                    section.append_child(cur)
                else:
                    children.append(cur)
            else:
                section.append_child(cur)
            cur = nxt

        # If there's only 1 child then don't bother
        if section.first_child is not None:
            finalizeSection(section)
            children.append(section)

        block.first_child = None
        block.last_child = None
        nextLevel = level + 1
        for child in children:
            # Handle nesting
            if child.t == 'MDsection':
                nestSections(child, level=nextLevel)

            # Append
            if block.first_child is None:
                block.first_child = child
            else:
                block.last_child.nxt = child
            child.parent = block
            child.nxt = None
            child.prev = block.last_child
            block.last_child = child