def parseAutolink(self, block): """Attempt to parse an autolink (URL or email in pointy brackets).""" m = self.match(reEmailAutolink) if m: # email dest = m[1:-1] node = Node('Link', None) node.destination = normalize_uri('mailto:' + dest) node.title = '' node.append_child(text(dest)) block.append_child(node) return True else: m = self.match(reAutolink) if m: # link dest = m[1:-1] node = Node('Link', None) node.destination = normalize_uri(dest) node.title = '' node.append_child(text(dest)) block.append_child(node) return True return False
def parseAutolink(self, block): """Attempt to parse an autolink (URL or email in pointy brackets).""" m = self.match(reEmailAutolink) if m: # email dest = m[1:-1] node = Node('Link', None) node.destination = normalize_uri('mailto:' + dest) node.title = '' node.append_child(text(dest)) block.append_child(node) return True else: m = self.match(reAutolink) if m: # link dest = m[1:-1] node = Node('Link', None) node.destination = normalize_uri(dest) node.title = '' node.append_child(text(dest)) block.append_child(node) return True return False
def parseCloseBracket(self, block): """ Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. """ title = None matched = False self.pos += 1 startpos = self.pos # look through the stack of delimiters for a [ or ![ opener = self.delimiters while opener is not None: if opener.get('cc') == '[' or opener.get('cc') == '!': break opener = opener.get('previous') if opener is None: # no matched opener, just return a literal block.append_child(text(']')) return True if not opener.get('active'): # no matched opener, just return a literal block.append_child(text(']')) # take opener off emphasis stack self.removeDelimiter(opener) return True # If we got here, opener is a potential opener is_image = opener.get('cc') == '!' # Check to see if we have a link/image # Inline link? if self.peek() == '(': self.pos += 1 self.spnl() dest = self.parseLinkDestination() if dest is not None and \ self.spnl(): # make sure there's a space before the title if re.match(reWhitespaceChar, self.subject[self.pos-1]): title = self.parseLinkTitle() if self.spnl() and self.peek() == ')': self.pos += 1 matched = True else: # Next, see if there's a link label savepos = self.pos beforelabel = self.pos n = self.parseLinkLabel() if n == 0 or n == 2: # empty or missing second label reflabel = self.subject[opener['index']:startpos] else: reflabel = self.subject[beforelabel:beforelabel + n] if n == 0: # If shortcut reference link, rewind before spaces we skipped. self.pos = savepos # lookup rawlabel in refmap link = self.refmap.get(normalizeReference(reflabel)) if link: dest = link['destination'] title = link['title'] matched = True if matched: node = Node('Image' if is_image else 'Link', None) node.destination = dest node.title = title or '' tmp = opener.get('node').nxt while tmp: nxt = tmp.nxt tmp.unlink() node.append_child(tmp) tmp = nxt block.append_child(node) self.processEmphasis(opener.get('previous')) opener.get('node').unlink() # processEmphasis will remove this and later delimiters. # Now, for a link, we also deactivate earlier link openers. # (no links in links) if not is_image: opener = self.delimiters while opener is not None: if opener.get('cc') == '[': # deactivate this opener opener['active'] = False opener = opener.get('previous') return True else: # no match # remove this opener from stack self.removeDelimiter(opener) self.pos = startpos block.append_child(text(']')) return True
def processEmphasis(self, stack_bottom): openers_bottom = { '_': stack_bottom, '*': stack_bottom, "'": stack_bottom, '"': stack_bottom, } use_delims = 0 # Find first closer above stack_bottom closer = self.delimiters while closer is not None and closer.get('previous') != stack_bottom: closer = closer.get('previous') # Move forward, looking for closers, and handling each while closer is not None: closercc = closer.get('cc') if not (closer.get('can_close') and (closercc == '_' or closercc == '*' or closercc == "'" or closercc == '"')): closer = closer.get('next') else: # found emphasis closer. now look back for first # matching opener: opener = closer.get('previous') opener_found = False while (opener is not None and opener != stack_bottom and opener != openers_bottom[closercc]): if opener.get('cc') == closercc and opener.get('can_open'): opener_found = True break opener = opener.get('previous') old_closer = closer if closercc == '*' or closercc == '_': if not opener_found: closer = closer.get('next') else: # Calculate actual number of delimiters used from # closer if closer['numdelims'] < 3 or opener['numdelims'] < 3: if closer['numdelims'] <= opener['numdelims']: use_delims = closer['numdelims'] else: use_delims = opener['numdelims'] else: if closer['numdelims'] % 2 == 0: use_delims = 2 else: use_delims = 1 opener_inl = opener.get('node') closer_inl = closer.get('node') # Remove used delimiters from stack elts and inlines opener['numdelims'] -= use_delims closer['numdelims'] -= use_delims opener_inl.literal = opener_inl.literal[ :len(opener_inl.literal) - use_delims] closer_inl.literal = closer_inl.literal[ :len(closer_inl.literal) - use_delims] # Build contents for new Emph element if use_delims == 1: emph = Node('Emph', None) else: emph = Node('Strong', None) tmp = opener_inl.nxt while tmp and tmp != closer_inl: nxt = tmp.nxt tmp.unlink() emph.append_child(tmp) tmp = nxt opener_inl.insert_after(emph) # Remove elts between opener and closer in delimiters # stack self.removeDelimitersBetween(opener, closer) # If opener has 0 delims, remove it and the inline if opener['numdelims'] == 0: opener_inl.unlink() self.removeDelimiter(opener) if closer['numdelims'] == 0: closer_inl.unlink() tempstack = closer['next'] self.removeDelimiter(closer) closer = tempstack elif closercc == "'": closer['node'].literal = '\u2019' if opener_found: opener['node'].literal = '\u2018' closer = closer['next'] elif closercc == '"': closer['node'].literal = '\u201D' if opener_found: opener['node'].literal = '\u201C' closer = closer['next'] if not opener_found: # Set lower bound for future searches for openers: openers_bottom[closercc] = old_closer['previous'] if not old_closer['can_open']: # We can remove a closer that can't be an opener, # once we've seen there's no matching opener: self.removeDelimiter(old_closer) # Remove all delimiters while self.delimiters is not None and self.delimiters != stack_bottom: self.removeDelimiter(self.delimiters)
def parseCloseBracket(self, block): """ Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. """ title = None matched = False self.pos += 1 startpos = self.pos # look through the stack of delimiters for a [ or ![ opener = self.delimiters while opener is not None: if opener.get('cc') == '[' or opener.get('cc') == '!': break opener = opener.get('previous') if opener is None: # no matched opener, just return a literal block.append_child(text(']')) return True if not opener.get('active'): # no matched opener, just return a literal block.append_child(text(']')) # take opener off emphasis stack self.removeDelimiter(opener) return True # If we got here, opener is a potential opener is_image = opener.get('cc') == '!' # Check to see if we have a link/image # Inline link? if self.peek() == '(': self.pos += 1 self.spnl() dest = self.parseLinkDestination() if dest is not None and \ self.spnl(): # make sure there's a space before the title if re.match(reWhitespaceChar, self.subject[self.pos - 1]): title = self.parseLinkTitle() if self.spnl() and self.peek() == ')': self.pos += 1 matched = True else: # Next, see if there's a link label savepos = self.pos beforelabel = self.pos n = self.parseLinkLabel() if n == 0 or n == 2: # empty or missing second label reflabel = self.subject[opener['index']:startpos] else: reflabel = self.subject[beforelabel:beforelabel + n] if n == 0: # If shortcut reference link, rewind before spaces we skipped. self.pos = savepos # lookup rawlabel in refmap link = self.refmap.get(normalizeReference(reflabel)) if link: dest = link['destination'] title = link['title'] matched = True if matched: node = Node('Image' if is_image else 'Link', None) node.destination = dest node.title = title or '' tmp = opener.get('node').nxt while tmp: nxt = tmp.nxt tmp.unlink() node.append_child(tmp) tmp = nxt block.append_child(node) self.processEmphasis(opener.get('previous')) opener.get('node').unlink() # processEmphasis will remove this and later delimiters. # Now, for a link, we also deactivate earlier link openers. # (no links in links) if not is_image: opener = self.delimiters while opener is not None: if opener.get('cc') == '[': # deactivate this opener opener['active'] = False opener = opener.get('previous') return True else: # no match # remove this opener from stack self.removeDelimiter(opener) self.pos = startpos block.append_child(text(']')) return True
def processEmphasis(self, stack_bottom): openers_bottom = { '_': stack_bottom, '*': stack_bottom, "'": stack_bottom, '"': stack_bottom, } use_delims = 0 # Find first closer above stack_bottom closer = self.delimiters while closer is not None and closer.get('previous') != stack_bottom: closer = closer.get('previous') # Move forward, looking for closers, and handling each while closer is not None: closercc = closer.get('cc') if not (closer.get('can_close') and (closercc == '_' or closercc == '*' or closercc == "'" or closercc == '"')): closer = closer.get('next') else: # found emphasis closer. now look back for first # matching opener: opener = closer.get('previous') opener_found = False while (opener is not None and opener != stack_bottom and opener != openers_bottom[closercc]): if opener.get('cc') == closercc and opener.get('can_open'): opener_found = True break opener = opener.get('previous') old_closer = closer if closercc == '*' or closercc == '_': if not opener_found: closer = closer.get('next') else: # Calculate actual number of delimiters used from # closer if closer['numdelims'] < 3 or opener['numdelims'] < 3: if closer['numdelims'] <= opener['numdelims']: use_delims = closer['numdelims'] else: use_delims = opener['numdelims'] else: if closer['numdelims'] % 2 == 0: use_delims = 2 else: use_delims = 1 opener_inl = opener.get('node') closer_inl = closer.get('node') # Remove used delimiters from stack elts and inlines opener['numdelims'] -= use_delims closer['numdelims'] -= use_delims opener_inl.literal = opener_inl.literal[:len( opener_inl.literal) - use_delims] closer_inl.literal = closer_inl.literal[:len( closer_inl.literal) - use_delims] # Build contents for new Emph element if use_delims == 1: emph = Node('Emph', None) else: emph = Node('Strong', None) tmp = opener_inl.nxt while tmp and tmp != closer_inl: nxt = tmp.nxt tmp.unlink() emph.append_child(tmp) tmp = nxt opener_inl.insert_after(emph) # Remove elts between opener and closer in delimiters # stack self.removeDelimitersBetween(opener, closer) # If opener has 0 delims, remove it and the inline if opener['numdelims'] == 0: opener_inl.unlink() self.removeDelimiter(opener) if closer['numdelims'] == 0: closer_inl.unlink() tempstack = closer['next'] self.removeDelimiter(closer) closer = tempstack elif closercc == "'": closer['node'].literal = '\u2019' if opener_found: opener['node'].literal = '\u2018' closer = closer['next'] elif closercc == '"': closer['node'].literal = '\u201D' if opener_found: opener['node'].literal = '\u201C' closer = closer['next'] if not opener_found: # Set lower bound for future searches for openers: openers_bottom[closercc] = old_closer['previous'] if not old_closer['can_open']: # We can remove a closer that can't be an opener, # once we've seen there's no matching opener: self.removeDelimiter(old_closer) # Remove all delimiters while self.delimiters is not None and self.delimiters != stack_bottom: self.removeDelimiter(self.delimiters)
def parseCloseBracket(self, block): """ Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. """ title = None matched = False self.pos += 1 startpos = self.pos # get last [ or ![ opener = self.brackets if opener is None: # no matched opener, just return a literal block.append_child(text(']')) return True if not opener.get('active'): # no matched opener, just return a literal block.append_child(text(']')) # take opener off brackets stack self.removeBracket() return True # If we got here, opener is a potential opener is_image = opener.get('image') # Check to see if we have a link/image savepos = self.pos # Inline link? if self.peek() == '(': self.pos += 1 self.spnl() dest = self.parseLinkDestination() if dest is not None and self.spnl(): # make sure there's a space before the title if re.match(reWhitespaceChar, self.subject[self.pos-1]): title = self.parseLinkTitle() if self.spnl() and self.peek() == ')': self.pos += 1 matched = True else: self.pos = savepos if not matched: # Next, see if there's a link label beforelabel = self.pos n = self.parseLinkLabel() if n > 2: reflabel = self.subject[beforelabel:beforelabel + n] elif not opener.get('bracket_after'): # Empty or missing second label means to use the first # label as the reference. The reference must not # contain a bracket. If we know there's a bracket, we # don't even bother checking it. reflabel = self.subject[opener.get('index'):startpos] if n == 0: # If shortcut reference link, rewind before spaces we skipped. self.pos = savepos if reflabel: # lookup rawlabel in refmap link = self.refmap.get(normalizeReference(reflabel)) if link: dest = link['destination'] title = link['title'] matched = True if matched: node = Node('image' if is_image else 'link', None) node.destination = dest node.title = title or '' tmp = opener.get('node').nxt while tmp: nxt = tmp.nxt tmp.unlink() node.append_child(tmp) tmp = nxt block.append_child(node) self.processEmphasis(opener.get('previousDelimiter')) self.removeBracket() opener.get('node').unlink() # We remove this bracket and processEmphasis will remove # later delimiters. # Now, for a link, we also deactivate earlier link openers. # (no links in links) if not is_image: opener = self.brackets while opener is not None: if not opener.get('image'): # deactivate this opener opener['active'] = False opener = opener.get('previous') return True else: # no match # remove this opener from stack self.removeBracket() self.pos = startpos block.append_child(text(']')) return True
def nestSections(block, level=1): """ Sections aren't handled by CommonMark at the moment. This function adds sections to a block of nodes. 'title' nodes with an assigned level below 'level' will be put in a child section. If there are no child nodes with titles of level 'level' then nothing is done """ cur = block.first_child if cur is not None: children = [] # Do we need to do anything? nest = False while cur is not None: if cur.t == 'heading' and cur.level == level: nest = True break cur = cur.nxt if not nest: return section = Node('MDsection', 0) section.parent = block cur = block.first_child while cur is not None: if cur.t == 'heading' and cur.level == level: # Found a split point, flush the last section if needed if section.first_child is not None: finalizeSection(section) children.append(section) section = Node('MDsection', 0) nxt = cur.nxt # Avoid adding sections without titles at the start if section.first_child is None: if cur.t == 'heading' and cur.level == level: section.append_child(cur) else: children.append(cur) else: section.append_child(cur) cur = nxt # If there's only 1 child then don't bother if section.first_child is not None: finalizeSection(section) children.append(section) block.first_child = None block.last_child = None nextLevel = level + 1 for child in children: # Handle nesting if child.t == 'MDsection': nestSections(child, level=nextLevel) # Append if block.first_child is None: block.first_child = child else: block.last_child.nxt = child child.parent = block child.nxt = None child.prev = block.last_child block.last_child = child