def _image_repl(self, groups): """Handles images and attachemnts included in the page.""" target = groups.get('image_target', "").strip() text = (groups.get('image_text', "") or "").strip() node = DocNode("image", self.cur, target) DocNode('text', node, text or node.content) self.text = None
def _item_repl(self, groups): """ List item """ bullet = groups.get('item_head', "") text = groups.get('item_text', "") if bullet[-1] == '#': kind = 'number_list' else: kind = 'bullet_list' level = len(bullet) - 1 lst = self.cur # Find a list of the same kind and level up the tree while (lst and not (lst.kind in ('number_list', 'bullet_list') and lst.level == level) and not lst.kind in ('document', 'section', 'blockquote')): lst = lst.parent if lst and lst.kind == kind: self.cur = lst else: # Create a new level of list self.cur = self._upto( self.cur, ('list_item', 'document', 'section', 'blockquote')) self.cur = DocNode(kind, self.cur) self.cur.level = level self.cur = DocNode('list_item', self.cur) self.cur.level = level + 1 self.parse_inline(text) self.text = None
def handle_starttag(self, tag, attrs): self.debug_msg("starttag", f"{tag!r} atts: {attrs}") if tag in IGNORE_TAGS: return headline = headline_tag_re.match(tag) if headline: self.cur = DocNode("headline", self.cur, level=int(headline.group(1))) return if tag in ("li", "ul", "ol"): if tag in ("ul", "ol"): self.__list_level += 1 self.cur = DocNode(tag, self.cur, None, attrs, level=self.__list_level) elif tag in ("img", "br"): # Work-a-round if img or br tag is not marked as startendtag: # wrong: <img src="/image.jpg"> doesn't work if </img> not exist # right: <img src="/image.jpg" /> DocNode(tag, self.cur, None, attrs) else: self.cur = DocNode(tag, self.cur, None, attrs)
def _add_macro(self, groups, macro_type, name_key, args_key, text_key=None): """ generic method to handle the macro, used for all variants: inline, inline-tag, block """ #self.debug_groups(groups) assert macro_type in ("macro_inline", "macro_block") if text_key: macro_text = groups.get(text_key, "").strip() else: macro_text = None node = DocNode(macro_type, self.cur, macro_text) macro_name = groups[name_key] node.macro_name = macro_name self.root.used_macros.add(macro_name) node.macro_args = groups.get(args_key, "").strip() self.text = None
def _pre_block_repl(self, groups): self._upto_block() kind = groups.get('pre_block_kind', None) text = groups.get('pre_block_text', "") def remove_tilde(m): return m.group('indent') + m.group('rest') text = self.pre_escape_re.sub(remove_tilde, text) node = DocNode('pre_block', self.cur, text) node.sect = kind or '' self.text = None
def _pre_block_repl(self, groups): self._upto_block() kind = groups.get("pre_block_kind", None) text = groups.get("pre_block_text", "") def remove_tilde(m): return m.group("indent") + m.group("rest") text = self.pre_escape_re.sub(remove_tilde, text) node = DocNode("pre_block", self.cur, text) node.sect = kind or "" self.text = None
def handle_startendtag(self, tag, attrs): self.debug_msg("startendtag", "%r atts: %s" % (tag, attrs)) attr_dict = dict(attrs) if tag in (self._block_placeholder, self._inline_placeholder): id = int(attr_dict["id"]) # block_type = attr_dict["type"] DocNode( "%s_%s" % (tag, attr_dict["type"]), self.cur, content=self.blockdata[id], # attrs = attr_dict ) else: DocNode(tag, self.cur, None, attrs)
def handle_startendtag(self, tag, attrs): self.debug_msg("startendtag", f"{tag!r} atts: {attrs}") attr_dict = dict(attrs) if tag in (self._block_placeholder, self._inline_placeholder): id = int(attr_dict["id"]) # block_type = attr_dict["type"] DocNode( f"{tag}_{attr_dict['type']}", self.cur, content=self.blockdata[id], # attrs = attr_dict ) else: DocNode(tag, self.cur, None, attrs)
def _url_repl(self, groups): """Handle raw urls in text.""" if not groups.get('escaped_url'): # this url is NOT escaped target = groups.get('url_target', "") node = DocNode('link', self.cur) node.content = target DocNode('text', node, node.content) self.text = None else: # this url is escaped, we render it as text if self.text is None: self.text = DocNode('text', self.cur, "") self.text.content += groups.get('url_target')
def __init__(self, raw, block_rules=None, blog_line_breaks=True, debug=False): assert isinstance(raw, str) self.raw = raw if block_rules is None: block_rules = BlockRules(blog_line_breaks=blog_line_breaks) self.blog_line_breaks = blog_line_breaks self.debug = debug # TODO: use logging # setup block element rules: self.block_re = re.compile('|'.join(block_rules.rules), block_rules.re_flags) self.root = DocNode('document', None) self.cur = self.root # The most recent document node self.text = None # The node to add inline characters to self.last_text_break = None # Last break node, inserted by _text_repl() # Filled with all macros that's in the text self.root.used_macros = set()
def _inline_mark(self, groups, key): self.cur = DocNode(key, self.cur) self.text = None text = groups["%s_text" % key] self.parse_inline(text) self.cur = self._upto(self.cur, (key, )).parent self.text = None
def _add_macro(self, groups, macro_type, name_key, args_key, text_key=None): """ generic mathod to handle the macro, used for all variants: inline, inline-tag, block """ #self.debug_groups(groups) assert macro_type in ("macro_inline", "macro_block") if text_key: macro_text = groups.get(text_key, "").strip() else: macro_text = None node = DocNode(macro_type, self.cur, macro_text) node.macro_name = groups[name_key] node.macro_args = groups.get(args_key, "").strip() self.text = None
def _link_repl(self, groups): """Handle all kinds of links.""" target = groups.get('link_target', "") text = (groups.get('link_text', "") or "").strip() parent = self.cur self.cur = DocNode('link', self.cur) self.cur.content = target self.text = None re.sub(self.link_re, self._replace, text) self.cur = parent self.text = None
def _table_repl(self, groups): row = groups.get('table', '|').strip() self.cur = self._upto(self.cur, ('table', 'document', 'section', 'blockquote')) if self.cur.kind != 'table': self.cur = DocNode('table', self.cur) tb = self.cur tr = DocNode('table_row', tb) for m in self.cell_re.finditer(row): cell = m.group('cell') if cell: text = cell.strip() self.cur = DocNode('table_cell', tr) self.text = None else: text = m.group('head').strip('= ') self.cur = DocNode('table_head', tr) self.text = DocNode('text', self.cur, "") self.parse_inline(text) self.cur = tb self.text = None
def _text_repl(self, groups): # print("_text_repl()", self.cur.kind) # self.debug_groups(groups) if self.cur.kind in ('table', 'table_row', 'bullet_list', 'number_list'): self._upto_block() if self.cur.kind in ('document', 'section', 'blockquote'): self.cur = DocNode('paragraph', self.cur) text = groups.get('text', "") if groups.get('space'): # use wikipedia style line breaks and seperate a new line with one space text = " " + text self.parse_inline(text) if groups.get('break') and self.cur.kind in ('paragraph', 'emphasis', 'strong', 'pre_inline'): self.last_text_break = DocNode('break', self.cur, "") self.text = None
def __init__(self, debug=False): super().__init__(convert_charrefs=False) self.debugging = debug if self.debugging: warnings.warn( message="Html2Creole debug is on! warn every data append.") self.result = DebugList(self) else: self.result = [] self.blockdata = [] self.root = DocNode("document", None) self.cur = self.root self.__list_level = 0
def _head_repl(self, groups): self._upto_block() node = DocNode("header", self.cur, groups["head_text"].strip()) node.level = len(groups["head_head"]) self.text = None
def handle_entityref(self, name): self.debug_msg("entityref", f"{name!r}") DocNode("entityref", self.cur, content=name)
def handle_data(self, data): self.debug_msg("data", f"{data!r}") assert isinstance(data, str) DocNode("data", self.cur, content=data)
def _char_repl(self, groups): if self.text is None: self.text = DocNode('text', self.cur, "") self.text.content += groups.get('char', "")
def _linebreak_repl(self, groups): DocNode('break', self.cur, None) self.text = None
def _pre_inline_repl(self, groups): text = groups.get('pre_inline_text', "") DocNode('pre_inline', self.cur, text) self.text = None
def _line_repl(self, groups): """ Transfer newline from the original markup into the html code """ self._upto_block() DocNode('line', self.cur, "")
def handle_entityref(self, name): self.debug_msg("entityref", "%r" % name) DocNode("entityref", self.cur, content=name)
def _head_repl(self, groups): self._upto_block() node = DocNode('header', self.cur, groups['head_text'].strip()) node.level = len(groups['head_head']) self.text = None
def _separator_repl(self, groups): self._upto_block() DocNode('separator', self.cur)
def handle_data(self, data): self.debug_msg("data", "%r" % data) if isinstance(data, BINARY_TYPE): data = unicode(data) DocNode("data", self.cur, content=data)