示例#1
0
    def handle_starttag(self, tag, attrs):
        self.debug_msg("starttag", "%r atts: %s" % (tag, attrs))

        if tag in IGNORE_TAGS:
            return

        headline = headline_tag_re.match(tag)
        if headline:
            self.cur = DocNode("headline",
                               self.cur,
                               level=int(headline.group(1)))
            return

        if tag in ("li", "ul", "ol"):
            if tag in ("ul", "ol"):
                self.__list_level += 1
            self.cur = DocNode(tag,
                               self.cur,
                               None,
                               attrs,
                               level=self.__list_level)
        elif tag in ("img", "br"):
            # Work-a-round if img or br  tag is not marked as startendtag:
            # wrong: <img src="/image.jpg"> doesn't work if </img> not exist
            # right: <img src="/image.jpg" />
            DocNode(tag, self.cur, None, attrs)
        else:
            self.cur = DocNode(tag, self.cur, None, attrs)
示例#2
0
 def _image_repl(self, groups):
     """Handles images and attachemnts included in the page."""
     target = groups.get("image_target", "").strip()
     text = (groups.get("image_text", "") or "").strip()
     node = DocNode("image", self.cur, target)
     DocNode("text", node, text or node.content)
     self.text = None
示例#3
0
 def _item_repl(self, groups):
     """ List item """
     bullet = groups.get("item_head", "")
     text = groups.get("item_text", "")
     if bullet[-1] == "#":
         kind = "number_list"
     else:
         kind = "bullet_list"
     level = len(bullet) - 1
     lst = self.cur
     # Find a list of the same kind and level up the tree
     while (lst and not (lst.kind in ("number_list", "bullet_list")
                         and lst.level == level)
            and not lst.kind in ("document", "section", "blockquote")):
         lst = lst.parent
     if lst and lst.kind == kind:
         self.cur = lst
     else:
         # Create a new level of list
         self.cur = self._upto(
             self.cur, ("list_item", "document", "section", "blockquote"))
         self.cur = DocNode(kind, self.cur)
         self.cur.level = level
     self.cur = DocNode("list_item", self.cur)
     self.cur.level = level + 1
     self.parse_inline(text)
     self.text = None
示例#4
0
    def _add_macro(self,
                   groups,
                   macro_type,
                   name_key,
                   args_key,
                   text_key=None):
        """
        generic method to handle the macro, used for all variants:
        inline, inline-tag, block
        """
        # self.debug_groups(groups)
        assert macro_type in ("macro_inline", "macro_block")

        if text_key:
            macro_text = groups.get(text_key, "").strip()
        else:
            macro_text = None

        node = DocNode(macro_type, self.cur, macro_text)
        macro_name = groups[name_key]
        node.macro_name = macro_name
        self.root.used_macros.add(macro_name)
        node.macro_args = groups.get(args_key, "").strip()

        self.text = None
示例#5
0
    def _text_repl(self, groups):
        #        print("_text_repl()", self.cur.kind)
        #        self.debug_groups(groups)

        if self.cur.kind in ("table", "table_row", "bullet_list",
                             "number_list"):
            self._upto_block()

        if self.cur.kind in ("document", "section", "blockquote"):
            self.cur = DocNode("paragraph", self.cur)

        text = groups.get("text", "")

        if groups.get("space"):
            # use wikipedia style line breaks and seperate a new line with one space
            text = " " + text

        self.parse_inline(text)

        if groups.get("break") and self.cur.kind in (
                "paragraph",
                "emphasis",
                "strong",
                "pre_inline",
        ):
            self.last_text_break = DocNode("break", self.cur, "")

        self.text = None
示例#6
0
    def _pre_block_repl(self, groups):
        self._upto_block()
        kind = groups.get("pre_block_kind", None)
        text = groups.get("pre_block_text", "")

        def remove_tilde(m):
            return m.group("indent") + m.group("rest")

        text = self.pre_escape_re.sub(remove_tilde, text)
        node = DocNode("pre_block", self.cur, text)
        node.sect = kind or ""
        self.text = None
示例#7
0
 def _url_repl(self, groups):
     """Handle raw urls in text."""
     if not groups.get("escaped_url"):
         # this url is NOT escaped
         target = groups.get("url_target", "")
         node = DocNode("link", self.cur)
         node.content = target
         DocNode("text", node, node.content)
         self.text = None
     else:
         # this url is escaped, we render it as text
         if self.text is None:
             self.text = DocNode("text", self.cur, "")
         self.text.content += groups.get("url_target")
示例#8
0
 def handle_startendtag(self, tag, attrs):
     self.debug_msg("startendtag", "%r atts: %s" % (tag, attrs))
     attr_dict = dict(attrs)
     if tag in (self._block_placeholder, self._inline_placeholder):
         id = int(attr_dict["id"])
         #            block_type = attr_dict["type"]
         DocNode(
             "%s_%s" % (tag, attr_dict["type"]),
             self.cur,
             content=self.blockdata[id],
             #                attrs = attr_dict
         )
     else:
         DocNode(tag, self.cur, None, attrs)
示例#9
0
    def _inline_mark(self, groups, key):
        self.cur = DocNode(key, self.cur)

        self.text = None
        text = groups["%s_text" % key]
        self.parse_inline(text)

        self.cur = self._upto(self.cur, (key, )).parent
        self.text = None
示例#10
0
 def _link_repl(self, groups):
     """Handle all kinds of links."""
     target = groups.get("link_target", "")
     text = (groups.get("link_text", "") or "").strip()
     parent = self.cur
     self.cur = DocNode("link", self.cur)
     self.cur.content = target
     self.text = None
     re.sub(self.link_re, self._replace, text)
     self.cur = parent
     self.text = None
示例#11
0
    def _table_repl(self, groups):
        row = groups.get("table", "|").strip()
        self.cur = self._upto(self.cur,
                              ("table", "document", "section", "blockquote"))
        if self.cur.kind != "table":
            self.cur = DocNode("table", self.cur)
        tb = self.cur
        tr = DocNode("table_row", tb)

        for m in self.cell_re.finditer(row):
            cell = m.group("cell")
            if cell:
                text = cell.strip()
                self.cur = DocNode("table_cell", tr)
                self.text = None
            else:
                text = m.group("head").strip("= ")
                self.cur = DocNode("table_head", tr)
                self.text = DocNode("text", self.cur, "")
            self.parse_inline(text)

        self.cur = tb
        self.text = None
示例#12
0
    def __init__(self, debug=False):
        HTMLParser.__init__(self)

        self.debugging = debug
        if self.debugging:
            warnings.warn(
                message="Html2Creole debug is on! warn every data append.")
            self.result = DebugList(self)
        else:
            self.result = []

        self.blockdata = []

        self.root = DocNode("document", None)
        self.cur = self.root

        self.__list_level = 0
示例#13
0
    def __init__(self, raw, block_rules=None, blog_line_breaks=True):
        assert isinstance(raw, TEXT_TYPE)
        self.raw = raw

        if block_rules is None:
            block_rules = BlockRules(blog_line_breaks=blog_line_breaks)

        # setup block element rules:
        self.block_re = re.compile("|".join(block_rules.rules),
                                   block_rules.re_flags)

        self.blog_line_breaks = blog_line_breaks

        self.root = DocNode("document", None)
        self.cur = self.root  # The most recent document node
        self.text = None  # The node to add inline characters to
        self.last_text_break = None  # Last break node, inserted by _text_repl()

        # Filled with all macros that's in the text
        self.root.used_macros = set()
示例#14
0
 def _line_repl(self, groups):
     """ Transfer newline from the original markup into the html code """
     self._upto_block()
     DocNode("line", self.cur, "")
示例#15
0
 def _separator_repl(self, groups):
     self._upto_block()
     DocNode("separator", self.cur)
示例#16
0
 def handle_entityref(self, name):
     self.debug_msg("entityref", "%r" % name)
     DocNode("entityref", self.cur, content=name)
示例#17
0
 def handle_data(self, data):
     self.debug_msg("data", "%r" % data)
     if isinstance(data, BINARY_TYPE):
         data = unicode(data)
     DocNode("data", self.cur, content=data)
示例#18
0
 def _head_repl(self, groups):
     self._upto_block()
     node = DocNode("header", self.cur, groups["head_text"].strip())
     node.level = len(groups["head_head"])
     self.text = None
示例#19
0
 def _char_repl(self, groups):
     if self.text is None:
         self.text = DocNode("text", self.cur, "")
     self.text.content += groups.get("char", "")
示例#20
0
 def _linebreak_repl(self, groups):
     DocNode("break", self.cur, None)
     self.text = None
示例#21
0
 def _pre_inline_repl(self, groups):
     text = groups.get("pre_inline_text", "")
     DocNode("pre_inline", self.cur, text)
     self.text = None