def parse_html(html_string, debug=False): """ create the document tree from html code """ assert isinstance(html_string, TEXT_TYPE), "given html_string must be unicode!" h2c = HtmlParser(debug=debug) document_tree = h2c.feed(html_string) if debug: h2c.debug() return document_tree
def parse_html(html_string, debug=False, **parser_kwargs): """ create the document tree from html code """ assert isinstance(html_string, unicode) h2c = HtmlParser(debug, **parser_kwargs) document_tree = h2c.feed(html_string) if debug: h2c.debug() return document_tree
</li> </ul> </li> <li><p>item 2</p> <ul> <li>subitem 2.1</li> </ul> </li> </ul> <p>Text under list.</p> <p>4 <img alt="PNG pictures" src="/image.png" /> four</p> <p>5 <img alt="Image without files ext?" src="/path1/path2/image" /> five</p> """ print(data) h2c = HtmlParser( # debug=True ) document_tree = h2c.feed(data) h2c.debug() e = ReStructuredTextEmitter(document_tree, debug=True ) content = e.emit() print("*" * 79) print(content) print("*" * 79) print(content.replace(" ", ".").replace("\n", "\\n\n"))