def _html_to_text(html, full_document=True): """Convert the given html string into a plain text string. If full_document is True, only convert data within the body element--ignore everything else.""" htt = HTMLtoText(full_document) htt.feed(html) return str(htt)
def test_two_paragraphs(self): body = "<p>A paragraph.</p><p>Another paragraph.</p>" html = self.htmloutline.format(body) htt = HTMLtoText() htt.feed(html) text = str(htt) self.assertEquals(text, "\nA paragraph.\n\nAnother paragraph.\n")
def test_basic(self): html = self.htmloutline.format("This is some text.") htt = HTMLtoText() htt.feed(html) text = str(htt) self.assertEquals(text, "This is some text.")
def test_linebreak(self): html = self.htmloutline.format("One line.<br/>Two lines.") htt = HTMLtoText() htt.feed(html) text = str(htt) self.assertEquals(text, "One line.\nTwo lines.")
def test_simple_paragraph(self): html = self.htmloutline.format("<p>A paragraph.</p>") htt = HTMLtoText() htt.feed(html) text = str(htt) self.assertEquals(text, "\nA paragraph.\n")