def test_flatten_respects_hidden_n2(self): me = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) expected = '`text text2`' actual = me.flatten() self.assertEqual(expected, actual)
def test_add_data(self): me = MarkdownElement('`', []) m2 = MarkdownElement('*', ['text', MarkdownElement('"', ['text2'])]) me.add_data('text3') me.add_data(m2) expected = '`text3*text"text2"*`' actual = me.flatten() self.assertEqual(expected, actual)
class HtmlToMarkdown(HTMLParser): def __init__(self): super().__init__() self.current_element = None def convert(self, html, flatten=True): if isinstance(html, str): html = [html] self.current_element = MarkdownElement() for line in html: self.feed(line) if flatten: return self.current_element.flatten() else: return self.current_element def handle_starttag(self, tag, attrs): # feed() callback new_element = MarkdownElement() try: new_element.tag = [MAP_TAG_START[tag], MAP_TAG_END[tag]] except KeyError: new_element.tag = MAP_TAG_START[tag] self.set_visibility(new_element) self.current_element.add_data(new_element) self.current_element = new_element def set_visibility(self, element): should_hide = self.current_element.tag in PREFMT_TAGS \ or self.current_element.hidden element.hidden = should_hide def handle_endtag(self, tag): # feed() callback self.current_element = self.current_element.parent def handle_data(self, data): # feed() callback data = self.sanitize(data) self.current_element.data.append(data) def sanitize(self, data): return data.replace(u'\xa0', ' ')
def test_tag_supports_list(self): me = MarkdownElement(['- ', '\n'], ['text']) expected = '- text\n' actual = me.flatten() self.assertEqual(expected, actual)
def test_flatten_respects_hidden_n1(self): me = MarkdownElement('`', ['text'], hidden=True) expected = 'text' actual = me.flatten() self.assertEqual(expected, actual)
def test_flatten_n2(self): me = MarkdownElement('`', ['text ', MarkdownElement('"', ['text2'])]) expected = '`text "text2"`' actual = me.flatten() self.assertEqual(expected, actual)
def test_flatten_n1(self): me = MarkdownElement('`', ['text']) expected = '`text`' actual = me.flatten() self.assertEqual(expected, actual)