def get_content(email, content_types=_CONTENT_PREFERENCES, remove_tags=True): """ Takes an email Message object and a Boolean indicating whether HTML tags should be removed. Returns a string of the most preferred content. If no matches are found, returns None. """ for content_type in content_types: for part in email.walk(): if part.get_content_type() == content_type: payload = part.get_payload(decode=True) if remove_tags and content_type == 'text/html': return html_to_text(payload) else: return payload
def test_html_to_text(self): """ Tests the html_to_text function. """ html = '<html><head></head><body><p>Hi!<br>How are you?<br>Here ' + \ 'is the <a href="https://www.python.org">link</a> you ' + \ 'wanted.</p></html>' actual = parserutils.html_to_text(html) expected = """\ Hi! How are you? Here is the link you wanted.""" self.assertEqual(actual, expected)