def test_html_to_text(self): s = ''' <html> <head> <title>Title</title> <style> p { font-family:"Times New Roman"; font-size:20px; } </style> </head> <body> <script type="text/javascript"> $.document() </script> Body <p>Paragraph <strong>here</strong></p> More text </body> </html> ''' t = html_to_text(s, encoding='ascii') t = t.strip() self.assertTrue(t.startswith('Body')) self.assertTrue(t.endswith('text'))
def test_html_to_text(self): s = ''' <html> <head> <title>Title</title> <style> p { font-family:"Times New Roman"; font-size:20px; } </style> </head> <body> <script type="text/javascript"> $.document() </script> Body <p>Paragraph <strong>here</strong></p> More text </body> </html> ''' t = html_to_text(s, encoding = 'ascii') t = t.strip() self.assertTrue(t.startswith('Body')) self.assertTrue(t.endswith('text'))
def test_html_to_text_empty(self): s = '' t = html_to_text(s, encoding='ascii') self.assertTrue(re.match('\s*', t))
def test_html_to_text_empty(self): s = '' t = html_to_text(s, encoding = 'ascii') self.assertTrue(re.match('\s*', t))