def html(): html_parser = HTMLParser() html_parser.parse(r'files/Test2.html') print('html parser', html_parser.get_processed_stems(), len(html_parser.get_processed_stems())) print('html parser link result', html_parser.get_links(), len(html_parser.get_links()))
def html_test(): html_parser = HTMLParser() html_parser.parse(r'D:\Test2.html') print(html_parser.get_processed_stems()) print(html_parser.get_links())
def test_get_linksHTMLParser(self): html = HTMLParser() html.parse('files/Test.html') text = [('google\nlink', 'http://google.com/'), ('google\nlink', 'http://google.com/')] assert html.get_links() == text