def test_find_next_page_url(self): """Verify we can find a next page url in the html body""" html = """ <html><body><a href="/?page=2">next</a></body></html> """ from lxml.html import document_fromstring doc = document_fromstring(html) res = r.find_next_page_url(set(), None, doc) self.assertEqual('/?page=2', res, 'Should find out page 2 url in the body.')
def _test_page(self, url, html_path, expected): html = load_regression_data(html_path) doc = r.parse(html, url) parsed_urls = {url} actual = r.find_next_page_url(parsed_urls, url, doc) self.assertEqual(expected, actual)