def test_Source(testdb): src = Source(url='http://umsu.de/papers/') src.load_from_db() assert type(src.last_checked) is datetime assert type(src.found_date) is datetime src.update_db(name="wo's weblog") src2 = Source(url='http://umsu.de/papers/') src2.load_from_db() assert src2.name == "wo's weblog"
def test_process_link(testdb, caplog): source = Source(url='http://umsu.de/papers/') source.load_from_db() browser = scraper.Browser(use_virtual_display=VDISPLAY) browser.goto(source.url) source.set_html(browser.page_source) link = 'options.pdf' el = browser.find_element_by_xpath("//a[@href='{}']".format(link)) url = source.make_absolute(link) li = Link(url=url, source=source, element=el) li.load_from_db() debuglevel(2) scraper.process_link(li, force_reprocess=True, keep_tempfiles=True) debuglevel(5) assert 'Options and Actions' in caplog.text() assert 'But even if we know' in caplog.text()
def test_scrape(testdb): src = Source(url='http://umsu.de/papers/') src.load_from_db() scraper.scrape(src)