Пример #1
0
def test_Source(testdb):
    src = Source(url='http://umsu.de/papers/')
    src.load_from_db()
    assert type(src.last_checked) is datetime
    assert type(src.found_date) is datetime
    src.update_db(name="wo's weblog")
    src2 = Source(url='http://umsu.de/papers/')
    src2.load_from_db()
    assert src2.name == "wo's weblog"
Пример #2
0
def test_process_link(testdb, caplog):
    source = Source(url='http://umsu.de/papers/')
    source.load_from_db()
    browser = scraper.Browser(use_virtual_display=VDISPLAY)
    browser.goto(source.url)
    source.set_html(browser.page_source)
    link = 'options.pdf'
    el = browser.find_element_by_xpath("//a[@href='{}']".format(link))
    url = source.make_absolute(link)
    li = Link(url=url, source=source, element=el)
    li.load_from_db()
    debuglevel(2)
    scraper.process_link(li, force_reprocess=True, keep_tempfiles=True)
    debuglevel(5)
    assert 'Options and Actions' in caplog.text()
    assert 'But even if we know' in caplog.text()
Пример #3
0
def test_scrape(testdb):
    src = Source(url='http://umsu.de/papers/')
    src.load_from_db()
    scraper.scrape(src)