Пример #1
0
def test_discover_urls():
    """ Test that hyperlinks are discovered or ignored correctly. """
    import queue, threading
    f = Fetch(url_queue=queue.Queue(), url_store=dict(), url_lock=threading.Lock())
    body = """
    <html><body>
    <a href='/a' />
    <a href='/b' />
    <a href='./c' />
    <a href='../d' />
    <a href='http://othersite/' />
    <a href='http://othersite/e' />
    <a href='http://othersite/e#foo' />
    <a href='mailto:[email protected]' />
    <a not always as it seems! />
    </body></html>
    """
    urls = f.discover_urls('http://nonexistant/x/', body)
    eq_(urls, ['http://nonexistant/a',
               'http://nonexistant/b',
               'http://nonexistant/x/c',
               'http://nonexistant/d',
               'http://othersite/',
               'http://othersite/e',
               'http://othersite/e#foo',
               ])