def test_discover_urls(): """ Test that hyperlinks are discovered or ignored correctly. """ import queue, threading f = Fetch(url_queue=queue.Queue(), url_store=dict(), url_lock=threading.Lock()) body = """ <html><body> <a href='/a' /> <a href='/b' /> <a href='./c' /> <a href='../d' /> <a href='http://othersite/' /> <a href='http://othersite/e' /> <a href='http://othersite/e#foo' /> <a href='mailto:[email protected]' /> <a not always as it seems! /> </body></html> """ urls = f.discover_urls('http://nonexistant/x/', body) eq_(urls, ['http://nonexistant/a', 'http://nonexistant/b', 'http://nonexistant/x/c', 'http://nonexistant/d', 'http://othersite/', 'http://othersite/e', 'http://othersite/e#foo', ])