def test_should_treat_relative_links_properly(self): html = ''' <a href="/relative">Relative</a> <a href="http://test.com/absolule">Absolute</a> ''' soup = Soup(markup=html) solution_one.discover_links(url='', soup=soup) self.assertEquals(2, solution_one.queue.qsize()) self.assertEquals(2, len(solution_one.discovered))
def test_should_add_allowed_urls_to_queue_and_discovered(self): html = ''' <a href="/valid">Valid</a> <a href="#invalid">Invalid</a> <a href="mailto://[email protected]">Invalid Too</a> <a href="/also-valid">Also Valid</a> ''' soup = Soup(markup=html) solution_one.discover_links(url='', soup=soup) self.assertEquals(2, solution_one.queue.qsize()) self.assertEquals(2, len(solution_one.discovered))
def test_should_ignore_links_starting_with_hashtag(self): html = '''<a href="#top">Top</a>''' soup = Soup(markup=html) solution_one.discover_links(url='', soup=soup) self.assertEquals(0, solution_one.queue.qsize()) self.assertEquals(0, len(solution_one.discovered))
def test_should_ignore_links_without_href(self): html = '''<a class='href'>no href</a>''' soup = Soup(markup=html) solution_one.discover_links(url='', soup=soup) self.assertEquals(0, solution_one.queue.qsize()) self.assertEquals(0, len(solution_one.discovered))