def test_should_respect_the_robots_txt_rules(self):
     rules = '''
     User-Agent: *
     Disallow: /login
     '''
     solution_one.RERP.parse(rules)
     url = 'http://test.com/login'
     self.assertFalse(solution_one.can_visit_link(url))
     url = 'http://test.com/logout'
     self.assertTrue(solution_one.can_visit_link(url))
 def test_should_not_allow_external_links(self):
     url = 'http://test.com/internal'
     self.assertTrue(solution_one.can_visit_link(url))
     url = 'http://external.com/home'
     self.assertFalse(solution_one.can_visit_link(url))
 def test_should_not_allow_the_same_url_twice(self):
     url = 'http://test.com/twice'
     self.assertTrue(solution_one.can_visit_link(url))
     solution_one.discovered = [url, ]
     self.assertFalse(solution_one.can_visit_link(url))
 def test_should_check_the_black_list(self):
     solution_one.BLACKLIST_REGEX = re.compile(r'/private')
     url = 'http://test.com/private'
     self.assertFalse(solution_one.can_visit_link(url))
     url = 'http://test.com/public'
     self.assertTrue(solution_one.can_visit_link(url))