Exemplo n.º 1
0
 def test_find_all_follow_on_links(self):
     all_links = hundredgreatest.get_links_in_soup(self.soup)
     follow_on_links_numbers = []
     all_links_as_str = [str(l) for l in all_links]
     pattern = re.compile(r"^<a.*(\S+)(\?page=)(\d)\S+</a>")
     for l in all_links_as_str:
         m = re.match(pattern, l)
         if m:
             follow_on_links_numbers.append(m.group(3))
     theset = set(follow_on_links_numbers)
     self.assertEqual(hundredgreatest.get_follow_on_link_numbers(self.soup), theset)
Exemplo n.º 2
0
 def test_compiled_soups(self):
     compiled_soups = set()
     valid_urls = {"{0}{1}{2}".format(BEST_NOVELS, "?page=", l) for l in
                   hundredgreatest.get_follow_on_link_numbers(self.soup)}
     # valid_urls = {BEST_NOVELS, BEST_NOVELS + "?page=2", BEST_NOVELS + "?page=3", BEST_NOVELS + "?page=4"}
     valid_urls.add(BEST_NOVELS)
     for u in valid_urls:
         r = urllib.request.urlopen(u)
         html = r.read()
         soup = BeautifulSoup(html)
         compiled_soups.add(soup)
     self.assertEqual(hundredgreatest.raw_compiled_soups(self.soup), compiled_soups)
Exemplo n.º 3
0
 def test_list_of_all_valid_urls(self):
     valid_url_links = {"{0}{1}{2}".format(BEST_NOVELS, "?page=", l) for l in
                        hundredgreatest.get_follow_on_link_numbers(self.soup)}
     valid_url_links.add(BEST_NOVELS)
     self.assertEqual(hundredgreatest.valid_urls_to_soupify(self.soup), valid_url_links)