Exemplo n.º 1
0
 def test_find_sort_by_href(self):
     """ Sorting by href produces proper results """
     for page in td.pages:
         seek = Links(text=page['text'])
         actual_list = seek.find(limit=5, sort=lambda key: key['href'] or "")
         self.assertEqual(len(actual_list), len(page['limit_sort_href']))
         for i, link in enumerate(actual_list):
             self.assertDictSame(link, page['limit_sort_href'][i])
Exemplo n.º 2
0
 def test_find_exclude(self):
     """ Determine if excluding links removes the links """
     for page in td.pages:
         seek = Links(text=page['text'])
         actual_list = seek.find(exclude=[{"class": re.compile("gb1")}])
         self.assertEqual(len(actual_list), page['exclude_links'])
         actual_list = seek.find(exclude=[{"class": "gb1"}])
         self.assertEqual(len(actual_list), page['exclude_links'])
Exemplo n.º 3
0
 def test_find_limit(self):
     """ Check that the actual array with a limit matches the test data """
     for page in td.pages:
         seek = Links(text=page['text'])
         actual_list = seek.find(limit=5)
         self.assertEqual(len(actual_list), len(page['limit_find']))
         for i, link in enumerate(actual_list):
             self.assertDictSame(link, page['limit_find'][i])
Exemplo n.º 4
0
 def test_find_reverse_sort(self):
     """ Ensure reverse sort sorts before limiting the # of links """
     for page in td.pages:
         seek = Links(text=page['text'])
         actual_list = seek.find(limit=5, reverse=True)
         self.assertEqual(len(actual_list), len(page['limit_reverse_find']))
         for i, link in enumerate(actual_list):
             self.assertDictSame(link, page['limit_reverse_find'][i])
Exemplo n.º 5
0
 def test_find_number_of_links(self):
     """ Ensure expected number of links reflects actual number of links """
     for page in td.pages:
         seek = Links(text=page['text'])
         self.assertEqual(len(seek.find()), page['num_links'])
Exemplo n.º 6
0
 def test_find_limit_param(self):
     """ How does find() handle the limit property """
     seek = Links(self.url)
     self.assertEqual(len(seek.find(limit=5)), 5)
     self.assertEqual(len(seek.find(limit=1)), 1)
Exemplo n.º 7
0
 def test_soup_property(self):
     """ Getting the web page yields correct response"""
     seek = Links(self.url)
     self.assertIsInstance(seek._soup, bs4.BeautifulSoup)
Exemplo n.º 8
0
 def test_find_duplicates(self):
     """ Determine if removing duplicates works """
     for page in td.pages:
         seek = Links(text=page['text'])
         actual_list = seek.find(duplicates=False)
         self.assertEqual(len(actual_list), page['duplicate_links'])
Exemplo n.º 9
0
import re
from linkGrabber import Links

links = Links('http://google.com')
gb = links.find(limit=4, duplicates=False, pretty=True)
print(gb)