def test_paginate(self): # to get 2 pages start = 0 end = 20 # basic search parameters params = { "q": "coca cola", "tbm": "nws", "api_key": os.getenv("API_KEY"), "start": start, "end": end } # as proof of concept # urls collects urls = [] # initialize a search search = GoogleSearch(params) # create a python generator pages = search.pagination() # fetch one search result per iteration # using a basic python for loop # which invokes python iterator under the hood. for page in pages: print(f"Current page: {page['serpapi_pagination']['current']}") for news_result in page["news_results"]: print( f"Title: {news_result['title']}\nLink: {news_result['link']}\n" ) urls.append(news_result['link']) # double check if things adds up. # total number pages expected # the exact number if variable depending on the search engine backend self.assertGreater(len(urls), 200)
def test_paginate(self): search = GoogleSearch({"q": "Coffee", "location": "Austin,Texas"}) pages = search.pagination(0, 20, 10) urls = [] for page in pages: urls.append(page['serpapi_pagination']['next']) self.assertEqual(len(urls), 2) self.assertTrue("start=10" in urls[0]) print(urls[1]) self.assertTrue("start=21" in urls[1])
def test_paginate_page_size(self): # to get 2 pages with each page contains 20 search results start = 0 end = 80 page_size = 20 # use parameters in params = { "q": "coca cola", "tbm": "nws", "api_key": os.getenv("API_KEY"), "start": start, "end": end, "num": page_size } title = [] search = GoogleSearch(params) # parameter start,end,page_size will be used instead of pagination pages = search.pagination() page_count = 0 count = 0 for page in pages: page_count += 1 # print(f"Current page: {page['serpapi_pagination']['current']}") for news_result in page["news_results"]: count += 1 i = 0 for t in title: i += 1 if t == news_result['title']: print(("%d duplicated title: %s at index: %d" % (count, t, i))) #print(f"{count} - title: {news_result['title']}") title.append(news_result['title']) self.assertEqual( count % 2, 0, ("page %s does not contain 20 elements" % page_count)) # check number of pages match self.assertEqual(page_count, 4) self.assertEqual(len(title), end, "number of search results")