def test_get_json(self): search = GoogleSearch({"q": "Coffee", "engine": "google_scholar"}) data = search.get_json() print(data['search_metadata']) search_id = data['search_metadata']['id'] # retrieve search from the archive - blocker print(search_id + ": get search from archive") raw_html = search.get_search_archive(search_id, 'html') # print(search_id + ": status = " + search_archived['search_metadata']['status']) print(raw_html)
def test_async(): # store searches search_queue = Queue() # Serp API search search = GoogleSearch({"location": "Austin,Texas", "async": True}) json_q = load_json("./dataset/Questions_with_Ans.json") # json_q = load_json("./dataset/question.json") ll = list(map(lambda x: x["Question"], json_q)) # loop through companies for company in ll: print("execute async search: q = " + company) search.params_dict["q"] = company data = search.get_dict() print("add search to the queue where id: " + data['search_metadata']['id']) # add search to the search_queue search_queue.put(data) print("wait until all search statuses are cached or success") # Create regular search search = GoogleSearch({"async": True}) while not search_queue.empty(): data = search_queue.get() search_id = data['search_metadata']['id'] # retrieve search from the archive - blocker print(search_id + ": get search from archive") search_archived = search.get_search_archive(search_id) print(search_id + ": status = " + search_archived['search_metadata']['status']) # check status if re.search('Cached|Success', search_archived['search_metadata']['status']): print(search_id + ": search done with q = " + search_archived['search_parameters']['q']) print(search_archived["organic_results"]) else: # requeue search_queue print(search_id + ": requeue search") search_queue.put(search) # wait 1s time.sleep(1) # search is over. print('all searches completed')
def search_async(q_list): search_queue = Queue() search = build_search(is_async=True) show_msg = False # loop through companies for q in q_list: search.params_dict["q"] = q data = search.get_dict() # add search to the search_queue search_queue.put(data) if show_msg: print("execute async search: q = " + q) print("add search to the queue where id: " + data['search_metadata']['id']) print("wait until all search statuses are cached or success") # Create regular search search = GoogleSearch({"async": True}) while not search_queue.empty(): data = search_queue.get() search_id = data['search_metadata']['id'] # retrieve search from the archive - blocker search_archived = search.get_search_archive(search_id) if show_msg: print(search_id + ": get search from archive") print(search_id + ": status = " + search_archived['search_metadata']['status']) # check status if re.search('Cached|Success', search_archived['search_metadata']['status']): if show_msg: print(search_id + ": search done with q = " + search_archived['search_parameters']['q']) QUERY_RESULT[search_archived['search_parameters']['q'] [-5:]] = search_archived["organic_results"] else: # requeue search_queue print(search_id + ": requeue search") search_queue.put(search) # wait 1s time.sleep(1) # search is over. print('all searches completed')
def test_async(self): # store searches search_queue = Queue() # Serp API search search = GoogleSearch({"location": "Austin,Texas", "async": True}) # loop through companies for company in ['amd', 'nvidia', 'intel']: print("execute async search: q = " + company) search.params_dict["q"] = company data = search.get_dict() if data is not None: print("oops data is empty for: " + company) continue print("add search to the queue where id: " + data['search_metadata']['id']) # add search to the search_queue search_queue.put(data) print("wait until all search statuses are cached or success") # Create regular search search = GoogleSearch({"async": True}) while not search_queue.empty(): data = search_queue.get() search_id = data['search_metadata']['id'] # retrieve search from the archive - blocker print(search_id + ": get search from archive") search_archived = search.get_search_archive(search_id) print(search_id + ": status = " + search_archived['search_metadata']['status']) # check status if re.search('Cached|Success', search_archived['search_metadata']['status']): print(search_id + ": search done with q = " + search_archived['search_parameters']['q']) else: # requeue search_queue print(search_id + ": requeue search") search_queue.put(search) # wait 1s time.sleep(1) # search is over. print('all searches completed')