def search(self, search_term, right_click = True): """ Search search_term in browser. Return True if search succeeded. @parmeter search_term: the words to search click: whether to right click on each search result @return result_set: the set of results """ # start browser self.browser = start_browser(self.crawl_config.browser_type, incognito=False, user_agent=self.crawl_config.user_agent) self.browser.set_page_load_timeout(15) # search start = 0 ad_set = set() search_set = set() while start < self.crawl_config.count: try: # google search advertisements or results url = 'https://www.google.com/?gws_rd=ssl#q=' url += '+'.join(search_term.split(' ')) # append start when the start is greater than zero if start > 0: url += '&start={0}'.format(start) self.browser.get(url) # wait until page load complete elem = wait_find_element(self.browser, 'id', 'ires') if elem is None: raise Exception("Page load failed.") time.sleep(random.randint(1, 3)) ad_set = ad_set | self.ad_links() if right_click: search_set = search_set | self.search_results() start = start + 10 except: # For robustness, don't throw errors here. safe_quit(self.browser) logger = logging.getLogger("global") logger.error("error in search") logger.error(sys.exc_info()[0]) if switch_vpn_state(self.connected): self.connected = not self.connected self.browser = restart_browser(self.crawl_config.browser_type, incognito=False, user_agent=self.crawl_config.user_agent, browser=self.browser) safe_quit(self.browser) return ad_set, search_set
def quit(self): while not self.browser_queue.empty(): browser = self.browser_queue.get() safe_quit(browser)