Пример #1
0
 def count(self, query=None, domain=None):
     if not domain: domain = self.domain
     if not query: query = self.query
     query = re.sub(' ', '%20', query)
     url = GoogleSearch.SEARCH_URL%{'domain':domain, 'query':query}
     mario = Mario()
     mario.set_proxies_list(self.proxies)
     response = mario.get(url)
     if not response:
         raise GoogleException('Fail to open page', 502)
     patterns = [re.compile('<p id=resultStats>&nbsp;[^^]*?<b>\d+</b> - <b>\d+</b>[^^]*?<b>([^^]*?)</b>'), re.compile('<p id=resultStats>&nbsp;[^^]*?<b>[^^]*?</b>[^^]*?<b>([^^]*?)</b>[^^]*?<b>\d+</b>-<b>\d+</b>')]
     for pattern in patterns:
         res = pattern.findall(response.body)
         if not res: continue
         return long(re.sub(',', '', res[0]))
     return 0
Пример #2
0
 def _get_page(self, query, page, domain):
     if page == 0:
         if self.number_of_results == 10:
             url = GoogleSearch.SEARCH_URL%{'domain':domain, 'query':query}
         else:
             url = GoogleSearch.SEARCH_URL_WITH_NUMBER%{'domain':domain, 'query':query, 'num':self.number_of_results}
     else:
         if self.number_of_results == 10:
             url = GoogleSearch.NEXT_PAGE%{'domain':domain, 'query':query, 'start':page*self.number_of_results}
         else:
             url = GoogleSearch.NEXT_PAGE_WITH_NUMBER%{'domain':domain, 'query':query, 'num':self.number_of_results, 'start':page*self.number_of_results}
     mario = Mario()
     mario.set_proxies_list(self.proxies)
     response = mario.get(url)
     if not response:
         raise GoogleException('Fail to open page', 502)
     results = self._parse_response(response.body)
     if not results: return []
     return [GoogleResult(result['unescape_url'], result['title'], result['description'], page*self.number_of_results+i+1) for i, result in enumerate(results)]