Пример #1
0
 def _parseRestaurantPage(self, pool, region_name, city_name, restaurant_name, href):
     utils.log("[%s] parsing restaurant '%s.%s.%s' (%s)" % (self, region_name, city_name, restaurant_name, href))
     
     try:
         soup = utils.getSoup(href)
     except:
         utils.printException()
         utils.log("[%s] error downloading page %s" % (self, href))
         return
     
     # parse the address for the current restaurant
     addr   = soup.find('div', {'class' : 'address'})
     street = addr.find('span', {'class' : 'street'}).getText().strip()
     geo    = addr.find('span', {'class' : 'geo'}).getText().strip()
     
     address = "%s, %s" % (street, geo)
     
     # add the current restaurant to the output for this crawler
     entity = Entity()
     entity.subcategory = "restaurant"
     entity.title   = restaurant_name
     entity.address = address
     entity.sources.zagat = {
         'zurl' : self.base + href, 
     }
     
     #self._globals['soup'] = soup
     # parse cuisine
     header = soup.find('div', {'id' : "block-zagat_restaurants-14"})
     if header is not None:
         header = header.find('ul').find('li', {'class' : 'first'})
         
         if header is not None:
             entity.cuisine = header.getText()
     
     # parse website
     site = soup.find('span', {'class' : 'website'})
     if site is not None:
         site = site.find('a')
         
         if site is not None:
             entity.site = site.get('href')
     
     # parse preview image
     img = soup.find('div', {'id' : 'content'}).find('div', {'class' : 'photo'})
     if img is not None:
         img = img.find('img')
         
         if img is not None:
             entity.image = img.get('src')
     
     self._output.put(entity)