Пример #1
0
 def _parseListPage(self, pool, queue, url, name, base=False):
     utils.log('[%s] parsing list page %s (%s)' % (self, name, url))
     
     try:
         soup = utils.getSoup(url)
     except:
         #utils.printException()
         utils.log("[%s] error downloading page %s (%s)" % (self, name, url))
         return
     
     results = soup.findAll('td', {'class' : 'summary'})
     
     for result in results:
         entity = Entity()
         entity.subcategory = "book"
         entity.nytimes = {}
         
         title = result.find('span', {'class' : 'bookName'}).getText().strip().title()
         if title.endswith(','):
             title = title[0:-1]
         
         entity.title = title
         
         details = result.getText(separator='___')
         details_match = self.details_re.match(details)
         
         if details_match:
             details_match    = details_match.groups()
             entity.author    = details_match[0]
             entity.publisher = details_match[1]
             entity.desc      = details_match[2]
         
         key = (entity.title, entity.author)
         if key in self.seen:
             continue
         
         self.seen.add(key)
         self._output.put(entity)