def _parseListPage(self, pool, queue, url, name, base=False): utils.log('[%s] parsing list page %s (%s)' % (self, name, url)) try: soup = utils.getSoup(url) except: #utils.printException() utils.log("[%s] error downloading page %s (%s)" % (self, name, url)) return results = soup.findAll('td', {'class' : 'summary'}) for result in results: entity = Entity() entity.subcategory = "book" entity.nytimes = {} title = result.find('span', {'class' : 'bookName'}).getText().strip().title() if title.endswith(','): title = title[0:-1] entity.title = title details = result.getText(separator='___') details_match = self.details_re.match(details) if details_match: details_match = details_match.groups() entity.author = details_match[0] entity.publisher = details_match[1] entity.desc = details_match[2] key = (entity.title, entity.author) if key in self.seen: continue self.seen.add(key) self._output.put(entity)