Пример #1
0
 def _parseDetailPage(self, name, href, subcategory):
     try:
         soup = utils.getSoup(href)
     except urllib2.HTTPError:
         utils.log("[%s] error parsing %s (%s)" % (self, name, href))
         return
     
     summ = soup.find('div', {'class' : 'summary-address'})
     
     try:
         addrp = summ.find('p', {'class' : 'adr'})
         
         street_addr = addrp.find('span', {'class' : 'street-address'}).getText().strip()
         locality    = addrp.find('span', {'class' : 'locality'}).getText().strip()
         region      = addrp.find('span', {'class' : 'region'}).getText().strip()
         
         try:
             postal_code = addrp.find('span', {'class' : 'postal-code'}).getText().strip()
         except AttributeError:
             postal_code = ""
         
         addr = "%s, %s, %s %s" % (street_addr, locality, region, postal_code)
     except AttributeError:
         try:
             p = summ.find('p').getText()
             r = re.compile('(.*)nr\. ', re.DOTALL)
             m = r.match(p)
             
             if m is None:
                 r = re.compile('(.*)at[. ]', re.DOTALL)
                 m = r.match(p)
             
             addr = m.groups()[0].replace('\n', ' ').strip()
         except AttributeError:
             utils.log("[%s] error parsing %s (%s)" % (self, name, href))
             return
     
     entity = Entity()
     entity.subcategory = subcategory
     entity.title   = name
     entity.address = addr
     entity.nymag = { }
     
     self._output.put(entity)