def _parseDetailPage(self, name, href, subcategory): try: soup = utils.getSoup(href) except urllib2.HTTPError: utils.log("[%s] error parsing %s (%s)" % (self, name, href)) return summ = soup.find('div', {'class' : 'summary-address'}) try: addrp = summ.find('p', {'class' : 'adr'}) street_addr = addrp.find('span', {'class' : 'street-address'}).getText().strip() locality = addrp.find('span', {'class' : 'locality'}).getText().strip() region = addrp.find('span', {'class' : 'region'}).getText().strip() try: postal_code = addrp.find('span', {'class' : 'postal-code'}).getText().strip() except AttributeError: postal_code = "" addr = "%s, %s, %s %s" % (street_addr, locality, region, postal_code) except AttributeError: try: p = summ.find('p').getText() r = re.compile('(.*)nr\. ', re.DOTALL) m = r.match(p) if m is None: r = re.compile('(.*)at[. ]', re.DOTALL) m = r.match(p) addr = m.groups()[0].replace('\n', ' ').strip() except AttributeError: utils.log("[%s] error parsing %s (%s)" % (self, name, href)) return entity = Entity() entity.subcategory = subcategory entity.title = name entity.address = addr entity.nymag = { } self._output.put(entity)