def find_person(self): find_person_url = self.config.BASE_URL + 'kp041.asp?template=xyz&selfaction=ws&showAll=true&PALFDNRM=1&kpdatfil=&filtdatum=filter&kpname=&kpsonst=&kpampa=99999999&kpfr=99999999&kpamfr=99999999&kpau=99999999&kpamau=99999999&searchForm=true&search=Suchen' """parse an XML file and return the tree""" parser = etree.XMLParser(recover=True) r = self.get_url(find_person_url) if not r: return xml = r.text.encode('ascii','xmlcharrefreplace') tree = etree.fromstring(xml, parser=parser) # element 0 is the special block # element 1 is the list of persons for node in tree[1].iterchildren(): elem = {} for e in node.iterchildren(): elem[e.tag] = e.text # now retrieve person details such as committee memberships etc. # we also get the age (but only that, no date of birth) person = Person(numeric_id=int(elem['kplfdnr']), identifier=elem['kplfdnr']) if elem['link_kp']: person.original_url = elem['link_kp'] # personal information if elem['adtit']: person.title = elem['adtit'] if elem['antext1'] == 'Frau': person.sex = 1 elif elem['antext1'] == 'Herr': person.sex = 2 if elem['advname']: person.firstname = elem['advname'] if elem['adname']: person.lastname = elem['adname'] # address if elem['adstr']: person.address = elem['adstr'] if elem['adhnr']: person.house_number = elem['adhnr'] if elem['adplz']: person.postalcode = elem['adplz'] if elem['adtel']: person.phone = elem['adtel'] # contact if elem['adtel']: person.phone = elem['adtel'] if elem['adtel2']: person.mobile = elem['adtel2'] if elem['adfax']: person.fax = elem['adfax'] if elem['adfax']: person.fax = elem['adfax'] if elem['ademail']: person.email = elem['ademail'] if elem['adwww1']: person.website = elem['adwww1'] person_party = elem['kppartei'] if person_party: if person_party in self.config.PARTY_ALIAS: person_party = self.config.PARTY_ALIAS[person_party] person.committee = [{'committee': Committee(identifier=person_party, title=person_party, type='party')}] if elem['link_kp'] is not None: if hasattr(self, 'person_queue'): self.person_queue.add(person.numeric_id) else: logging.info("Person %s %s has no link", person.firstname, person.lastname) oid = self.db.save_person(person)