def parse_parl(self, data): name = '%(given_names)s %(family_name)s' % data if not self.data: self.data.update(data) del self.data['party_id'] del self.data['party_name'] del self.data['email'] self.data['id'] = idFactory.new('person') self.data['name'] = name assert self.data['name'] == name self.data.setdefault('identifiers', []).insert(0, { 'scheme': 'za.gov.parliament/person', 'identifier': '%(id)s' % data } ) if data.get('email'): if 'contact_details' in self.data: assert data['email'] == [ x for x in self.data['contact_details'] if x['type'] == 'email' ][0]['value'] else: self.data['contact_details'] = [ { 'type': 'email', 'value': data['email'] } ] party = self.organizations[data['party_name']] if 'identifiers' in party: assert party['identifiers'][0]['identifier'] == data['party_id'] else: party['identifiers'] = [ { 'scheme': 'za.gov.parliament/party', 'identifier': data['party_id'] } ] existing_party = [ x for x in self.data.get('memberships', []) if 'party' in x['organization_id'] ] if existing_party: assert party['id'] == existing_party[0]['organization_id'], party['id'] else: add_membership( self.data, { 'person_id': self.data['id'], 'organization_id': party['id'] } ) self.text = requests.get('http://www.parliament.gov.za/live/content.php?Item_ID=184&MemberID=%(id)s' % data).text self.parse_honorific() self.parse_table() self.parse_photo() self.parse_committees()
def parse(data): orgs_by_id = dict([ (x['id'], x) for x in data['organizations'].values() ]) # TODO: Perhaps check old/new committees, then stop using parl.py # committees. Or just assume these new ones are accurate. for row in csv.DictReader(open(data_path + 'committees.csv')): if row['Name'] not in data['organizations']: data['organizations'][row['Name']] = { 'id': idFactory.new('committee_pmg'), 'name': row['Name'], 'slug': row['Name'].lower().replace(' ','-'), 'classification': row['Type'] } for row in csv.DictReader(open(data_path + 'committee-members.csv')): row['Name'] = re.sub('^([^,]*) Mr, (.*)$', r'\1, Mr \2', row['Name']) family_name, initials = row['Name'].split(',') initials = re.sub('^\s*(Mr|Ms|Dr|Nkosi|Prof|Adv|Prince)\s+', '', initials) # TODO: Use the person's other_names filed, and get these misspellings in there. if family_name == 'Khorai': family_name = 'Khoarai' if family_name == 'Hoosan': family_name = 'Hoosen' if family_name == 'Jeffrey': family_name = 'Jeffery' if family_name == 'Hill-Lews': family_name = 'Hill-Lewis' if family_name == 'Koornhof' and initials == 'NC': initials = 'NJJVR' matches = [ x for x in data['persons'].values() if asciify(x['family_name']) == family_name ] if len(matches) > 1: matches = [ x for x in data['persons'].values() if x['family_name'] == family_name and initialise(x['given_names']) == initials ] if not matches: matches = [ x for x in data['persons'].values() if x['family_name'] == family_name and initialise(x['given_names'])[0:len(initials)] == initials ] # With the current data, we now always have one result assert len(matches) == 1 person = matches[0] party = [ x for x in person['memberships'] if 'party' in x['organization_id'] ][0]['organization_id'] assert row['Party'] == orgs_by_id[party]['name'], row['Party'] + orgs_by_id[party]['name'] mship = { 'organization_id': data['organizations'][row['Committee']]['id'] } if row['IsAlternative?'] == 'True': mship['role'] = 'Alternate Member' if row['IsChairperson?'] == 'True': mship['role'] = 'Chairperson' add_membership(person, mship) return data
def parse(data): for person in data['persons'].values(): person['slug'] = person['name'].lower().replace(' ', '-') # There are three non-Assembly/NCOP people in the executive no_house = 0 for p in data['persons'].values(): if not [ y for y in p['memberships'] if 'house' in y['organization_id'] ]: no_house += 1 assert no_house == 3 na_manual = { 'Cassel Charlie Mathale': { 'start_date': '2013-07-15' }, 'Wayne Maxim Thring': { 'start_date': '2013-06-21' }, 'Masenyani Richard Baloyi': { 'end_date': '2013-07-10' }, 'Letlapa Moroatshoge Mphahlele': { 'end_date': '2013-07-11', 'end_reason': 'Ceased to be a member under section 47(3)(c) of the Constitution (changed party)' }, # 'Mpethi': { 'start_date': ? }, 'Ntopile Marcel Kganyago': { 'end_date': '2013-07-17', 'end_reason': 'Died' }, 'Nqabayomzi Lawrence Kwankwa': { 'start_date': '2013-08-06' }, 'Loretta Jacobus': { 'end_date': '2013-08-01' }, } ncop_manual = { 'Rory Dean MacPherson': { 'party': 'DA', 'end_date': '2009-05-29', 'province': 'KwaZulu-Natal' }, 'Robert Alfred Lees': { 'start_date': '2009-06-11' }, 'Sheery Su-Huei Cheng': { 'party': 'DA', 'end_date': '2010-09-30', 'province': 'Gauteng' }, 'Beverley Lynette Abrahams': { 'start_date': '2010-10-01' }, 'Timothy Duncan Harris': { 'party': 'DA', 'end_date': '2010-09-09', 'province': 'Western Cape' }, 'Theodorus Barnardus Beyleveldt': { 'party': 'DA', 'start_date': '2010-10-12', 'end_date': '2011-07-10', 'end_reason': 'Died', 'province': 'Western Cape' }, 'Denis Joseph': { 'start_date': '2011-10-20' }, 'Armiston Watson': { 'party': 'DA', 'end_date': '2011-11-07', 'province': 'Mpumalanga' }, 'Velly Makasana Manzini': { 'start_date': '2011-11-08' }, 'Tlhalefi Andries Mashamaite': { 'party': 'ANC', 'end_date': '2012-05-08', 'province': 'Limpopo' }, 'Thabo Lucas Makunyane': { 'start_date': '2012-05-22' }, 'Zukisa Cheryl Faku': { 'start_date': '2013-04-25' }, 'Mokoane Collen Maine': { 'end_date': '2013-08-01' }, # XXX } for person in data['persons'].values(): name = person['name'] mships = person['memberships'] mship = [ x for x in mships if 'ncop' in x['organization_id'] and x['role'] == 'Delegate' ] if mship: # Present, and has NCOP membership entry. Set a start and possibly end date. mship = mship[0] assert 'start_date' not in mship n = ncop_manual.pop(name, {}) mship['start_date'] = n.get('start_date', '2009-05-07') if 'end_date' in n and 'end_date' not in mship: mship['end_date'] = n['end_date'] elif name in ncop_manual: # Present, but has no NCOP membership entry n = ncop_manual.pop(name) add_membership( person, { 'organization_id': 'org.mysociety.za/house/ncop', 'label': 'Delegate for %s' % n['province'], 'role': 'Delegate', 'area': { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[n['province']], 'name': n['province'] }, 'start_date': n.get('start_date', '2009-05-07'), 'end_date': n['end_date'], 'end_reason': n.get('end_reason', 'Resigned'), }) mship = [ x for x in mships if 'house/na' in x['organization_id'] and x['role'] == 'Member' ] if mship: mship = mship[0] n = na_manual.pop(name, {}) if 'start_date' not in mship: mship['start_date'] = n.pop('start_date', '2009-05-06') if n: assert 'end_date' not in mship mship['end_date'] = n['end_date'] mship['end_reason'] = n.get('end_reason', 'Resigned') elif name in na_manual: raise Exception # The ones left have no person entry at all. for name, d in ncop_manual.items(): id = idFactory.new('person') given_names, family_name = name.rsplit(None, 1) person = { 'id': id, 'name': name, 'given_names': given_names, 'family_name': family_name, 'slug': name.lower().replace(' ', '-'), } add_membership( person, {'organization_id': data['organizations'][d['party']]['id']}) add_membership( person, { 'organization_id': 'org.mysociety.za/house/ncop', 'label': 'Delegate for %s' % d['province'], 'role': 'Delegate', 'area': { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[d['province']], 'name': d['province'] }, 'start_date': d.get('start_date', '2009-05-07'), 'end_date': d['end_date'], 'end_reason': d.get('end_reason', 'Resigned'), }) data['persons'][name] = person return data
def parse(data): for person in data['persons'].values(): person['slug'] = person['name'].lower().replace(' ', '-') # There are three non-Assembly/NCOP people in the executive no_house = 0 for p in data['persons'].values(): if not [ y for y in p['memberships'] if 'house' in y['organization_id'] ]: no_house += 1 assert no_house == 3 na_manual = { 'Cassel Charlie Mathale': { 'start_date': '2013-07-15' }, 'Wayne Maxim Thring': { 'start_date': '2013-06-21' }, 'Masenyani Richard Baloyi': { 'end_date': '2013-07-10' }, 'Letlapa Moroatshoge Mphahlele': { 'end_date': '2013-07-11', 'end_reason': 'Ceased to be a member under section 47(3)(c) of the Constitution (changed party)' }, # 'Mpethi': { 'start_date': ? }, 'Ntopile Marcel Kganyago': { 'end_date': '2013-07-17', 'end_reason': 'Died' }, 'Nqabayomzi Lawrence Kwankwa': { 'start_date': '2013-08-06' }, 'Loretta Jacobus': { 'end_date': '2013-08-01' }, } ncop_manual = { 'Rory Dean MacPherson': { 'party': 'DA', 'end_date': '2009-05-29', 'province': 'KwaZulu-Natal' }, 'Robert Alfred Lees': { 'start_date': '2009-06-11' }, 'Sheery Su-Huei Cheng': { 'party': 'DA', 'end_date': '2010-09-30', 'province': 'Gauteng' }, 'Beverley Lynette Abrahams': { 'start_date': '2010-10-01' }, 'Timothy Duncan Harris': { 'party': 'DA', 'end_date': '2010-09-09', 'province': 'Western Cape' }, 'Theodorus Barnardus Beyleveldt': { 'party': 'DA', 'start_date': '2010-10-12', 'end_date': '2011-07-10', 'end_reason': 'Died', 'province': 'Western Cape' }, 'Denis Joseph': { 'start_date': '2011-10-20' }, 'Armiston Watson': { 'party': 'DA', 'end_date': '2011-11-07', 'province': 'Mpumalanga' }, 'Velly Makasana Manzini': { 'start_date': '2011-11-08' }, 'Tlhalefi Andries Mashamaite': { 'party': 'ANC', 'end_date': '2012-05-08', 'province': 'Limpopo' }, 'Thabo Lucas Makunyane': { 'start_date': '2012-05-22' }, 'Zukisa Cheryl Faku': { 'start_date': '2013-04-25' }, 'Mokoane Collen Maine': { 'end_date': '2013-08-01' }, # XXX } for person in data['persons'].values(): name = person['name'] mships = person['memberships'] mship = [ x for x in mships if 'ncop' in x['organization_id'] and x['role'] == 'Delegate' ] if mship: # Present, and has NCOP membership entry. Set a start and possibly end date. mship = mship[0] assert 'start_date' not in mship n = ncop_manual.pop(name, {}) mship['start_date'] = n.get('start_date', '2009-05-07') if 'end_date' in n and 'end_date' not in mship: mship['end_date'] = n['end_date'] elif name in ncop_manual: # Present, but has no NCOP membership entry n = ncop_manual.pop(name) add_membership(person, { 'organization_id': 'org.mysociety.za/house/ncop', 'label': 'Delegate for %s' % n['province'], 'role': 'Delegate', 'area': { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[n['province']], 'name': n['province'] }, 'start_date': n.get('start_date', '2009-05-07'), 'end_date': n['end_date'], 'end_reason': n.get('end_reason', 'Resigned'), }) mship = [ x for x in mships if 'house/na' in x['organization_id'] and x['role'] == 'Member' ] if mship: mship = mship[0] n = na_manual.pop(name, {}) if 'start_date' not in mship: mship['start_date'] = n.pop('start_date', '2009-05-06') if n: assert 'end_date' not in mship mship['end_date'] = n['end_date'] mship['end_reason'] = n.get('end_reason', 'Resigned') elif name in na_manual: raise Exception # The ones left have no person entry at all. for name, d in ncop_manual.items(): id = idFactory.new('person') given_names, family_name = name.rsplit(None, 1) person = { 'id': id, 'name': name, 'given_names': given_names, 'family_name': family_name, 'slug': name.lower().replace(' ', '-'), } add_membership(person, { 'organization_id': data['organizations'][d['party']]['id'] }) add_membership(person, { 'organization_id': 'org.mysociety.za/house/ncop', 'label': 'Delegate for %s' % d['province'], 'role': 'Delegate', 'area': { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[d['province']], 'name': d['province'] }, 'start_date': d.get('start_date', '2009-05-07'), 'end_date': d['end_date'], 'end_reason': d.get('end_reason', 'Resigned'), }) data['persons'][name] = person return data
def parse(): for row in FixingDictReader( open(data_path + 'myreps_na_executive_export.csv')): person_bits = dict((col_map(k), v) for k, v in row.items() if k in ('first_name', 'last_name', 'initials_alt', 'other_names', 'title', 'email') and v) position_bits = dict( (col_map(k), v) for k, v in row.items() if k in ('start_date', 'end_date', 'end_reason', 'organisation', 'position', 'region') and v and v != 'Member' and v != 'National') if 'end_date' not in position_bits: del position_bits['end_reason'] if 'end_date' in position_bits and position_bits['end_reason'] == '0': del position_bits['end_reason'] if person_bits['given_names'] == 'Tlhalefi Andries': continue # Comes in elsewhere # Manual fixes of file fix_person_bits(person_bits) fix_end_reason(position_bits, person_bits) name = '%(given_names)s %(family_name)s' % person_bits person_bits['name'] = name if person_bits.get('email'): person_bits['contact_details'] = [{ 'type': 'email', 'value': person_bits.pop('email') }] if 'other_names' in person_bits: person_bits['other_names'] = [{'name': person_bits['other_names']}] if position_bits['organisation'] not in ORGANIZATIONS: ORGANIZATIONS.setdefault( position_bits['organisation'], { 'id': 'org.mysociety.za/party/' + position_bits['organisation'].lower(), 'name': position_bits['organisation'], 'slug': position_bits['organisation'].lower(), 'classification': 'party' }) position_bits['organization_id'] = ORGANIZATIONS[ position_bits['organisation']]['id'] del position_bits['organisation'] if position_bits[ 'organization_id'] == 'org.mysociety.za/house/national-assembly' and 'role' not in position_bits: position_bits['label'] = position_bits['role'] = 'Member' elif position_bits['organization_id'] == 'org.mysociety.za/house/ncop': position_bits['label'] = position_bits['role'] = 'Delegate' if 'end_reason' in position_bits: position_bits['end_reason'] = REASONS[position_bits['end_reason']] if position_bits.get('region'): r = position_bits['region'] position_bits['area'] = { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[r], 'name': r } position_bits['label'] += ' for ' + r del position_bits['region'] if name in PEOPLE: person_bits['id'] = PEOPLE[name]['person']['id'] assert PEOPLE[name]['person'] == person_bits else: person_bits['id'] = idFactory.new('person') PEOPLE[name] = {'id': person_bits['id'], 'person': person_bits} add_membership(PEOPLE[name], position_bits) # National Assembly MyReps site data # To fetch myreps ID and PERSON_ID na = open(data_path + 'myreps-na.xml').read() people = ET.fromstring(na).iter('Members') cols_xml = [ 'id', 'person_id', 'person_first_name', 'person_last_name', 'person_paries' ] for person in people: row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml])) if row['person_first_name'] == 'Nomaindiya Cathleen': row['person_first_name'] = 'NomaIndiya Cathleen' if row['person_first_name'] == 'Alpheus' and row[ 'person_last_name'] == 'Mokabhe': row.update(person_first_name='Alpheus Mokabhe', person_last_name='Maziya') if row['person_first_name'] == 'Ximbi': row.update(person_first_name='Dumsani Livingstone', person_last_name='Ximbi') name = '%(person_first_name)s %(person_last_name)s' % row name = fix_bad_encoding(name.encode('utf-8')) PEOPLE[name]['person']['identifiers'] = [ { 'identifier': row['person_id'], 'scheme': 'myreps_person_id' }, ] if row['id']: PEOPLE[name]['person']['identifiers'].append({ 'identifier': row['id'], 'scheme': 'myreps_id' }) na_prev = open(data_path + 'myreps-national-assembly.html').read() na_prev = re.search('<div[^>]*id="past"[^>]*>.*?</div>(?s)', na_prev).group(0) for person in re.findall( '<li><a href="/people/view/(.*?)">([^<]*) ([^<]*?)</a> until .*?</li>', na_prev): row = dict(zip(cols_xml, ['', person[0], person[1], person[2], ''])) if row['person_first_name'] == 'Patricia de': row.update(person_first_name='Patricia', person_last_name='de Lille') if row['person_first_name'] == 'D van der': row.update(person_first_name='D', person_last_name='van der Walt') name = '%(person_first_name)s %(person_last_name)s' % row PEOPLE[name]['person']['identifiers'] = [ { 'identifier': row['person_id'], 'scheme': 'myreps_person_id' }, ] # NCOP MyReps site data ncop = open(data_path + 'myreps-ncop.xml').read() people = ET.fromstring(ncop).iter('Members') for person in people: row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml])) # Change couple of names to match parliament data if row['person_first_name'] == 'Arthur': row['person_first_name'] = 'Robert Alfred' elif row['person_first_name'] == 'Buoang Lemias': row['person_first_name'] = 'Budang Lemias' name = '%(person_first_name)s %(person_last_name)s' % row id = idFactory.new('person') PEOPLE[name] = { 'id': id, 'person': { 'id': id, 'given_names': row['person_first_name'], 'family_name': row['person_last_name'], 'name': name, 'identifiers': [ { 'identifier': row['id'], 'scheme': 'myreps_id' }, { 'identifier': row['person_id'], 'scheme': 'myreps_person_id' }, ] }, } add_membership( PEOPLE[name], { 'organization_id': 'org.mysociety.za/house/ncop', 'label': 'Delegate', 'role': 'Delegate' }) if row['id'] == '7852': # Special case of one person resigned since data PEOPLE[name]['memberships'][0].update( end_date='2013-03-27', end_reason='Resigned', label='Delegate for Eastern Cape', area={ 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES['Eastern Cape'], 'name': 'Eastern Cape' }) if row['person_paries']: add_membership( PEOPLE[name], {'organization_id': ORGANIZATIONS[row['person_paries']]['id']}) for name in PEOPLE.keys(): PEOPLE[name]['person'].update(memberships=PEOPLE[name]['memberships']) PEOPLE[name] = PEOPLE[name]['person'] return { 'persons': PEOPLE, 'organizations': ORGANIZATIONS, }
def parse(data): orgs_by_id = dict([(x['id'], x) for x in data['organizations'].values()]) # TODO: Perhaps check old/new committees, then stop using parl.py # committees. Or just assume these new ones are accurate. for row in csv.DictReader(open(data_path + 'committees.csv')): if row['Name'] not in data['organizations']: data['organizations'][row['Name']] = { 'id': idFactory.new('committee_pmg'), 'name': row['Name'], 'slug': row['Name'].lower().replace(' ', '-'), 'classification': row['Type'] } for row in csv.DictReader(open(data_path + 'committee-members.csv')): row['Name'] = re.sub('^([^,]*) Mr, (.*)$', r'\1, Mr \2', row['Name']) family_name, initials = row['Name'].split(',') initials = re.sub('^\s*(Mr|Ms|Dr|Nkosi|Prof|Adv|Prince)\s+', '', initials) # TODO: Use the person's other_names filed, and get these misspellings in there. if family_name == 'Khorai': family_name = 'Khoarai' if family_name == 'Hoosan': family_name = 'Hoosen' if family_name == 'Jeffrey': family_name = 'Jeffery' if family_name == 'Hill-Lews': family_name = 'Hill-Lewis' if family_name == 'Koornhof' and initials == 'NC': initials = 'NJJVR' matches = [ x for x in data['persons'].values() if asciify(x['family_name']) == family_name ] if len(matches) > 1: matches = [ x for x in data['persons'].values() if x['family_name'] == family_name and initialise(x['given_names']) == initials ] if not matches: matches = [ x for x in data['persons'].values() if x['family_name'] == family_name and initialise( x['given_names'])[0:len(initials)] == initials ] # With the current data, we now always have one result assert len(matches) == 1 person = matches[0] party = [ x for x in person['memberships'] if 'party' in x['organization_id'] ][0]['organization_id'] assert row['Party'] == orgs_by_id[party][ 'name'], row['Party'] + orgs_by_id[party]['name'] mship = { 'organization_id': data['organizations'][row['Committee']]['id'] } if row['IsAlternative?'] == 'True': mship['role'] = 'Alternate Member' if row['IsChairperson?'] == 'True': mship['role'] = 'Chairperson' add_membership(person, mship) return data
def parse(): for row in FixingDictReader(open(data_path + "myreps_na_executive_export.csv")): person_bits = dict( (col_map(k), v) for k, v in row.items() if k in ("first_name", "last_name", "initials_alt", "other_names", "title", "email") and v ) position_bits = dict( (col_map(k), v) for k, v in row.items() if k in ("start_date", "end_date", "end_reason", "organisation", "position", "region") and v and v != "Member" and v != "National" ) if "end_date" not in position_bits: del position_bits["end_reason"] if "end_date" in position_bits and position_bits["end_reason"] == "0": del position_bits["end_reason"] if person_bits["given_names"] == "Tlhalefi Andries": continue # Comes in elsewhere # Manual fixes of file fix_person_bits(person_bits) fix_end_reason(position_bits, person_bits) name = "%(given_names)s %(family_name)s" % person_bits person_bits["name"] = name if person_bits.get("email"): person_bits["contact_details"] = [{"type": "email", "value": person_bits.pop("email")}] if "other_names" in person_bits: person_bits["other_names"] = [{"name": person_bits["other_names"]}] if position_bits["organisation"] not in ORGANIZATIONS: ORGANIZATIONS.setdefault( position_bits["organisation"], { "id": "org.mysociety.za/party/" + position_bits["organisation"].lower(), "name": position_bits["organisation"], "slug": position_bits["organisation"].lower(), "classification": "party", }, ) position_bits["organization_id"] = ORGANIZATIONS[position_bits["organisation"]]["id"] del position_bits["organisation"] if ( position_bits["organization_id"] == "org.mysociety.za/house/national-assembly" and "role" not in position_bits ): position_bits["label"] = position_bits["role"] = "Member" elif position_bits["organization_id"] == "org.mysociety.za/house/ncop": position_bits["label"] = position_bits["role"] = "Delegate" if "end_reason" in position_bits: position_bits["end_reason"] = REASONS[position_bits["end_reason"]] if position_bits.get("region"): r = position_bits["region"] position_bits["area"] = {"id": "org.mysociety.za/mapit/code/p/" + PROVINCES[r], "name": r} position_bits["label"] += " for " + r del position_bits["region"] if name in PEOPLE: person_bits["id"] = PEOPLE[name]["person"]["id"] assert PEOPLE[name]["person"] == person_bits else: person_bits["id"] = idFactory.new("person") PEOPLE[name] = {"id": person_bits["id"], "person": person_bits} add_membership(PEOPLE[name], position_bits) # National Assembly MyReps site data # To fetch myreps ID and PERSON_ID na = open(data_path + "myreps-na.xml").read() people = ET.fromstring(na).iter("Members") cols_xml = ["id", "person_id", "person_first_name", "person_last_name", "person_paries"] for person in people: row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml])) if row["person_first_name"] == "Nomaindiya Cathleen": row["person_first_name"] = "NomaIndiya Cathleen" if row["person_first_name"] == "Alpheus" and row["person_last_name"] == "Mokabhe": row.update(person_first_name="Alpheus Mokabhe", person_last_name="Maziya") if row["person_first_name"] == "Ximbi": row.update(person_first_name="Dumsani Livingstone", person_last_name="Ximbi") name = "%(person_first_name)s %(person_last_name)s" % row name = fix_bad_encoding(name.encode("utf-8")) PEOPLE[name]["person"]["identifiers"] = [{"identifier": row["person_id"], "scheme": "myreps_person_id"}] if row["id"]: PEOPLE[name]["person"]["identifiers"].append({"identifier": row["id"], "scheme": "myreps_id"}) na_prev = open(data_path + "myreps-national-assembly.html").read() na_prev = re.search('<div[^>]*id="past"[^>]*>.*?</div>(?s)', na_prev).group(0) for person in re.findall('<li><a href="/people/view/(.*?)">([^<]*) ([^<]*?)</a> until .*?</li>', na_prev): row = dict(zip(cols_xml, ["", person[0], person[1], person[2], ""])) if row["person_first_name"] == "Patricia de": row.update(person_first_name="Patricia", person_last_name="de Lille") if row["person_first_name"] == "D van der": row.update(person_first_name="D", person_last_name="van der Walt") name = "%(person_first_name)s %(person_last_name)s" % row PEOPLE[name]["person"]["identifiers"] = [{"identifier": row["person_id"], "scheme": "myreps_person_id"}] # NCOP MyReps site data ncop = open(data_path + "myreps-ncop.xml").read() people = ET.fromstring(ncop).iter("Members") for person in people: row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml])) # Change couple of names to match parliament data if row["person_first_name"] == "Arthur": row["person_first_name"] = "Robert Alfred" elif row["person_first_name"] == "Buoang Lemias": row["person_first_name"] = "Budang Lemias" name = "%(person_first_name)s %(person_last_name)s" % row id = idFactory.new("person") PEOPLE[name] = { "id": id, "person": { "id": id, "given_names": row["person_first_name"], "family_name": row["person_last_name"], "name": name, "identifiers": [ {"identifier": row["id"], "scheme": "myreps_id"}, {"identifier": row["person_id"], "scheme": "myreps_person_id"}, ], }, } add_membership( PEOPLE[name], {"organization_id": "org.mysociety.za/house/ncop", "label": "Delegate", "role": "Delegate"} ) if row["id"] == "7852": # Special case of one person resigned since data PEOPLE[name]["memberships"][0].update( end_date="2013-03-27", end_reason="Resigned", label="Delegate for Eastern Cape", area={"id": "org.mysociety.za/mapit/code/p/" + PROVINCES["Eastern Cape"], "name": "Eastern Cape"}, ) if row["person_paries"]: add_membership(PEOPLE[name], {"organization_id": ORGANIZATIONS[row["person_paries"]]["id"]}) for name in PEOPLE.keys(): PEOPLE[name]["person"].update(memberships=PEOPLE[name]["memberships"]) PEOPLE[name] = PEOPLE[name]["person"] return {"persons": PEOPLE, "organizations": ORGANIZATIONS}