class ModGovImporter(BaseImporter): def __init__(self, *args, **kwargs): self.url = kwargs.pop("url") super().__init__(*args, **kwargs) self.get_data() self.saved_numseats = SavedMapping("num_seats.json") def get_data(self): self.data = requests.get(self.url).content self.soup = BeautifulSoup(self.data, "xml") def divisions(self): areas = self.soup.election.find_all("electionarea") for area in areas: division = ModGovDivision(self.election, area) area = division.match_name() if area == "--deleted--": continue if not division.local_area: raise if int(division.numseats) != division.local_area.winner_count: if int(division.numseats) == 0: # chances are this is a mistake pass else: key = division.local_area.ballot_paper_id should_ask = self.saved_numseats.get(key, True) if should_ask: print(division.title) print(self.url) print(division.numseats, division.local_area.winner_count) print("winner_count mismatch, update local?") answer = raw_input("y/n: ") if answer.lower() == "y": division.local_area.winner_count = int( division.numseats) division.local_area.save() self.saved_numseats[key] = False self.saved_numseats.save() else: pass yield division def candidates(self, division): for candidate in self.soup.find_all("candidate"): if candidate.electionareaid.get_text() == division.electionareaid: yield ModGovCandidate(candidate, division) def api_url_to_web_url(self, url): url = url.replace( "mgWebService.asmx/GetElectionResults", "mgElectionElectionAreaResults.aspx", ) url = url.replace("lElectionId=", "Page=all&EID=") return url
class CandidateMatcher(object): def __init__(self, candidate, ballot_paper): self.candidate = candidate self.ballot_paper = ballot_paper self.membership_map = SavedMapping('membership_map.json') def match(self): matchers = [ self.pick_from_map, self.match_party_and_name, self.match_manually, self.match_from_all_manually, ] for matcher in matchers: match = matcher() if match: return match import sys sys.exit() def get_parties(self): parties = [self.candidate.party] if self.candidate.party.identifiers.filter(identifier="PP53"): parties.append( Identifier.objects.get( identifier="joint-party:53-119").content_object) return parties def get_memberships(self): if hasattr(self, '_memberships'): return self._memberships parties = self.get_parties() candidates_for_party = \ self.ballot_paper.local_area.membership_set.filter( on_behalf_of__in=parties ).select_related('person').order_by('pk') self._memberships = candidates_for_party return self._memberships def pick_from_map(self): candidates_for_party = self.get_memberships() try: key = "{}--{}".format( self.ballot_paper.local_area.ballot_paper_id, self.candidate.name.encode('utf8'), ) except: import ipdb ipdb.set_trace() value = self.membership_map.get(key, None) if value: return self.ballot_paper.local_area.membership_set.get(pk=value) def match_party_and_name(self, qs=None): if not qs: candidates_for_party = self.get_memberships() else: candidates_for_party = qs if candidates_for_party.count() == 1: # Only one person it can be, init? return candidates_for_party.first() else: for membership in candidates_for_party: def _clean_name(name): name = name.lower() name = name.replace(' ', ' ') name = name.replace(',', '') name = name.replace('councillor', '') return name person_name = _clean_name(membership.base.person.name.lower()) candidate_name = _clean_name(self.candidate.name.lower()) if person_name == candidate_name: return membership def _name_to_parts(name): name = name.split(' ') name = [n.strip().encode('utf8') for n in name if name] return name split_person_name = _name_to_parts(person_name) split_candidate_name = _name_to_parts(candidate_name) # Ignore middle names if split_person_name[0] == split_candidate_name[0]: if split_person_name[-1] == split_candidate_name[-1]: return membership # LAST, First if split_person_name[-1] == split_candidate_name[0]: if split_person_name[0] == split_candidate_name[-1]: return membership print("person name {} didn't match to candidate {}".format( split_person_name, split_candidate_name)) def _manual_matcher(self, qs): print("No match for '{}' in {}. Please pick from the following".format( self.candidate.name, self.ballot_paper.title)) for i, membership in enumerate(qs, start=1): print("\t{}\t{}".format( i, membership.base.person.name.encode('utf8'))) match = raw_input("Enter selection: ") if match == "s": return match = int(match) key = "{}--{}".format( self.ballot_paper.local_area.ballot_paper_id, self.candidate.name.encode('utf8'), ) picked_membership = qs[match - 1] self.membership_map[key] = picked_membership.pk self.membership_map.save() return picked_membership def match_manually(self): candidates_for_party = self.get_memberships() if not candidates_for_party.exists(): return return self._manual_matcher(candidates_for_party) def match_from_all_manually(self): qs = self.ballot_paper.local_area.membership_set.all() match = self.match_party_and_name(qs=qs) if match: return match return self._manual_matcher(qs)
class PartyMatacher(object): """ Takes a string and tries to return an Organisation that matches the party """ def __init__(self, party_name): self.party_name = party_name self.known_names_to_ids = SavedMapping('party_names.json') def clean_party_names(self): name_options = [] self.party_name = self.party_name.lower() name_options.append(self.party_name.replace(' party', '')) name_options.append(self.party_name.replace('the ', '')) name_options.append(self.party_name.replace(' tory ', 'conservative')) name_options.append( self.party_name.replace(' libdem ', 'liberal democrats')) for name in name_options[:]: name_options.append(self.clean_party_name(name)) return name_options def clean_party_name(self, name): # TODO differernt registers / countries return self.known_names_to_ids.get(name, name) def match_party_id(self, cleaned_name): try: return Identifier.objects.get( identifier=cleaned_name).content_object except: return None def match_party_name(self, cleaned_name): try: OrganizationExtra.objects.get(base__name__iexact=cleaned_name) except: return None def match_party_description(self, cleaned_name): try: OtherName.objects.get(name__iexact=cleaned_name).content_object except Exception as e: return None def match(self, picker=True): matchers = [ self.match_party_id, self.match_party_name, self.match_party_description, ] cleaned_names = self.clean_party_names() for cleaned_name in cleaned_names: for matcher in matchers: match = matcher(cleaned_name) if match: return match if picker: self.known_names_to_ids.picker(self.party_name) return self.match(picker=False) raise ValueError("No match for {} (cleaned to {})".format( self.party_name, repr(cleaned_name)))
class PartyMatacher(object): """ Takes a string and tries to return an Organisation that matches the party """ def __init__(self, party_name): self.party_name = party_name self.known_names_to_ids = SavedMapping("party_names.json") def clean_party_names(self): name_options = [] self.party_name = self.party_name.lower() name_options.append(self.party_name.replace(" party", "")) name_options.append(self.party_name.replace("the ", "")) name_options.append(self.party_name.replace(" tory ", "conservative")) name_options.append( self.party_name.replace(" libdem ", "liberal democrats") ) for name in name_options[:]: name_options.append(self.clean_party_name(name)) return name_options def clean_party_name(self, name): # TODO differernt registers / countries return self.known_names_to_ids.get(name, name) def match_party_id(self, cleaned_name): try: return Party.objects.get(ec_id=cleaned_name) except: return None def match_party_name(self, cleaned_name): try: Party.objects.get(name__iexact=cleaned_name) except: return None def match_party_description(self, cleaned_name): try: PartyDescription.objects.get(name__iexact=cleaned_name) except Exception as e: return None def match(self, picker=True): matchers = [ self.match_party_id, self.match_party_name, self.match_party_description, ] cleaned_names = self.clean_party_names() for cleaned_name in cleaned_names: for matcher in matchers: match = matcher(cleaned_name) if match: return match if picker: self.known_names_to_ids.picker(self.party_name) return self.match(picker=False) raise ValueError( "No match for {} (cleaned to {})".format( self.party_name, repr(cleaned_name) ) )