示例#1
0
class ModGovImporter(BaseImporter):
    def __init__(self, *args, **kwargs):
        self.url = kwargs.pop("url")
        super().__init__(*args, **kwargs)
        self.get_data()
        self.saved_numseats = SavedMapping("num_seats.json")

    def get_data(self):
        self.data = requests.get(self.url).content
        self.soup = BeautifulSoup(self.data, "xml")

    def divisions(self):
        areas = self.soup.election.find_all("electionarea")
        for area in areas:
            division = ModGovDivision(self.election, area)
            area = division.match_name()
            if area == "--deleted--":
                continue
            if not division.local_area:
                raise
            if int(division.numseats) != division.local_area.winner_count:
                if int(division.numseats) == 0:
                    # chances are this is a mistake
                    pass
                else:
                    key = division.local_area.ballot_paper_id
                    should_ask = self.saved_numseats.get(key, True)
                    if should_ask:
                        print(division.title)
                        print(self.url)
                        print(division.numseats,
                              division.local_area.winner_count)
                        print("winner_count mismatch, update local?")
                        answer = raw_input("y/n: ")
                        if answer.lower() == "y":
                            division.local_area.winner_count = int(
                                division.numseats)
                            division.local_area.save()
                        self.saved_numseats[key] = False
                        self.saved_numseats.save()
                    else:
                        pass
            yield division

    def candidates(self, division):
        for candidate in self.soup.find_all("candidate"):
            if candidate.electionareaid.get_text() == division.electionareaid:
                yield ModGovCandidate(candidate, division)

    def api_url_to_web_url(self, url):
        url = url.replace(
            "mgWebService.asmx/GetElectionResults",
            "mgElectionElectionAreaResults.aspx",
        )
        url = url.replace("lElectionId=", "Page=all&EID=")
        return url
class CandidateMatcher(object):
    def __init__(self, candidate, ballot_paper):
        self.candidate = candidate
        self.ballot_paper = ballot_paper
        self.membership_map = SavedMapping('membership_map.json')

    def match(self):
        matchers = [
            self.pick_from_map,
            self.match_party_and_name,
            self.match_manually,
            self.match_from_all_manually,
        ]
        for matcher in matchers:
            match = matcher()
            if match:
                return match
        import sys
        sys.exit()

    def get_parties(self):
        parties = [self.candidate.party]
        if self.candidate.party.identifiers.filter(identifier="PP53"):
            parties.append(
                Identifier.objects.get(
                    identifier="joint-party:53-119").content_object)
        return parties

    def get_memberships(self):
        if hasattr(self, '_memberships'):
            return self._memberships
        parties = self.get_parties()

        candidates_for_party = \
            self.ballot_paper.local_area.membership_set.filter(
                on_behalf_of__in=parties
            ).select_related('person').order_by('pk')
        self._memberships = candidates_for_party
        return self._memberships

    def pick_from_map(self):
        candidates_for_party = self.get_memberships()
        try:
            key = "{}--{}".format(
                self.ballot_paper.local_area.ballot_paper_id,
                self.candidate.name.encode('utf8'),
            )
        except:
            import ipdb
            ipdb.set_trace()
        value = self.membership_map.get(key, None)
        if value:
            return self.ballot_paper.local_area.membership_set.get(pk=value)

    def match_party_and_name(self, qs=None):
        if not qs:
            candidates_for_party = self.get_memberships()
        else:
            candidates_for_party = qs
        if candidates_for_party.count() == 1:
            # Only one person it can be, init?
            return candidates_for_party.first()
        else:
            for membership in candidates_for_party:

                def _clean_name(name):
                    name = name.lower()
                    name = name.replace('  ', ' ')
                    name = name.replace(',', '')
                    name = name.replace('councillor', '')
                    return name

                person_name = _clean_name(membership.base.person.name.lower())
                candidate_name = _clean_name(self.candidate.name.lower())

                if person_name == candidate_name:
                    return membership

                def _name_to_parts(name):
                    name = name.split(' ')
                    name = [n.strip().encode('utf8') for n in name if name]
                    return name

                split_person_name = _name_to_parts(person_name)
                split_candidate_name = _name_to_parts(candidate_name)

                # Ignore middle names
                if split_person_name[0] == split_candidate_name[0]:
                    if split_person_name[-1] == split_candidate_name[-1]:
                        return membership

                # LAST, First
                if split_person_name[-1] == split_candidate_name[0]:
                    if split_person_name[0] == split_candidate_name[-1]:
                        return membership

                print("person name {} didn't match to candidate {}".format(
                    split_person_name, split_candidate_name))

    def _manual_matcher(self, qs):
        print("No match for '{}' in {}. Please pick from the following".format(
            self.candidate.name, self.ballot_paper.title))
        for i, membership in enumerate(qs, start=1):
            print("\t{}\t{}".format(
                i, membership.base.person.name.encode('utf8')))
        match = raw_input("Enter selection: ")
        if match == "s":
            return
        match = int(match)
        key = "{}--{}".format(
            self.ballot_paper.local_area.ballot_paper_id,
            self.candidate.name.encode('utf8'),
        )
        picked_membership = qs[match - 1]
        self.membership_map[key] = picked_membership.pk
        self.membership_map.save()
        return picked_membership

    def match_manually(self):
        candidates_for_party = self.get_memberships()
        if not candidates_for_party.exists():
            return
        return self._manual_matcher(candidates_for_party)

    def match_from_all_manually(self):
        qs = self.ballot_paper.local_area.membership_set.all()
        match = self.match_party_and_name(qs=qs)
        if match:
            return match
        return self._manual_matcher(qs)
示例#3
0
class PartyMatacher(object):
    """
    Takes a string and tries to return an Organisation that matches the party
    """
    def __init__(self, party_name):
        self.party_name = party_name
        self.known_names_to_ids = SavedMapping('party_names.json')

    def clean_party_names(self):
        name_options = []
        self.party_name = self.party_name.lower()
        name_options.append(self.party_name.replace(' party', ''))
        name_options.append(self.party_name.replace('the ', ''))
        name_options.append(self.party_name.replace(' tory ', 'conservative'))
        name_options.append(
            self.party_name.replace(' libdem ', 'liberal democrats'))
        for name in name_options[:]:
            name_options.append(self.clean_party_name(name))
        return name_options

    def clean_party_name(self, name):
        # TODO differernt registers / countries
        return self.known_names_to_ids.get(name, name)

    def match_party_id(self, cleaned_name):
        try:
            return Identifier.objects.get(
                identifier=cleaned_name).content_object
        except:
            return None

    def match_party_name(self, cleaned_name):
        try:
            OrganizationExtra.objects.get(base__name__iexact=cleaned_name)
        except:
            return None

    def match_party_description(self, cleaned_name):
        try:
            OtherName.objects.get(name__iexact=cleaned_name).content_object
        except Exception as e:
            return None

    def match(self, picker=True):
        matchers = [
            self.match_party_id,
            self.match_party_name,
            self.match_party_description,
        ]

        cleaned_names = self.clean_party_names()
        for cleaned_name in cleaned_names:
            for matcher in matchers:
                match = matcher(cleaned_name)
                if match:
                    return match
        if picker:
            self.known_names_to_ids.picker(self.party_name)
            return self.match(picker=False)
        raise ValueError("No match for {} (cleaned to {})".format(
            self.party_name, repr(cleaned_name)))
示例#4
0
class PartyMatacher(object):
    """
    Takes a string and tries to return an Organisation that matches the party
    """

    def __init__(self, party_name):
        self.party_name = party_name
        self.known_names_to_ids = SavedMapping("party_names.json")

    def clean_party_names(self):
        name_options = []
        self.party_name = self.party_name.lower()
        name_options.append(self.party_name.replace(" party", ""))
        name_options.append(self.party_name.replace("the ", ""))
        name_options.append(self.party_name.replace(" tory ", "conservative"))
        name_options.append(
            self.party_name.replace(" libdem ", "liberal democrats")
        )
        for name in name_options[:]:
            name_options.append(self.clean_party_name(name))
        return name_options

    def clean_party_name(self, name):
        # TODO differernt registers / countries
        return self.known_names_to_ids.get(name, name)

    def match_party_id(self, cleaned_name):
        try:
            return Party.objects.get(ec_id=cleaned_name)
        except:
            return None

    def match_party_name(self, cleaned_name):
        try:
            Party.objects.get(name__iexact=cleaned_name)
        except:
            return None

    def match_party_description(self, cleaned_name):
        try:
            PartyDescription.objects.get(name__iexact=cleaned_name)
        except Exception as e:
            return None

    def match(self, picker=True):
        matchers = [
            self.match_party_id,
            self.match_party_name,
            self.match_party_description,
        ]

        cleaned_names = self.clean_party_names()
        for cleaned_name in cleaned_names:
            for matcher in matchers:
                match = matcher(cleaned_name)
                if match:
                    return match
        if picker:
            self.known_names_to_ids.picker(self.party_name)
            return self.match(picker=False)
        raise ValueError(
            "No match for {} (cleaned to {})".format(
                self.party_name, repr(cleaned_name)
            )
        )