示例#1
0
class ModGovImporter(BaseImporter):
    def __init__(self, *args, **kwargs):
        self.url = kwargs.pop("url")
        super().__init__(*args, **kwargs)
        self.get_data()
        self.saved_numseats = SavedMapping("num_seats.json")

    def get_data(self):
        self.data = requests.get(self.url).content
        self.soup = BeautifulSoup(self.data, "xml")

    def divisions(self):
        areas = self.soup.election.find_all("electionarea")
        for area in areas:
            division = ModGovDivision(self.election, area)
            area = division.match_name()
            if area == "--deleted--":
                continue
            if not division.local_area:
                raise
            if int(division.numseats) != division.local_area.winner_count:
                if int(division.numseats) == 0:
                    # chances are this is a mistake
                    pass
                else:
                    key = division.local_area.ballot_paper_id
                    should_ask = self.saved_numseats.get(key, True)
                    if should_ask:
                        print(division.title)
                        print(self.url)
                        print(division.numseats,
                              division.local_area.winner_count)
                        print("winner_count mismatch, update local?")
                        answer = raw_input("y/n: ")
                        if answer.lower() == "y":
                            division.local_area.winner_count = int(
                                division.numseats)
                            division.local_area.save()
                        self.saved_numseats[key] = False
                        self.saved_numseats.save()
                    else:
                        pass
            yield division

    def candidates(self, division):
        for candidate in self.soup.find_all("candidate"):
            if candidate.electionareaid.get_text() == division.electionareaid:
                yield ModGovCandidate(candidate, division)

    def api_url_to_web_url(self, url):
        url = url.replace(
            "mgWebService.asmx/GetElectionResults",
            "mgElectionElectionAreaResults.aspx",
        )
        url = url.replace("lElectionId=", "Page=all&EID=")
        return url
示例#2
0
 def __init__(self, *args, **kwargs):
     self.url = kwargs.pop("url")
     super().__init__(*args, **kwargs)
     self.get_data()
     self.saved_numseats = SavedMapping("num_seats.json")
示例#3
0
 def __init__(self, party_name):
     self.party_name = party_name
     self.known_names_to_ids = SavedMapping('party_names.json')
示例#4
0
class PartyMatacher(object):
    """
    Takes a string and tries to return an Organisation that matches the party
    """
    def __init__(self, party_name):
        self.party_name = party_name
        self.known_names_to_ids = SavedMapping('party_names.json')

    def clean_party_names(self):
        name_options = []
        self.party_name = self.party_name.lower()
        name_options.append(self.party_name.replace(' party', ''))
        name_options.append(self.party_name.replace('the ', ''))
        name_options.append(self.party_name.replace(' tory ', 'conservative'))
        name_options.append(
            self.party_name.replace(' libdem ', 'liberal democrats'))
        for name in name_options[:]:
            name_options.append(self.clean_party_name(name))
        return name_options

    def clean_party_name(self, name):
        # TODO differernt registers / countries
        return self.known_names_to_ids.get(name, name)

    def match_party_id(self, cleaned_name):
        try:
            return Identifier.objects.get(
                identifier=cleaned_name).content_object
        except:
            return None

    def match_party_name(self, cleaned_name):
        try:
            OrganizationExtra.objects.get(base__name__iexact=cleaned_name)
        except:
            return None

    def match_party_description(self, cleaned_name):
        try:
            OtherName.objects.get(name__iexact=cleaned_name).content_object
        except Exception as e:
            return None

    def match(self, picker=True):
        matchers = [
            self.match_party_id,
            self.match_party_name,
            self.match_party_description,
        ]

        cleaned_names = self.clean_party_names()
        for cleaned_name in cleaned_names:
            for matcher in matchers:
                match = matcher(cleaned_name)
                if match:
                    return match
        if picker:
            self.known_names_to_ids.picker(self.party_name)
            return self.match(picker=False)
        raise ValueError("No match for {} (cleaned to {})".format(
            self.party_name, repr(cleaned_name)))
示例#5
0
class BaseDivision(object):
    """
    A representation of a division and the relationship between a remote
    source and a local Post object.
    """

    def __init__(self, election, remote_name):
        self.election = election
        self.remote_name = remote_name
        self.local_area = None
        self.saved_matches = SavedMapping("division_matches.json")

    def election_specific_guess(self):
        guesses_by_election = {
            "local.swindon.2018-05-03": {
                "Gorsehill & Pinehurst": "Gorse Hill and Pinehurst"
            }
        }
        return guesses_by_election.get(self.election.slug, {}).get(
            self.remote_name
        )

    def match_name(self):
        # TODO use OtherName here
        self.remote_name = self.remote_name.lower()

        key = "{}--{}".format(self.election.slug, self.remote_name)
        if key in self.saved_matches:
            match = self.saved_matches[key]
            if match == "--deleted--":
                return "--deleted--"
            self.local_area = self.election.ballot_set.get(
                ballot_paper_id=match
            )
            return self.local_area

        guesses = [
            self.remote_name,
            self.remote_name.replace(" & ", " and "),
            self.remote_name.replace(" and ", " & "),
            self.remote_name.replace(" ward", "").strip(),
            self.remote_name.replace(" & ", " and ")
            .replace(" ward", "")
            .strip(),
        ]
        if self.remote_name.endswith("s"):
            guesses.append("{}'s".format(self.remote_name[:-1]))
        if self.election_specific_guess():
            guesses.append(self.election_specific_guess())

        for name in guesses:
            try:
                area = self.election.ballot_set.get(post__label__iexact=name)
                self.local_area = area
                return
            except:
                continue
        # Try a regex…I know
        for name in guesses:
            try:
                name = name.replace(" ", ".")
                name = name.replace("-", ".")
                name = name + "$"
                area = self.election.ballot_set.get(post__label__iregex=name)
                self.local_area = area
                return
            except:
                continue

        # If all else fails, just ask the user
        print(
            "No match for {} found. Can you manually match it?".format(
                self.remote_name
            )
        )
        possible = [
            ballot
            for ballot in self.election.ballot_set.all()
            .order_by("post__label")
            .select_related("post")
        ]
        for i, ballot in enumerate(possible, start=1):
            print("\t{}\t{}".format(i, ballot.post.label))
        answer = input("Pick a number or 'd' if it's deleted: ")
        if answer.lower() == "d":
            self.saved_matches[key] = "--deleted--"
            self.saved_matches.save()
            return "--deleted--"
        else:
            answer = int(answer) - 1
        area = possible[answer]
        self.saved_matches[key] = area.ballot_paper_id
        self.saved_matches.save()
        print(area)
        self.local_area = area
        return area
示例#6
0
 def __init__(self, election, remote_name):
     self.election = election
     self.remote_name = remote_name
     self.local_area = None
     self.saved_matches = SavedMapping("division_matches.json")
 def __init__(self, candidate, ballot_paper):
     self.candidate = candidate
     self.ballot_paper = ballot_paper
     self.membership_map = SavedMapping('membership_map.json')
class CandidateMatcher(object):
    def __init__(self, candidate, ballot_paper):
        self.candidate = candidate
        self.ballot_paper = ballot_paper
        self.membership_map = SavedMapping('membership_map.json')

    def match(self):
        matchers = [
            self.pick_from_map,
            self.match_party_and_name,
            self.match_manually,
            self.match_from_all_manually,
        ]
        for matcher in matchers:
            match = matcher()
            if match:
                return match
        import sys
        sys.exit()

    def get_parties(self):
        parties = [self.candidate.party]
        if self.candidate.party.identifiers.filter(identifier="PP53"):
            parties.append(
                Identifier.objects.get(
                    identifier="joint-party:53-119").content_object)
        return parties

    def get_memberships(self):
        if hasattr(self, '_memberships'):
            return self._memberships
        parties = self.get_parties()

        candidates_for_party = \
            self.ballot_paper.local_area.membership_set.filter(
                on_behalf_of__in=parties
            ).select_related('person').order_by('pk')
        self._memberships = candidates_for_party
        return self._memberships

    def pick_from_map(self):
        candidates_for_party = self.get_memberships()
        try:
            key = "{}--{}".format(
                self.ballot_paper.local_area.ballot_paper_id,
                self.candidate.name.encode('utf8'),
            )
        except:
            import ipdb
            ipdb.set_trace()
        value = self.membership_map.get(key, None)
        if value:
            return self.ballot_paper.local_area.membership_set.get(pk=value)

    def match_party_and_name(self, qs=None):
        if not qs:
            candidates_for_party = self.get_memberships()
        else:
            candidates_for_party = qs
        if candidates_for_party.count() == 1:
            # Only one person it can be, init?
            return candidates_for_party.first()
        else:
            for membership in candidates_for_party:

                def _clean_name(name):
                    name = name.lower()
                    name = name.replace('  ', ' ')
                    name = name.replace(',', '')
                    name = name.replace('councillor', '')
                    return name

                person_name = _clean_name(membership.base.person.name.lower())
                candidate_name = _clean_name(self.candidate.name.lower())

                if person_name == candidate_name:
                    return membership

                def _name_to_parts(name):
                    name = name.split(' ')
                    name = [n.strip().encode('utf8') for n in name if name]
                    return name

                split_person_name = _name_to_parts(person_name)
                split_candidate_name = _name_to_parts(candidate_name)

                # Ignore middle names
                if split_person_name[0] == split_candidate_name[0]:
                    if split_person_name[-1] == split_candidate_name[-1]:
                        return membership

                # LAST, First
                if split_person_name[-1] == split_candidate_name[0]:
                    if split_person_name[0] == split_candidate_name[-1]:
                        return membership

                print("person name {} didn't match to candidate {}".format(
                    split_person_name, split_candidate_name))

    def _manual_matcher(self, qs):
        print("No match for '{}' in {}. Please pick from the following".format(
            self.candidate.name, self.ballot_paper.title))
        for i, membership in enumerate(qs, start=1):
            print("\t{}\t{}".format(
                i, membership.base.person.name.encode('utf8')))
        match = raw_input("Enter selection: ")
        if match == "s":
            return
        match = int(match)
        key = "{}--{}".format(
            self.ballot_paper.local_area.ballot_paper_id,
            self.candidate.name.encode('utf8'),
        )
        picked_membership = qs[match - 1]
        self.membership_map[key] = picked_membership.pk
        self.membership_map.save()
        return picked_membership

    def match_manually(self):
        candidates_for_party = self.get_memberships()
        if not candidates_for_party.exists():
            return
        return self._manual_matcher(candidates_for_party)

    def match_from_all_manually(self):
        qs = self.ballot_paper.local_area.membership_set.all()
        match = self.match_party_and_name(qs=qs)
        if match:
            return match
        return self._manual_matcher(qs)
示例#9
0
class PartyMatacher(object):
    """
    Takes a string and tries to return an Organisation that matches the party
    """

    def __init__(self, party_name):
        self.party_name = party_name
        self.known_names_to_ids = SavedMapping("party_names.json")

    def clean_party_names(self):
        name_options = []
        self.party_name = self.party_name.lower()
        name_options.append(self.party_name.replace(" party", ""))
        name_options.append(self.party_name.replace("the ", ""))
        name_options.append(self.party_name.replace(" tory ", "conservative"))
        name_options.append(
            self.party_name.replace(" libdem ", "liberal democrats")
        )
        for name in name_options[:]:
            name_options.append(self.clean_party_name(name))
        return name_options

    def clean_party_name(self, name):
        # TODO differernt registers / countries
        return self.known_names_to_ids.get(name, name)

    def match_party_id(self, cleaned_name):
        try:
            return Party.objects.get(ec_id=cleaned_name)
        except:
            return None

    def match_party_name(self, cleaned_name):
        try:
            Party.objects.get(name__iexact=cleaned_name)
        except:
            return None

    def match_party_description(self, cleaned_name):
        try:
            PartyDescription.objects.get(name__iexact=cleaned_name)
        except Exception as e:
            return None

    def match(self, picker=True):
        matchers = [
            self.match_party_id,
            self.match_party_name,
            self.match_party_description,
        ]

        cleaned_names = self.clean_party_names()
        for cleaned_name in cleaned_names:
            for matcher in matchers:
                match = matcher(cleaned_name)
                if match:
                    return match
        if picker:
            self.known_names_to_ids.picker(self.party_name)
            return self.match(picker=False)
        raise ValueError(
            "No match for {} (cleaned to {})".format(
                self.party_name, repr(cleaned_name)
            )
        )