示例#1
0
def update_ridings_from_represent(boundary_set='federal-electoral-districts'):

    Riding.objects.filter(current=True).update(current=False)

    base_url = 'http://represent.opennorth.ca/'
    riding_list = requests.get(
        urljoin(base_url,
                '/boundaries/federal-electoral-districts/?limit=500')).json()
    riding_urls = [r['url'] for r in riding_list['objects']]
    for riding_url in riding_urls:
        riding_data = requests.get(urljoin(base_url, riding_url)).json()
        edid = int(riding_data['external_id'])
        name = riding_data['metadata']['ENNAME']
        name_fr = riding_data['metadata']['FRNAME']
        prov = riding_data['metadata']['PROVCODE']
        try:
            riding = Riding.objects.get_by_name(name)
            riding.name = name  # just in case of slight punctuation differences
        except Riding.DoesNotExist:
            riding = Riding(name=name)
        riding.edid = edid
        riding.name_fr = name_fr
        riding.province = prov
        riding.current = True
        riding.save()
        sleep(.1)
示例#2
0
def update_ridings_from_represent(boundary_set='federal-electoral-districts'):

    Riding.objects.filter(current=True).update(current=False)

    base_url = 'http://represent.opennorth.ca/'
    req = urllib2.urlopen(urljoin(base_url, '/boundaries/federal-electoral-districts/?limit=500'))
    riding_list = json.load(req)
    riding_urls = [r['url'] for r in riding_list['objects']]
    for riding_url in riding_urls:
        req = urllib2.urlopen(urljoin(base_url, riding_url))
        riding_data = json.load(req)
        edid = int(riding_data['external_id'])
        name = riding_data['metadata']['ENNAME']
        name_fr = riding_data['metadata']['FRNAME']
        prov = riding_data['metadata']['PROVCODE']
        try:
            riding = Riding.objects.get_by_name(name)
            riding.name = name # just in case of slight punctuation differences
        except Riding.DoesNotExist:
            riding = Riding(name=name)
        riding.edid = edid
        riding.name_fr = name_fr
        riding.province = prov
        riding.current = True
        riding.save()
        sleep(.1)
def update_ridings_from_represent(boundary_set="federal-electoral-districts"):

    Riding.objects.filter(current=True).update(current=False)

    base_url = "http://represent.opennorth.ca/"
    riding_list = requests.get(urljoin(base_url, "/boundaries/federal-electoral-districts/?limit=500")).json()
    riding_urls = [r["url"] for r in riding_list["objects"]]
    for riding_url in riding_urls:
        riding_data = requests.get(urljoin(base_url, riding_url)).json()
        edid = int(riding_data["external_id"])
        name = riding_data["metadata"]["ENNAME"]
        name_fr = riding_data["metadata"]["FRNAME"]
        prov = riding_data["metadata"]["PROVCODE"]
        try:
            riding = Riding.objects.get_by_name(name)
            riding.name = name  # just in case of slight punctuation differences
        except Riding.DoesNotExist:
            riding = Riding(name=name)
        riding.edid = edid
        riding.name_fr = name_fr
        riding.province = prov
        riding.current = True
        riding.save()
        sleep(0.1)
def import_parl_election(url, election, session=None, soup=None): # FIXME session none only for now
    """Import an election from parl.gc.ca results.
    
    Sample URL: http://www2.parl.gc.ca/Sites/LOP/HFER/hfer.asp?Language=E&Search=Bres&ridProvince=0&genElection=0&byElection=2009%2F11%2F09&submit1=Search"""
    
    def _addParty(link):
        match = re.search(r'\?([^"]+)', link)
        if not match: raise Exception("Couldn't parse link in addParty")
        partyurl = 'http://www2.parl.gc.ca/Sites/LOP/HFER/hfer-party.asp?' + match.group(1)
        partypage = urllib2.urlopen(partyurl)
        partypage = re.sub(r'</?font[^>]*>', '', partypage.read()) # strip out font tags
        partysoup = BeautifulSoup(partypage, convertEntities='html')
        partyname = partysoup.find('td', width='85%').string.strip()
        if partyname:
            party = Party(name=partyname)
            party.save()
            return party
        else:
            raise Exception("Couldn't parse party name")
    
    page = urllib2.urlopen(url)
    page = re.sub(re.compile(r'</?font[^>]*>', re.I), '', page.read()) # strip out font tags
    if soup is None: soup = BeautifulSoup(page, convertEntities='html')
    
    # this works for elections but not byelections -- slightly diff format    
    #for row in soup.find('table', width="95%").findAll('tr'):
    
    for row in soup.find(text=re.compile('click on party abbreviation')).findNext('table').findAll('tr'):
      
        if row.find('h5'):
            # It's a province name
            province = row.find('h5').string
            province = PROVINCES_NORMALIZED[province.lower()]
            print "PROVINCE: %s" % province
            
        elif row.find('td', 'pro'):
            # It's a province name -- formatted differently on byelection pages
            provincetmp = row.find('b').string
            try:
                province = PROVINCES_NORMALIZED[provincetmp.lower()]
                print "PROVINCE: %s" % province
            except KeyError:
                # the 'province' class is sometimes used for non-province headings. thanks, parliament!
                print "NOT A PROVINCE: %s" % provincetmp

            
        elif row.find('td', 'rid'):
            # It's a riding name
            a = row.find('a')
            href = a['href']
            ridingname = a.string
            try:
                riding = Riding.objects.get_by_name(ridingname)
            except Riding.DoesNotExist:
                print "WARNING: Could not find riding %s" % ridingname
                riding = Riding(name=ridingname.strip().title(), province=province)
                riding.save()
            else:
                print "RIDING: %s" % riding
        
        elif row.find('td', bgcolor='#00224a'):
            # It's a heading
            pass
        elif row.find('td', align='right'):
            # It's a results row
            cells = row.findAll('td')
            if len(cells) != 6:
                raise Exception("Couldn't parse row: %s" % row)
                
            # Cell 2: party name
            link = cells[1].find('a')
            partylink = link['href']
            partyabbr = link.string
            try:
                party = Party.objects.get_by_name(partyabbr)
            except Party.DoesNotExist:
                party = _addParty(partylink)
                party.add_alternate_name(partyabbr)
                print "WARNING: Could not find party %s" % partyabbr
                
            # Cell 6: elected
            if cells[5].find('img'):
                elected = True
            else:
                elected = False
                
            # Cell 1: candidate name
            link = cells[0].find('a')
            if link:
                parllink = link['href']
                candidatename = link.string
            else:
                candidatename = cells[0].string.strip()
            (last, first) = candidatename.split(', ')
            last = last.strip().title()
            first = first.strip()
            
            # First, assemble a list of possible candidates
            candidate = None
            saveCandidate = False
            candidates = Politician.objects.filter_by_name("%s %s" % (first, last))
            # If there's nothing in the list, try a little harder
            if len(candidates) == 0:
                # Does the candidate have many given names?
                if first.strip().count(' ') >= 1:
                    minifirst = first.strip().split(' ')[0]
                    candidates = Politician.objects.filter_by_name("%s %s" % (minifirst, last))
            # Then, evaluate the possibilities in the list
            for posscand in candidates:
                # You're only a match if you've run for office for the same party in the same province
                match = ElectedMember.objects.filter(riding__province=riding.province, party=party, politician=posscand).count() >= 1 or Candidacy.objects.filter(riding__province=riding.province, party=party, candidate=posscand).count() >= 1
                if match:
                    if candidate is not None:
                        print "WARNING: Could not disambiguate existing candidates %s" % candidatename
                        candidate = None
                        break
                    else:
                        candidate = posscand
            if candidate is None:
                saveCandidate = True
                candidate = Politician(name="%s %s" % (first, last), name_given=first, name_family=last)
            
            # Cell 3: occupation
            occupation = cells[2].string
            
            # Cell 4: votes
            votetotal = parsetools.munge_int(cells[3].string)
            
            # Okay -- now see if this candidacy already exists
            candidacy = None
            if party.name != 'Independent':
                candidacies = Candidacy.objects.filter(election=election, riding=riding, party=party)
                if len(candidacies) > 1:
                    raise Exception("Too many candidacies!")
                elif len(candidacies) == 1:
                    candidacy = candidacies[0]
                    if candidate != candidacy.candidate:
                        print "WARNING: Forced riding/party match for candidate %s: %s" % (candidatename, candidacy.candidate)
                        candidate = candidacy.candidate
            if candidacy is None:
                candidacies = Candidacy.objects.filter(candidate=candidate, election=election)
                if len(candidacies) > 1:
                    raise Exception("Two candidacies for one candidate!")
                elif len(candidacies) == 1:
                    candidacy = candidacies[0]
                    if candidacy.riding != riding or candidacy.party != party:
                        print "WARNING: Invalid riding/party match for %s - %s (%s), %s (%s)" % (candidacy, riding, candidacy.riding == riding, party, candidacy.party == party)
                        continue
                else:
                    if saveCandidate: candidate.save()
                    candidacy = Candidacy(candidate=candidate, election=election, riding=riding, party=party)
            candidacy.occupation = unicode(occupation)
            candidacy.votetotal = votetotal
            candidacy.elected = elected
            candidacy.save()
            #print "%s (%s), a %s, got %d votes (elected: %s)" % (candidatename, partyabbr, occupation, votecount, elected)
    election.calculate_vote_percentages()
示例#5
0
def import_parl_election(url,
                         election,
                         session=None,
                         soup=None):  # FIXME session none only for now
    """Import an election from parl.gc.ca results.
    
    Sample URL: http://www2.parl.gc.ca/Sites/LOP/HFER/hfer.asp?Language=E&Search=Bres&ridProvince=0&genElection=0&byElection=2009%2F11%2F09&submit1=Search"""

    # Steps: 1. run this function
    # 2. el.label_winners()
    # 3. el.create_members(Session.objects.current())

    def _addParty(link):
        match = re.search(r'\?([^"]+)', link)
        if not match: raise Exception("Couldn't parse link in addParty")
        partyurl = 'http://www2.parl.gc.ca/Sites/LOP/HFER/hfer-party.asp?' + match.group(
            1)
        partypage = urllib2.urlopen(partyurl)
        partypage = re.sub(r'</?font[^>]*>', '',
                           partypage.read())  # strip out font tags
        partysoup = BeautifulSoup(partypage, convertEntities='html')
        partyname = partysoup.find('td', width='85%').string.strip()
        if partyname:
            party = Party(name_en=partyname)
            party.save()
            return party
        else:
            raise Exception("Couldn't parse party name")

    page = urllib2.urlopen(url)
    page = re.sub(re.compile(r'</?font[^>]*>', re.I), '',
                  page.read())  # strip out font tags
    if soup is None: soup = BeautifulSoup(page, convertEntities='html')

    # this works for elections but not byelections -- slightly diff format
    #for row in soup.find('table', width="95%").findAll('tr'):

    for row in soup.find(text=re.compile(
            'click on party abbreviation')).findNext('table').findAll('tr'):

        if row.find('h5'):
            # It's a province name
            province = row.find('h5').string
            province = PROVINCES_NORMALIZED[province.lower()]
            print "PROVINCE: %s" % province

        elif row.find('td', 'pro'):
            # It's a province name -- formatted differently on byelection pages
            provincetmp = row.find('b').string
            try:
                province = PROVINCES_NORMALIZED[provincetmp.lower()]
                print "PROVINCE: %s" % province
            except KeyError:
                # the 'province' class is sometimes used for non-province headings. thanks, parliament!
                print "NOT A PROVINCE: %s" % provincetmp

        elif row.find('td', 'rid'):
            # It's a riding name
            a = row.find('a')
            href = a['href']
            ridingname = a.string
            try:
                riding = Riding.objects.get_by_name(ridingname)
            except Riding.DoesNotExist:
                print "WARNING: Could not find riding %s" % ridingname
                riding = Riding(name=ridingname.strip().title(),
                                province=province)
                riding.save()
            else:
                print "RIDING: %s" % riding

        elif row.find('td', bgcolor='#00224a'):
            # It's a heading
            pass
        elif row.find('td', align='right'):
            # It's a results row
            cells = row.findAll('td')
            if len(cells) != 6:
                raise Exception("Couldn't parse row: %s" % row)

            # Cell 2: party name
            link = cells[1].find('a')
            partylink = link['href']
            partyabbr = link.string
            try:
                party = Party.objects.get_by_name(partyabbr)
            except Party.DoesNotExist:
                party = _addParty(partylink)
                party.add_alternate_name(partyabbr)
                print "WARNING: Could not find party %s" % partyabbr

            # Cell 6: elected
            if cells[5].find('img'):
                elected = True
            else:
                elected = False

            # Cell 1: candidate name
            link = cells[0].find('a')
            if link:
                parllink = link['href']
                candidatename = link.string
            else:
                candidatename = cells[0].string.strip()
            (last, first) = candidatename.split(', ')
            last = last.strip().title()
            first = first.strip()

            # First, assemble a list of possible candidates
            candidate = None
            saveCandidate = False
            candidates = Politician.objects.filter_by_name("%s %s" %
                                                           (first, last))
            # If there's nothing in the list, try a little harder
            if len(candidates) == 0:
                # Does the candidate have many given names?
                if first.strip().count(' ') >= 1:
                    minifirst = first.strip().split(' ')[0]
                    candidates = Politician.objects.filter_by_name(
                        "%s %s" % (minifirst, last))
            # Then, evaluate the possibilities in the list
            for posscand in candidates:
                # You're only a match if you've run for office for the same party in the same province
                match = ElectedMember.objects.filter(
                    riding__province=riding.province,
                    party=party,
                    politician=posscand).count(
                    ) >= 1 or Candidacy.objects.filter(
                        riding__province=riding.province,
                        party=party,
                        candidate=posscand).count() >= 1
                if match:
                    if candidate is not None:
                        print "WARNING: Could not disambiguate existing candidates %s" % candidatename
                        candidate = None
                        break
                    else:
                        candidate = posscand
            if candidate is None:
                saveCandidate = True
                candidate = Politician(name="%s %s" % (first, last),
                                       name_given=first,
                                       name_family=last)

            # Cell 3: occupation
            occupation = cells[2].string

            # Cell 4: votes
            votetotal = parsetools.munge_int(cells[3].string)

            # Okay -- now see if this candidacy already exists
            candidacy = None
            if party.name != 'Independent':
                candidacies = Candidacy.objects.filter(election=election,
                                                       riding=riding,
                                                       party=party)
                if len(candidacies) > 1:
                    raise Exception("Too many candidacies!")
                elif len(candidacies) == 1:
                    candidacy = candidacies[0]
                    if candidate != candidacy.candidate:
                        print "WARNING: Forced riding/party match for candidate %s: %s" % (
                            candidatename, candidacy.candidate)
                        candidate = candidacy.candidate
            if candidacy is None:
                candidacies = Candidacy.objects.filter(candidate=candidate,
                                                       election=election)
                if len(candidacies) > 1:
                    raise Exception("Two candidacies for one candidate!")
                elif len(candidacies) == 1:
                    candidacy = candidacies[0]
                    if candidacy.riding != riding or candidacy.party != party:
                        print "WARNING: Invalid riding/party match for %s - %s (%s), %s (%s)" % (
                            candidacy, riding, candidacy.riding
                            == riding, party, candidacy.party == party)
                        continue
                else:
                    if saveCandidate: candidate.save()
                    candidacy = Candidacy(candidate=candidate,
                                          election=election,
                                          riding=riding,
                                          party=party)
            candidacy.occupation = unicode(occupation)
            candidacy.votetotal = votetotal
            candidacy.elected = elected
            candidacy.save()
            #print "%s (%s), a %s, got %d votes (elected: %s)" % (candidatename, partyabbr, occupation, votecount, elected)
    election.calculate_vote_percentages()