def scrape(self, chamber, year): session = "%d-%d" % (int(year), int(year) + 1) url = "http://www.ncga.state.nc.us/gascripts/members/"\ "memberList.pl?sChamber=" if chamber == 'lower': url += 'House' else: url += 'Senate' with self.urlopen(url) as (resp, data): leg_list = self.soup_parser(data) leg_table = leg_list.find('div', id='mainBody').find('table') for row in leg_table.findAll('tr')[1:]: party = row.td.contents[0].strip() if party == 'Dem': party = 'Democrat' elif party == 'Rep': party = 'Republican' district = row.findAll('td')[1].contents[0].strip() full_name = row.findAll('td')[2].a.contents[0].strip() full_name = full_name.replace(u'\u00a0', ' ') (first_name, last_name, middle_name, suffix) = split_name( full_name) legislator = Legislator(session, chamber, district, full_name, first_name, last_name, middle_name, party, suffix=suffix) legislator.add_source(url) self.save_legislator(legislator)
def get_vote(self, bill, url): url = "http://www.ncga.state.nc.us" + url + "&bPrintable=true" chamber = {"H": "lower", "S": "upper"}[re.findall("sChamber=(\w)", url)[0]] data = self.urlopen(url) soup = self.soup_parser(data) motion = ( soup.findAll("a", href=re.compile("BillLookUp\.pl"))[0] .findParents("tr", limit=1)[0] .findAll("td")[1] .font.contents[-1] ) vote_time = soup.findAll("b", text="Time:")[0].next.strip() vote_time = dt.datetime.strptime(vote_time, "%b %d %Y %I:%M%p") vote_mess = soup.findAll("td", text=re.compile("Total Votes:"))[0] (yeas, noes, nots, absent, excused) = map( lambda x: int(x), re.findall( "Ayes: (\d+)\s+Noes: (\d+)\s+Not: (\d+)\s+Exc. " "Absent: (\d+)\s+Exc. Vote: (\d+)", vote_mess, re.U )[0], ) # chamber, date, motion, passed, yes_count, no_count, other_count v = Vote(chamber, vote_time, motion, (yeas > noes), yeas, noes, nots + absent + excused) # eh, it's easier to just get table[2] for this.. vote_table = soup.findAll("table")[2] for row in vote_table.findAll("tr"): if "Democrat" in self.flatten(row): continue cells = row.findAll("td") if len(cells) == 1: # I can't find any examples of ties in the House, # nor information on who would break them. if not self.lt_gov and chamber == "upper": full_name = ( soup.findAll("td", text=re.compile("Lieutenant Governor"))[0] .parent.findAll("span")[0] .contents[0] ) (first_name, last_name, middle_name, suffix) = split_name(full_name) self.lt_gov = Person( full_name, first_name=first_name, last_name=last_name, middle_name=middle_name, suffix=suffix ) self.lt_gov.add_role("Lieutenant Governor", bill["session"]) self.save_person(self.lt_gov) if "VOTES YES" in self.flatten(cells[0]): v["passed"] = True v.yes(full_name) else: v["passed"] = False v.no(full_name) continue elif len(cells) == 2: vote_type, a = cells bunch = [self.flatten(a)] elif len(cells) == 3: vote_type, d, r = cells bunch = [self.flatten(d), self.flatten(r)] else: continue # why doesn't .string work? ... bleh. vote_type = vote_type.font.b.contents[0] if "Ayes" in vote_type: adder = v.yes elif "Noes" in vote_type: adder = v.no else: adder = v.other for party in bunch: party = map(lambda x: x.replace(" (SPEAKER)", ""), party[(party.index(":") + 1) :].split(";")) if party[0] == "None": party = [] for x in party: adder(x) v.add_source(url) bill.add_vote(v)
def get_vote(self, bill, url): url = 'http://www.ncga.state.nc.us' + url + '&bPrintable=true' chamber = {'H': 'lower', 'S': 'upper'}[ re.findall('sChamber=(\w)', url)[0]] data = self.urlopen(url) soup = self.soup_parser(data) motion = soup.findAll('a', href=re.compile('BillLookUp\.pl'))[0] \ .findParents('tr', limit=1)[0].findAll('td')[1] \ .font.contents[-1] vote_time = soup.findAll('b', text='Time:')[0].next.strip() vote_time = dt.datetime.strptime(vote_time, '%b %d %Y %I:%M%p') vote_mess = soup.findAll('td', text=re.compile('Total Votes:'))[0] (yeas, noes, nots, absent, excused) = map(lambda x: int(x), re.findall( 'Ayes: (\d+)\s+Noes: (\d+)\s+Not: (\d+)\s+Exc. ' 'Absent: (\d+)\s+Exc. Vote: (\d+)', vote_mess, re.U)[0]) # chamber, date, motion, passed, yes_count, no_count, other_count v = Vote(chamber, vote_time, motion, (yeas > noes), yeas, noes, nots + absent + excused) # eh, it's easier to just get table[2] for this.. vote_table = soup.findAll('table')[2] for row in vote_table.findAll('tr'): if 'Democrat' in self.flatten(row): continue cells = row.findAll('td') if len(cells) == 1: # I can't find any examples of ties in the House, # nor information on who would break them. if not self.lt_gov and chamber == 'upper': full_name = soup.findAll( 'td', text=re.compile('Lieutenant Governor'))[0] \ .parent.findAll('span')[0].contents[0] (first_name, last_name, middle_name, suffix) = split_name( full_name) self.lt_gov = Person(full_name, first_name=first_name, last_name=last_name, middle_name=middle_name, suffix=suffix) self.lt_gov.add_role('Lieutenant Governor', bill['session']) self.save_person(self.lt_gov) if 'VOTES YES' in self.flatten(cells[0]): v['passed'] = True v.yes(full_name) else: v['passed'] = False v.no(full_name) continue elif len(cells) == 2: vote_type, a = cells bunch = [self.flatten(a)] elif len(cells) == 3: vote_type, d, r = cells bunch = [self.flatten(d), self.flatten(r)] else: continue # why doesn't .string work? ... bleh. vote_type = vote_type.font.b.contents[0] if 'Ayes' in vote_type: adder = v.yes elif 'Noes' in vote_type: adder = v.no else: adder = v.other for party in bunch: party = map(lambda x: x.replace( ' (SPEAKER)', ''), party[ (party.index(':') + 1):].split(';')) if party[0] == 'None': party = [] for x in party: adder(x) v.add_source(url) bill.add_vote(v)