for suffix in name_suffixes: if suffix in last: # if so, store this separately, and remove it from the last name member['suffix'] = suffix last = last.replace(suffix, '').strip() member['name_dict']['last_name'] = last first_middle_dict = re.match("^(?P<first_name>[\w'\.]+)(?: (?P<middle_name>[\w\.]+))?$", first_middle).groupdict() # merge the parsed values into the member dict member['name_dict'].update(first_middle_dict) member['name_dict'] = normalize_name_fields(member['name_dict']) member['party'] = get_party(tds[3].text.strip()) member['url'] = 'http://house.mo.gov/member.aspx?year={0}&district={1}'.format(session.year, member['district']) members.append(member) if session.year <= 2010: # in 1998, the house decreased the width of the table listing members, but then switched it back if session.year == 1998: trs = soup.find('table', attrs = {'border': 2, 'width': '80%'}).find_all('tr') else: trs = soup.find('table', attrs = {'border': 2, 'width': '100%'}).find_all('tr') for tr in trs: tds = tr.find_all('td')
if 'District' in line: # try finding the district number and setting this attribute try: race.district = re.search('\d+', line).group() except AttributeError: print 'No district number' print repr(line) # if the line matches the candidate pattern elif cand_match != None: # append a candidate to the race's list race.candidates.append( Race_Candidate( raw_name = cand_match.group(1).replace(u'\xad', '-').strip() , party = get_party(cand_match.group(2).strip()) , votes = cand_match.group(3).strip().replace(',', '') , pct_votes = cand_match.group(4).replace('%', '').strip() ) ) # if the phrase 'Total Votes' is in the line elif 'Total Votes' in line: # search the current line for the total votes number try: race.total_votes = re.search('[\d|,]+', line).group().replace(',', '') # if not founnd, try the next line except AttributeError: race.total_votes = re.search('[\d|,]+', reader[idx + 1]).group().replace(',', '')
print type(e) print e # if "unexpired" appears in the text, set this attribute if 'unexpired' in tds[0].text: race.unexpired = True else: race.unexpired = False # if the second, third and fourth columns have text... elif len(tds[1].text.strip())>0 and len(tds[2].text.strip())>0 and len(tds[3].text.strip())>0: # append a new candidate to the race's candidate list race.candidates.append(Race_Candidate( raw_name = tds[0].text.replace(' ', ' ').strip() , party = get_party(tds[1].text.strip()) , votes = tds[2].text.strip().replace(',', '') , pct_votes = tds[3].text.replace('%', '').strip() )) # if the second column contains the phrase 'Total Votes'... elif tds[1].text.strip() == 'Total Votes:': # set the race's total votes race.total_votes = tds[2].text.replace(',', '').strip() # then append the race to the election election.races.append(race) else: pass