def scrape_votes(self, session, zip_url): votes = {} last_line = [] for line in self.zf.open("tblrollcallsummary.txt"): if line.strip() == "": continue line = line.split("|") if len(line) < 14: if len(last_line + line[1:]) == 14: line = last_line self.warning("used bad vote line") else: last_line = line self.warning("bad vote line %s" % "|".join(line)) session_yr = line[0] body = line[1] vote_num = line[2] timestamp = line[3] bill_id = line[4].strip() yeas = int(line[5]) nays = int(line[6]) # present = int(line[7]) # absent = int(line[8]) motion = line[11].strip() or "[not available]" if session_yr == session and bill_id in self.bills_by_id: actor = "lower" if body == "H" else "upper" time = dt.datetime.strptime(timestamp, "%m/%d/%Y %I:%M:%S %p") # TODO: stop faking passed somehow passed = yeas > nays vote = Vote( chamber=actor, start_date=time.strftime("%Y-%m-%d"), motion_text=motion, result="pass" if passed else "fail", classification="passage", bill=self.bills_by_id[bill_id], ) vote.set_count("yes", yeas) vote.set_count("no", nays) vote.add_source(zip_url) votes[body + vote_num] = vote for line in self.zf.open("tblrollcallhistory.txt"): # 2012 | H | 2 | 330795 | HB309 | Yea |1/4/2012 8:27:03 PM session_yr, body, v_num, employee, bill_id, vote, date = line.split("|") if not bill_id: continue if session_yr == session and bill_id.strip() in self.bills_by_id: try: leg = self.legislators[employee]["name"] except KeyError: self.warning("Error, can't find person %s" % employee) continue vote = vote.strip() if body + v_num not in votes: self.warning("Skipping processing this vote:") self.warning("Bad ID: %s" % (body + v_num)) continue other_count = 0 # code = self.legislators[employee]['seat'] if vote == "Yea": votes[body + v_num].yes(leg) elif vote == "Nay": votes[body + v_num].no(leg) else: votes[body + v_num].other(leg) other_count += 1 votes[body + v_num].set_count("other", other_count) for vote in votes.values(): yield vote
def scrape_votes(self, session): votes = {} other_counts = defaultdict(int) last_line = [] vote_url = "http://gencourt.state.nh.us/dynamicdatafiles/RollCallSummary.txt" lines = self.get(vote_url).content.decode("utf-8").splitlines() for line in lines: if len(line) < 2: continue if line.strip() == "": continue line = line.split("|") if len(line) < 14: if len(last_line + line[1:]) == 14: line = last_line self.warning("used bad vote line") else: last_line = line self.warning("bad vote line %s" % "|".join(line)) session_yr = line[0].replace("\xef\xbb\xbf", "") body = line[1] vote_num = line[2] timestamp = line[3] bill_id = line[4].strip() yeas = int(line[5]) nays = int(line[6]) # present = int(line[7]) # absent = int(line[8]) motion = line[11].strip() or "[not available]" if session_yr == session and bill_id in self.bills_by_id: actor = "lower" if body == "H" else "upper" time = dt.datetime.strptime(timestamp, "%m/%d/%Y %I:%M:%S %p") time = pytz.timezone("America/New_York").localize( time).isoformat() # TODO: stop faking passed somehow passed = yeas > nays vote = Vote( chamber=actor, start_date=time, motion_text=motion, result="pass" if passed else "fail", classification="passage", bill=self.bills_by_id[bill_id], ) vote.set_count("yes", yeas) vote.set_count("no", nays) vote.add_source(vote_url) vote.pupa_id = session_yr + body + vote_num # unique ID for vote votes[body + vote_num] = vote for line in (self.get( "http://gencourt.state.nh.us/dynamicdatafiles/RollCallHistory.txt" ).content.decode("utf-8").splitlines()): if len(line) < 2: continue # 2016|H|2|330795||Yea| # 2012 | H | 2 | 330795 | 964 | HB309 | Yea | 1/4/2012 8:27:03 PM session_yr, body, v_num, _, employee, bill_id, vote, date = line.split( "|") if not bill_id: continue if session_yr == session and bill_id.strip() in self.bills_by_id: try: leg = " ".join(self.legislators[employee]["name"].split()) except KeyError: self.warning("Error, can't find person %s" % employee) continue vote = vote.strip() if body + v_num not in votes: self.warning("Skipping processing this vote:") self.warning("Bad ID: %s" % (body + v_num)) continue # code = self.legislators[employee]['seat'] if vote == "Yea": votes[body + v_num].yes(leg) elif vote == "Nay": votes[body + v_num].no(leg) else: votes[body + v_num].vote("other", leg) # hack-ish, but will keep the vote count sync'd other_counts[body + v_num] += 1 votes[body + v_num].set_count("other", other_counts[body + v_num]) for vote in votes.values(): yield vote