def parse_house_actions(self, bill, url): url = re.sub("BillActions", "BillActionsPrn", url) bill.add_source(url) with self.urlopen(url) as actions_page: actions_page = lxml.html.fromstring(actions_page) rows = actions_page.xpath('//table/tr') for row in rows[1:]: # new actions are represented by having dates in the first td # otherwise, it's a continuation of the description from the # previous action if len(row) > 0 and row[0].tag == 'td': if len(row[0].text_content().strip()) > 0: date = row[0].text_content().strip() date = dt.datetime.strptime(date, '%m/%d/%Y') action = row[2].text_content().strip() else: action += ('\n' + row[2].text_content()) action = action.rstrip() actor = house_get_actor_from_action(action) #TODO probably need to add the type here as well bill.add_action(actor, action, date) # add that last action actor = house_get_actor_from_action(action) #TODO probably need to add the type here as well bill.add_action(actor, action, date)
def parse_house_actions(self, bill, url): bill.add_source(url) with self.urlopen(url) as actions_page: actions_page = BeautifulSoup(actions_page) rows = actions_page.findAll('tr') # start with index 0 because the table doesn't have an opening <tr> first_row = rows[0] date = first_row.td.contents[0].strip() date = dt.datetime.strptime(date, '%m/%d/%Y') action = first_row.td.nextSibling.nextSibling.contents[0].strip() for row in rows[1:]: # new actions are represented by having dates in the first td # otherwise, it's a continuation of the description from the # previous action if row.td != None: if len(row.td.contents) > 0 and row.td.contents[0] != ' ': actor = house_get_actor_from_action(action) bill.add_action(actor, action, date) date = row.td.contents[0].strip() date = dt.datetime.strptime(date, '%m/%d/%Y') action = row.td.nextSibling.nextSibling action = action.contents[0].strip() else: action += ('\n' + row.td.nextSibling.nextSibling.contents[0]) action = action.rstrip() # add that last action actor = house_get_actor_from_action(action) bill.add_action(actor, action, date)
def parse_house_actions(self, bill, url): bill.add_source(url) with self.urlopen(url) as actions_page: actions_page = BeautifulSoup(actions_page) rows = actions_page.findAll('tr') # start with index 0 because the table doesn't have an opening <tr> first_row = rows[0] date = first_row.td.contents[0].strip() date = dt.datetime.strptime(date, '%m/%d/%Y') action = first_row.td.nextSibling.nextSibling.contents[0].strip() for row in rows[1:]: # new actions are represented by having dates in the first td # otherwise, it's a continuation of the description from the # previous action if row.td != None: if len(row.td.contents) > 0 and row.td.contents[0] != ' ': actor = house_get_actor_from_action(action) bill.add_action(actor, action, date) date = row.td.contents[0].strip() date = dt.datetime.strptime(date, '%m/%d/%Y') action = row.td.nextSibling.nextSibling action = action.contents[0].strip() else: action += ('\n' + row.td.nextSibling.nextSibling.contents[0]) action = action.rstrip() # add that last action actor = house_get_actor_from_action(action) bill.add_action(actor, action, date)
def _parse_house_actions(self, bill, url): url = re.sub("BillActions", "BillActionsPrn", url) bill.add_source(url) actions_page = self.get(url).text actions_page = lxml.html.fromstring(actions_page) rows = actions_page.xpath('//table/tr') for row in rows[1:]: # new actions are represented by having dates in the first td # otherwise, it's a continuation of the description from the # previous action if len(row) > 0 and row[0].tag == 'td': if len(row[0].text_content().strip()) > 0: date = row[0].text_content().strip() date = dt.datetime.strptime(date, '%m/%d/%Y') action = row[2].text_content().strip() else: action += ('\n' + row[2].text_content()) action = action.rstrip() actor = house_get_actor_from_action(action) type_class = self._get_action(actor, action) votes = self._get_votes(date, actor, action) for vote in votes: bill.add_vote(vote) bill.add_action(actor, action, date, type=type_class)
def parse_house_actions(self, bill, url): url = re.sub("BillActions", "BillActionsPrn", url) bill.add_source(url) with self.urlopen(url) as actions_page: actions_page = lxml.html.fromstring(actions_page) rows = actions_page.xpath("//table/tr") for row in rows[1:]: # new actions are represented by having dates in the first td # otherwise, it's a continuation of the description from the # previous action if len(row) > 0 and row[0].tag == "td": if len(row[0].text_content().strip()) > 0: date = row[0].text_content().strip() date = dt.datetime.strptime(date, "%m/%d/%Y") action = row[2].text_content().strip() else: action += "\n" + row[2].text_content() action = action.rstrip() actor = house_get_actor_from_action(action) type_class = self.get_action(actor, action) bill.add_action(actor, action, date, type=type_class)