def scrape_session(self, chamber, session): billdirs_path = '/bills/%s/billhistory/%s_bills/' % ( session, chamber_name(chamber)) billdirs_url = urlparse.urljoin(self._ftp_root, billdirs_path) with self.urlopen(billdirs_url) as bill_dirs: for dir in parse_ftp_listing(bill_dirs): bill_url = urlparse.urljoin(billdirs_url, dir) + '/' with self.urlopen(bill_url) as bills: for history in parse_ftp_listing(bills): self.scrape_bill(chamber, session, urlparse.urljoin(bill_url, history))
def scrape(self, chamber, session): self.validate_session(session) if len(session) == 2: session = "%sR" % session for btype in ["bills", "concurrent_resolutions", "joint_resolutions", "resolutions"]: billdirs_path = "/bills/%s/billhistory/%s_%s/" % (session, chamber_name(chamber), btype) billdirs_url = urlparse.urljoin(self._ftp_root, billdirs_path) with self.urlopen(billdirs_url) as bill_dirs: for dir in parse_ftp_listing(bill_dirs): bill_url = urlparse.urljoin(billdirs_url, dir) + "/" with self.urlopen(bill_url) as bills: for history in parse_ftp_listing(bills): self.scrape_bill(chamber, session, urlparse.urljoin(bill_url, history))