from utils import http, scrape from utils.arrays import flatten from utils.session import Session DOMAIN = "https://leginfo.legislature.ca.gov" URL = DOMAIN + "/faces/billSearchClient.xhtml" AVAILABLE_SESSIONS = Session.available_sessions() def run(): return flatten(crawl(session) for session in AVAILABLE_SESSIONS) def crawl(session): VIEWSTATE_VALUE = "//input[@name='javax.faces.ViewState' and contains(@id, 'ViewState:0')]/@value" RESULTS = "//table[@id='bill_results']/tbody/tr" output(session) sesh = http.Session() # load the form page so we can scrape required `viewstate` value sesh.get(URL) # do the actual form submission sesh.post( URL, { "billSearchForm": "billSearchForm", "billSearchForm:bill_number": "", "billSearchForm:house": "Both", "billSearchForm:hiddenHouse": "", "billSearchForm:statuteYear": "", "billSearchForm:chapter_number": "",
def fanout_by_session(taskname, sessions=None, **kwargs): """Fanout a task by passed sessions.""" if not sessions: sessions = Session.available_sessions() return fanout(taskname, sessions, **kwargs)