示例#1
0
from utils import http, scrape
from utils.arrays import flatten
from utils.session import Session

DOMAIN = "https://leginfo.legislature.ca.gov"
URL = DOMAIN + "/faces/billSearchClient.xhtml"

AVAILABLE_SESSIONS = Session.available_sessions()


def run():
    return flatten(crawl(session) for session in AVAILABLE_SESSIONS)


def crawl(session):
    VIEWSTATE_VALUE = "//input[@name='javax.faces.ViewState' and contains(@id, 'ViewState:0')]/@value"
    RESULTS = "//table[@id='bill_results']/tbody/tr"
    output(session)

    sesh = http.Session()
    # load the form page so we can scrape required `viewstate` value
    sesh.get(URL)
    # do the actual form submission
    sesh.post(
        URL, {
            "billSearchForm": "billSearchForm",
            "billSearchForm:bill_number": "",
            "billSearchForm:house": "Both",
            "billSearchForm:hiddenHouse": "",
            "billSearchForm:statuteYear": "",
            "billSearchForm:chapter_number": "",
示例#2
0
def fanout_by_session(taskname, sessions=None, **kwargs):
    """Fanout a task by passed sessions."""
    if not sessions:
        sessions = Session.available_sessions()
    return fanout(taskname, sessions, **kwargs)