def scrape(self): page = self.lxmlize(MEMBER_LIST) for row in page.xpath("//table[@frame='void']/tbody/tr")[1:]: role, whos, expire = row.xpath("./*") people = zip([x.text_content() for x in whos.xpath(".//font")], [x.text_content() for x in expire.xpath(".//font")]) thing = role.text_content() comm = Committee(name=thing) url = role.xpath(".//a")[0].attrib['href'] comm.add_link(url=url, note='homepage') for person, expire in people: if "TBA" in person: continue info = {} try: info = re.match("(?P<name>.*), (?P<addr>\d+\w* .*)", person).groupdict() except AttributeError: info = re.match("(?P<name>.*) (?P<addr>\d+\w* .*)", person).groupdict() addr = info['addr'] roles = {"Vice Chair": "Vice Chair", "Chair": "Chair", "CHAIR": "Chair", "Appt": "member",} position = "member" if "Resigned" in addr: continue for role in roles: if role in addr: addr, chair = [x.strip() for x in addr.rsplit(role, 1)] position = roles[role] addr = clean_address(addr) leg = Legislator(name=info['name'], district=position) leg.add_contact_detail(type="address", value=addr, note="Address") leg.add_source(MEMBER_LIST) yield leg leg.add_membership(comm) comm.add_source(MEMBER_LIST) yield comm
def get_people(self): people = [ {"name": "Mckenzie A. Cannon", "district": "10a",}, {"name": "Yandel V. Watkins", "district": "Second Fnord and Norfolk",}, {"name": "Adrien A. Coffey", "district": "A",}, {"district": "10c", "name": "Natasha Moon",}, {"district": "Berkshire, Hampshire, Franklin and Hampden", "name": "Ramon Harmon",}, {"district": "5", "name": "Sam Sellers",}, {"district": "6", "name": "Estrella Hahn",}, {"district": "B", "name": "Teagan Rojas",}, {"district": "C", "name": "Barrett Adams",}, {"district": "D", "name": "Kayla Shelton",}, {"district": "E", "name": "Kohen Dudley",}, {"district": "F", "name": "Cayden Norman",}, {"district": "ZZ", "name": "Shayla Fritz",}, {"district": "Ward 2", "name": "Gunnar Luna",}, {"district": "Green", "name": "Regina Cruz",}, {"district": "Blue", "name": "Makenzie Keller",}, {"district": "Red", "name": "Eliana Meyer",}, {"district": "Yellow", "name": "Taylor Parrish",}, {"district": "Silver", "name": "Callie Craig",}, ] for person in people: l = Legislator(**person) l.add_source("http://example.com") dslug = ( person['district'].lower().replace(" ", "-").replace(",", "")) l.add_contact_detail( type='email', value="*****@*****.**" % (dslug), note='office email' ) yield l
def get_people(self): people = [ { "name": "Mckenzie A. Cannon", "district": "10a", }, { "name": "Yandel V. Watkins", "district": "Second Fnord and Norfolk", }, { "name": "Adrien A. Coffey", "district": "A", }, { "district": "10c", "name": "Natasha Moon", }, { "district": "Berkshire, Hampshire, Franklin and Hampden", "name": "Ramon Harmon", }, { "district": "5", "name": "Sam Sellers", }, { "district": "6", "name": "Estrella Hahn", }, { "district": "B", "name": "Teagan Rojas", }, { "district": "C", "name": "Barrett Adams", }, { "district": "D", "name": "Kayla Shelton", }, { "district": "E", "name": "Kohen Dudley", }, { "district": "F", "name": "Cayden Norman", }, { "district": "ZZ", "name": "Shayla Fritz", }, { "district": "Ward 2", "name": "Gunnar Luna", }, { "district": "Green", "name": "Regina Cruz", }, { "district": "Blue", "name": "Makenzie Keller", }, { "district": "Red", "name": "Eliana Meyer", }, { "district": "Yellow", "name": "Taylor Parrish", }, { "district": "Silver", "name": "Callie Craig", }, ] for person in people: l = Legislator(**person) l.add_source("http://example.com") dslug = (person['district'].lower().replace(" ", "-").replace(",", "")) l.add_contact_detail(type='email', value="*****@*****.**" % (dslug), note='office email') yield l