Exemplo n.º 1
0
    def scrape(self):
        page = self.lxmlize(MEMBER_LIST)
        for row in page.xpath("//table[@frame='void']/tbody/tr")[1:]:
            role, whos, expire = row.xpath("./*")
            people = zip([x.text_content() for x in whos.xpath(".//font")],
                         [x.text_content() for x in expire.xpath(".//font")])
            thing = role.text_content()

            comm = Committee(name=thing)
            url = role.xpath(".//a")[0].attrib['href']
            comm.add_link(url=url, note='homepage')

            for person, expire in people:
                if "TBA" in person:
                    continue
                info = {}

                try:
                   info = re.match("(?P<name>.*), (?P<addr>\d+\w* .*)",
                                   person).groupdict()
                except AttributeError:
                    info = re.match("(?P<name>.*) (?P<addr>\d+\w* .*)",
                                    person).groupdict()

                addr = info['addr']

                roles = {"Vice Chair": "Vice Chair",
                         "Chair": "Chair",
                         "CHAIR": "Chair",
                         "Appt": "member",}

                position = "member"

                if "Resigned" in addr:
                    continue

                for role in roles:
                    if role in addr:
                        addr, chair = [x.strip() for x in addr.rsplit(role, 1)]
                        position = roles[role]

                addr = clean_address(addr)
                leg = Legislator(name=info['name'], district=position)
                leg.add_contact_detail(type="address",
                                       value=addr,
                                       note="Address")
                leg.add_source(MEMBER_LIST)
                yield leg

                leg.add_membership(comm)
            comm.add_source(MEMBER_LIST)
            yield comm
Exemplo n.º 2
0
    def get_people(self):
        people = [
            {"name": "Mckenzie A. Cannon", "district": "10a",},
            {"name": "Yandel V. Watkins",
             "district": "Second Fnord and Norfolk",},
            {"name": "Adrien A. Coffey", "district": "A",},
            {"district": "10c", "name": "Natasha Moon",},
            {"district": "Berkshire, Hampshire, Franklin and Hampden",
             "name": "Ramon Harmon",},
            {"district": "5", "name": "Sam Sellers",},
            {"district": "6", "name": "Estrella Hahn",},
            {"district": "B",  "name": "Teagan Rojas",},
            {"district": "C", "name": "Barrett Adams",},
            {"district": "D", "name": "Kayla Shelton",},
            {"district": "E", "name": "Kohen Dudley",},
            {"district": "F", "name": "Cayden Norman",},
            {"district": "ZZ", "name": "Shayla Fritz",},
            {"district": "Ward 2", "name": "Gunnar Luna",},
            {"district": "Green", "name": "Regina Cruz",},
            {"district": "Blue", "name": "Makenzie Keller",},
            {"district": "Red", "name": "Eliana Meyer",},
            {"district": "Yellow", "name": "Taylor Parrish",},
            {"district": "Silver", "name": "Callie Craig",},
        ]

        for person in people:
            l = Legislator(**person)
            l.add_source("http://example.com")
            dslug = (
                person['district'].lower().replace(" ", "-").replace(",", ""))
            l.add_contact_detail(
                type='email',
                value="*****@*****.**" % (dslug),
                note='office email'
            )
            yield l
Exemplo n.º 3
0
    def get_people(self):
        people = [
            {
                "name": "Mckenzie A. Cannon",
                "district": "10a",
            },
            {
                "name": "Yandel V. Watkins",
                "district": "Second Fnord and Norfolk",
            },
            {
                "name": "Adrien A. Coffey",
                "district": "A",
            },
            {
                "district": "10c",
                "name": "Natasha Moon",
            },
            {
                "district": "Berkshire, Hampshire, Franklin and Hampden",
                "name": "Ramon Harmon",
            },
            {
                "district": "5",
                "name": "Sam Sellers",
            },
            {
                "district": "6",
                "name": "Estrella Hahn",
            },
            {
                "district": "B",
                "name": "Teagan Rojas",
            },
            {
                "district": "C",
                "name": "Barrett Adams",
            },
            {
                "district": "D",
                "name": "Kayla Shelton",
            },
            {
                "district": "E",
                "name": "Kohen Dudley",
            },
            {
                "district": "F",
                "name": "Cayden Norman",
            },
            {
                "district": "ZZ",
                "name": "Shayla Fritz",
            },
            {
                "district": "Ward 2",
                "name": "Gunnar Luna",
            },
            {
                "district": "Green",
                "name": "Regina Cruz",
            },
            {
                "district": "Blue",
                "name": "Makenzie Keller",
            },
            {
                "district": "Red",
                "name": "Eliana Meyer",
            },
            {
                "district": "Yellow",
                "name": "Taylor Parrish",
            },
            {
                "district": "Silver",
                "name": "Callie Craig",
            },
        ]

        for person in people:
            l = Legislator(**person)
            l.add_source("http://example.com")
            dslug = (person['district'].lower().replace(" ",
                                                        "-").replace(",", ""))
            l.add_contact_detail(type='email',
                                 value="*****@*****.**" % (dslug),
                                 note='office email')
            yield l