Пример #1
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization(
            'Temecula City Council',
            classification='legislature')
        council.add_source(urls.list.url)
        yield council

        for tr in urls.list.xpath('//table[2]//tr')[1:]:

            # Parse some attributes.
            name, role = tr.xpath('td/p[1]//font/text()')
            image = tr.xpath('td/img/@src').pop()

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)

            # Add email address.
            email, detail_url = tr.xpath('td//a/@href')
            email = email[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(detail_url)

            yield person
Пример #2
0
def test_magic_methods():
    """ Test the magic methods work """
    bob = Person("John Q. Public, Esq.",
                 gender="male", image="http://example.com/john.jpg",
                 summary="Some person")
    bob.add_source(url='foo')
    bob.validate()

    bob.add_link("http://twitter.com/ev", "Twitter Account")

    assert bob.links == [
        {"note": "Twitter Account",
         "url": "http://twitter.com/ev"}
    ]

    bob.add_name("Thiston", note="What my friends call me")

    assert bob.other_names == [
        {"name": "Thiston",
         "note": "What my friends call me"}
    ]

    bob.add_name("Johnseph Q. Publico",
                 note="Birth name",
                 start_date="1920-01",
                 end_date="1949-12-31")

    assert bob.other_names == [
        {"name": "Thiston",
         "note": "What my friends call me"},
        {"name": "Johnseph Q. Publico",
         "note": "Birth name",
         "start_date": "1920-01",
         "end_date": "1949-12-31"}
    ]
Пример #3
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization('Temecula City Council',
                               classification='legislature')
        council.add_source(urls.list.url)
        yield council

        for tr in urls.list.xpath('//table[2]//tr')[1:]:

            # Parse some attributes.
            name, role = tr.xpath('td/p[1]//font/text()')
            image = tr.xpath('td/img/@src').pop()

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)

            # Add email address.
            email, detail_url = tr.xpath('td//a/@href')
            email = email[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(detail_url)

            yield person
Пример #4
0
def test_basic_invalid_person():
    """ Test that we can create an invalid person, and validation will fail """
    bob = Person("Bob B. Johnson")
    bob.add_source(url='foo')
    bob.validate()

    bob.name = None

    @raises(ValidationError)
    def _():
        bob.validate()
    _()
Пример #5
0
def test_add_contact_information():
    """ Test that we can add contact information """
    bob = Person("John Q. Public, Esq.",
                 gender="male", image="http://example.com/john.jpg",
                 summary="Some person")
    bob.add_source(url='foo')
    bob.validate()

    bob.add_contact_detail(type='voice',
                           value='876-5309',
                           note='Jenny Cell')

    bob.validate()
Пример #6
0
    def get_people(self):
        # committee
        tech = Organization('Technology')
        tech.add_post('Chairman', 'chairman')
        tech.add_source('https://example.com')
        yield tech

        # subcommittee
        ecom = Organization('Subcommittee on E-Commerce', parent=tech)
        ecom.add_source('https://example.com')
        yield ecom

        p = Person('Paul Tagliamonte', district='6', chamber='upper')
        p.add_membership(tech, role='chairman')
        p.add_source('https://example.com')
        yield p
Пример #7
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization('Boise City Council')
        council.add_source(legislators_url)
        yield council

        xpath = '//div[@id="content"]/div/a/@href'
        people_urls = urls.list.xpath(xpath)

        # SKip the mayor because his page has no name or email.
        people_urls = people_urls[1:]
        for url in people_urls:

            urls.add(detail=url)
            # Parse some attributes.

            image = urls.detail.xpath('//div[@id="content"]/p/img/@src').pop()
            name = urls.detail.xpath('//h1/text()').pop()

            name = name.replace('Council ', '')
            role, _, name = name.partition(' ')

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)
            memb.add_source(urls.detail.url)

            # Add email address.
            email_xpath = '//a[contains(@href, "mailto")]/@href'
            email = urls.detail.xpath(email_xpath).pop()[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(urls.detail.url)

            yield person
Пример #8
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization('Denver City Council')
        council.add_source(legislators_url)

        # Get image urls, names, detail urls, and districts.
        image_xpath = '//a[contains(@href, "councildistrict")]/img/@src'
        image_urls = urls.list.xpath(image_xpath)

        name_xpath = '//a[contains(@href, "councildistrict")]'
        names = [a.text_content() for a in urls.list.xpath(name_xpath)][:-1]
        names = filter(None, names)

        person_urls_xpath = '//a[contains(@href, "councildistrict")]/@href'
        person_urls = urls.list.xpath(person_urls_xpath)

        post_ids = []
        xpath = '//a[contains(@href, "councildistrict")]/img/ancestor::td'
        for td in urls.list.xpath(xpath):
            text = td.text_content()
            m = re.search('Council District \d+', text)
            if m:
                post_ids.append(m.group())
                continue
            m = re.search('Council At-Large', text)
            if m:
                post_ids.append('Council At-Large')

        for post_id in post_ids:
            council.add_post(post_id, post_id)
        yield council

        data = zip(image_urls, names, person_urls, post_ids)
        for image_url, name, person_url, post_id in data:

            # Create legislator.
            person = Person(name, image=image_url)

            # Add sources.
            urls.add(detail=person_url)
            person.add_source(urls.list.url, note='list')
            person.add_source(urls.detail.url, note='detail')

            # Add membership on council.
            memb = person.add_membership(council, post_id=post_id.strip())
            memb.add_source(urls.detail.url)

            xpath = '//div[@id="dnn_column3"]'
            contact_text = urls.detail.xpath(xpath)[0].text_content()

            if not contact_text.strip():
                xpath = '//div[contains(@id, "dnn_RightPaneWide")]'
                contact_text = urls.detail.xpath(xpath)[0].text_content()

            phone_regex = r'\(\d{3}\)[ -]*\d{3}-\d{4}'
            phone = re.search(phone_regex, contact_text).group()
            memb.contact_details.append(
                dict(type='phone', value=phone, note='work'))

            # Add email address.
            email_regex = r'\[email protected]'
            email = re.search(email_regex, contact_text).group()
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            yield person
Пример #9
0
    def get_people(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization('Denver City Council')
        council.add_source(legislators_url)

        # Get image urls, names, detail urls, and districts.
        image_xpath = '//a[contains(@href, "councildistrict")]/img/@src'
        image_urls = urls.list.xpath(image_xpath)

        name_xpath = '//a[contains(@href, "councildistrict")]'
        names = [a.text_content() for a in urls.list.xpath(name_xpath)][:-1]
        names = filter(None, names)

        person_urls_xpath = '//a[contains(@href, "councildistrict")]/@href'
        person_urls = urls.list.xpath(person_urls_xpath)

        post_ids = []
        xpath = '//a[contains(@href, "councildistrict")]/img/ancestor::td'
        for td in urls.list.xpath(xpath):
            text = td.text_content()
            m = re.search('Council District \d+', text)
            if m:
                post_ids.append(m.group())
                continue
            m = re.search('Council At-Large', text)
            if m:
                post_ids.append('Council At-Large')

        for post_id in post_ids:
            council.add_post(post_id, post_id)
        yield council

        data = zip(image_urls, names, person_urls, post_ids)
        for image_url, name, person_url, post_id in data:

            # Create legislator.
            person = Person(name, image=image_url)

            # Add sources.
            urls.add(detail=person_url)
            person.add_source(urls.list.url, note='list')
            person.add_source(urls.detail.url, note='detail')

            # Add membership on council.
            memb = person.add_membership(council, post_id=post_id.strip())
            memb.add_source(urls.detail.url)

            xpath = '//div[@id="dnn_column3"]'
            contact_text = urls.detail.xpath(xpath)[0].text_content()

            if not contact_text.strip():
                xpath = '//div[contains(@id, "dnn_RightPaneWide")]'
                contact_text = urls.detail.xpath(xpath)[0].text_content()

            phone_regex = r'\(\d{3}\)[ -]*\d{3}-\d{4}'
            phone = re.search(phone_regex, contact_text).group()
            memb.contact_details.append(
                dict(type='phone', value=phone, note='work'))

            # Add email address.
            email_regex = r'\[email protected]'
            email = re.search(email_regex, contact_text).group()
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            yield person