Python CanadianPerson.gender примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils

Класс/Тип: CanadianPerson

Метод/Функция: gender

Примеров на hotexamples.com: 2

Python CanadianPerson.gender - 2 примера найдено. Это лучшие примеры Python кода для utils.CanadianPerson.gender, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

add_contact(30)

add_source(30)

image(30)

add_link(22)

add_membership(8)

birth_date(5)

gender(2)

add_name(1)

validate(1)

Пример #1

Показать файл

Файл: people.py Проект: spatialbits/scrapers-ca

    def scrape(self):
        organizations = {}
        seat_numbers = defaultdict(lambda: defaultdict(int))

        reader = self.csv_reader(self.csv_url,
                                 delimiter=self.delimiter,
                                 header=True,
                                 encoding=self.encoding,
                                 skip_rows=self.skip_rows)
        reader.fieldnames = [
            self.header_converter(field) for field in reader.fieldnames
        ]
        for row in reader:

            try:
                if self.is_valid_row(row):
                    for key, corrections in self.corrections.items():
                        if not isinstance(corrections, dict):
                            row[key] = corrections(row[key])
                        elif row[key] in corrections:
                            row[key] = corrections[row[key]]

                    organization_classification = 'legislature'

                    organization_name = row['organization']
                    organization_key = organization_name.lower()
                    if organization_key in organizations:
                        organization = organizations[organization_key]
                    else:
                        organization = Organization(
                            organization_name,
                            classification=organization_classification)
                        organization.add_source(self.csv_url)
                        yield organization
                        organizations[organization_key] = organization

                    if not row['primary role']:
                        row['primary role'] = 'Councillor'

                    role = row['primary role']

                    post = Post(role=role,
                                label=organization_name,
                                organization_id=organization._id)
                    yield post

                    name = row['name'].strip(' .,')

                    district = row['district name']

                    if self.many_posts_per_area and role not in self.unique_roles:
                        seat_numbers[role][district] += 1
                        district = '{} (seat {})'.format(
                            district, seat_numbers[role][district])

                    p = Person(primary_org=organization_classification,
                               name=name,
                               district=district,
                               role=role,
                               party=row.get('party name'))
                    p.add_source(self.csv_url)

                    if row.get('gender'):
                        p.gender = row['gender']
                    if row.get('photo url'):
                        p.image = row['photo url']

                    if row.get('source url'):
                        p.add_source(row['source url'].strip(' .,'))

                    if row.get('website'):
                        p.add_link(row['website'], note='web site')
                    if row.get('facebook'):
                        p.add_link(re.sub(r'[#?].+', '', row['facebook']))
                    if row.get('twitter'):
                        p.add_link(row['twitter'])

                    if row['email']:
                        p.add_contact('email', row['email'].strip(' .,'))
                    if row['address']:
                        p.add_contact('address', row['address'], 'legislature')
                    if row.get('phone'):
                        p.add_contact('voice', row['phone'], 'legislature')
                    if row.get('fax'):
                        p.add_contact('fax', row['fax'], 'legislature')
                    if row.get('cell'):
                        p.add_contact('cell', row['cell'], 'legislature')
                    if row.get('birth date'):
                        p.birth_date = row['birth date']

                    if row.get('incumbent'):
                        p.extras['incumbent'] = row['incumbent']

                    if name in self.other_names:
                        for other_name in self.other_names[name]:
                            p.add_name(other_name)

                    # Validate person entity so that we can catch the exception if needed.
                    p.validate()

                    yield p
            except Exception as e:
                print(repr(e))
                continue

Пример #2

Показать файл

Файл: people.py Проект: jboakye/scrapers-ca

    def scrape_people(self, rows, gender):
        assert len(rows), 'No members found'
        for row in rows:
            name = row.xpath(
                './/div[@class="ce-mip-mp-name"][1]')[0].text_content()
            constituency = row.xpath(
                './/div[@class="ce-mip-mp-constituency"][1]')[0].text_content(
                )
            constituency = constituency.replace('–', '—')  # n-dash, m-dash
            if constituency == 'Mont-Royal':
                constituency = 'Mount Royal'

            province = row.xpath(
                './/div[@class="ce-mip-mp-province"][1]')[0].text_content()

            party = row.xpath(
                './/div[@class="ce-mip-mp-party"][1]')[0].text_content()

            url = row.xpath('.//a[@class="ce-mip-mp-tile"]/@href')[0]

            if province == 'Québec':
                url = url.replace('/en/', '/fr/')

            mp_page = self.lxmlize(url)
            email = self.get_email(mp_page,
                                   '//*[@id="contact"]/div/p/a',
                                   error=False)

            photo = mp_page.xpath(
                './/div[@class="ce-mip-mp-profile-container"]//img/@src')[0]

            m = Person(primary_org='lower',
                       name=name,
                       district=constituency,
                       role='MP',
                       party=party)
            m.add_source(COUNCIL_PAGE)
            m.add_source(url)
            m.gender = gender
            # @see https://www.ourcommons.ca/Members/en/ziad-aboultaif(89156)
            if email:
                m.add_contact('email', email)

            if photo:
                # Determine whether the photo is actually a generic silhouette
                photo_response = self.get(photo)
                if (photo_response.status_code == 200
                        and hashlib.sha1(photo_response.content).hexdigest()
                        not in IMAGE_PLACEHOLDER_SHA1):
                    m.image = photo

            # I don't think the new parliment website has personal website anymore
            personal_url = mp_page.xpath(
                './/a[contains(@title, "Personal Web Site")]/@href')
            if personal_url:
                m.add_link(personal_url[0])

            preferred_languages = mp_page.xpath(
                './/dt[contains(., "Preferred Language")]/following-sibling::dd/text()'
            )
            if preferred_languages:
                m.extras['preferred_languages'] = [
                    language.replace('/', '').strip()
                    for language in preferred_languages
                ]

            if province == 'Québec':
                m.add_contact('address',
                              'Chambre des communes\nOttawa ON  K1A 0A6',
                              'legislature')
            else:
                m.add_contact('address',
                              'House of Commons\nOttawa ON  K1A 0A6',
                              'legislature')

            # Hill Office contacts
            # Now phone and fax are in the same element
            # <p>
            #   Telephone: xxx-xxx-xxxx<br/>
            #   Fax: xxx-xxx-xxx
            # </p>
            phone_and_fax_el = mp_page.xpath(
                './/h4[contains(., "Hill Office")]/../p[contains(., "Telephone")]|.//h4[contains(., "Hill Office")]/../p[contains(., "Téléphone :")]'
            )
            if len(phone_and_fax_el):
                phone_and_fax = phone_and_fax_el[0].text_content().strip(
                ).splitlines()
                voice = phone_and_fax[0].replace('Telephone:', '').replace(
                    'Téléphone :', '').strip()
                fax = phone_and_fax[1].replace('Fax:', '').replace(
                    'Télécopieur :', '').strip()
                if voice:
                    m.add_contact('voice', voice, 'legislature')

                if fax:
                    m.add_contact('fax', fax, 'legislature')

            # Constituency Office contacts
            # Some people has more than one, e.g. https://www.ourcommons.ca/Members/en/ben-lobb(35600)#contact
            for i, constituency_office_el in enumerate(
                    mp_page.xpath(
                        './/div[@class="ce-mip-contact-constituency-office-container"]/div'
                    )):
                note = 'constituency'
                if i:
                    note += ' ({})'.format(i + 1)

                address = constituency_office_el.xpath('./p[1]')[0]
                address = address.text_content().strip().splitlines()
                address = list(map(str.strip, address))
                m.add_contact('address', '\n'.join(address), note)

                phone_and_fax_el = constituency_office_el.xpath(
                    './p[contains(., "Telephone")]|./p[contains(., "Téléphone")]'
                )
                if len(phone_and_fax_el):
                    phone_and_fax = phone_and_fax_el[0].text_content().strip(
                    ).splitlines()
                    # Note that https://www.ourcommons.ca/Members/en/michael-barrett(102275)#contact
                    # has a empty value - "Telephone:". So the search / replace cannot include space.
                    voice = phone_and_fax[0].replace('Telephone:', '').replace(
                        'Téléphone :', '').strip()
                    if len(phone_and_fax) > 1:
                        fax = phone_and_fax[1].replace('Fax:', '').replace(
                            'Télécopieur :', '').strip()

                    if voice:
                        m.add_contact('voice', voice, note)

                    if fax:
                        m.add_contact('fax', fax, note)

            yield m