Python Person.image примеры использования

Язык программирования: Python

Пространство имен/Пакет: openstates.scrape

Класс/Тип: Person

Метод/Функция: image

Примеров на hotexamples.com: 8

Python Person.image - 8 примеров найдено. Это лучшие примеры Python кода для openstates.scrape.Person.image, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Person(30)

add_link(30)

add_source(30)

add_contact_detail(30)

as_dict(20)

image(8)

add_membership(5)

add_name(4)

extras(4)

add_identifier(3)

add_party(3)

birth_date(3)

pre_save(3)

validate(2)

name(1)

contact_details(1)

add_term(1)

Пример #1

Показать файл

Файл: test_people_importer.py Проект: sgodfrey66/openstates-core

def test_same_name_people():
    create_jurisdiction()
    o = Organization.objects.create(name="WWE", jurisdiction_id="jid")

    # importing two people with the same name to a pristine database should error
    p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1")
    p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2")
    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])

    # importing one person should pass
    PersonImporter("jid").import_data([p1.as_dict()])
    # create fake memberships so that future lookups work on the imported people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # importing another person with the same name should fail
    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])

    # adding birth dates should pass
    p1.birth_date = "1970"
    p2.birth_date = "1930"
    resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])
    assert resp["person"]["insert"] == 1
    assert resp["person"]["noop"] == 0
    assert resp["person"]["update"] == 1
    assert Person.objects.count() == 2
    # create fake memberships so that future lookups work on the imported people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # adding a third person with the same name but without a birthday should error
    p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3")

    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p3.as_dict()])

    # and now test that an update works and we can insert a new one with the same name
    p1.image = "http://example.com/1.jpg"
    p2.birth_date = "1931"  # change birth_date, means a new insert
    resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])
    assert Person.objects.count() == 3
    assert resp["person"]["insert"] == 1
    assert resp["person"]["noop"] == 0
    assert resp["person"]["update"] == 1

Пример #2

Показать файл

Файл: people.py Проект: vikrantmygamma/openstates-scrapers

    def _parse_person(self, row, chamber, seat_map):
        # Capture legislator vitals.
        first_name = row["FirstName"]
        middle_name = row["MiddleName"]
        last_name = row["LastName"]
        full_name = "{} {} {}".format(first_name, middle_name, last_name)
        full_name = re.sub(r"[\s]{2,}", " ", full_name)

        if chamber == "lower":
            district = "{} {}".format(row["County"],
                                      int(row["District"])).strip()
        else:
            district = str(int(row["District"])).strip()

        party = self.party_map[row["party"].upper()]
        email = row["WorkEmail"]

        if district == "0":
            self.warning("Skipping {}, district is set to 0".format(full_name))
            return

        person = Person(primary_org=chamber,
                        district=district,
                        name=full_name,
                        party=party)

        extras = {
            "first_name": first_name,
            "middle_name": middle_name,
            "last_name": last_name,
        }

        person.extras = extras
        if email:
            office = "Capitol" if email.endswith(
                "@leg.state.nh.us") else "District"
            person.add_contact_detail(type="email",
                                      value=email,
                                      note=office + " Office")

        # Capture legislator office contact information.
        district_address = "{}\n{}\n{}, {} {}".format(row["Address"],
                                                      row["address2"],
                                                      row["city"],
                                                      row["State"],
                                                      row["Zipcode"]).strip()

        phone = row["Phone"].strip()
        if not phone:
            phone = None

        if district_address:
            office = "Capitol" if chamber == "upper" else "District"
            person.add_contact_detail(type="address",
                                      value=district_address,
                                      note=office + " Office")
        if phone:
            office = "Capitol" if "271-" in phone else "District"
            person.add_contact_detail(type="voice",
                                      value=phone,
                                      note=office + " Office")

        # Retrieve legislator portrait.
        profile_url = None
        if chamber == "upper":
            profile_url = self.senate_profile_url.format(row["District"])
        elif chamber == "lower":
            try:
                seat_number = seat_map[row["seatno"]]
                profile_url = self.house_profile_url.format(seat_number)
            except KeyError:
                pass

        if profile_url:
            person.image = self._get_photo(profile_url, chamber)
            person.add_source(profile_url)

        return person

Пример #3

Показать файл

    def _scrape_representative(self, url, parties):
        # logger.info(f'Generating representative person object from {url}')
        """
        Returns a Person object representing a member of the lower
        legislative chamber.
        """
        # url = self.get(url).text.replace('<br>', '')
        member_page = self.lxmlize(url)

        photo_url = member_page.xpath('//img[@class="member-photo"]/@src')[0]
        if photo_url.endswith("/.jpg"):
            photo_url = None

        scraped_name, district_text = member_page.xpath(
            '//div[@class="member-info"]/h2')
        scraped_name = scraped_name.text_content().strip().replace("Rep. ", "")
        scraped_name = " ".join(scraped_name.split())

        name = " ".join(scraped_name.split(", ")[::-1])

        district_text = district_text.text_content().strip()
        district = str(self.district_re.search(district_text).group(1))

        # Vacant house "members" are named after their district numbers:
        if re.match(r"^District \d+$", scraped_name):
            return None

        party = parties[district]

        person = Person(name=name,
                        district=district,
                        party=party,
                        primary_org="lower")

        if photo_url is not None:
            person.image = photo_url

        person.add_link(url)
        person.add_source(url)

        def office_name(element):
            """Returns the office address type."""
            return element.xpath("preceding-sibling::h4[1]/text()")[0].rstrip(
                ":")

        offices_text = [{
            "name":
            office_name(p_tag),
            "type":
            office_name(p_tag).replace(" Address", "").lower(),
            "details":
            p_tag.text_content(),
        } for p_tag in member_page.xpath(
            '//h4/following-sibling::p[@class="double-space"]')]

        for office_text in offices_text:
            details = office_text["details"].strip()

            # A few member pages have blank office listings:
            if details == "":
                continue

            # At the time of writing, this case of multiple district
            # offices occurs exactly once, for the representative at
            # District 43:
            if details.count("Office") > 1:
                district_offices = [
                    district_office.strip() for district_office in re.findall(
                        r"(\w+ Office.+?(?=\w+ Office|$))",
                        details,
                        flags=re.DOTALL)
                ]
                offices_text += [{
                    "name":
                    re.match(r"\w+ Office", office).group(),
                    "type":
                    "district",
                    "details":
                    re.search(r"(?<=Office).+(?=\w+ Office|$)?", office,
                              re.DOTALL).group(),
                } for office in district_offices]

            match = self.address_re.search(details)
            if match is not None:
                address = re.sub(
                    " +$",
                    "",
                    match.group().replace("\r", "").replace("\n\n", "\n"),
                    flags=re.MULTILINE,
                )
            else:
                # No valid address found in the details.
                continue

            phone_number = extract_phone(details)
            fax_number = extract_fax(details)

            if address:
                person.add_contact_detail(type="address",
                                          value=address,
                                          note=office_text["name"])
            if phone_number:
                person.add_contact_detail(type="voice",
                                          value=phone_number,
                                          note=office_text["name"])
            if fax_number:
                person.add_contact_detail(type="fax",
                                          value=fax_number,
                                          note=office_text["name"])

        yield person

Пример #4

Показать файл

    def _scrape_senator(self, url, parties):
        # logger.info(f'Generating senator person object from {url}')
        """
        Returns a Person object representing a member of the upper
        legislative chamber.
        """
        # Scrape legislator information from roster URL
        # Example: view-source:https://senate.texas.gov/member.php?d=1
        member_page = self.lxmlize(url)

        photo_url = member_page.xpath('//img[@id="memhead"]/@src')[0]
        scraped_name_district_text = member_page.xpath(
            '//div[@class="pgtitle"]/text()')[0]
        scraped_name, district_text = scraped_name_district_text.split(":")
        name = " ".join(scraped_name.replace("Senator ", "").split()).strip()
        district = str(district_text.split()[1]).strip()
        # Vacant house "members" are named after their district numbers:
        if re.match(r"^District \d+$", name):
            return None
        bio = " ".join(member_page.xpath('//div[@class="bio"]/text()'))
        party = parties[district]

        person = Person(
            name=name,
            district=district,
            party=party,
            primary_org="upper",
            biography=bio,
        )

        if photo_url is not None:
            person.image = photo_url
        person.add_link(url)
        person.add_source(url)

        office_ids = []
        # Get offices based on table headers
        for th_tag in member_page.xpath('//table[@class="memdir"]/tr/th'):
            # logger.warn([th_tag.xpath('text()'),th_tag.xpath('@id')])
            id = th_tag.xpath("@id")[0] if th_tag.xpath("@id") else ""
            label = th_tag.xpath("text()")[0].strip() if th_tag.xpath(
                "text()") else ""
            if id != "" and label != "":
                office_ids.append({"id": id, "label": label})

        # logger.warn(office_ids)
        for office in office_ids:
            # logger.warn(office)
            row = member_page.xpath(
                f'//table[@class="memdir"]/tr/td[@headers="{office["id"]}"]')
            # A few member pages have broken ids for office listings:
            if len(row) == 0:
                row = member_page.xpath(
                    '//table[@class="memdir"]/tr/td[@headers="dDA1"]')
            if len(row) > 0:
                details = " ".join(row[0].xpath("text()")).strip()
                details = details.replace("\r", "").replace("\n", "")
            # logger.warn(details)
            # A few member pages have blank office listings:
            if details == "":
                continue

            match = self.address_re.search(details)
            if match is not None:
                address = re.sub(
                    " +$",
                    "",
                    match.group().replace("\r", "").replace("\n", ""),
                    flags=re.MULTILINE,
                )
            else:
                # No valid address found in the details.
                continue

            phone_number = extract_phone(details)
            fax_number = extract_fax(details)

            if address:
                person.add_contact_detail(type="address",
                                          value=address,
                                          note=office["label"])
            if phone_number:
                person.add_contact_detail(type="voice",
                                          value=phone_number,
                                          note=office["label"])
            if fax_number:
                person.add_contact_detail(type="fax",
                                          value=fax_number,
                                          note=office["label"])

        yield person

Пример #5

Показать файл

Файл: people.py Проект: jessemortenson/openstates

    def legislators(self, latest_only):
        legs = {}

        for member, chamber, term, url in self._memberships(latest_only):
            name, _, _, district, party = member.xpath("td")
            district = district.text
            detail_url = name.xpath("a/@href")[0]

            if party.text_content().strip() == "":
                party = "Independent"
            else:
                party = {"D": "Democratic", "R": "Republican", "I": "Independent"}[
                    party.text
                ]
            name = name.text_content().strip()

            # inactive legislator, skip them for now
            if name.endswith("*"):
                name = name.strip("*")
                continue

            name = AKA.get(name, name)

            if name in legs:
                p, terms = legs[name]
                terms.append((chamber, district, term, party))
            else:
                p = Person(name, party=party)
                legs[name] = p, [(chamber, district, term, party)]

            p.add_source(url)
            p.add_source(detail_url)
            p.add_link(detail_url)

            birth_date = BIRTH_DATES.get(name, None)
            if birth_date:
                p.birth_date = birth_date

            leg_html = self.get(detail_url).text
            leg_doc = lxml.html.fromstring(leg_html)
            leg_doc.make_links_absolute(detail_url)

            hotgarbage = (
                "Senate Biography Information for the 98th General "
                "Assembly is not currently available."
            )

            if hotgarbage in leg_html:
                # The legislator's bio isn't available yet.
                self.logger.warning("No legislator bio available for " + name)
                continue

            photo_url = leg_doc.xpath('//img[contains(@src, "/members/")]/@src')[0]
            p.image = photo_url

            p.contact_details = []
            # email
            email = leg_doc.xpath('//b[text()="Email: "]')
            if email:
                p.add_contact_detail(
                    type="email", value=email[0].tail.strip(), note="Capitol Office"
                )

            offices = {
                "Capitol Office": '//table[contains(string(), "Springfield Office")]',
                "District Office": '//table[contains(string(), "District Office")]',
            }

            for location, xpath in offices.items():
                table = leg_doc.xpath(xpath)
                if table:
                    for type, value in self._table_to_office(table[3]):
                        if type in ("fax", "voice") and not validate_phone_number(
                            value
                        ):
                            continue

                        p.add_contact_detail(type=type, value=value, note=location)

        return legs

Пример #6

Показать файл

Файл: people.py Проект: vikrantmygamma/openstates-scrapers

    def scrape_chamber(self, chamber, session):
        url = "https://docs.legis.wisconsin.gov/{}/legislators/{}".format(
            session, {
                "upper": "senate",
                "lower": "assembly"
            }[chamber])

        body = self.get(url).text
        page = lxml.html.fromstring(body)
        page.make_links_absolute(url)

        for row in page.xpath(
                ".//div[@class='box-content']/div[starts-with(@id,'district')]"
        ):
            if row.xpath(
                    ".//a/@href") and not row.xpath(".//a[text()='Vacant']"):
                rep_url = row.xpath(".//a[text()='Details']/@href")[0].strip(
                    "https://")
                rep_url = "https://" + rep_url
                rep_doc = lxml.html.fromstring(self.get(rep_url).text)
                rep_doc.make_links_absolute(rep_url)

                full_name = (rep_doc.xpath('.//div[@id="district"]/h1/text()')
                             [0].replace("Senator ",
                                         "").replace("Representative ", ""))

                party = rep_doc.xpath('.//div[@id="district"]//small/text()')
                if len(party) > 0:
                    party = PARTY_DICT[party[0].split("-")[0].strip(
                        "(").strip()]
                else:
                    party = None
                district = rep_doc.xpath(
                    './/div[@id="district"]/h3/a/@href')[1]
                district = district.split("/")[-1]
                district = str(int(district))

                # email
                email = rep_doc.xpath("//span[@class='info email']/a/text()")
                if email:
                    email = email[0]
                else:
                    email = ""

                assert party is not None, "{} is missing party".format(
                    full_name)

                person = Person(name=full_name,
                                district=district,
                                primary_org=chamber,
                                party=party)

                img = rep_doc.xpath('.//div[@id="district"]/img/@src')
                if img:
                    person.image = img[0]

                # office ####
                address_lines = rep_doc.xpath(
                    './/span[@class="info office"]/text()')
                address = "\n".join([
                    line.strip() for line in address_lines
                    if line.strip() != ""
                ])
                person.add_contact_detail(type="address",
                                          value=address,
                                          note="Capitol Office")

                phone = rep_doc.xpath(
                    './/span[@class="info telephone"]/text()')
                if phone:
                    phone = re.sub(r"\s+", " ", phone[1]).strip()
                    person.add_contact_detail(type="voice",
                                              value=phone,
                                              note="Capitol Office")

                fax = rep_doc.xpath('.//span[@class="info fax"]/text()')
                if fax:
                    fax = re.sub(r"\s+", " ", fax[1]).strip()
                    person.add_contact_detail(type="fax",
                                              value=fax,
                                              note="Capitol Office")

                if email:
                    person.add_contact_detail(type="email",
                                              value=email,
                                              note="Capitol Office")

                person.add_link(rep_url)
                person.add_source(rep_url)

                yield person

Пример #7

Показать файл

    def _scrape_upper_chamber(self):
        self.info("Scraping upper chamber for legislators.")

        chamber = "upper"

        url = self._senators_url
        source_url = url
        page = self.get(url).text
        page = lxml.html.fromstring(page)
        table = page.xpath('//*[@id="content-2"]//table//tr')
        rowcount = 0
        for tr in table:
            rowcount += 1

            # the first two rows are headers, skip:
            if rowcount <= 2:
                continue

            tds = tr.xpath("td")
            full_name = tds[0].xpath("div/a")[0].text_content().strip()

            if full_name.startswith(
                ("Vacant", "Vacancy")) or full_name.endswith(("Vacant")):
                self.warning("Skipping vacancy, named '{}'".format(full_name))
                continue

            party_and_district = tds[1].text_content().strip().split("-")
            if party_and_district[0] == "D":
                party = "Democratic"
            elif party_and_district[0] == "R":
                party = "Republican"

            district = party_and_district[1].lstrip("0")
            phone = tds[3].xpath("div")[0].text_content().strip()
            url = self._senator_details_url.format(int(district))

            details_page = self.get(url).text
            if "currently vacant" in details_page:
                continue

            person = Person(name=full_name,
                            primary_org=chamber,
                            district=district,
                            party=party)

            person.add_source(source_url)
            person.add_source(url)
            person.add_link(url)

            page = lxml.html.fromstring(details_page)
            photo_url = page.xpath(
                '//*[@id="content-2"]//img[contains(@src, "uploads")]/@src')[0]

            contact_info = [
                line.strip()
                for line in page.xpath('//div[@class="textwidget"]/p[1]')
                [0].text_content().split("\n") if "Capitol Office:" not in line
            ]
            address = "\n".join(contact_info[:2])
            email = next((line for line in iter(contact_info) if "@" in line),
                         None)
            phone_pattern = re.compile(r"\(\d{3}\) \d{3}-\d{4}")
            phone_numbers = [
                line for line in contact_info
                if phone_pattern.search(line) is not None
            ]

            phone = phone_pattern.search(phone_numbers[0]).group()
            fax = next(
                (phone_pattern.search(phone_number).group()
                 for phone_number in iter(phone_numbers)
                 if "fax" in phone_number.lower()),
                None,
            )

            person.add_contact_detail(type="address",
                                      value=address,
                                      note="Capitol Office")
            person.add_contact_detail(type="voice",
                                      value=phone,
                                      note="Capitol Office")
            if fax:
                person.add_contact_detail(type="fax",
                                          value=fax,
                                          note="Capitol Office")
            if email:
                person.add_contact_detail(type="email",
                                          value=email,
                                          note="Capitol Office")

            person.image = photo_url

            yield person

Пример #8

Показать файл

    def _scrape_lower_chamber(self):
        self.info("Scraping lower chamber for legislators.")

        chamber = "lower"

        roster_url = self._reps_url
        page = self.get(roster_url).text
        page = lxml.html.fromstring(page)
        # This is the ASP.net table container
        table_xpath = "//table[@id='theTable']"
        table = page.xpath(table_xpath)[0]
        for tr in table.xpath("tr")[3:]:
            # If a given term hasn't occurred yet, then ignore it
            # Eg, in 2017, the 2018 term page will have a blank table
            if tr.attrib.get("class") == "dxgvEmptyDataRow":
                self.warning("No House members found")
                return

            tds = tr.xpath("td")
            last_name = tds[1].text_content().strip()
            first_name = tds[2].text_content().strip()
            full_name = "{} {}".format(first_name, last_name)
            district = str(int(tds[3].text_content().strip()))
            party = tds[4].text_content().strip()
            if party == "D":
                party = "Democratic"
            elif party == "R":
                party = "Republican"

            if party.strip() == "":  # Workaround for now.
                party = "Other"

            phone = tds[6].text_content().strip()
            room = tds[7].text_content().strip()

            address = self._assumed_address_fmt.format(room if room else "")

            if last_name == "Vacant":
                person = Person(name=full_name,
                                primary_org=chamber,
                                district=district,
                                party=party)
                person.extras = {
                    "first_name": first_name,
                    "last_name": last_name
                }

                person.add_contact_detail(type="address",
                                          value=address,
                                          note="Capitol Office")
                if phone.strip():
                    person.add_contact_detail(type="voice",
                                              value=phone,
                                              note="Capitol Office")

                person.add_source(roster_url)

                self._save_vacant_legislator(person)
            else:
                party_override = {
                    " Green": "Democratic",
                    " Sisco": "Republican"
                }

                if party == "" and full_name in party_override:
                    party = party_override[full_name]

                details_url = self._rep_details_url.format(district)
                details_page = lxml.html.fromstring(self.get(details_url).text)

                person = Person(name=full_name,
                                primary_org=chamber,
                                district=district,
                                party=party)
                person.extras = {
                    "first_name": first_name,
                    "last_name": last_name
                }
                person.add_source(roster_url)
                person.add_source(details_url)
                person.add_link(details_url)

                email = details_page.xpath(
                    '//*[@id="ContentPlaceHolder1_lblAddresses"] '
                    '//a[starts-with(@href,"mailto:")]/@href')
                if len(email) > 0 and email[0].lower() != "mailto:":
                    email = email[0].split(":")[1]
                else:
                    email = None

                person.add_contact_detail(type="address",
                                          value=address,
                                          note="Capitol Office")
                if phone:
                    person.add_contact_detail(type="voice",
                                              value=phone,
                                              note="Capitol Office")
                if email:
                    person.add_contact_detail(type="email",
                                              value=email,
                                              note="Capitol Office")

                picture = details_page.xpath(
                    '//*[@id="ContentPlaceHolder1_imgPhoto"]/@src')
                if len(picture) > 0:
                    person.image = picture[0]

                yield person