Python url_tweak示例，federal_common.utils.url_tweak Python示例

示例#1

0

显示文件

文件： fetch_house_votes.py 项目： bradbeattie/api.iscanadafair.ca

    def fetch_votes_session(self, session, list_url, remote_session_id):
        session.links[EN][sources.NAME_HOC_VOTES[EN]] = url_tweak(
            list_url, update={"sessionId": remote_session_id})
        session.links[FR][sources.NAME_HOC_VOTES[FR]] = get_french_parl_url(
            session.links[EN][sources.NAME_HOC_VOTES[EN]],
            BeautifulSoup(
                fetch_url(session.links[EN][sources.NAME_HOC_VOTES[EN]]),
                "lxml"),
        )
        session.save()

        parl_soup = BeautifulSoup(
            fetch_url(url_tweak(
                "http://www.ourcommons.ca/Parliamentarians/en/HouseVotes/ExportVotes?output=XML",
                update={"sessionId": remote_session_id},
            ),
                      use_cache=session.parliament.number < 42), "lxml")

        for overview in tqdm(
                parl_soup.find_all(
                    "voteparticipant"
                ),  # Oddly named considering the previous format we found this in
                desc=str(session),
                unit="vote",
        ):
            self.fetch_vote(overview, session)

示例#2

0

显示文件

文件： fetch_elections.py 项目： bradbeattie/api.iscanadafair.ca

 def fetch_general_election(self, parliament):
     logger.debug("Fetching general election, {}".format(parliament))
     url = "https://lop.parl.ca/About/Parliament/FederalRidingsHistory/hfer.asp?Search=Gres&genElection={}".format(
         parliament.number)
     election = models.GeneralElection(
         number=parliament.number,
         parliament=parliament,
         links={
             EN: {
                 sources.NAME_LOP_GENERAL_ELECTION[EN]:
                 url_tweak(url, update={"Language": sources.LANG_LOP[EN]}),
                 sources.NAME_WIKI[EN]:
                 "https://en.wikipedia.org/wiki/Canadian_federal_election,_{}"
                 .format(self.general_election_data[parliament.number]
                         ["date"].year),
             },
             FR: {
                 sources.NAME_LOP_GENERAL_ELECTION[FR]:
                 url_tweak(url, update={"Language": sources.LANG_LOP[FR]}),
                 sources.NAME_WIKI[FR]:
                 "https://fr.wikipedia.org/wiki/Élections_fédérales_canadiennes_de_{}"
                 .format(self.general_election_data[parliament.number]
                         ["date"].year)
             },
         },
         **self.general_election_data[parliament.number],
     )
     election.save()

示例#3

0

显示文件

文件： fetch_committees.py 项目： bradbeattie/api.iscanadafair.ca

 def fetch_hoc_committees_session(self, session, session_url):
     for link in tqdm(
             BeautifulSoup(
                 fetch_url(session_url),
                 "html.parser",
             ).select(".committees-list .accordion-content a"),
             desc=str(session),
             unit="committee",
     ):
         committee_url = {
             EN: url_tweak(urljoin(session_url, link.attrs["href"]))
         }
         committee = models.Committee(
             session=session,
             chamber=models.Committee.CHAMBER_HOC,
         )
         for lang in (EN, FR):
             soup = BeautifulSoup(fetch_url(committee_url[lang]),
                                  "html.parser")
             committee.names[lang][sources.NAME_PARL_COMMITTEE[
                 lang]] = soup.select(".institution-brand")[0].text
             committee.names[lang][
                 sources.NAME_PARL_COMMITTEE_CODE[lang]] = soup.select(
                     ".header-title.current-committee-profile")[0].text
             committee.links[lang][
                 sources.NAME_PARL_COMMITTEE[lang]] = committee_url[lang]
             if not committee.slug:
                 if "Joint" in committee.names[lang][
                         sources.NAME_PARL_COMMITTEE[lang]]:
                     committee.chamber = models.Committee.CHAMBER_JOINT
                 committee.slug = self.get_slug(committee)
                 committee_url[FR] = get_french_parl_url(
                     committee_url[lang], soup)
         committee.save()

示例#4

0

显示文件

文件： augment_elections_wiki.py 项目： bradbeattie/api.iscanadafair.ca

    def augment_election_wiki(self, election):
        soup = BeautifulSoup(fetch_url(url_tweak(
            election.links[EN][sources.NAME_WIKI[EN]],
            update={"action": "edit"},
        )), "html.parser")

        # Get the info box
        page_source = soup.select("#wpTextbox1")[0].text
        infobox_lines = re.search("{{Infobox election\n(.*?)\n}}", page_source, re.S | re.I).groups()[0].splitlines()
        infobox = {}
        infobox["parties"] = []
        for key, value in [
            line[2:].split("=", 1)
            for line in infobox_lines
            if line.startswith("| ")
        ]:
            key = key.strip()
            value = value.strip()
            try:
                party_place = int(key[-1]) - 1
                while len(infobox["parties"]) <= party_place:
                    infobox["parties"].append({})
                infobox["parties"][party_place][key[:-1]] = value
            except ValueError:
                infobox[key] = value
        election.wiki_info_box = infobox
        election.save()

示例#5

0

显示文件

文件： augment_ridings_ec.py 项目： bradbeattie/api.iscanadafair.ca

    def augment_ridings_ec(self):
        for row in tqdm(
                BeautifulSoup(
                    fetch_url(
                        url_tweak(
                            "http://www.elections.ca/Scripts/vis/SearchProvinces?PROV=CA&PROVID=99999&QID=-1&PAGEID=20",
                            update={"L": sources.LANG_EC[EN]})),
                    "html.parser").select("table tr")):
            cells = row.find_all("td", recursive=False)
            if cells:
                riding = models.Riding.objects.get(slug=slugify("{} {}".format(
                    cells[1].text,
                    cells[0].text,
                )))
                riding.electoral_district_number = parse_qs(
                    urlparse(cells[0].a.attrs["href"]).query)["ED"][0]
                self.cached_ridings[riding.electoral_district_number] = riding
                riding.save()

        for riding in tqdm(
                models.Riding.objects.filter(
                    electoral_district_number__isnull=False),
                desc="Augment Ridings, Elections Canada",
                unit="riding",
        ):
            self.augment_riding_ec(riding)

示例#6

0

显示文件

文件： fetch_provinces.py 项目： bradbeattie/api.iscanadafair.ca

    def fetch_provinces(self):
        url = url_tweak(self.ROOT_URL,
                        update={"Language": sources.LANG_LOP[EN]})
        for link in tqdm(
                BeautifulSoup(
                    fetch_url(url),
                    "html.parser",
                ).select("#ctl00_pnlContent a"),
                desc="Fetch Provinces, LoP (EN)",
                unit="province",
        ):
            if link.attrs.get("id",
                              "").startswith("ctl00_cphContent_repProvinces_"):
                province, created = models.Province.objects.get_or_create(
                    slug=slugify(link.text.strip()))
                url_en = url_tweak(
                    urljoin(url, link.attrs["href"]),
                    remove=("MenuID", "MenuQuery"),
                    update={"Section": "All"},
                )
                self.augment_province(province, EN, url_en)

        url = url_tweak(self.ROOT_URL, update={"Language": FR})
        for link in tqdm(
                BeautifulSoup(
                    fetch_url(url),
                    "html.parser",
                ).select("#ctl00_pnlContent a"),
                desc="Fetch Provinces, LoP (FR)",
                unit="province",
        ):
            if link.attrs.get("id",
                              "").startswith("ctl00_cphContent_repProvinces_"):
                url_fr = url_tweak(
                    urljoin(url, link.attrs["href"]),
                    remove=("MenuID", "MenuQuery"),
                    update={"Section": "All"},
                )
                province = models.Province.objects.get(
                    links__contains=url_tweak(
                        url_fr,
                        update={"Language": sources.LANG_LOP[EN]},
                    ))
                self.augment_province(province, FR, url_fr)

示例#7

0

显示文件

文件： augment_parties_lop_party.py 项目： bradbeattie/api.iscanadafair.ca

    def handle(self, *args, **options):
        if options["verbosity"] > 1:
            logger.setLevel(logging.DEBUG)

        cached_parties = get_cached_dict(models.Party.objects.all())
        list_url = "https://lop.parl.ca/parlinfo/Lists/Party.aspx"
        for lang in (EN, FR):
            for a in tqdm(
                    BeautifulSoup(
                        fetch_url(
                            url_tweak(
                                list_url,
                                update={"Language": sources.LANG_LOP[lang]})),
                        "html.parser").select("td > a"),
                    desc="Augment Parties, LoP",
                    unit="party",
            ):
                if "_lnkParty_" not in a.attrs.get("id", ""):
                    continue
                url = url_tweak(
                    urljoin(list_url, a.attrs["href"]),
                    update={"Section": "ALL"},
                    remove=("MenuID", "MenuQuery"),
                )
                lop_item_code = sources.LOP_CODE.search(url).group().lower()
                party = models.Party.objects.filter(
                    lop_item_code=lop_item_code).first()
                if not party:
                    name = sources.WHITESPACE.sub(" ", a.text.strip())
                    name = LOP_LIST_MAPPING.get(name, name)
                    if name is None:
                        continue
                    party = get_cached_obj(cached_parties, name)
                party.links[lang][sources.NAME_LOP_PARTY[lang]] = url
                party.names[lang][
                    sources.NAME_LOP_PARTY[lang]] = a.text.strip()
                party.lop_item_code = sources.LOP_CODE.search(
                    url).group().lower()
                soup = BeautifulSoup(fetch_url(url), "html.parser")
                for link in soup.select("#ctl00_cphContent_dataLinks a"):
                    party.links[lang][sources.AVAILABILITY_WARNINGS.sub(
                        "", link.text.strip())] = link.attrs["href"]
                party.save()

示例#8

0

显示文件

 def fetch_riding(self, riding, url):
     for lang in (EN, FR):
         riding.links[lang][
             sources.NAME_LOP_RIDING_HISTORY[lang]] = url_tweak(
                 url, update={"Language": sources.LANG_LOP[lang]})
         try:
             fetch_url(
                 riding.links[lang][sources.NAME_LOP_RIDING_HISTORY[lang]])
         except Exception as e:
             logger.exception(e)
     riding.save()
     self.cached_ridings[riding.slug] = riding

示例#9

0

显示文件

文件： augment_ridings_lop.py 项目： bradbeattie/api.iscanadafair.ca

    def augment_riding(self, riding):
        try:
            for lang in (FR, EN):
                url = riding.links[lang][sources.NAME_LOP_RIDING_HISTORY[lang]]
                soup = BeautifulSoup(fetch_url(url), "html.parser")
                riding.names[lang][sources.NAME_LOP_RIDING_HISTORY[
                    lang]] = soup.select("h4")[0].text.split(", ")[0]
        except (KeyError, FetchFailure, FetchSuppressed) as e:
            logger.exception(e)
            return

        riding.save()
        for tag_id in ("#previous", "#became"):
            related_ridings = soup.select(tag_id)
            if related_ridings:
                for link in related_ridings[0].parent.select("a"):
                    match = re.search(
                        r"^(?P<name>.*) \((?P<province>.*)\)\((?P<daterange>.*)\)",
                        link.text).groupdict()
                    riding_slug = slugify("{province}-{name}".format(**match))
                    try:
                        related_riding = get_cached_obj(
                            self.cached_ridings, riding_slug)
                    except AssertionError:
                        province = get_cached_obj(self.cached_provinces,
                                                  match["province"])
                        related_riding, created = models.Riding.objects.get_or_create(
                            slug=riding_slug, province=province)
                        logger.debug("Auxilliary riding detected: {}".format(
                            riding_slug))
                    for lang in (EN, FR):
                        if sources.NAME_LOP_RIDING_HISTORY[
                                lang] not in related_riding.links[lang]:
                            related_riding.links[lang][
                                sources.
                                NAME_LOP_RIDING_HISTORY[lang]] = url_tweak(
                                    urljoin(url, link.attrs["href"]),
                                    update={
                                        "Language": sources.LANG_LOP[lang]
                                    },
                                )
                            related_riding.names[lang][
                                sources.
                                NAME_LOP_RIDING_HISTORY[lang]] = BeautifulSoup(
                                    fetch_url(related_riding.links[lang][
                                        sources.NAME_LOP_RIDING_HISTORY[lang]]
                                              ),
                                    "html.parser",
                                ).select("h4")[0].text.split(", ")[0]
                            related_riding.save()
                    riding.related_historically.add(related_riding)

示例#10

0

显示文件

文件： fetch_elections.py 项目： bradbeattie/api.iscanadafair.ca

    def fetch_by_elections(self, parliament):
        logger.debug("Fetching by-elections, {}".format(parliament))
        url = "https://lop.parl.ca/About/Parliament/FederalRidingsHistory/hfer.asp?Language=E&Search=Bres&genElection={}".format(
            parliament.number)

        # Caching for later use
        soup = BeautifulSoup(fetch_url(url), "html.parser")

        dates = set()
        for row in soup.select(".rid"):
            dates.add(
                dateparse(
                    LOP_ROW_RIDING.search(
                        row.text.strip()).groupdict()["date"]))
        for date in dates:
            models.ByElection.objects.get_or_create(
                slug=f"{parliament.number}-{date}",
                parliament=parliament,
                date=date,
                links={
                    EN: {
                        sources.NAME_LOP_BY_ELECTION[EN]:
                        url_tweak(
                            url,
                            remove=("genElection", ),
                            update={"byElection": date.strftime("%Y/%m/%d")})
                    },
                    FR: {
                        sources.NAME_LOP_BY_ELECTION[FR]:
                        url_tweak(url,
                                  remove=("genElection", ),
                                  update={
                                      "byElection": date.strftime("%Y/%m/%d"),
                                      "Language": sources.LANG_LOP[FR]
                                  })
                    },
                },
            )

示例#11

0

显示文件

文件： fetch_parliaments.py 项目： bradbeattie/api.iscanadafair.ca

 def fetch_parliaments(self):
     url = "https://lop.parl.ca/parlinfo/Lists/Parliament.aspx"
     for link in tqdm(
         BeautifulSoup(
             fetch_url(url),
             "html.parser",
         ).select("#ctl00_cphContent_ctl00_grdParliamentList td > a"),
         desc="Fetch Parliaments, LoP",
         unit="parliament",
     ):
         parliament, created = models.Parliament.objects.get_or_create(
             number=int(REVERSE_ORDINAL.sub(r"\1", link.text)),
         )
         if created or parliament.number >= 42:
             url = url_tweak(
                 urljoin(url, link.attrs["href"]),
                 remove=("MenuID", "MenuQuery"),
                 update={"Section": "All"},
             )
             parliament.links = {
                 EN: {sources.NAME_WIKI[EN]: "https://en.wikipedia.org/wiki/{}_Canadian_Parliament".format(inflector.ordinal(parliament.number))},
                 FR: {sources.NAME_WIKI[FR]: "https://fr.wikipedia.org/wiki/{}{}_législature_du_Canada".format(parliament.number, "re" if parliament.number == 1 else "e")},
             }
             for lang in (EN, FR):
                 parliament.links[lang][sources.NAME_LOP_PARLIAMENT[lang]] = url_tweak(url, update={"Language": sources.LANG_LOP[lang]})
                 if parliament.number <= 35:
                     parliament.links[lang][sources.NAME_CANADIANA[lang]] = "http://parl.canadiana.ca/search?usrlang={}&lang={}&identifier=P{}".format(
                         sources.LANG_CANADIANA_UI[lang],
                         sources.LANG_CANADIANA_CONTENT[lang],
                         parliament.number,
                     )
             parliament.seats = int(BeautifulSoup(
                 fetch_url(parliament.links[EN][sources.NAME_LOP_PARLIAMENT[EN]]),
                 "html.parser",
             ).select("#ctl00_cphContent_ctl06_pnlSectionPartyStandingsContent .GridRows")[0].contents[-1].text)
             parliament.save()

示例#12

0

显示文件

文件： fetch_committees.py 项目： bradbeattie/api.iscanadafair.ca

 def fetch_hoc_committees(self):
     list_url = "http://www.ourcommons.ca/Committees/en/List"
     for link in tqdm(
             BeautifulSoup(
                 fetch_url(list_url),
                 "html.parser",
             ).select(".session-selector"),
             desc="Fetch Committees, HoC",
             unit="session",
     ):
         querydict = parse_qs(urlparse(link.attrs["href"]).query)
         self.fetch_hoc_committees_session(
             Session.objects.get(parliament__number=querydict["parl"][0],
                                 number=querydict["session"][0]),
             url_tweak(urljoin(list_url, link.attrs["href"])),
         )

示例#13

0

显示文件

文件： fetch_committees.py 项目： bradbeattie/api.iscanadafair.ca

 def fetch_senate_committees(self):
     list_url = "https://sencanada.ca/en/committees/"
     for link in tqdm(
             BeautifulSoup(
                 fetch_url(list_url),
                 "html.parser",
             ).select(".session-dropdown-session a"),
             desc="Fetch Committees, Senate",
             unit="session",
     ):
         parliament_number, session_number = link.attrs["href"].strip(
             "/").rsplit("/", 1)[1].split("-")
         self.fetch_senate_committees_session(
             Session.objects.get(parliament__number=parliament_number,
                                 number=session_number),
             url_tweak(urljoin(list_url, link.attrs["href"])),
         )

示例#14

0

显示文件

    def fetch_parliamentarian(self, slug, name, lang_naive_url):
        parliamentarian, created = models.Parliamentarian.objects.get_or_create(
            slug=slug)
        if not created:
            return

        for lang in (EN, FR):
            parliamentarian.names[lang][
                sources.NAME_LOP_PARLIAMENT[lang]] = name
            url = url_tweak(lang_naive_url,
                            update={"Language": sources.LANG_LOP[lang]})
            parliamentarian.links[lang][
                sources.NAME_LOP_PARLIAMENTARIAN[lang]] = url
            soup = BeautifulSoup(fetch_url(url), "html.parser")
            parliamentarian.names[lang][sources.NAME_LOP_PARLIAMENTARIAN[
                lang]] = sources.WHITESPACE.sub(
                    " ",
                    soup.select("#ctl00_cphContent_lblTitle")[0].text)
            for link in soup.select("#ctl00_cphContent_dataLinks a"):
                parliamentarian.links[lang][sources.AVAILABILITY_WARNINGS.sub(
                    "", link.text.strip())] = link.attrs["href"]
        try:
            parliamentarian.lop_item_code = sources.LOP_CODE.search(
                url).group().lower()
            parliamentarian.birthdate = soup.select(
                "#ctl00_cphContent_DateOfBirthData")[0].text.strip().replace(
                    ".", "-")
        except:
            pass

        # Download the parliamentarian's photo if they have one
        photo_url = urljoin(
            url,
            soup.select("#ctl00_cphContent_imgParliamentarianPicture")
            [0].attrs["src"])
        code = sources.LOP_CODE.search(photo_url).group().lower()
        if code != "00000000-0000-0000-0000-000000000000":
            filename = "{}.jpg".format(code)
            filepath = parliamentarian.photo.field.upload_to(None, filename)
            if os.path.exists(os.path.join(settings.MEDIA_ROOT, filepath)):
                parliamentarian.photo = filepath
            else:
                parliamentarian.photo.save(
                    filename, ContentFile(requests.get(photo_url).content))

        parliamentarian.save()

示例#15

0

显示文件

文件： fetch_bills.py 项目： bradbeattie/api.iscanadafair.ca

    def fetch_bills_session(self, session):
        cached_committees = get_cached_dict(models.Committee.objects.filter(session=session))

        url = "http://www.parl.ca/LegisInfo/Home.aspx?download=xml&ParliamentSession={}-{}".format(session.parliament.number, session.number)
        soup = BeautifulSoup(fetch_url(url, use_cache=session.parliament.number < 42), "lxml")
        for bill_soup in tqdm(
            soup.find_all("bill"),
            desc=str(session),
            unit="bill",
        ):
            bill_number = bill_soup.select("billnumber")[0]
            bill_number = "-".join(filter(None, (
                bill_number.attrs["prefix"],
                bill_number.attrs["number"],
                bill_number.get("suffix", None),
            )))
            bill = models.Bill(
                session=session,
                slug=slugify("{}-{}".format(
                    session.slug,
                    bill_number,
                )),
            )
            for lang in (EN, FR):
                bill.links[lang][sources.NAME_LEGISINFO[lang]] = url_tweak(
                    "http://www.parl.gc.ca/LegisInfo/BillDetails.aspx",
                    update={
                        "billId": bill_soup.attrs["id"],
                        "Language": sources.LANG_LEGISINFO_UI[lang],
                    },
                )
                bill.names[lang][sources.NAME_LEGISINFO_NUMBER[lang]] = bill_number
                bill.names[lang][sources.NAME_LEGISINFO_TITLE[lang]] = bill_soup.select("billtitle > title[language={}]".format(sources.LANG_LEGISINFO_XML[lang]))[0].text
                title_short = bill_soup.select("shorttitle > title[language={}]".format(sources.LANG_LEGISINFO_XML[lang]))[0].text
                if title_short:
                    bill.names[lang][sources.NAME_LEGISINFO_TITLE_SHORT[lang]] = title_short
            bill.save()

            for event_soup in bill_soup.select("event"):
                try:
                    committee_soup = bill_soup.select("committee[accronym]")[0]  # They misspelled "acronym" in their XML
                    code = committee_soup.attrs["accronym"]
                    if code != "WHOL":
                        bill.committees.add(get_cached_obj(cached_committees, code))
                except IndexError:
                    pass

示例#16

0

显示文件

文件： fetch_sittings.py 项目： bradbeattie/api.iscanadafair.ca

 def parse_session(self, session):
     session_url = url_tweak(
         "http://www.ourcommons.ca/DocumentViewer/en/SessionPublicationCalendarsWidget?organization=HOC&publicationTypeId=37",
         update={
             "parliament": session.parliament.number,
             "session": session.number
         },
     )
     for sitting_link in tqdm(
             BeautifulSoup(
                 fetch_url(
                     session_url,
                     use_cache=session.parliament.number < 42,
                 ), "html.parser").select("td a"),
             desc=str(session),
             unit="sitting",
     ):
         self.parse_sitting_url(
             urljoin(session_url, sitting_link.attrs["href"]), session)

示例#17

0

显示文件

 def fetch_parliamentarians(self, parliament):
     logger.debug("Fetch parliamentarians, {}".format(parliament))
     url = parliament.links[EN][sources.NAME_LOP_PARLIAMENT[EN]]
     for link in tqdm(
             BeautifulSoup(
                 fetch_url(url),
                 "html.parser").select("a[href^=Parliamentarian]"),
             desc=str(parliament),
             unit="parliamentarian",
     ):
         # We slugify the parliamentarian's name to disambiguate
         # names like "Marcel Masse" and "Marcel Massé"
         self.cache_parliamentarians[slugify(link.text)][url_tweak(
             urljoin(url, link.attrs["href"]),
             update={
                 "MoreInfo": "True",
                 "Section": "All",
             },
         )] = link.text

示例#18

0

显示文件

 def handle(self, *args, **options):
     if options["verbosity"] > 1:
         logger.setLevel(logging.DEBUG)
     cached_parties = get_cached_dict(models.Party.objects.all())
     url = "http://www.elections.ca/content.aspx?dir=par&document=index&section=pol"
     for lang in (EN, FR):
         url_lang = url_tweak(url, update={"lang": sources.LANG_EC[lang]})
         ec_soup = BeautifulSoup(fetch_url(url_lang), "html.parser")
         for h3 in ec_soup.select("h3.partytitle"):
             name = h3.text.strip()
             name_short = h3.attrs["id"]
             name = EC_MAPPING.get(name, name)
             try:
                 party = get_cached_obj(cached_parties, name)
             except AssertionError:
                 party = get_cached_obj(cached_parties, name_short)
             party.names[lang][sources.NAME_EC[lang]] = name
             party.names[lang][sources.NAME_EC_SHORT[lang]] = name_short
             party.links[lang][sources.NAME_EC[lang]] = "{}#{}".format(
                 url_lang, name_short)
             party.save()
             cached_parties[name].add(party)
             cached_parties[name_short].add(party)

示例#19

0

显示文件

文件： fetch_committees.py 项目： bradbeattie/api.iscanadafair.ca

    def fetch_senate_committees_session(self, session, session_url):
        for link in tqdm(
                BeautifulSoup(
                    fetch_url(session_url),
                    "html.parser").select(".committee-list-boxes-wrapper a"),
                desc=str(session),
                unit="committee",
        ):
            committee_url = {
                EN: url_tweak(urljoin(session_url, link.attrs["href"]))
            }
            if link.select(".joint-committee-list-boxes"):
                logger.debug(
                    "Skipping {} (broken, reported, joint committees are covered in HoC anyway)"
                    .format(committee_url[EN]))
                continue

            committee = models.Committee(
                session=session,
                chamber=models.Committee.CHAMBER_SEN,
            )
            for lang in (EN, FR):
                soup = BeautifulSoup(fetch_url(committee_url[lang]),
                                     "html.parser")
                committee.names[lang][
                    sources.NAME_PARL_COMMITTEE[lang]] = soup.select(
                        "meta[name=dc.description]")[0].attrs["content"]
                committee.names[lang][sources.NAME_PARL_COMMITTEE_CODE[
                    lang]] = committee_url[lang].strip("/").split(
                        "/")[-2].upper()
                committee.links[lang][
                    sources.NAME_PARL_COMMITTEE[lang]] = committee_url[lang]
                if not committee.slug:
                    committee.slug = self.get_slug(committee)
                    committee_url[FR] = get_french_parl_url(
                        committee_url[lang], soup)
            committee.save()

示例#20

0

显示文件

    def augment_parties_by_parliament_file(self, parliament, cached_parties):
        for lang in (EN, FR):
            url = parliament.links[lang][sources.NAME_LOP_PARLIAMENT[lang]]
            soup = BeautifulSoup(fetch_url(url), "html.parser")
            for row in tqdm(
                soup.select("#ctl00_cphContent_ctl04_repGeneralElection_ctl00_grdMembers tr"),
                desc=str(parliament),
                unit="party",
            ):
                cells = row.find_all("td", recursive=False)
                if cells:
                    if (parliament.number, cells[0].a.text) in BYPASSED:
                        continue

                    parliamentarian_name = sources.WHITESPACE.sub(" ", cells[0].a.text.strip())
                    party_name = sources.WHITESPACE.sub(" ", cells[2].text.strip())
                    if parliament.number == 13:
                        if party_name == "Unionist (Liberal)":
                            party_name = "Unionist (Conservative and Liberal)"
                        elif party_name == "Union (libéral)":
                            party_name = "Union (conservateurs et libéraux)"
                    if party_name.startswith(("Independent", "Indépendant")):
                        continue

                    lop_item_code = sources.LOP_CODE.search(cells[0].a.attrs["href"]).group().lower()
                    election_candidates = ElectionCandidate.objects.filter(
                        election_riding__general_election=parliament.general_election,
                        parliamentarian__lop_item_code=lop_item_code,
                    )
                    party = election_candidates.first().party

                    # https://lop.parl.ca/About/Parliament/FederalRidingsHistory/hfer.asp?Language=E&Search=C says
                    # "Some discrepancies in data may appear. Data appearing in the Federal Member Profile (biography)
                    # should be considered the authoritative source." So we might need to change the party noted from HFER
                    # to that detected through the Parliament file.
                    if not party:
                        if party_name in cached_parties:
                            logger.debug("{}, {}, shows HFER as an independent, but now shows up as {}".format(
                                parliament,
                                parliamentarian_name,
                                party_name,
                            ))
                            election_candidates.update(party=cached_parties[party_name])
                    elif (party.slug, party_name) in NEGATIVE:
                        if lang == EN:
                            logger.debug("{}, {}, shows in HFER as {}, but PFile as {}".format(
                                parliament,
                                parliamentarian_name,
                                party.names[lang][sources.NAME_LOP_RIDING_HISTORY[lang]],
                                party_name,
                            ))
                    elif sources.NAME_LOP_PARLIAMENT[lang] not in party.names[lang]:
                        party.names[lang][sources.NAME_LOP_PARLIAMENT[lang]] = party_name
                        if cells[2].a.attrs.get("href", None):
                            party.links[lang][sources.NAME_LOP_PARTY[lang]] = url_tweak(
                                urljoin(url, cells[2].a.attrs["href"]),
                                update={"Section": "All"},
                            )
                        party.save()
                        logger.debug("{}, mapping {} to {} via {}".format(parliament, party.slug, party_name, parliamentarian_name))
                        cached_parties[party_name] = party

                    # https://lop.parl.ca/About/Parliament/FederalRidingsHistory/hfer.asp?Language=E&Search=C says
                    # "Some discrepancies in data may appear. Data appearing in the Federal Member Profile (biography)
                    # should be considered the authoritative source." So we might need to change the party noted from HFER
                    # to that detected through the Parliament file.
                    elif party.names[lang][sources.NAME_LOP_PARLIAMENT[lang]] != party_name:
                        logger.debug("{}, {}, shows HFER as {}, known previous in PFile as {}, but now shows up as {}".format(
                            parliament,
                            parliamentarian_name,
                            party.names[lang][sources.NAME_LOP_RIDING_HISTORY[lang]],
                            party.names[lang][sources.NAME_LOP_PARLIAMENT[lang]],
                            party_name,
                        ))
                        election_candidates.update(party=party)

        government_party_name = sources.WHITESPACE.sub(" ", soup.select("#ctl00_cphContent_GoverningPartyData")[0].text.strip())
        parliament.government_party = cached_parties[government_party_name]
        parliament.save()