Python get_french_parl_url示例，federal_common.utils.get_french_parl_url Python示例

示例#1

0

显示文件

文件： fetch_committees.py 项目： bradbeattie/api.iscanadafair.ca

 def fetch_hoc_committees_session(self, session, session_url):
     for link in tqdm(
             BeautifulSoup(
                 fetch_url(session_url),
                 "html.parser",
             ).select(".committees-list .accordion-content a"),
             desc=str(session),
             unit="committee",
     ):
         committee_url = {
             EN: url_tweak(urljoin(session_url, link.attrs["href"]))
         }
         committee = models.Committee(
             session=session,
             chamber=models.Committee.CHAMBER_HOC,
         )
         for lang in (EN, FR):
             soup = BeautifulSoup(fetch_url(committee_url[lang]),
                                  "html.parser")
             committee.names[lang][sources.NAME_PARL_COMMITTEE[
                 lang]] = soup.select(".institution-brand")[0].text
             committee.names[lang][
                 sources.NAME_PARL_COMMITTEE_CODE[lang]] = soup.select(
                     ".header-title.current-committee-profile")[0].text
             committee.links[lang][
                 sources.NAME_PARL_COMMITTEE[lang]] = committee_url[lang]
             if not committee.slug:
                 if "Joint" in committee.names[lang][
                         sources.NAME_PARL_COMMITTEE[lang]]:
                     committee.chamber = models.Committee.CHAMBER_JOINT
                 committee.slug = self.get_slug(committee)
                 committee_url[FR] = get_french_parl_url(
                     committee_url[lang], soup)
         committee.save()

示例#2

0

显示文件

文件： fetch_house_votes.py 项目： bradbeattie/api.iscanadafair.ca

    def fetch_votes_session(self, session, list_url, remote_session_id):
        session.links[EN][sources.NAME_HOC_VOTES[EN]] = url_tweak(
            list_url, update={"sessionId": remote_session_id})
        session.links[FR][sources.NAME_HOC_VOTES[FR]] = get_french_parl_url(
            session.links[EN][sources.NAME_HOC_VOTES[EN]],
            BeautifulSoup(
                fetch_url(session.links[EN][sources.NAME_HOC_VOTES[EN]]),
                "lxml"),
        )
        session.save()

        parl_soup = BeautifulSoup(
            fetch_url(url_tweak(
                "http://www.ourcommons.ca/Parliamentarians/en/HouseVotes/ExportVotes?output=XML",
                update={"sessionId": remote_session_id},
            ),
                      use_cache=session.parliament.number < 42), "lxml")

        for overview in tqdm(
                parl_soup.find_all(
                    "voteparticipant"
                ),  # Oddly named considering the previous format we found this in
                desc=str(session),
                unit="vote",
        ):
            self.fetch_vote(overview, session)

示例#3

0

显示文件

文件： fetch_sittings.py 项目： bradbeattie/api.iscanadafair.ca

 def parse_sitting_url(self, sitting_url, session):
     try:
         sitting_number = SITTING.search(sitting_url).groups()[0].upper()
         sitting = models.Sitting(
             session=session,
             number=sitting_number,
             slug="-".join((session.slug, sitting_number.lower())),
         )
         for lang in (EN, FR):
             soup = BeautifulSoup(
                 fetch_url(
                     sitting_url,
                     use_cache=(session.parliament.number,
                                int(
                                    NUMBERS.search(
                                        sitting.number).groups()[0])) <
                     (42, 190),
                 ), "html.parser")
             if lang == EN:
                 sitting.date = dateparse(
                     soup.select("#load-publication-selector")[0].text)
             for tab in soup.select(".publication-tabs > li"):
                 if "disabled" not in tab["class"]:
                     sitting.links[lang][", ".join(
                         (sources.NAME_HOC[lang], tab.a.text))] = urljoin(
                             sitting_url,
                             tab.a.attrs.get("href", sitting_url))
                     if lang == EN and "Hansard" in tab.a.text:
                         sitting.links[EN][sources.NAME_OP[
                             EN]] = f"https://openparliament.ca/debates/{sitting.date.year}/{sitting.date.month}/{sitting.date.day}/"
             xml_button = one_or_none(soup.select(".btn-export-xml"))
             if xml_button:
                 sitting.links[lang][
                     sources.NAME_HOC_HANSARD_XML[lang]] = urljoin(
                         sitting_url, xml_button.attrs["href"])
             if lang == EN:
                 sitting_url = get_french_parl_url(sitting_url, soup)
         sitting.save()
     except Exception as e:
         logger.exception(e)

示例#4

0

显示文件

文件： fetch_committees.py 项目： bradbeattie/api.iscanadafair.ca

    def fetch_senate_committees_session(self, session, session_url):
        for link in tqdm(
                BeautifulSoup(
                    fetch_url(session_url),
                    "html.parser").select(".committee-list-boxes-wrapper a"),
                desc=str(session),
                unit="committee",
        ):
            committee_url = {
                EN: url_tweak(urljoin(session_url, link.attrs["href"]))
            }
            if link.select(".joint-committee-list-boxes"):
                logger.debug(
                    "Skipping {} (broken, reported, joint committees are covered in HoC anyway)"
                    .format(committee_url[EN]))
                continue

            committee = models.Committee(
                session=session,
                chamber=models.Committee.CHAMBER_SEN,
            )
            for lang in (EN, FR):
                soup = BeautifulSoup(fetch_url(committee_url[lang]),
                                     "html.parser")
                committee.names[lang][
                    sources.NAME_PARL_COMMITTEE[lang]] = soup.select(
                        "meta[name=dc.description]")[0].attrs["content"]
                committee.names[lang][sources.NAME_PARL_COMMITTEE_CODE[
                    lang]] = committee_url[lang].strip("/").split(
                        "/")[-2].upper()
                committee.links[lang][
                    sources.NAME_PARL_COMMITTEE[lang]] = committee_url[lang]
                if not committee.slug:
                    committee.slug = self.get_slug(committee)
                    committee_url[FR] = get_french_parl_url(
                        committee_url[lang], soup)
            committee.save()

示例#5

0

显示文件

文件： fetch_house_votes.py 项目： bradbeattie/api.iscanadafair.ca

    def fetch_vote(self, overview, session):
        number = overview.decisiondivisionnumber.text
        vote = models.HouseVote(
            slug="-".join((session.slug, number)),
            number=number,
            result=RESULT_MAPPING[overview.decisionresultname.text],
        )
        vote.links[EN][sources.NAME_HOC_VOTE_DETAILS[
            EN]] = "http://www.ourcommons.ca/Parliamentarians/en/votes/{}/{}/{}/".format(
                session.parliament.number,
                session.number,
                number,
            )
        soup = {}
        for lang in (EN, FR):
            soup[lang] = BeautifulSoup(
                fetch_url(
                    vote.links[lang][sources.NAME_HOC_VOTE_DETAILS[lang]],
                    sometimes_refetch=False),
                "html.parser",
            )
            details = one_or_none(soup[lang].select(".voteDetailsText"))
            if details:
                vote.context[lang] = soup_to_text(details)
            if lang == EN:
                vote.links[FR][
                    sources.NAME_HOC_VOTE_DETAILS[FR]] = get_french_parl_url(
                        vote.links[lang][sources.NAME_HOC_VOTE_DETAILS[lang]],
                        soup[lang],
                    )
        try:
            vote.sitting = models.Sitting.objects.get(
                session=session,
                date=dateparse(overview.decisioneventdatetime.text),
            )
        except Exception as e:
            # Sometimes the XML listings show the wrong dates.
            # I've contacted [email protected] about this.
            element = BeautifulSoup(
                fetch_url(vote.links[EN][sources.NAME_HOC_VOTE_DETAILS[EN]]),
                "html.parser",
            ).select("#VoteDetailsHeader .voteDetailsTopHeaderContent")[1]
            vote.sitting = models.Sitting.objects.get(
                session=session,
                date=dateparse(element.text.split(" - ")[1]),
            )

        if overview.billnumbercode.text:
            vote.bill = models.Bill.objects.get(
                slug="-".join((session.slug,
                               *overview.billnumbercode.text.split("-"))))

        vote.save()

        # Fetch the parliamentarian votes
        # TODO: This has been temporarily written to scrape off of HTML
        #       as the new XML format omits party affiliation.
        for row in soup[EN].select(
                "#parlimant > tbody > tr"
        ):  # Note the source code misspells "parliament"
            self.fetch_vote_participant(row, vote, soup)