Пример #1
0
 def read(self, last_update: str) -> pd.DataFrame:
     yearly_report_page = get_soup(self.source_url)
     # Get Newest Month Report Page
     monthly_report_link = yearly_report_page.find("div", class_="col-lg-12", id="content-detail").find("a")["href"]
     monthly_report_page = get_soup(monthly_report_link)
     # Get links
     df = self._parse_data(monthly_report_page, last_update)
     return df
Пример #2
0
def read(source: str) -> pd.Series:
    soup = get_soup(source)
    link = parse_infogram_link(soup)
    soup = get_soup(link)
    infogram_data = parse_infogram_data(soup)
    return pd.Series({
        "date": parse_infogram_date(infogram_data),
        "source_url": source,
        **parse_infogram_vaccinations(infogram_data),
    })
Пример #3
0
def read(source: str):
    soup = get_soup(source)
    url = parse_pdf_link(soup, source)
    if not url.endswith(".pdf"):
        raise ValueError(f"File reporting metrics is not a PDF: {url}!")
    ds = pd.Series(parse_data(url))
    return ds
Пример #4
0
def connect_parse_data(source: str) -> pd.Series:

    soup = get_soup(source)

    counters = soup.find_all(class_="elementor-counter-number")
    assert len(counters) == 4, "New counter in dashboard?"

    total_vaccinations = clean_count(counters[0]["data-to-value"])
    first_doses = clean_count(counters[1]["data-to-value"])
    second_doses = clean_count(counters[2]["data-to-value"])
    unique_doses = clean_count(counters[3]["data-to-value"])

    people_vaccinated = first_doses + unique_doses
    people_fully_vaccinated = second_doses + unique_doses

    date = localdate("America/Jamaica")

    return pd.Series(
        data={
            "total_vaccinations": total_vaccinations,
            "people_vaccinated": people_vaccinated,
            "people_fully_vaccinated": people_fully_vaccinated,
            "date": date,
        }
    )
Пример #5
0
def parse_vaccinations(elem) -> dict:
    # Get news text
    url = elem.find_parent(class_="card").find("a").get("href")
    soup = get_soup(url, verify=False)
    text = "\n".join([p.text for p in soup.find("article").find_all("p")])

    # Find metrics
    metrics = dict()
    # total_vaccinations = re.search(r"疫苗共有(?P<count>[\d,]*)人次", text)
    total_vaccinations = re.search(r"疫苗劑數為(?P<count>[\d,]*)劑", text)
    # print(total_vaccinations)
    # people_vaccinated = re.search(r"1劑疫苗共有(?P<count>[\d,]*)人次", text)
    people_vaccinated = re.search(r"已接種人數共有(?P<count>[\d,]*)人", text)
    # people_fully_vaccinated = re.search(r"2劑疫苗共有(?P<count>[\d,]*)人次", text)
    # people_fully_vaccinated = re.search(r"已完成接種2劑有(?P<count>[\d,]*)人", text)
    people_fully_vaccinated = re.search(r"已接種第2劑的?有([\d,]{6,})", text)
    # people_fully_vaccinated = re.search(r"接種2劑有(?P<count>[\d,]*)人", text)

    if total_vaccinations:
        metrics["total_vaccinations"] = clean_count(
            total_vaccinations.group(1))
    if people_vaccinated:
        metrics["people_vaccinated"] = clean_count(people_vaccinated.group(1))
    if people_fully_vaccinated:
        metrics["people_fully_vaccinated"] = clean_count(
            people_fully_vaccinated.group(1))
    return metrics
Пример #6
0
 def read(self) -> pd.DataFrame:
     """Load data."""
     soup = utils.get_soup(self.source_url)
     link = self._parse_file_link(soup)
     df = utils.read_xlsx_from_url(link, sheet_name="Date")
     print(link)
     # df_by_age = utils.read_xlsx_from_url(link, sheet_name="Ethnicity Age Gender by dose")
     return df  # , df_by_age
Пример #7
0
 def _parse_pdf_link(self, soup) -> str:
     a = soup.find(class_="download").find("a")
     url_pdf = f"{self.source_url}{a['href']}"
     for i in range(10):
         soup = get_soup(url_pdf)
         a = soup.find(class_="viewer-button")
         if a is not None:
             break
     return f"{self.source_url}{a['href']}"
Пример #8
0
 def read(self, last_update: str) -> pd.DataFrame:
     data = []
     for cnt in range(0, 5 * self._num_max_pages, 5):
         # print(f"page: {cnt}")
         url = f"{self.source_url}/(offset)/{cnt}/"
         soup = get_soup(url)
         data_, proceed = self.parse_data(soup, last_update)
         data.extend(data_)
         if not proceed:
             break
     return pd.DataFrame(data)
Пример #9
0
 def read(self) -> pd.Series:
     soup = get_soup(self.source_url)
     people_vaccinated, people_fully_vaccinated = self.parse_vaccinated(
         soup)
     date_str = self.parse_date(soup)
     data = pd.Series({
         "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
         "total_vaccinations": people_vaccinated + people_fully_vaccinated,
         "date": date_str,
     })
     return pd.Series(data)
Пример #10
0
def read(source: str) -> pd.Series:
    soup = get_soup(source)

    counters = soup.find_all(class_="text-brand-blue")
    dose_1 = clean_count(counters[1].text)
    dose_2 = clean_count(counters[2].text)
    assert dose_1 >= dose_2

    date = soup.find(class_="text-gray-500").text
    date = date.replace("Updated ", "") + str(datetime.date.today().year)
    date = clean_date(date, fmt="%d. %B%Y", lang="en")

    return pd.Series({"people_vaccinated": dose_1, "people_fully_vaccinated": dose_2, "date": date})
Пример #11
0
    def vaccines_approved(self,
                          location: str = None,
                          original_names: bool = False) -> list:
        """Get list of approved vaccines in a country (or all if None specified).

        Args:
            location (str, optional): Country name. If None, retrieves all approved vaccines. Defaults to None.
            original_names (bool, optional): Set to True to keep vaccine from web. Defaults to False.

        Returns:
            list: Approved vaccines
        """
        if location:
            try:
                url = self.get_country_url(location)
                soup = get_soup(url)
                return self._parse_vaccines_location(soup, original_names)
            except ValueError:
                return None
        else:
            soup = get_soup(self.all_vaccines_url)
            return self._parse_vaccines_all(soup, original_names)
Пример #12
0
def connect_parse_data(source: str) -> pd.Series:

    soup = get_soup(source)

    people_vaccinated = soup.find(class_="count-up").text
    people_vaccinated = clean_count(people_vaccinated)

    total_vaccinations = people_vaccinated

    return pd.Series(
        data={
            "total_vaccinations": total_vaccinations,
            "people_vaccinated": people_vaccinated,
        })
Пример #13
0
def read(source: str) -> pd.Series:
    soup = get_soup(source)

    for label in soup.find_all(class_="number-label"):
        if label.text == "Total vaccins administrés":
            container = label.parent.parent

    return pd.Series(
        data={
            "total_vaccinations": parse_total_vaccinations(container),
            "people_vaccinated": parse_people_vaccinated(container),
            "people_fully_vaccinated": parse_people_fully_vaccinated(
                container),
            "source_url": source,
        })
Пример #14
0
 def read(self) -> pd.Series:
     soup = get_soup(self.source_url)
     (
         total_vaccinations,
         people_vaccinated,
         people_fully_vaccinated,
         total_boosters,
     ) = self._parse_metrics(soup)
     return pd.Series({
         "total_vaccinations": total_vaccinations,
         "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
         "total_boosters": total_boosters,
         "source_url": self.source_url,
         "date": self._parse_date(soup),
     })
Пример #15
0
 def parse_data(self, soup: BeautifulSoup, last_update: str) -> tuple:
     elems = self.get_elements(soup)
     records = []
     for elem in elems:
         if elem["date"] > last_update:
             # print(elem["date"], elem)
             soup = get_soup(elem["link"])
             record = {
                 "source_url": elem["link"],
                 "date": elem["date"],
                 **self.parse_data_news_page(soup),
             }
             records.append(record)
         else:
             # print(elem["date"], "END")
             return records, False
     return records, True
Пример #16
0
def connect_parse_data(source: str) -> pd.Series:

    soup = get_soup(source)
    people_vaccinated = soup.find_all(class_="repart-stlucia")[0].text
    people_vaccinated = clean_count(people_vaccinated)

    people_fully_vaccinated = soup.find_all(class_="repart-stlucia")[1].text
    people_fully_vaccinated = clean_count(people_fully_vaccinated)

    total_vaccinations = people_vaccinated + people_fully_vaccinated

    date = localdate("America/St_Lucia")

    data = {
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": date,
    }
    return pd.Series(data=data)
Пример #17
0
def read(source: str, last_update: str, num_pages_limit: int = 10):
    records = []
    for page_nr in range(1, num_pages_limit):
        # print(page_nr)
        # Get soup
        url = f"{source}/{page_nr}/"
        soup = get_soup(url, verify=False)
        # Get data (if any)
        records_sub = parse_data(soup)
        if records_sub:
            records.extend(records_sub)
            if any([record["date"] <= last_update for record in records_sub]):
                # print("Dates exceding!  ", str([record["date"] for record in records_sub]))
                break
    if pd.Series([r.get("total_vaccinations")
                  for r in records]).notnull().any():
        records = [
            record for record in records if record["date"] >= last_update
        ]
        if len(records) > 0:
            return postprocess(pd.DataFrame(records))
    return None
Пример #18
0
def read(source: str) -> pd.Series:

    soup = get_soup(source)

    people_vaccinated = clean_count(
        re.search(r"^[\d,]+", soup.find_all(class_="info-box-number")[2].text).group(0)
    )
    people_fully_vaccinated = clean_count(
        re.search(r"^[\d,]+", soup.find_all(class_="info-box-number")[3].text).group(0)
    )
    total_vaccinations = people_vaccinated + people_fully_vaccinated

    date = localdate("Asia/Dhaka")

    return pd.Series(
        data={
            "total_vaccinations": total_vaccinations,
            "people_vaccinated": people_vaccinated,
            "people_fully_vaccinated": people_fully_vaccinated,
            "date": date,
        }
    )
Пример #19
0
def connect_parse_data(source: str) -> pd.Series:

    soup = get_soup(source)

    df = pd.read_html(str(soup.find(class_="vaccination-count")))[0]
    assert df.shape == (3, 7)

    values = df.iloc[:, 2].values

    total_vaccinations = values[0]
    people_vaccinated = values[1]
    people_fully_vaccinated = values[2]
    assert total_vaccinations == people_vaccinated + people_fully_vaccinated

    date = soup.find(class_="aly_tx_center").text
    date = localdate("Asia/Tokyo")

    data = {
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": date,
    }
    return pd.Series(data=data)
Пример #20
0
def read(source: str) -> pd.Series:
    soup = get_soup(source)

    for block in soup.find(class_="main").find_all(class_="w3-center"):

        if block.find("p").text == "ΣΥΝΟΛΟ ΕΜΒΟΛΙΑΣΜΩΝ":
            total_vaccinations = clean_count(block.find_all("p")[1].text)
            date = re.search(r"[\d/]{8,10}", block.find_all("p")[2].text)
            date = clean_date(date.group(0), "%d/%m/%Y")

        if block.find("p").text == "ΣΥΝΟΛΟ 1ης ΔΟΣΗΣ":
            people_vaccinated = clean_count(block.find_all("p")[1].text)

        if block.find("p").text == "ΣΥΝΟΛΟ 2ης ΔΟΣΗΣ":
            people_fully_vaccinated = clean_count(block.find_all("p")[1].text)

    data = {
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": date,
        "source_url": source,
    }
    return pd.Series(data=data)
Пример #21
0
 def read(self) -> pd.Series:
     soup = get_soup(self.source_url, verify=False)
     return self.parse_data(soup)
Пример #22
0
 def read(self) -> pd.DataFrame:
     soup = get_soup(self.source_url)
     return self.parse_data(soup)
Пример #23
0
 def read(self) -> pd.Series:
     soup = get_soup(self.source_data_url)
     url_pdf = self._parse_pdf_link(soup)
     dfs = self._parse_tables(url_pdf)
     data = self.parse_data(dfs, soup)
     return data
Пример #24
0
 def _parse_link_zip(self):
     soup = get_soup(self.source_url_ref)
     url = soup.find("a", string="Download her").get("href")
     return url
Пример #25
0
 def pipe_total_vax_bfill(self, df: pd.DataFrame, n_days: int) -> pd.DataFrame:
     soup = get_soup(self.source_url_ref)
     links = self._get_zip_links(soup)
     links = links[:n_days]
     df = self._backfill_total_vaccinations(df, links)
     return df
Пример #26
0
 def _get_file_link(self):
     soup = get_soup(self.source_url)
     file_url = soup.find_all("a",
                              class_="resource-url-analytics")[-1]["href"]
     return file_url
Пример #27
0
 def read(self) -> pd.Series:
     soup = get_soup(self.source_url)
     data = self._parse_data(soup)
     return pd.Series(data)
Пример #28
0
 def read(self):
     soup = get_soup(self.source_url)
     links = self._parse_links_pdfs(soup)
     # For now, only get most recent link
     link = links[0]
     return self._parse_data(link)
Пример #29
0
 def read(self):
     soup = get_soup(self.source_url)
     data = self.parse_data(soup)
     print(data)
     return pd.Series(data=data)
Пример #30
0
def read(source: str) -> pd.Series:
    soup = get_soup(source)
    return parse_data(soup)