Exemplo n.º 1
0
 def parse_vaccinated(self, soup):
     regex = r"De los ([\d\.]+) vacunados un total de ([\d\.]+) \(([\d\.]+)%\) ya han recibido la 2ª dosis"
     match = re.search(regex, soup.text)
     people_vaccinated = match.group(1)
     people_fully_vaccinated = match.group(2)
     return clean_count(people_vaccinated), clean_count(
         people_fully_vaccinated)
Exemplo n.º 2
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    # regex = (
    #     r"So far, ([\d,]+) \(([\d,]+)% of the estimated population of 65,000\) have received at least one dose of a"
    #     r" COVID-19 vaccine, with ([\d,]+)% having completed the two-dose course"
    # )
    # matches = re.search(regex, soup.text)
    # people_vaccinated = clean_count(matches.group(1))
    # proportion_dose1 = clean_count(matches.group(2))
    # proportion_dose2 = clean_count(matches.group(3))
    # assert proportion_dose1 >= proportion_dose2
    # people_fully_vaccinated = round(people_vaccinated * proportion_dose2 / proportion_dose1)
    # total_vaccinations = people_vaccinated + people_fully_vaccinated

    regex_1 = (
        r"There have been ([\d,]+) Covid-19 vaccinations given in total in the Cayman Islands."
    )
    total_vaccinations = clean_count(re.search(regex_1, soup.text).group(1))

    regex_2 = (
        r"Of these, ([\d,]+) \(([\d,]+)% of (?:[a-zA-Z0-9,]+)\) have had at least one dose of a COVID-19 "
        r"vaccine and ([\d,]+)% have completed the two dose course.")
    matches = re.search(regex_2, soup.text)
    people_vaccinated = clean_count(matches.group(1))
    proportion_dose1 = clean_count(matches.group(2))
    proportion_dose2 = clean_count(matches.group(3))
    assert proportion_dose1 >= proportion_dose2
    people_fully_vaccinated = round(total_vaccinations * proportion_dose2 /
                                    100)

    return pd.Series({
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
    })
Exemplo n.º 3
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    people_vaccinated = clean_count(
        soup
        .find(class_="status_infoArea")
        .find(class_="round1")
        .find(class_="big")
        .text
    )

    people_fully_vaccinated = clean_count(
        soup
        .find(class_="status_infoArea")
        .find(class_="round2")
        .find(class_="big")
        .text
    )

    total_vaccinations = people_vaccinated + people_fully_vaccinated

    date = str((datetime.datetime.now(pytz.timezone("Asia/Seoul")) - datetime.timedelta(days=1)).date())

    data = {
        "date": date,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "total_vaccinations": total_vaccinations,
    }
    return pd.Series(data=data)
Exemplo n.º 4
0
def connect_parse_data(source: str, source_old: str) -> pd.Series:
    op = Options()
    op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:
        driver.get(source)
        time.sleep(5)

        total_vaccinations = driver.find_element_by_id("counter1").text
        people_vaccinated = driver.find_element_by_id("counter2").text
        people_fully_vaccinated = driver.find_element_by_id("counter3").text

        driver.get(source_old)
        time.sleep(5)

        # Sanity check
        total_vaccinations_old = driver.find_element_by_id("counter1").text
        if total_vaccinations != total_vaccinations_old:
            raise ValueError(
                "Both dashboards may not be synced and hence may refer to different timestamps. Consider"
                "Introducing the timestamp manually.")
        date = driver.find_element_by_id("pupdateddate").text.replace(
            "Updated ", "")
        date = str(pd.to_datetime(date, dayfirst=True).date())

    data = {
        "total_vaccinations": clean_count(total_vaccinations),
        "people_vaccinated": clean_count(people_vaccinated),
        "people_fully_vaccinated": clean_count(people_fully_vaccinated),
        "date": date,
    }
    return pd.Series(data=data)
Exemplo n.º 5
0
def parse_data(url: str) -> pd.Series:

    kwargs = {"pandas_options": {"dtype": str, "header": None}}
    dfs_from_pdf = tabula.read_pdf(url, pages="all", **kwargs)
    for df in dfs_from_pdf:
        if "Beneficiaries vaccinated" in dfs_from_pdf[0].values.flatten():
            break
    df = df[df[0] == "India"]
    ncols = df.shape[1]

    people_vaccinated = clean_count(df[ncols - 3].item())
    people_fully_vaccinated = clean_count(df[ncols - 2].item())
    total_vaccinations = clean_count(df[ncols - 1].item())

    return pd.Series({
        "date":
        str((datetime.datetime.now(pytz.timezone("Asia/Kolkata")) -
             datetime.timedelta(days=1)).date()),
        "people_vaccinated":
        people_vaccinated,
        "people_fully_vaccinated":
        people_fully_vaccinated,
        "total_vaccinations":
        total_vaccinations,
        "source_url":
        url,
    })
def parse_data(soup: BeautifulSoup) -> pd.Series:
    a = 1 + 2
    b = 1
    regex_1 = (
        r"([\d,]+) C(ovid|OVID)-19 vaccinations has been given in total in the Cayman Islands"
    )
    regex_1 = r"([\d,]+) C(ovid|OVID)-19 vaccinations (?:had|have) been given in total in the Cayman Islands"
    total_vaccinations = clean_count(re.search(regex_1, soup.text).group(1))

    # regex_2 = (
    #     r"Of these, ([\d,]+) \((?:[\d,]+)% of (?:[a-zA-Z0-9,]+)\) have had at least one dose"
    # )
    # assert total_vaccinations >= people_vaccinated
    # people_fully_vaccinated = total_vaccinations - people_vaccinated
    regex_2 = (
        r"Of these,? ([\d,]+) \((?:[\d,]+)% of (?:[\d,]+)\) (?:have)? had at least one dose of a C(?:ovid|OVID)-19 "
        r"vaccine and (?:approximately)? (?:[\d,]+)% \(([\d,]+)\) have completed the two-dose course\."
    )
    matches = re.search(regex_2, soup.text)
    people_vaccinated = clean_count(matches.group(1))
    people_fully_vaccinated = clean_count(matches.group(2))

    return pd.Series({
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
    })
Exemplo n.º 7
0
def read(source: str) -> pd.Series:
    op = Options()
    op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:
        driver.get(source)
        time.sleep(1)

        for h5 in driver.find_elements_by_tag_name("h5"):

            if "Primera dosis" in h5.text:
                people_vaccinated = clean_count(
                    h5.find_element_by_xpath("./preceding-sibling::div").text)

            elif "Total dosis aplicadas" in h5.text:
                total_vaccinations = clean_count(
                    h5.find_element_by_xpath("./preceding-sibling::div").text)

            elif "Población completamente vacunada" in h5.text:
                people_fully_vaccinated = clean_count(
                    h5.find_element_by_xpath("./preceding-sibling::div").text)

            elif "Acumulados al" in h5.text:
                date = h5.text.replace("Acumulados al ", "")
                date = str(dateparser.parse(date, languages=["es"]).date())

    data = {
        "date": date,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "total_vaccinations": total_vaccinations,
    }
    return pd.Series(data=data)
Exemplo n.º 8
0
    def _parse_data(self) -> pd.Series:
        op = Options()
        op.add_argument("--headless")

        with webdriver.Chrome(options=op) as driver:
            # Main page
            driver.get(self.source_url)
            # Get report page from within iframe
            source = driver.find_element_by_xpath(
                "/html/body/section[2]/iframe").get_attribute("src")

            driver.get(source)

            data_blocks = (WebDriverWait(driver, 20).until(
                EC.visibility_of_all_elements_located(
                    (By.CLASS_NAME, "card"))))
            for block in data_blocks:
                block_title = block.get_attribute("aria-label")
                if "first dose" in block_title:
                    people_vaccinated = re.search(r"first dose +(\d+)\.",
                                                  block_title).group(1)
                elif "sec dose" in block_title:
                    people_fully_vaccinated = re.search(
                        r"sec dose +(\d+)\.", block_title).group(1)

            people_vaccinated = clean_count(people_vaccinated)
            people_fully_vaccinated = clean_count(people_fully_vaccinated)

        return pd.Series({
            "people_vaccinated": people_vaccinated,
            "people_fully_vaccinated": people_fully_vaccinated,
        })
Exemplo n.º 9
0
def connect_parse_data(source: str) -> pd.Series:
    op = Options()
    op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:
        driver.get(source)
        time.sleep(10)

        date = driver.find_element_by_class_name(
            "as_of").find_element_by_tag_name("span").text
        date = clean_date(date, "%d.%m.%Y")

        for elem in driver.find_elements_by_class_name("counter_block"):
            if "1 ДОЗУ" in elem.text:
                people_vaccinated = elem.find_element_by_tag_name("h2").text
            if "2 ДОЗИ" in elem.text:
                people_fully_vaccinated = elem.find_element_by_tag_name(
                    "h2").text

    data = {
        "people_vaccinated": clean_count(people_vaccinated),
        "people_fully_vaccinated": clean_count(people_fully_vaccinated),
        "date": date,
    }
    return pd.Series(data=data)
Exemplo n.º 10
0
def parse_vaccinations(elem) -> dict:
    # Get news text
    url = elem.find_parent(class_="card").find("a").get("href")
    soup = get_soup(url)
    text = "\n".join([p.text for p in soup.find("article").find_all("p")])

    # Find metrics
    metrics = dict()
    # total_vaccinations = re.search(r"疫苗共有(?P<count>[\d,]*)人次", text)
    total_vaccinations = re.search(r"疫苗劑數為(?P<count>[\d,]*)劑", text)
    # print(total_vaccinations)
    # people_vaccinated = re.search(r"1劑疫苗共有(?P<count>[\d,]*)人次", text)
    people_vaccinated = re.search(r"已接種人數共有(?P<count>[\d,]*)人", text)
    # people_fully_vaccinated = re.search(r"2劑疫苗共有(?P<count>[\d,]*)人次", text)
    people_fully_vaccinated = re.search(r"已完成接種2劑有(?P<count>[\d,]*)人", text)

    if total_vaccinations:
        metrics["total_vaccinations"] = clean_count(
            total_vaccinations.group(1))
    if people_vaccinated:
        metrics["people_vaccinated"] = clean_count(people_vaccinated.group(1))
    if people_fully_vaccinated:
        metrics["people_fully_vaccinated"] = clean_count(
            people_fully_vaccinated.group(1))
    return metrics
Exemplo n.º 11
0
def parse_data(soup: BeautifulSoup):
    regex = r"Укупно вакцинација: ([\d.]+), од тога ревакцинација: ([\d.]+)"
    matches = re.search(regex, soup.text)

    total_vaccinations = clean_count(matches.group(1))
    people_fully_vaccinated = clean_count(matches.group(2))
    return total_vaccinations, people_fully_vaccinated
Exemplo n.º 12
0
def connect_parse_data(source: str) -> pd.Series:

    headers = {
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:86.0) Gecko/20100101 Firefox/86.0",
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Pragma": "no-cache",
        "Cache-Control": "no-cache",
    }
    soup = BeautifulSoup(
        requests.get(source, headers=headers).content, "html.parser")

    data = re.search(r"De los ([\d\.]+) vacunados un total de ([\d\.]+)",
                     soup.text)
    people_vaccinated = clean_count(data.group(1))
    people_fully_vaccinated = clean_count(data.group(2))

    data = {
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
    }
    return pd.Series(data=data)
Exemplo n.º 13
0
def main():

    data = {
        "location": "Guatemala",
        "source_url": "https://gtmvigilanciacovid.shinyapps.io/3869aac0fb95d6baf2c80f19f2da5f98",
        "vaccine": "Moderna, Oxford/AstraZeneca",
    }

    op = Options()
    op.add_argument("--headless")
    with webdriver.Chrome(options=op) as driver:
        driver.maximize_window()  # For maximizing window
        driver.implicitly_wait(20)  # gives an implicit wait for 20 seconds
        driver.get(data["source_url"])
        driver.find_element_by_class_name("fa-syringe").click()
        date = driver.find_element_by_class_name("logo").text
        dose1 = driver.find_element_by_id("dosisaplicadas1").find_element_by_tag_name("h3").text
        dose2 = driver.find_element_by_id("dosisaplicadas2").find_element_by_tag_name("h3").text

    data["people_vaccinated"] = clean_count(dose1)
    data["people_fully_vaccinated"] = clean_count(dose2)
    data["total_vaccinations"] = data["people_vaccinated"] + data["people_fully_vaccinated"]

    date = re.search(r"\d+/\d+/202\d", date).group(0)
    data["date"] = clean_date(date, "%d/%m/%Y")

    increment(
        location=data["location"],
        total_vaccinations=data["total_vaccinations"],
        people_vaccinated=data["people_vaccinated"],
        people_fully_vaccinated=data["people_fully_vaccinated"],
        date=data["date"],
        source_url=data["source_url"],
        vaccine=data["vaccine"],
    )
Exemplo n.º 14
0
def connect_parse_data(source: str) -> pd.Series:
    op = Options()
    op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:
        driver.get(source)
        time.sleep(10)

        date = re.search(r"Fecha de corte : ([\d/]{10})",
                         driver.page_source).group(1)

        for block in driver.find_elements_by_class_name("unselectable"):
            if block.get_attribute("aria-label") == "Dosis aplicadas Card":
                total_vaccinations = clean_count(
                    block.find_element_by_class_name("value").text)
            elif block.get_attribute(
                    "aria-label") == "Segundas dosis aplicadas Card":
                people_fully_vaccinated = clean_count(
                    block.find_element_by_class_name("value").text)

    people_vaccinated = total_vaccinations - people_fully_vaccinated

    return pd.Series({
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": clean_date(date, "%d/%m/%Y")
    })
Exemplo n.º 15
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    for p in soup.find_all("p"):

        if "Primera dosis" in p.text:
            people_vaccinated = clean_count(
                re.search(r"[\d,]{6,}", p.text).group(0))

        elif "Total dosis aplicadas" in p.text:
            total_vaccinations = clean_count(
                re.search(r"[\d,]{6,}", p.text).group(0))

        elif "Población completamente vacunada" in p.text:
            people_fully_vaccinated = clean_count(
                re.search(r"[\d,]{6,}", p.text).group(0))

    date = soup.find("h6").text.replace("Acumulados al ", "")
    date = str(dateparser.parse(date, languages=["es"]).date())

    data = {
        "date": date,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "total_vaccinations": total_vaccinations,
    }
    return pd.Series(data=data)
Exemplo n.º 16
0
def parse_data(data: dict) -> pd.Series:
    dose1 = clean_count(data["data"][0]["vakdose1"])
    dose2 = clean_count(data["data"][0]["vakdose2"])
    data = pd.Series({
        "date": datetime.fromtimestamp(data["updated"] // 1000).strftime("%Y-%m-%d"),
        "people_vaccinated": dose1,
        "people_fully_vaccinated": dose2,
        "total_vaccinations": dose1 + dose2
    })
    return data
Exemplo n.º 17
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    numbers = soup.find(class_="cifras-coronavirus").find_all(class_="cifra")

    return pd.Series(
        data={
            "total_vaccinations": clean_count(numbers[1].text),
            "people_fully_vaccinated": clean_count(numbers[2].text),
            "date": set_date()
        })
Exemplo n.º 18
0
 def parse_data(self, soup):
     widgets = soup.find_all(class_="textwidget")
     total_vaccinations = clean_count(widgets[0].text)
     people_fully_vaccinated = clean_count(widgets[1].text)
     people_vaccinated = total_vaccinations - people_fully_vaccinated
     return pd.Series({
         "total_vaccinations": total_vaccinations,
         "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
         "date": localdate("Asia/Tbilisi")
     })
Exemplo n.º 19
0
 def _parse_metrics(self, soup: BeautifulSoup):
     total_vaccinations = clean_count(
         re.search(self.regex["total_vaccinations"], soup.text).group(1)
     )
     people_vaccinated = clean_count(
         re.search(self.regex["people_vaccinated"], soup.text).group(1)
     )
     people_fully_vaccinated = clean_count(
         re.search(self.regex["people_fully_vaccinated"], soup.text).group(1)
     )
     return total_vaccinations, people_vaccinated, people_fully_vaccinated
Exemplo n.º 20
0
 def parse_data(self, soup):
     regex = r"ja s’han administrat ([\d\.]+) dosis i ([\d\.]+) persones han rebut, com a mínim, una dosi del vaccí"
     match = re.search(regex, soup.text)
     # Metrics
     total_vaccinations = clean_count(match.group(1))
     people_vaccinated = clean_count(match.group(2))
     people_fully_vaccinated = total_vaccinations - people_vaccinated
     return pd.Series({
         "total_vaccinations": total_vaccinations,
         "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
         "date": self.parse_date(soup)
     })
Exemplo n.º 21
0
 def parse_data_news_page(self, soup: BeautifulSoup):
     people_vaccinated = re.search(self.regex["people_vaccinated"],
                                   soup.text)
     people_fully_vaccinated = re.search(
         self.regex["people_fully_vaccinated"], soup.text)
     metrics = {}
     if people_vaccinated:
         metrics["people_vaccinated"] = clean_count(
             people_vaccinated.group(1))
     if people_fully_vaccinated:
         metrics["people_fully_vaccinated"] = clean_count(
             people_fully_vaccinated.group(1))
     return metrics
Exemplo n.º 22
0
 def _parse_metrics(self, text: str):
     metrics = re.search(self.regex_vax, text).groups()
     people_vaccinated = clean_count(metrics[0])
     people_fully_vaccinated = clean_count(metrics[1])
     total_vaccinations = clean_count(metrics[2])
     if total_vaccinations != people_vaccinated + people_fully_vaccinated:
         raise ValueError(
             "total_vaccinations != people_vaccinated + people_fully_vaccinated"
         )
     return {
         "total_vaccinations": total_vaccinations,
         "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
     }
Exemplo n.º 23
0
def parse_infogram_vaccinations(infogram_data: dict) -> int:
    total_vaccinations = clean_count(
        _get_infogram_value(infogram_data,
                            "4f66ed81-151f-4b97-aa3c-4927bde058b2"))
    people_vaccinated = clean_count(
        _get_infogram_value(infogram_data,
                            "4048eac1-24ba-4e24-b081-61dfa0281a0e"))
    people_fully_vaccinated = clean_count(
        _get_infogram_value(infogram_data,
                            "50a2486f-7dca-4afd-a551-bd24665d7314"))
    return {
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated
    }
Exemplo n.º 24
0
 def parse_data(self, soup):
     regex = (
         r"s’han administrat un total de ([\d\.]+) vacunes, ([\d\.]+) persones (?:han rebut|tenen) una dosi del "
         r"vaccí,? i ([\d\.]+) (persones )?(en )?tenen les dues")
     match = re.search(regex, soup.text)
     # Metrics
     total_vaccinations = clean_count(match.group(1))
     people_vaccinated = clean_count(match.group(2))
     people_fully_vaccinated = clean_count(match.group(3))
     # people_fully_vaccinated = total_vaccinations - people_vaccinated
     return pd.Series({
         "total_vaccinations": total_vaccinations,
         "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
         "date": self.parse_date(soup)
     })
Exemplo n.º 25
0
def connect_parse_data(source: str) -> pd.Series:

    headers = {
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:86.0) Gecko/20100101 Firefox/86.0",
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Pragma": "no-cache",
        "Cache-Control": "no-cache",
    }
    soup = BeautifulSoup(
        requests.get(source, headers=headers).content, "html.parser")

    total_vaccinations = soup.find(class_="repart-stlucia").text
    total_vaccinations = clean_count(total_vaccinations)

    date = soup.find(class_="h2-blue").text
    date = re.search(r"\w+ +\d+, +202\d", date).group(0)
    date = clean_date(date, "%B %d, %Y")

    data = {
        "total_vaccinations": total_vaccinations,
        "date": date,
    }
    return pd.Series(data=data)
Exemplo n.º 26
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    # Get path to newest pdf
    links = soup.find(class_="rt-article").find_all("a")
    for link in links:
        if "sitrep-sl-en" in link["href"]:
            pdf_path = "https://www.epid.gov.lk" + link["href"]
            break

    tf = tempfile.NamedTemporaryFile()

    with open(tf.name, mode="wb") as f:
        f.write(requests.get(pdf_path).content)

    with open(tf.name, mode="rb") as f:
        reader = PyPDF2.PdfFileReader(f)
        page = reader.getPage(0)
        text = page.extractText().replace("\n", "")

    regex = r"COVID-19\s+Total\s+Vaccinated\s+(\d+)"
    total_vaccinations = re.search(regex, text).group(1)
    total_vaccinations = clean_count(total_vaccinations)

    people_vaccinated = total_vaccinations

    regex = r"Situation Report\s+([\d\.]{10})"
    date = re.search(regex, text).group(1)
    date = clean_date(date, "%d.%m.%Y")

    return pd.Series(data={
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "date": date,
        "source_url": pdf_path,
    })
Exemplo n.º 27
0
    def _parse_stats(self, df: pd.DataFrame) -> int:
        if df.shape[1] != 4 or df.iloc[0, 0] != "廠牌" or df.iloc[
                0, 1] != "劑次" or not (df.iloc[-1, 0] == "總計"
                                      or df.iloc[-2, 0] == "總計"):
            raise ValueError(f"Table 1: format has changed!")

        num_dose1 = df[df[1] == "第 1劑"].tail(1).values[0][-1]
        num_dose1 = clean_count(num_dose1)

        num_dose2 = df[df[1] == "第 2劑"].tail(1).values[0][-1]
        num_dose2 = clean_count(num_dose2)

        return {
            "total_vaccinations": (num_dose1 + num_dose2),
            "people_vaccinated": num_dose1,
        }
Exemplo n.º 28
0
def parse_total_vaccinations(soup: BeautifulSoup) -> str:
    return clean_count(
        soup
        .find(class_="counter-box-content", string=re.compile("Vaccines Administered"))
        .parent
        .find(class_="display-counter")["data-value"]
    )
Exemplo n.º 29
0
 def parse_data(self, soup: BeautifulSoup) -> pd.Series:
     data = {}
     match = re.search(self.regex["title"], soup.text)
     if match:
         # date
         date_str = match.group(1)
         data["date"] = clean_date(f"{date_str} {datetime.now().year}",
                                   "%d de %B %Y",
                                   lang="es")
         # vaccinations
         data["total_vaccinations"] = clean_count(match.group(2))
     match = re.search(self.regex["data"], soup.text)
     if match:
         data["people_vaccinated"] = clean_count(match.group(1))
         data["people_fully_vaccinated"] = clean_count(match.group(3))
     return pd.Series(data)
Exemplo n.º 30
0
def parse_metric(soup: BeautifulSoup, description: str) -> int:
    value = (
        soup.find("strong", string=description)
        .parent.parent.parent.parent
        .find_all("tr")[-1]
        .text
    )
    return clean_count(value)