Пример #1
0
def main():

    url = "https://vaccinocovid.iss.sm/"
    soup = BeautifulSoup(
        requests.get(url, verify=False).content, "html.parser")

    for script in soup.find_all("script"):
        if "new Chart" in str(script):
            chart_data = str(script)
            break

    people_vaccinated = re.search(r"([\d,. ]+) [Vv]accinati",
                                  chart_data).group(1)
    people_vaccinated = vaxutils.clean_count(people_vaccinated)

    people_fully_vaccinated = 0

    total_vaccinations = people_vaccinated + people_fully_vaccinated

    date = re.search(r"Dati aggiornati al (\d{2}/\d{2}/\d{4})",
                     chart_data).group(1)
    date = vaxutils.clean_date(date, "%d/%m/%Y")

    vaxutils.increment(location="San Marino",
                       total_vaccinations=total_vaccinations,
                       people_vaccinated=people_vaccinated,
                       people_fully_vaccinated=people_fully_vaccinated,
                       date=date,
                       source_url="https://vaccinocovid.iss.sm/",
                       vaccine="Sputnik V")
Пример #2
0
def main():

    url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    links = soup.find(class_="display-posts-listing").find_all("a",
                                                               class_="title")

    for link in links:
        if "Actualizare zilnică" in link.text:
            url = link["href"]
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    date = soup.find(class_="post-date").find(class_="meta-text").text.strip()
    date = vaxutils.clean_date(date, "%b %d, %Y")

    main_text = soup.find(class_="entry-content-text").text

    count = re.search(
        r"Număr total de persoane vaccinate împotriva COVID-19 cu vaccinul Pfizer BioNTech \(începând cu data de 27 decembrie 2020\) – ([\d\.]+)",
        main_text)
    count = count.group(1)
    count = vaxutils.clean_count(count)

    vaxutils.increment(location="Romania",
                       total_vaccinations=count,
                       date=date,
                       source_url=url,
                       vaccine="Pfizer/BioNTech")
Пример #3
0
def main():

    url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    links = soup.find(class_="display-posts-listing").find_all("a", class_="title")

    for link in links:
        if "Actualizare zilnică" in link.text:
            url = link["href"]
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    date = soup.find(class_="post-date").find(class_="meta-text").text.strip()
    date = vaxutils.clean_date(date, "%b %d, %Y")
    
    paragraphs = soup.find(class_="entry-content").find_all("p")

    for paragraph in paragraphs:
        if "Număr total de persoane vaccinate" in paragraph.text:
            count = paragraph.find_all("strong")

    count = "".join(c.text for c in count)
    count = vaxutils.clean_count(count)

    vaxutils.increment(
        location="Romania",
        total_vaccinations=count,
        date=date,
        source_url=url,
        vaccine="Pfizer/BioNTech"
    )
Пример #4
0
def main():

    url = "https://www.gov.bm/sites/default/files/COVID-19%20Vaccination%20Updates.pdf"
    os.system(f"curl {url} -o bermuda.pdf -s")

    with open("bermuda.pdf", "rb") as pdfFileObj:
        pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
        text = pdfReader.getPage(0).extractText()

    regex = r"VACCINATION CENTRE(.*?)Total Vaccines Administered"
    total_vaccinations = re.search(regex, text)
    total_vaccinations = vaxutils.clean_count(total_vaccinations.group(1))

    regex = r"As of (\w+ \d+, 20\d+)"
    date = re.search(regex, text)
    date = vaxutils.clean_date(date.group(1), "%B %d, %Y")

    vaxutils.increment(
        location="Bermuda",
        total_vaccinations=total_vaccinations,
        date=date,
        source_url=url,
        vaccine="Pfizer/BioNTech"
    )

    os.remove("bermuda.pdf")
Пример #5
0
def main():

    url = "https://www.terviseamet.ee/et/uudised"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    for h2 in soup.find_all("h2", class_="views-field-title"):
        if "COVID-19 blogi" in h2.text:
            url = "https://www.terviseamet.ee" + h2.find("a")["href"]
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    date = soup.find(class_="field-name-post-date").text
    date = vaxutils.clean_date(date, "%d.%m.%Y")

    count = soup.find(
        string=re.compile(r"Eestis on COVID-19 vastu vaktsineerimisi"))
    count = re.search(r"tehtud ([\d\s]+) inimesele", count).group(1)
    count = vaxutils.clean_count(count)

    vaxutils.increment(location="Estonia",
                       total_vaccinations=count,
                       date=date,
                       source_url=url,
                       vaccine="Pfizer/BioNTech")
Пример #6
0
def main():
    url = "https://data.public.lu/fr/datasets/covid-19-rapports-journaliers/#_"

    # Locate newest pdf
    html_page = urllib.request.urlopen(url)
    soup = BeautifulSoup(html_page, "html.parser")
    pdf_path = soup.find(
        "a", class_="btn-primary")["href"]  # Get path to newest pdf

    # Fetch data
    dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all")
    df = pd.DataFrame(dfs_from_pdf[2])  # Hardcoded table location

    total_vaccinations = df.loc[df["Unnamed: 0"] ==
                                "Nombre de doses administrées",
                                "Unnamed: 1"].values[0]
    total_vaccinations = vaxutils.clean_count(total_vaccinations)

    date = re.search(r"\d\d\.\d\d\.202\d", df.columns[1]).group(0)
    date = vaxutils.clean_date(date, "%d.%m.%Y")

    vaxutils.increment(location="Luxembourg",
                       total_vaccinations=total_vaccinations,
                       date=date,
                       source_url=pdf_path,
                       vaccine="Pfizer/BioNTech")
Пример #7
0
def main():
    url = "https://data.public.lu/fr/datasets/covid-19-rapports-journaliers/#_"

    # Locate newest pdf
    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    pdf_path = soup.find(
        "a", class_="btn-primary")["href"]  # Get path to newest pdf

    # Fetch data
    dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all")
    df = pd.DataFrame(dfs_from_pdf[2])  # Hardcoded table location

    values = sorted(
        pd.to_numeric(df["Unnamed: 2"].str.replace(
            r"[^\d]", "", regex=True)).dropna().astype(int))
    assert len(values) == 3

    total_vaccinations = values[2]
    people_vaccinated = values[1]
    people_fully_vaccinated = values[0]

    date = df["Unnamed: 1"].str.replace("Journée du ", "").values[0]
    date = vaxutils.clean_date(date, "%d.%m.%Y")

    vaxutils.increment(location="Luxembourg",
                       total_vaccinations=total_vaccinations,
                       people_vaccinated=people_vaccinated,
                       people_fully_vaccinated=people_fully_vaccinated,
                       date=date,
                       source_url=pdf_path,
                       vaccine="Moderna, Oxford/AstraZeneca, Pfizer/BioNTech")
Пример #8
0
def main():

    url = "https://esriportugal.maps.arcgis.com/apps/opsdashboard/index.html#/acf023da9a0b4f9dbb2332c13f635829"

    # Options for Chrome WebDriver
    op = Options()
    # op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:

        driver.get(url)
        time.sleep(4)

        for box in driver.find_elements_by_class_name("indicator-top-text"):

            if "Total de Vacinas Administradas" in box.text:
                count_text = box.find_element_by_xpath("..").text

            elif "Dados relativos ao boletim da DGS de" in box.text:
                date_text = box.find_element_by_xpath("..").text

    count = re.search(r"\n([\d\s]+$)", count_text).group(1)
    count = vaxutils.clean_count(count)

    date = re.search(r"\n([\d/]+$)", date_text).group(1)
    date = vaxutils.clean_date(date, "%d/%m/%Y")

    vaxutils.increment(location="Portugal",
                       total_vaccinations=count,
                       date=date,
                       source_url=url,
                       vaccine="Pfizer/BioNTech")
Пример #9
0
def main():
    url = "https://data.public.lu/fr/datasets/covid-19-rapports-journaliers/#_"

    # Locate newest pdf
    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    pdf_path = soup.find(
        "a", class_="btn-primary")["href"]  # Get path to newest pdf

    # Fetch data
    dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all")
    df = pd.DataFrame(dfs_from_pdf[2])  # Hardcoded table location

    people_vaccinated = df.loc[df["Unnamed: 0"] ==
                               "Personnes vaccinées - Dose 1",
                               "Unnamed: 2"].values[0]
    people_vaccinated = vaxutils.clean_count(people_vaccinated)

    people_fully_vaccinated = df.loc[df["Unnamed: 0"] ==
                                     "Personnes vaccinées - Dose 2",
                                     "Unnamed: 2"].values[0]
    people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated)

    total_vaccinations = people_vaccinated + people_fully_vaccinated

    date = df["Unnamed: 1"].str.replace("Journée du ", "").values[0]
    date = vaxutils.clean_date(date, "%d.%m.%Y")

    vaxutils.increment(location="Luxembourg",
                       total_vaccinations=total_vaccinations,
                       people_vaccinated=people_vaccinated,
                       people_fully_vaccinated=people_fully_vaccinated,
                       date=date,
                       source_url=pdf_path,
                       vaccine="Pfizer/BioNTech")
Пример #10
0
def connect_parse_data(source: str) -> pd.Series:
    op = Options()
    op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:
        driver.get(source)
        time.sleep(10)

        date = driver.find_element_by_class_name(
            "as_of").find_element_by_tag_name("span").text
        date = vaxutils.clean_date(date, "%d.%m.%Y")

        for elem in driver.find_elements_by_class_name("counter_block"):
            if "1 ДОЗУ" in elem.text:
                people_vaccinated = elem.find_element_by_tag_name("h2").text
            if "2 ДОЗИ" in elem.text:
                people_fully_vaccinated = elem.find_element_by_tag_name(
                    "h2").text

    data = {
        "people_vaccinated": vaxutils.clean_count(people_vaccinated),
        "people_fully_vaccinated":
        vaxutils.clean_count(people_fully_vaccinated),
        "date": date,
    }
    return pd.Series(data=data)
Пример #11
0
def main():

    url = "https://covid19.gov.im/general-information/latest-updates/"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    vax_box = soup.find(string=re.compile("Total first vaccinations")).parent.parent

    date = vax_box.find("strong").text
    date = vaxutils.clean_date(date, "%d %B %Y")

    for p in vax_box.find_all("p"):
        if "Total first vaccinations" in p.text:
            data = p.text

    people_vaccinated, people_fully_vaccinated = re.search(r"Total first vaccinations:\xa0 ([\d,]+)Total second vaccinations:\xa0 ([\d,]+)", data).groups()

    people_vaccinated = vaxutils.clean_count(people_vaccinated)
    people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated)
    total_vaccinations = people_vaccinated + people_fully_vaccinated

    vaxutils.increment(
        location="Isle of Man",
        total_vaccinations=total_vaccinations,
        people_vaccinated=people_vaccinated,
        people_fully_vaccinated=people_fully_vaccinated,
        date=date,
        source_url=url,
        vaccine="Pfizer/BioNTech"
    )
Пример #12
0
def main():

    op = Options()
    op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:

        url = "https://app.powerbi.com/view?r=eyJrIjoiMzg4YmI5NDQtZDM5ZC00ZTIyLTgxN2MtOTBkMWM4MTUyYTg0IiwidCI6ImFmZDBhNzVjLTg2NzEtNGNjZS05MDYxLTJjYTBkOTJlNDIyZiIsImMiOjh9"
        driver.get(url)

        # Wait for the desired element to load. If nothing is found after 25 seconds, returns.
        timeout = 25

        try:
            element_present = EC.presence_of_element_located(
                (By.CLASS_NAME, "value"))
            data = WebDriverWait(driver, timeout).until(element_present).text
        except TimeoutException:
            return
        count = vaxutils.clean_count(data)

        try:
            element_present = EC.presence_of_element_located(
                (By.CLASS_NAME, "title"))
            data = WebDriverWait(driver, timeout).until(element_present).text
        except TimeoutException:
            return
        date = vaxutils.clean_date(data, "%m/%d/%Y %H:%M:%S %p")

        vaxutils.increment(location="Italy",
                           total_vaccinations=count,
                           date=date,
                           source_url=url,
                           vaccine="Pfizer/BioNTech")
Пример #13
0
def connect_parse_data(source: str) -> pd.Series:

    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:86.0) Gecko/20100101 Firefox/86.0",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Pragma": "no-cache",
        "Cache-Control": "no-cache",
    }
    soup = BeautifulSoup(requests.get(source, headers=headers).content, "html.parser")

    total_vaccinations = soup.find(class_="repart-stlucia").text
    total_vaccinations = vaxutils.clean_count(total_vaccinations)

    date = soup.find(class_="h2-blue").text
    date = re.search(r"\w+ \d+, 202\d", date).group(0)
    date = vaxutils.clean_date(date, "%B %d, %Y")

    data = {
        "total_vaccinations": total_vaccinations,
        "date": date,
    }
    return pd.Series(data=data)
Пример #14
0
def main():

    url = "https://www.terviseamet.ee/et/uudised"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    for h2 in soup.find_all("h2", class_="views-field-title"):
        if "COVID-19 blogi" in h2.text:
            url = "https://www.terviseamet.ee" + h2.find("a")["href"]
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    text = soup.find(class_="node-published").text

    people_vaccinated, people_fully_vaccinated = re.search(
        r"Eestis on COVID-19 vastu vaktsineerimisi tehtud ([\d\s]+) inimesele, kaks doosi on saanud ([\d\s]+) inimest",
        text).groups()

    people_vaccinated = vaxutils.clean_count(people_vaccinated)
    people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated)
    total_vaccinations = people_vaccinated + people_fully_vaccinated

    date = soup.find(class_="field-name-post-date").text
    date = vaxutils.clean_date(date, "%d.%m.%Y")

    vaxutils.increment(location="Estonia",
                       total_vaccinations=total_vaccinations,
                       people_vaccinated=people_vaccinated,
                       people_fully_vaccinated=people_fully_vaccinated,
                       date=date,
                       source_url=url,
                       vaccine="Pfizer/BioNTech")
def main():

    data = {
        "location": "Guatemala",
        "source_url":
        "https://gtmvigilanciacovid.shinyapps.io/3869aac0fb95d6baf2c80f19f2da5f98",
        "vaccine": "Moderna",
    }

    with webdriver.Chrome() as driver:
        driver.get(data["source_url"])
        time.sleep(2)
        driver.find_element_by_class_name("fa-syringe").click()
        time.sleep(4)
        date = driver.find_element_by_class_name("logo").text
        dose1 = driver.find_element_by_id(
            "dosisaplicadas").find_element_by_tag_name("h3").text

    data["people_vaccinated"] = vaxutils.clean_count(dose1)
    data["people_fully_vaccinated"] = 0
    data["total_vaccinations"] = data["people_vaccinated"] + data[
        "people_fully_vaccinated"]

    date = re.search(r"\d+/\d+/202\d", date).group(0)
    data["date"] = vaxutils.clean_date(date, "%d/%m/%Y")

    vaxutils.increment(
        location=data["location"],
        total_vaccinations=data["total_vaccinations"],
        people_vaccinated=data["people_vaccinated"],
        people_fully_vaccinated=data["people_fully_vaccinated"],
        date=data["date"],
        source_url=data["source_url"],
        vaccine=data["vaccine"],
    )
Пример #16
0
def parse_date(soup: BeautifulSoup) -> str:
    for h3 in soup.find_all("h3"):
        if "Vaccination Data" in h3.text:
            break
    date = re.search(r"as of (\d+ \w+ \d+)", h3.text).group(1)
    date = vaxutils.clean_date(date, "%d %b %Y")
    return date
Пример #17
0
def main():

    url = "https://www.gov.je/Health/Coronavirus/Vaccine/Pages/VaccinationStatistics.aspx"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    table = soup.find(class_="govstyleTable-default")
    df = pd.read_html(str(table))[0]

    total_vaccinations = int(df.loc[df[0] == "Total number of doses",
                                    1].values[0])
    people_vaccinated = int(
        df.loc[df[0] == "Total number of first dose vaccinations",
               1].values[0])
    people_fully_vaccinated = int(
        df.loc[df[0] == "Total number of second dose vaccinations",
               1].values[0])

    date = re.search(r"Data applies to: Week ending (\d[\w\s]+\d{4})",
                     soup.text).group(1)
    date = vaxutils.clean_date(date, "%d %B %Y")

    vaxutils.increment(location="Jersey",
                       total_vaccinations=total_vaccinations,
                       people_vaccinated=people_vaccinated,
                       people_fully_vaccinated=people_fully_vaccinated,
                       date=date,
                       source_url=url,
                       vaccine="Oxford/AstraZeneca, Pfizer/BioNTech")
Пример #18
0
def parse_data(data: dict) -> pd.Series:
    sheet_id = data["sheetNames"].index("Vakcinisani ukupno")
    total_vaccinations = vaxutils.clean_count(data["data"][sheet_id][0][0])

    date = vaxutils.clean_date(data["data"][1][0][0], "%d.%m.%Y.")

    data = {"date": date, "total_vaccinations": total_vaccinations}
    return pd.Series(data=data)
Пример #19
0
def parse_data(data: dict) -> pd.Series:

    date = vaxutils.clean_date(data["updated"], "%Y/%m/%d")

    total_vaccinations = data["progress"]

    return pd.Series(data={
        "date": date,
        "total_vaccinations": total_vaccinations,
    })
Пример #20
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    numbers = soup.find_all(class_="odometer")

    date = re.search(r"[\d\.]{10}", soup.find(class_="counter").text).group(0)
    date = vaxutils.clean_date(date, "%d.%m.%Y")

    return pd.Series(data={
        "total_vaccinations": int(numbers[0]["data-count"]),
        "people_vaccinated": int(numbers[1]["data-count"]),
        "people_fully_vaccinated": int(numbers[2]["data-count"]),
        "date": date
    })
def parse_data(soup: BeautifulSoup) -> pd.Series:

    data = pd.Series(dtype="int")

    spans = soup.find("table").find_all("span")

    data["people_vaccinated"] = int(re.sub(r"[^\d]", "", spans[-3].text))
    data["people_fully_vaccinated"] = int(re.sub(r"[^\d]", "", spans[-2].text))
    data["total_vaccinations"] = data["people_vaccinated"] + data["people_fully_vaccinated"]

    date = re.search(r"[\d-]{10}", spans[0].text).group(0)
    data["date"] = vaxutils.clean_date(date, "%d-%m-%Y")

    return data
Пример #22
0
def read(source: str) -> pd.Series:

    soup = vaxutils.get_soup(source)

    total_vaccinations = vaxutils.clean_count(soup.find(class_="stats-decoration-title").text)
    people_vaccinated = total_vaccinations
    people_fully_vaccinated = 0

    date = re.search(r"\d+ \w+ 202\d", soup.find(class_="stats-decoration-text").text).group(0)
    date = vaxutils.clean_date(date, "%d %B %Y")
    
    return pd.Series(data={
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": date,
    })
Пример #23
0
def main():

    url = "https://www.mohfw.gov.in/"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    total_vaccinations = soup.find(class_="coviddata").text
    total_vaccinations = vaxutils.clean_count(total_vaccinations)

    date = soup.find(id="site-dashboard").find("h5").text
    date = re.search(r"\d+\s\w+\s+202\d", date).group(0)
    date = vaxutils.clean_date(date, "%d %B %Y")

    vaxutils.increment(location="India",
                       total_vaccinations=total_vaccinations,
                       date=date,
                       source_url=url,
                       vaccine="Covaxin, Covishield")
Пример #24
0
def connect_parse_data(source: str) -> pd.Series:
    op = Options()
    op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:
        driver.get(source)
        time.sleep(5)

        total_vaccinations = driver.find_element_by_id("counter1").text

        date = driver.find_element_by_id("pupdateddate").text
        date = vaxutils.clean_date(date.replace("Updated ", ""), "%d %b, %Y")

    data = {
        "total_vaccinations": vaxutils.clean_count(total_vaccinations),
        "date": date,
    }
    return pd.Series(data=data)
Пример #25
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    people_vaccinated = int(soup.find_all(class_="count")[0]["data-count"])
    people_fully_vaccinated = int(soup.find_all(class_="count")[1]["data-count"])
    assert people_vaccinated >= people_fully_vaccinated
    total_vaccinations = people_vaccinated + people_fully_vaccinated

    date = soup.find(class_="reportdate").text
    date = re.search(r"\d+ \w+ 202\d", date).group(0)
    date = vaxutils.clean_date(date, "%d %b %Y")

    data = {
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": date,
    }
    return pd.Series(data=data)
Пример #26
0
def main():

    url = "https://covid19.gov.im/general-information/latest-updates/"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    date = soup.find(class_="fa-syringe").parent.find("strong").text
    date = vaxutils.clean_date(date, "%d %B %Y")

    total_vaccinations = soup.find(class_="fa-syringe").parent.text
    total_vaccinations = re.search(r"Total vaccinations:\s+(\d+)",
                                   total_vaccinations).group(1)
    total_vaccinations = vaxutils.clean_count(total_vaccinations)

    vaxutils.increment(location="Isle of Man",
                       total_vaccinations=total_vaccinations,
                       date=date,
                       source_url=url,
                       vaccine="Pfizer/BioNTech")
Пример #27
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    total_vaccinations = int(soup.find_all(class_="counter")[0].text)
    people_vaccinated = int(soup.find_all(class_="counter")[1].text)
    people_fully_vaccinated = int(soup.find_all(class_="counter")[2].text)
    assert total_vaccinations >= people_vaccinated
    assert people_vaccinated >= people_fully_vaccinated

    date = soup.find(class_="fuente").text
    date = re.search(r"\d{2}-\d{2}-\d{4}", date).group(0)
    date = vaxutils.clean_date(date, "%d-%m-%Y")

    data = {
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": date,
    }
    return pd.Series(data=data)
Пример #28
0
def main():

    url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    links = soup.find(class_="display-posts-listing").find_all("a",
                                                               class_="title")

    for link in links:
        if "Actualizare zilnică" in link.text:
            url = link["href"]
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    date = soup.find(class_="post-date").find(class_="meta-text").text.strip()
    date = vaxutils.clean_date(date, "%b %d, %Y")

    main_text = soup.find(class_="entry-content-text").text

    people_fully_vaccinated = re.findall(r"[\d.]+ persoane vaccinate cu 2 doz",
                                         main_text)[1]
    people_fully_vaccinated = people_fully_vaccinated.replace(
        " persoane vaccinate cu 2 doz", "")
    people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated)

    people_vaccinated = re.findall(r"[\d.]+ persoane vaccinate cu 1 doz",
                                   main_text)[1]
    people_vaccinated = people_vaccinated.replace(
        " persoane vaccinate cu 1 doz", "")
    people_vaccinated = vaxutils.clean_count(
        people_vaccinated) + people_fully_vaccinated

    total_vaccinations = people_vaccinated + people_fully_vaccinated

    vaxutils.increment(location="Romania",
                       total_vaccinations=total_vaccinations,
                       people_vaccinated=people_vaccinated,
                       people_fully_vaccinated=people_fully_vaccinated,
                       date=date,
                       source_url=url,
                       vaccine="Pfizer/BioNTech")
Пример #29
0
def main():

    url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/"
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    links = soup.find(class_="display-posts-listing").find_all("a",
                                                               class_="title")

    for link in links:
        if "Actualizare zilnică" in link.text:
            url = link["href"]
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    date = soup.find(class_="post-date").find(class_="meta-text").text.strip()
    date = vaxutils.clean_date(date, "%b %d, %Y")

    url = soup.find(class_="entry-content-text").find_all("a")[-1]["href"]

    kwargs = {'pandas_options': {'dtype': str, 'header': None}}
    dfs_from_pdf = tabula.read_pdf(url, pages="all", **kwargs)
    df = dfs_from_pdf[0]

    values = df[df[0] == "Total"].dropna()[2].str.split(" ")
    values = [
        vaxutils.clean_count(val) for val in pd.core.common.flatten(values)
    ]
    assert len(values) == 2

    people_fully_vaccinated = values[1]
    one_dose_only = values[0]
    people_vaccinated = one_dose_only + people_fully_vaccinated
    total_vaccinations = people_fully_vaccinated + people_vaccinated

    vaxutils.increment(location="Romania",
                       total_vaccinations=total_vaccinations,
                       people_vaccinated=people_vaccinated,
                       people_fully_vaccinated=people_fully_vaccinated,
                       date=date,
                       source_url=url,
                       vaccine="Moderna, Oxford/AstraZeneca, Pfizer/BioNTech")
Пример #30
0
def read(source: str) -> pd.Series:

    headers = {
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:86.0) Gecko/20100101 Firefox/86.0",
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "Pragma": "no-cache",
        "Cache-Control": "no-cache",
    }
    soup = BeautifulSoup(
        requests.get(source, headers=headers).content, "html.parser")

    text = soup.find("div", id="data").find("p").text

    date = re.search(r"На сегодня \(([\d\.]{8})\)", text).group(1)
    date = vaxutils.clean_date(date, "%d.%m.%y")

    people_vaccinated = re.search(
        r"([\d\s]+) чел\. \(.*% от населения\) - привито хотя бы одним компонентом вакцины",
        text).group(1)
    people_vaccinated = vaxutils.clean_count(people_vaccinated)

    people_fully_vaccinated = re.search(
        r"([\d\s]+) чел\. \(.*% от населения\) - полностью привито",
        text).group(1)
    people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated)

    total_vaccinations = re.search(r"([\d\s]+) шт\. - всего прививок сделано",
                                   text).group(1)
    total_vaccinations = vaxutils.clean_count(total_vaccinations)

    return pd.Series({
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": date,
    })