Пример #1
0
def read(source: str) -> pd.Series:
    soup = vaxutils.get_soup(source)
    link = parse_infogram_link(soup)
    soup = vaxutils.get_soup(link)
    infogram_data = parse_infogram_data(soup)
    return pd.Series({
        "total_vaccinations": parse_infogram_doses(infogram_data),
        "date": parse_infogram_date(infogram_data),
        "source_url": source
    })
Пример #2
0
def read(source: str, num_pages_limit: int = 10):
    # Load page
    for page_nr in range(1, num_pages_limit):
        # Get soup
        url = f"{source}/{page_nr}/"
        soup = vaxutils.get_soup(url)
        # Get data
        ds = parse_data(soup)
        if ds is not None:
            return ds
    raise Exception(
        "No news page with vaccination data was found. Check URLs.")
Пример #3
0
def connect_parse_data(source: str) -> pd.Series:

    soup = vaxutils.get_soup(source)

    people_vaccinated = soup.find(class_="count-up").text
    people_vaccinated = vaxutils.clean_count(people_vaccinated)

    total_vaccinations = people_vaccinated

    return pd.Series(
        data={
            "total_vaccinations": total_vaccinations,
            "people_vaccinated": people_vaccinated,
        })
Пример #4
0
def read(source: str, last_update: str, num_pages_limit: int = 10):
    records = []
    for page_nr in range(1, num_pages_limit):
        # Get soup
        url = f"{source}/{page_nr}/"
        soup = vaxutils.get_soup(url)
        # Get data (if any)
        records_sub = parse_data(soup)
        if records_sub:
            records.extend(records_sub)
            if any([record["date"] <= last_update for record in records_sub]):
                break
    if len(records) > 0:
        records = [record for record in records if record["date"] >= last_update]
        if len(records) > 0:
            return pd.DataFrame(records)
    return None
Пример #5
0
def read(source: str) -> pd.Series:

    soup = vaxutils.get_soup(source)

    total_vaccinations = vaxutils.clean_count(soup.find(class_="stats-decoration-title").text)
    people_vaccinated = total_vaccinations
    people_fully_vaccinated = 0

    date = re.search(r"\d+ \w+ 202\d", soup.find(class_="stats-decoration-text").text).group(0)
    date = vaxutils.clean_date(date, "%d %B %Y")
    
    return pd.Series(data={
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "date": date,
    })
Пример #6
0
def read(source: str) -> pd.Series:

    soup = vaxutils.get_soup(source)
    blocks = soup.find_all(class_="aly_tx_center")

    for block in blocks:

        if "医療従事者等:" in block.text:
            healthcare_workers = vaxutils.clean_count(block.find("font").text)

        elif "高齢者:" in block.text:
            elderly = vaxutils.clean_count(block.find("font").text)

    total_vaccinations = healthcare_workers + elderly

    return pd.Series(data={
        "total_vaccinations": total_vaccinations,
    })
Пример #7
0
def parse_vaccinations(elem) -> dict:
    # Get news text
    url = elem.find_parent(class_="card").find("a").get("href")
    soup = vaxutils.get_soup(url)
    text = "\n".join([p.text for p in soup.find("article").find_all("p")])

    # Find metrics
    metrics = dict()
    total_vaccinations = re.search(r"疫苗共有(?P<count>[\d,]*)人次", text)
    people_vaccinated = re.search(r"1劑疫苗共有(?P<count>[\d,]*)人次", text)
    people_fully_vaccinated = re.search(r"2劑疫苗共有(?P<count>[\d,]*)人次", text)
    if total_vaccinations:
        metrics["total_vaccinations"] = vaxutils.clean_count(total_vaccinations.group(1))
    if people_vaccinated:
        metrics["people_vaccinated"] = vaxutils.clean_count(people_vaccinated.group(1))
    if people_fully_vaccinated:
        metrics["people_fully_vaccinated"] = vaxutils.clean_count(people_fully_vaccinated.group(1))
    return metrics
Пример #8
0
def connect_parse_data(source: str) -> pd.Series:

    soup = vaxutils.get_soup(source)
    tables = pd.read_html(str(soup))

    for table in tables:

        if table.iloc[0, 0] == "عدد متلقي اللقاح":
            people_vaccinated = vaxutils.clean_count(table.iloc[1, 0])

        elif table.iloc[0, 0] == "عدد المطعمين بشكل كامل":
            people_fully_vaccinated = vaxutils.clean_count(table.iloc[1, 0])

        elif table.iloc[0, 0] == "عدد الجرعات":
            total_vaccinations = vaxutils.clean_count(table.iloc[1, 0])

    return pd.Series(
        data={
            "total_vaccinations": total_vaccinations,
            "people_vaccinated": people_vaccinated,
            "people_fully_vaccinated": people_fully_vaccinated,
        })
Пример #9
0
def read(source: str):
    soup = vaxutils.get_soup(source)
    url = parse_pdf_link(soup, source)
    ds = pd.Series(parse_data(url))
    return ds