Пример #1
0
    def export(self):

        data = pd.read_csv(self.output_path)

        url = "https://www.gouv.bj/coronavirus/"
        soup = get_soup(url)

        stats = soup.find_all("h2", attrs={"class", "h1 adapt white regular"})

        count = int(stats[0].text) + int(stats[1].text)
        date_str = localdatenow("Africa/Porto-Novo")

        if count > data["Cumulative total"].max(
        ) and date_str > data["Date"].max():

            new = pd.DataFrame({
                "Country": self.location,
                "Date": [date_str],
                "Cumulative total": count,
                "Source URL": url,
                "Source label": "Government of Benin",
                "Units": "tests performed",
            })

            df = pd.concat([new, data], sort=False)
            self.export_datafile(df)
Пример #2
0
    def export(self):

        data = pd.read_csv(self.output_path)

        url = "http://cdcmoh.gov.kh/"
        soup = get_soup(url)
        print(soup.select("span:nth-child(1) strong span"))

        count = clean_count(soup.select("p+ div strong:nth-child(1)")[0].text)

        date_str = localdatenow("Asia/Phnom_Penh")

        if count > data["Cumulative total"].max(
        ) and date_str > data["Date"].max():

            new = pd.DataFrame({
                "Country": self.location,
                "Date": [date_str],
                "Cumulative total": count,
                "Source URL": url,
                "Source label": "CDCMOH",
                "Units": "tests performed",
            })

            data = pd.concat([new, data], sort=False)
        self.export_datafile(data)
Пример #3
0
 def _api_request(self):
     date_low = localdatenow(self.timezone)
     date_up = localdatenow(self.timezone, sum_days=1)
     params = {
         "f": "json",
         "outFields": "*",
         "outStatistics": (
             "[{'onStatisticField':'total_dosis_adminsitradas','outStatisticFieldName':'total_vaccinations','statisticType':'sum'},"
             "{'onStatisticField':'total_primera_dosis','outStatisticFieldName':'dose_1','statisticType':'sum'},"
             "{'onStatisticField':'total_segunda_dosis','outStatisticFieldName':'dose_2','statisticType':'max'}]"
         ),
         "returnGeometry": "false",
         "where": f"fecha BETWEEN timestamp '{date_low} 05:00:00' AND timestamp '{date_up} 04:59:59'",
     }
     data = request_json(self.source_url, params=params)
     return data
Пример #4
0
 def pipe_checks(self, df: pd.DataFrame):
     n = 20
     if df.Date.isna().any():
         raise ValueError("Some `Date` have NaN values!")
     if not (df.Date.max() > localdatenow(minus_days=n)):
         raise ValueError(
             f"Data has not been updated for more than {n} days! Check source."
         )
     return df
Пример #5
0
 def pipe_age_checks(self, df: pd.DataFrame) -> pd.DataFrame:
     # print(df.columns)
     if (df.people_vaccinated_per_hundred > 100).sum():
         raise ValueError(
             "Check `people_vaccinated_per_hundred` field! Found values above 100%."
         )
     if (df.people_fully_vaccinated_per_hundred > 100).sum():
         raise ValueError(
             "Check `people_fully_vaccinated_per_hundred` field! Found values above 100%."
         )
     if (df.monday.min() < "2021-02-08") or (df.monday.max() >
                                             localdatenow("America/Lima")):
         raise ValueError(
             "Check `monday` field! Some dates may be out of normal")
     if not (df.location.unique() == "Peru").all():
         raise ValueError("Invalid values in `location` field!")
     return df
Пример #6
0
 def _get_num_gap_days(self, df_current):
     return (localdatenow(tz=None, as_datetime=True) - clean_date(
         df_current.date.max(), "%Y-%m-%d", as_datetime=True)).days
Пример #7
0
 def pipe_metadata(self, ds: pd.Series) -> pd.Series:
     ds = enrich_data(ds, "location", self.location)
     ds = enrich_data(ds, "source_url", self.source_url_ref)
     ds = enrich_data(ds, "vaccine", "Oxford/AstraZeneca, Pfizer/BioNTech")
     ds = enrich_data(ds, "date", localdatenow(self.timezone))
     return ds