def update(): now = get_timestr() cols = { "År/Year": "year", "Uke/week": "week", "Tilfeller/cases": "new_confirmed", } data = get_data() if data: df_new = pd.read_excel(data.content, usecols=cols) df_new = df_new.rename(columns=cols) df_new = df_new[df_new["year"].isin([2021, 2022])] df_new = df_new[["year", "week", "new_confirmed"]].fillna(0).astype(int) df_new = df_new.sort_values(by="week").reset_index(drop=True) df_new["total_confirmed"] = df_new["new_confirmed"].cumsum() df_new["source"] = "fhi:web" df = load_datafile("omicron") if not df_new.equals(df): print(now, "omicron: New update") sourcefile = load_sources() sourcefile["omicron.csv"]["last_updated"] = now sourcefile["omicron.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("omicron", df_new)
def update(): now = get_timestr() # get fhi datafile datafile = get_fhi_datafile("data_covid19_lab_by_time") df_new = pd.read_csv(datafile, usecols=["date", "n_neg", "n_pos", "pr100_pos"], parse_dates=["date"]) mapping = {"n_neg": "new_neg", "n_pos": "new_pos"} df_new = df_new.rename(columns=mapping) df_new["new_total"] = df_new["new_neg"] + df_new["new_pos"] df_new["total_neg"] = df_new["new_neg"].cumsum() df_new["total_pos"] = df_new["new_pos"].cumsum() df_new["total"] = df_new["new_total"].cumsum() df_new["source"] = "fhi:git" df_new = df_new.sort_values(by=["date"], ascending=True) df = load_datafile("tested_lab", parse_dates=["date"]) if not df_new.equals(df): print(now, "tested_lab: New update") sourcefile = load_sources() sourcefile["tested_lab.csv"]["last_updated"] = now sourcefile["tested_lab.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("tested_lab", df_new) # Generate graph graphs.tested_lab()
def update(): now = get_timestr() datafile = get_fhi_datafile("data_covid19_msis_by_time_location") df_new = pd.read_csv(datafile, usecols=["date", "n", "location_name"], parse_dates=["date"]) df_new = df_new.loc[(df_new["location_name"] == "Norge")] df_new = df_new.filter(items=["date", "n"]) df_new = df_new[df_new.date >= "2020-02-21"] df_new = df_new.rename(columns={"n": "new"}) df_new["total"] = df_new["new"].cumsum() df_new["source"] = "fhi:git" df_new = df_new.reset_index(drop=True) df = load_datafile("confirmed", parse_dates=["date"]) df = df[df.date >= "2020-02-21"] df_filter = df.loc[df["source"] == "fhi:git"] df_filter = df_filter.reset_index(drop=True) if not df_new.equals(df_filter): print(now, "fhi_git.confirmed: New update") df_new = df_new.merge(df, how="outer") df_new = df_new.drop_duplicates(subset=["date"], keep="first") second_last = df_new.iloc[-2:] second_last_total = second_last.total.values[0] last_total = second_last.total.values[1] last_new = second_last.new.values[1] if second_last_total > last_total: newToday = last_new + (second_last_total - last_total) df_new.iloc[-1:]["total"] = second_last_total df_new.iloc[-1:]["new"] = newToday sourcefile = load_sources() sourcefile["confirmed.csv"]["last_updated"] = now sourcefile["confirmed.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("confirmed", df_new) # Generate graph graphs.confirmed()
def update(): now = get_timestr() today = date.today() yesterday = today - timedelta(days=1) url = "https://statistikk.fhi.no/api/msis/antallKoronaTotalt" df = load_datafile("confirmed") last_total = df["total"].max() try: confirmed_total = requests.get(url).json() except Exception: confirmed_total = 0 error = sys.exc_info()[1] print(now, "- ERROR:", str(error)) if confirmed_total > last_total: print(now, "msis_api.confirmed: New update") confirmed_diff = confirmed_total - last_total if datetime.now().hour in range(0, 2): n_yesterday = df.new.loc[df["date"] == str(yesterday)].values[0] diff_yesterday = n_yesterday + confirmed_diff df.loc[df["date"] == str(yesterday), "new"] = diff_yesterday df.loc[df["date"] == str(yesterday), "total"] = confirmed_total df.loc[df["date"] == str(today), "total"] = confirmed_total df.loc[df["date"] == str(yesterday), "source"] = "msis:api" df.loc[df["date"] == str(today), "source"] = "msis:api" else: n_today = df.new.loc[df["date"] == str(today)].values[0] diff_today = n_today + confirmed_diff df.loc[df["date"] == str(today), "new"] = diff_today df.loc[df["date"] == str(today), "total"] = confirmed_total df.loc[df["date"] == str(today), "source"] = "msis:api" sourcefile = load_sources() sourcefile["confirmed.csv"]["last_updated"] = now sourcefile["confirmed.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("confirmed", df)
def update(): now = get_timestr() url = "https://api.helsedirektoratet.no/ProduktCovid19/Covid19Statistikk/" headers = { "Ocp-Apim-Subscription-Key": os.getenv("HELSEDIR_API_KEY"), } res = requests.get(url + "helseregion", headers=headers).json() df_new = pd.DataFrame() for r in res: df_new = df_new.append(r["registreringer"]) df_new = df_new.rename( columns={ "dato": "date", "antInnlagte": "admissions", "antCovidIntensiv": "icu", "antRespirator": "respiratory", }) df_new = df_new[["date", "admissions", "icu", "respiratory"]] df_new["date"] = pd.to_datetime(df_new["date"], format="%Y-%m-%d") df_new = df_new.groupby("date").sum() df_new = df_new.reset_index().sort_values(["date"], ascending=True) df_new["source"] = "helsedir:api" df = load_datafile("hospitalized", parse_dates=["date"]) if not df_new.equals(df): print(now, "hospitalized: New update") sourcefile = load_sources() sourcefile["hospitalized.csv"]["last_updated"] = now sourcefile["hospitalized.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("hospitalized", df_new) # Generate graph graphs.hospitalized()
def update(): now = get_timestr() # load current data df = load_datafile("dead") # get fhi datafile datafile = get_fhi_datafile("data_covid19_demographics") df_new = pd.read_csv(datafile) date_of_publishing = df_new.date_of_publishing.max() if date_of_publishing not in df.date.values: print(now, "dead: New update") last_data = df["total"].max() fhi_dead = df_new["n"].sum() dead_diff = fhi_dead - last_data df = df.append( { "date": date_of_publishing, "new": dead_diff, "total": fhi_dead, "source": "fhi:git", }, ignore_index=True, ) sourcefile = load_sources() sourcefile["dead.csv"]["last_updated"] = now sourcefile["dead.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("dead", df) # Generate graph graphs.dead()
def update(): now = get_timestr() # get from fhi api url = "https://www.fhi.no/api/chartdata/api/91672" res = requests.get(url).json() tests = res["figures"][4] fhi_tests = tests["number"] fhi_date = str(datetime.strptime(tests["updated"], "%d/%m/%Y").date()) # get current data df = load_datafile("tested") # update new data if fhi_date not in df.date.values: print(now, "tested: New update") last_data = df["total"].max() tested_diff = fhi_tests - last_data df = df.append( { "date": fhi_date, "new": tested_diff, "total": fhi_tests, "source": "fhi:web", }, ignore_index=True, ) sourcefile = load_sources() sourcefile["tested.csv"]["last_updated"] = now sourcefile["tested.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("tested", df)
def update(): now = get_timestr() url = "https://www.fhi.no/om/smittestopp/nokkeltall-fra-smittestopp/" browser = get_browser() try: browser.get(url) time.sleep(1) new_downloads = browser.execute_script( "return Highcharts.charts[0].series[0].options.data") total_downloads = browser.execute_script( "return Highcharts.charts[0].series[1].options.data") new_reported = browser.execute_script( "return Highcharts.charts[1].series[0].options.data") total_reported = browser.execute_script( "return Highcharts.charts[1].series[1].options.data") browser.close() browser.quit() except Exception: error = sys.exc_info()[1] print("- ERROR:", str(error)) new_lst = [] for n in new_downloads: date_parsed = datetime.fromtimestamp(n[0] / 1000) datestr = date_parsed.strftime("%Y-%m-%d") d = {"date": datestr, "new_downloads": n[1]} new_lst.append(d) df_new = pd.DataFrame(new_lst) for n in total_downloads: date_parsed = datetime.fromtimestamp(n[0] / 1000) datestr = date_parsed.strftime("%Y-%m-%d") df_new.loc[df_new["date"] == datestr, "total_downloads"] = n[1] for n in new_reported: date_parsed = datetime.fromtimestamp(n[0] / 1000) datestr = date_parsed.strftime("%Y-%m-%d") df_new.loc[df_new["date"] == datestr, "new_reported"] = n[1] for n in total_reported: date_parsed = datetime.fromtimestamp(n[0] / 1000) datestr = date_parsed.strftime("%Y-%m-%d") df_new.loc[df_new["date"] == datestr, "total_reported"] = n[1] df_new = df_new.fillna(0) intcolumns = [ "new_downloads", "total_downloads", "new_reported", "total_reported", ] df_new[intcolumns] = df_new[intcolumns].astype(int) df_new["source"] = "fhi:web" df_new["date"] = pd.to_datetime(df_new["date"]) columns = [ "date", "new_downloads", "total_downloads", "new_reported", "total_reported", ] # Compare dfs df = load_datafile("smittestopp", parse_dates=["date"]) df_new = df_new.sort_values(by=["date"]) merged = pd.merge(df_new, df, how="outer", indicator=True) new_data = merged.loc[merged._merge == "left_only", columns] if not new_data.empty: print(now, "smittestopp: New update") sourcefile = load_sources() sourcefile["smittestopp.csv"]["last_updated"] = now sourcefile["smittestopp.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("smittestopp", df_new) # Generate graph graphs.smittestopp()
def update(): now = get_timestr() # load current data df = load_datafile("vaccine_doses") # get fhi datafile datafile = get_fhi_datafile("data_covid19_sysvak_by_time_location") df_new = pd.read_csv( datafile, usecols=[ "date", "granularity_geo", "location_name", "n_dose_1", "n_dose_2", "cum_n_dose_1", "cum_n_dose_2", "cum_pr100_dose_1", "cum_pr100_dose_2", ], ) mapping = { "n_dose_1": "new_dose_1", "n_dose_2": "new_dose_2", "cum_n_dose_1": "total_dose_1", "cum_n_dose_2": "total_dose_2", "cum_pr100_dose_1": "total_pr100_dose_1", "cum_pr100_dose_2": "total_pr100_dose_2", } columns = [ "granularity_geo", "location_name", "date", "new_dose_1", "new_dose_2", "total_dose_1", "total_dose_2", "total_pr100_dose_1", "total_pr100_dose_2", "new_doses", "total_doses", "source", ] df_new = df_new.rename(columns=mapping) df_new["new_doses"] = df_new["new_dose_1"] + df_new["new_dose_2"] df_new["total_doses"] = df_new["total_dose_1"] + df_new["total_dose_2"] df_new["source"] = "fhi:git" df_new = df_new[columns] for column in columns: df_new[column] = df_new[column].astype(str).replace("nan", "") df[column] = df[column].astype(str) df_new = df_new.sort_values( by=["granularity_geo", "location_name", "date"]).reset_index(drop=True) df = df.sort_values( by=["granularity_geo", "location_name", "date"]).reset_index(drop=True) if not df_new.index.equals(df.index): print(now, "vaccine_doses: New update") sourcefile = load_sources() sourcefile["vaccine_doses.csv"]["last_updated"] = now sourcefile["vaccine_doses.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("vaccine_doses", df_new) # Generate graph graphs.vaccine_doses()
tested_lab.update() print("Checking for update: confirmed.csv") confirmed_new_day() confirmed_msis.update() confirmed_fhi.update() print("Checking for update: hospitalized.csv") hospitalized.update() print("Checking for update: dead.csv") dead.update() print("Checking for update: vaccine_doses.csv") vaccine.update() print("Checking for update: transport.csv") transport.update() print("Checking for update: smittestopp.csv") smittestopp.update() sources = load_sources() pending_update = [sources[category]["pending_update"] for category in sources] if 1 in pending_update: print("Updating README.md") reset_pending() update_readme()
def update(): now = get_timestr() headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 11.2; rv:86.0) Gecko/20100101 Firefox/86.0", "X-Requested-With": "XMLHttpRequest", } columns = [ "tr_type", "route", "company", "tr_from", "tr_to", "departure", "arrival", "source", ] df_new = pd.DataFrame(columns=columns) base_url = "https://www.fhi.no" url = f"{base_url}/sv/smittsomme-sykdommer/corona/koronavirus-og-covid-19-pa-offentlig-kommunikasjon/" html = requests.get(url).text soup = BeautifulSoup(html, "lxml") for i in soup.find_all("div", class_="fhi-dynamic-table__download"): a = i.find("a") href = a.get("href") filename = os.path.basename(href) tr_types = { "båt": "Skip", "bat": "Skip", "buss": "Buss", "fly": "Fly", "tog": "Tog", } res = requests.get(f"{base_url}{href}", allow_redirects=True, headers=headers) df_tmp = pd.read_excel(res.content) df_tmp = df_tmp.dropna() for i, row in df_tmp.iterrows(): route = row[0] company = row[1] from_to = row[2].replace("\xa0", "") departure = fmt_date(row[3]) arrival = fmt_date(row[4]) from_to_split = from_to.split("-") _from = from_to_split[0].strip() try: _to = from_to_split[1].strip() except IndexError: _to = "" tr_type = "" try: tr_type = filename.split("-")[1].split(".")[0] except Exception: raise Exception values = { "tr_type": tr_types[tr_type], "route": route, "company": company, "tr_from": _from, "tr_to": _to, "departure": departure, "arrival": arrival, "source": "fhi:web", } df_new = df_new.append(pd.DataFrame(values, index=[0])) df_new.loc[df_new.departure == "2021-12-27 16:05:00", ["departure"]] = "2020-12-27 16:05:00" df_new["departure"] = pd.to_datetime(df_new["departure"]) df_new["arrival"] = pd.to_datetime(df_new["arrival"]) for column in columns: df_new[column] = df_new[column].astype(str) df_new = df_new.sort_values(by=["departure", "arrival", "route"], ascending=False) columns = [ "tr_type", "route", "company", "tr_from", "tr_to", "departure", "arrival", "source", ] df = load_datafile("transport") for column in columns: df_new[column] = df_new[column].astype(str).replace("nan", "") df[column] = df[column].astype(str).replace("nan", "") merged = pd.merge(df_new, df, how="outer", indicator=True) new_data = merged.loc[merged._merge == "left_only", columns] if not new_data.empty: print(now, "transport: New update") df = pd.concat([df, new_data]) df = df.sort_values(by=["departure", "arrival", "route"], ascending=False) sourcefile = load_sources() sourcefile["transport.csv"]["last_updated"] = now sourcefile["transport.csv"]["pending_update"] = 1 write_sources(sourcefile) write_datafile("transport", df)