def hydrate_datastore(_): root = Path("/tmp") data = root / "data" figs = root / "figs" data.mkdir(exist_ok=True) figs.mkdir(exist_ok=True) # define data versions for api files paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in (3, 4, 5, 6, 7, 8, 9, 10)] } for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) data_recency = str(df["date_announced"].max()).split()[0] run_date = str(pd.Timestamp.now()).split()[0] print(f"data_recency: {data_recency}") print(f"run_date : {run_date}") df["hash"] = df.apply(lambda x: hash(tuple(x)), axis=1) df["report_date"] = run_date df.to_csv(data / f"hashed_records_{run_date}.csv") print(df.tail())
import pandas as pd from adaptive.etl.commons import download_data from adaptive.etl.covid19india import data_path, get_time_series, load_all_data from adaptive.utils import setup data, _ = setup() paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 18)] } for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"} def assemble_time_series(df): ts = get_time_series(df) deltas = ts[schema.keys()]\ .rename(columns = schema) deltas = deltas.reindex(pd.date_range(deltas.index.min(), deltas.index.max()), fill_value=0) merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]), left_index=True, right_index=True).astype(int) merged.index.name = "date"
sero = pd.read_stata("data/kadata.labdate.dta")\ .drop(columns = ["_merge"])\ sero["S"] = sero["elisa_pos15"] sero["t0"] = sero["date_med"] sero["td"] = sero["t0"] + pd.Timedelta(days=30) sero["hr"] = sero.hom_region.map(hom_regions_numeric) # pull down COVID 19 India data paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 19)] } # for target in paths['v3'] + paths['v4']: # download_data(data, target) df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .query("detected_state == 'Karnataka'") # get all deaths in KA on Aug 29 by district get_time_series(df, "detected_district")\ .query("status_change_date <= 'Aug 29, 2020'", engine = "python")\ .Deceased.sum(level = 0)\ .drop("Other State")\ .astype(int)\ .to_csv(data/"ka_cumulative_deaths_aug29.csv") # aggregate time series by hom_region df["detected_region"] = df.detected_district.map(hom_regions_rev) ka_ts = get_time_series(df.dropna(subset=["detected_region"]), "detected_region").rename(columns={ "Deceased": "dD", "Hospitalized": "dT",