示例#1
0
def get_state_timeseries(states=["Tamil Nadu"],
                         download: bool = False) -> pd.DataFrame:
    paths = {
        "v3": [data_path(i) for i in (1, 2)],
        "v4": [data_path(i) for i in range(3, 25)]
    }
    if download:
        for target in paths['v3'] + paths['v4']:
            download_data(data, target)
    return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
        .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'", engine = "python")\
        .pipe(lambda _: get_time_series(_, ["detected_state", "detected_district"]))\
        .drop(columns = ["date", "time", "delta", "logdelta"])\
        .rename(columns = {
            "Deceased":     "dD",
            "Hospitalized": "dT",
            "Recovered":    "dR"
        })
示例#2
0
def get_state_timeseries(
    states = "*", 
    download: bool = False, 
    aggregation_cols = ["detected_state", "detected_district"], 
    last_API_file: int = 27) -> pd.DataFrame:
    """ load state- and district-level data, downloading source files if specified """
    paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, last_API_file)]}
    if download:
        for target in paths['v3'] + paths['v4']: 
            download_data(data, target)
    return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
        .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'")\
        .pipe(lambda _: get_time_series(_, aggregation_cols))\
        .drop(columns = ["date", "time", "delta", "logdelta"])\
        .rename(columns = {
            "Deceased":     "dD",
            "Hospitalized": "dT",
            "Recovered":    "dR"
        })
示例#3
0
import pandas as pd
from epimargin.etl.commons import download_data
from epimargin.etl.covid19india import data_path, get_time_series, load_all_data
from epimargin.utils import setup

data, _ = setup()

paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 18)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                   v4_paths=[data / filepath for filepath in paths['v4']])

schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"}


def assemble_time_series(df):
    ts = get_time_series(df)
    deltas = ts[schema.keys()]\
        .rename(columns = schema)
    deltas = deltas.reindex(pd.date_range(deltas.index.min(),
                                          deltas.index.max()),
                            fill_value=0)
    merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]),
                          left_index=True,
                          right_index=True).astype(int)
    merged.index.name = "date"
示例#4
0
plt.legend(loc = 'upper right')
plt.PlotDevice().ylabel("new cases", rotation = -90, labelpad = 50)
plt.ylim(bottom = 0)
plt.sca(lax)
plt.show()

# cases vs deaths
from pathlib import Path
data = Path("./data")
paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 27)]}
for target in paths['v3'] + paths['v4']: 
    download_data(data, target)
df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
    .pipe(lambda _: get_time_series(_, ["detected_state"]))\
    .drop(columns = ["date", "time", "delta", "logdelta"])\
    .rename(columns = {
        "Deceased":     "dD",
        "Hospitalized": "dT",
        "Recovered":    "dR"
    }).sum(level = -1).sort_index()

plt.plot(df.index, smoothed(df.dD.values), label = "Daily Deaths", color = plt.RED)
plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = 200, fontdict = plt.theme.note, ha = "center", va = "top")
plt.legend(loc = 'upper left')
plt.ylim(bottom = 0)
lax = plt.gca()
plt.sca(lax.twinx())
plt.plot(df.index, smoothed(df.dT.values), label = "Daily Cases", color = plt.PRED_PURPLE)
plt.legend(loc = 'upper right')
plt.PlotDevice().ylabel("new cases", rotation = -90, labelpad = 50)
plt.ylim(bottom = 0)
plt.sca(lax)
示例#5
0
sero = pd.read_stata("data/kadata.labdate.dta")\
    .drop(columns = ["_merge"])\

sero["S"] = sero["elisa_pos15"]
sero["t0"] = sero["date_med"]
sero["td"] = sero["t0"] + pd.Timedelta(days=30)
sero["hr"] = sero.hom_region.map(hom_regions_numeric)

# pull down COVID 19 India data
paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 19)]
}
# for target in paths['v3'] + paths['v4']:
#     download_data(data, target)
df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
    .query("detected_state == 'Karnataka'")

# get all deaths in KA on Aug 29 by district
get_time_series(df, "detected_district")\
    .query("status_change_date <= 'Aug 29, 2020'", engine = "python")\
    .Deceased.sum(level = 0)\
    .drop("Other State")\
    .astype(int)\
    .to_csv(data/"ka_cumulative_deaths_aug29.csv")

# aggregate time series by hom_region
df["detected_region"] = df.detected_district.map(hom_regions_rev)
ka_ts = get_time_series(df.dropna(subset=["detected_region"]),
                        "detected_region").rename(columns={
                            "Deceased": "dD",
                            "Hospitalized": "dT",