Exemplo n.º 1
0
def get_state_timeseries(states=["Tamil Nadu"],
                         download: bool = False) -> pd.DataFrame:
    paths = {
        "v3": [data_path(i) for i in (1, 2)],
        "v4": [data_path(i) for i in range(3, 25)]
    }
    if download:
        for target in paths['v3'] + paths['v4']:
            download_data(data, target)
    return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
        .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'", engine = "python")\
        .pipe(lambda _: get_time_series(_, ["detected_state", "detected_district"]))\
        .drop(columns = ["date", "time", "delta", "logdelta"])\
        .rename(columns = {
            "Deceased":     "dD",
            "Hospitalized": "dT",
            "Recovered":    "dR"
        })
Exemplo n.º 2
0
def get_state_timeseries(
    states = "*", 
    download: bool = False, 
    aggregation_cols = ["detected_state", "detected_district"], 
    last_API_file: int = 27) -> pd.DataFrame:
    """ load state- and district-level data, downloading source files if specified """
    paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, last_API_file)]}
    if download:
        for target in paths['v3'] + paths['v4']: 
            download_data(data, target)
    return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
        .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'")\
        .pipe(lambda _: get_time_series(_, aggregation_cols))\
        .drop(columns = ["date", "time", "delta", "logdelta"])\
        .rename(columns = {
            "Deceased":     "dD",
            "Hospitalized": "dT",
            "Recovered":    "dR"
        })
Exemplo n.º 3
0
import pandas as pd
from epimargin.etl.commons import download_data
from epimargin.etl.covid19india import data_path, get_time_series, load_all_data
from epimargin.utils import setup

data, _ = setup()

paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 18)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                   v4_paths=[data / filepath for filepath in paths['v4']])

schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"}


def assemble_time_series(df):
    ts = get_time_series(df)
    deltas = ts[schema.keys()]\
        .rename(columns = schema)
    deltas = deltas.reindex(pd.date_range(deltas.index.min(),
                                          deltas.index.max()),
                            fill_value=0)
    merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]),
                          left_index=True,
                          right_index=True).astype(int)
Exemplo n.º 4
0
plt.xlim(df.index.get_level_values(0).min(), df.index.get_level_values(0).max() - pd.Timedelta(days = 7))
plt.ylim(0, 900)
plt.legend(loc = "upper left")
lax = plt.gca()
plt.sca(lax.twinx())
plt.plot(df["TT"][:, "delta", "confirmed"].index, smoothed(df["TT"][:, "delta", "confirmed"].values), label = "Daily Cases", color = plt.PRED_PURPLE)
plt.legend(loc = 'upper right')
plt.PlotDevice().ylabel("new cases", rotation = -90, labelpad = 50)
plt.ylim(bottom = 0)
plt.sca(lax)
plt.show()

# cases vs deaths
from pathlib import Path
data = Path("./data")
paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 27)]}
for target in paths['v3'] + paths['v4']: 
    download_data(data, target)
df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
    .pipe(lambda _: get_time_series(_, ["detected_state"]))\
    .drop(columns = ["date", "time", "delta", "logdelta"])\
    .rename(columns = {
        "Deceased":     "dD",
        "Hospitalized": "dT",
        "Recovered":    "dR"
    }).sum(level = -1).sort_index()

plt.plot(df.index, smoothed(df.dD.values), label = "Daily Deaths", color = plt.RED)
plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = 200, fontdict = plt.theme.note, ha = "center", va = "top")
plt.legend(loc = 'upper left')
plt.ylim(bottom = 0)
Exemplo n.º 5
0
    adr = pd.date_range(end=onset.index[-1], periods=len(adj))
    adjusted = pd.Series(adj, index=adr)

    return adjusted


root = cwd()
data = root / "data"
figs = root / "figs"

gamma = 0.2
smoothing = 10
CI = 0.95

paths = {
    "v3": [data_path(_) for _ in (1, 2)],
    "v4": [data_path(_) for _ in range(3, 18)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

dfn = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                    v4_paths=[data / filepath for filepath in paths['v4']])

delay = pd.read_csv(data / "bihar_delay.csv").set_index("delay")

state_ts = get_time_series(dfn, "detected_state").loc["Bihar"].Hospitalized
# state_ts = delay_adjust(state_ts, np.squeeze(delay.values))
state_ts = state_ts[state_ts.index >= "2020-03-26"]
district_names, population_counts, _ = etl.district_migration_matrix(
Exemplo n.º 6
0
# model details
CI = 0.99
smoothing = 10

if __name__ == "__main__":
    root = cwd()
    data = root / "data"
    output = root / "output"
    if not data.exists():
        data.mkdir()
    if not output.exists():
        output.mkdir()

    # define data versions for api files
    paths = {
        "v3": [data_path(i) for i in (1, 2)],
        "v4": [data_path(i) for i in (3, 4, 5, 6, 7, 8)]
    }

    # download data from india covid 19 api
    for target in paths['v3'] + paths['v4']:
        download_data(data, target)

    df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                       v4_paths=[data / filepath for filepath in paths['v4']])
    data_recency = str(df["date_announced"].max()).split()[0]
    run_date = str(pd.Timestamp.now()).split()[0]

    ts = get_time_series(df[df.detected_state == "Delhi"])

    (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,