Exemplo n.º 1
0
def process_data(data):
    (dep, model_data) = data
    print("Computing model for " + dep)
    gm = GenerativeModel(dep, model_data)
    gm.sample()

    result = summarize_inference_data(gm.inference_data)
    return (dep, result)
Exemplo n.º 2
0
def main():
    # Read in raw Daily data
    log.info("Downloading new data...")
    new_cases, new_tests, tx_data = get_tx_covid_data()

    # Get start and end dates
    LAST_DAY = max(new_cases.columns[-1], new_tests.columns[-1])

    log.info(f"County data exists through {LAST_DAY.date()}")
    log.info(
        f"Covid tracking project data exists through {tx_data.index[-1].date()}"
    )

    if LAST_DAY.date() != tx_data.index[-1].date():
        LAST_DAY = min(LAST_DAY, tx_data.index[-1])
        log.warning(f"Date mismatch, using {LAST_DAY.date()}")

    # Firestore results keyed by time of model run
    fs_doc = datetime.now().strftime("%Y-%m-%d")

    log.info("Running regional models")
    for region, counties in METROS.items():
        log.info(region)

        df = pd.concat(
            [new_cases.loc[counties].sum(), new_tests.loc[counties].sum()],
            axis=1)
        df.columns = ["positive", "total"]
        start_smooth = LAST_DAY - pd.Timedelta(days=7)
        end_smooth = LAST_DAY - pd.Timedelta(days=1)

        # Fill-in missing test totals from state-wide data
        if np.isnan(df.loc[LAST_DAY, "total"]):
            df.loc[LAST_DAY, "total"] = (
                tx_data.loc[LAST_DAY, "total"] *
                df.loc[start_smooth:end_smooth, "total"].sum() /
                tx_data.loc[start_smooth:end_smooth, "total"].sum())

        # If any mistaken values result in negatives, zero out so model ignores
        df["positive"] = np.where(
            df["positive"].values > 0,
            df["positive"].values,
            np.zeros_like(df["positive"].values),
        )

        gm = GenerativeModel(region, df.loc[:LAST_DAY])
        gm.sample()
        to_firestore(fs_doc,
                     {region: summarize_inference_data(gm.inference_data)})

    to_firestore(fs_doc, {"updated": time.time()})
Exemplo n.º 3
0
def task_render_region_result(country: str, region: str,
                              run_date: pd.Timestamp):
    """ Render a CSV with summary output for a given region """
    az.rcParams["data.load"] = "eager"

    with tempfile.NamedTemporaryFile() as fp:
        s3.Bucket(S3_BUCKET).download_file(
            get_inference_data_key(run_date, region, country=country), fp.name)
        fp.seek(0)
        inference_data = az.from_netcdf(fp.name)

    summary = summarize_inference_data(inference_data)
    key = get_state_output_key(run_date, region, country=country)
    with fs.open(f"{S3_BUCKET}/{key}", "w") as file:
        summary.to_csv(file)
Exemplo n.º 4
0
def train(region):
    processor = data_processor.factory(region)
    cache = Cache(BUCKET, f"cache/{region}", f"/tmp/rt-{region}")
    timezone = pytz.timezone(TIMEZONES[region])

    if FORCE_USE_CACHE or cache.is_fresh():
        cache.download()
        model, result = itemgetter("model", "result")(cache.get())
        updated_at = cache.modified_at().astimezone(timezone)
        summary = _build_summary(result, updated_at)
        return model, result, summary

    df = processor.process_data(START_DATE)
    model = GenerativeModel(region, df.loc[region])
    model.sample()
    result = summarize_inference_data(model.inference_data)
    if SET_CACHE:
        cache.set({"model": model, "data": df, "result": result})

    updated_at = datetime.now().astimezone(timezone)
    summary = _build_summary(result, updated_at)
    return model, result, summary
Exemplo n.º 5
0
""" Modelo para las 15 provincias con más pruebas procesadas"""
""" Distrito Nacional"""
df = get_and_process_covidtracking_data(
    run_date=pd.Timestamp.today())

regiones= ["Distrito Nacional","Santo Domingo","Santiago","La Vega", "La Altagracia","Duarte",
          "San Cristóbal","La Romana","Puerto Plata","San Pedro de Macorís","San Juan",
           "Azua","Peravia","Espaillat","Sánchez Ramírez"]
           
for i in regiones: 
    region=i
    
    model_data = df.loc[region] 
    gm = GenerativeModel(region, model_data)
    gm.sample()
    result = summarize_inference_data(gm.inference_data)

    fig, ax = plt.subplots(figsize=(12, 8))
    result.infections.plot(c="C2", label="Expected primary infections")
    result.test_adjusted_positive.plot(c="C0", label="Expected positive tests if tests were constant")
    result.test_adjusted_positive_raw.plot(c="C1", alpha=.5, label="Expected positive tests", style="--")
    gm.observed.positive.plot(c="C7", alpha=.7, label="Reported positive tests")
    fig.set_facecolor("w")
    ax.legend();
    ax.set(title=f"rt.live model inference for {region}", ylabel="number of cases")
    sns.despine();
    
    fig, ax = plt.subplots(figsize=(12, 8))
     
    ax.set(title=f"Tasa de Reproducción Efectiva Para {region}", ylabel="$R_e(t)$")
    samples = gm.trace["r_t"]