def get_all_preds(): very_important_vars = [ 'PopulationDensityperSqMile2010', # 'MedicareEnrollment,AgedTot2017', 'PopulationEstimate2018', '#ICU_beds', 'MedianAge2010', 'Smokers_Percentage', 'DiabetesPercentage', 'HeartDiseaseMortality', 'Respiratory Mortality', '#Hospitals' ] exponential = {'model_type': 'exponential'} shared_exponential = {'model_type': 'shared_exponential'} demographics = { 'model_type': 'shared_exponential', 'demographic_vars': very_important_vars } linear = {'model_type': 'linear'} df = load_data.load_county_level(data_dir='../data/') max_cases = [max(v) for v in df['cases']] df['max_cases'] = max_cases df = df[df['max_cases'] > 0] df = fit_and_predict.fit_and_predict_ensemble( df, target_day=NUM_DAYS_LIST, mode='predict_future', outcome='deaths', methods=[exponential, shared_exponential, demographics, linear], output_key=f'predicted_deaths_ensemble_all') df = fit_and_predict.fit_and_predict_ensemble( df, target_day=NUM_DAYS_LIST, mode='predict_future', outcome='deaths', methods=[shared_exponential, demographics, linear], output_key=f'predicted_deaths_ensemble_no_exponential_all') df = fit_and_predict.fit_and_predict_ensemble( df, target_day=NUM_DAYS_LIST, mode='predict_future', outcome='deaths', methods=[shared_exponential, linear], output_key=f'predicted_deaths_ensemble_shared_linear_all') method_keys = [c for c in df if 'predicted' in c] for key in method_keys: for d in range(1, 8): newkey = key[:-3] + str(d) df[newkey] = np.array([p[d - 1] for p in df[key].values]) geo = ['countyFIPS', 'CountyNamew/StateAbbrev'] preds_df = df[geo + method_keys] return preds_df
def data_loader(): if not "county_data.pkl" in os.listdir(): df = load_data.load_county_level("covid19-severity-prediction/data") with open("county_data.pkl", 'wb') as f: pickle.dump(df, f) else: with open("county_data.pkl", 'rb') as f: df = pickle.load(f) return df
cached_fname = oj(cached_dir, f'preds_{d.month}_{d.day}_cached.pkl') if os.path.exists(cached_fname): date2.append(d + timedelta(days=6)) add_predictions_7day(pd.read_pickle(cached_fname), df_tab) else: k += 1 if k > 1: break return df_tab, date2 if __name__ == '__main__': print('loading data...') NUM_DAYS_LIST = [1, 2, 3, 4, 5, 6, 7] df_county = load_data.load_county_level( data_dir=oj(parentdir, 'data')).fillna(0) df_county = add_preds( df_county, NUM_DAYS_LIST=NUM_DAYS_LIST, cached_dir=oj(parentdir, 'data')) # adds keys like "Predicted Deaths 1-day" ## orgnize predicts as array add_pre(df_county, 'Predicted Cases ', 'pred_cases', 'pred_new_cases') add_pre(df_county, 'Predicted Deaths ', 'pred_deaths', 'pred_new_deaths') ## add new cases/death to dataframe add_new(df_county) ## Add new cases/deaths predictions and their intervals df_county = add_new_pre(df_county, 'Predicted Cases ', 'tot_cases',
import numpy as np import pandas as pd from os.path import join as oj import pygsheets import pandas as pd import sys sys.path.append('../modeling') sys.path.append('..') import load_data from fit_and_predict import add_preds from functions import merge_data if __name__ == '__main__': NUM_DAYS_LIST = [1] df_county = load_data.load_county_level(data_dir='../data') df_hospital = load_data.load_hospital_level( data_dir='../data_hospital_level') df_county = add_preds( df_county, NUM_DAYS_LIST=NUM_DAYS_LIST) # adds keys like "Predicted Deaths 1-day" print('succesfully ran pipeline!')
linear = {'model_type': 'linear'} corrected = False #df_county = pd.read_pickle("df_county_6_21.pkl") df_county = pd.read_pickle("all_deaths_preds_6_21.pkl") linear_weights_by_day = {} today = date(2020, 6, 21) earliest_day = date(2020, 3, 7) ndays = (today - earliest_day).days outcome = 'deaths' horizon = 21 if corrected: """ correcting the uptick on 4/14 """ df_county_orig = load_data.load_county_level(data_dir='../data/') df_county_predictions = pd.read_pickle("all_deaths_preds_6_21.pkl") df_county = copy.deepcopy(df_county_orig) # today = date(2020, 6, 8) uptick_date = date(2020, 4, 14) days_to_correct = (today - uptick_date).days for i in range(len(df_county)): r = df_county.iloc[i] if r['StateName'] == 'NY': pred = df_county_predictions[ f'all_deaths_pred_4_14_ensemble_21'].values[i][0] actual = r['deaths'][-days_to_correct] correction = actual - pred df_county['deaths'].values[i] = np.array([ x if x < actual else int(x - correction) for x in r['deaths'] ])
import load_data from Project_Models import RNN_Model import torch import torch.nn as nn def normalize(x): maxtensor = x.max(0, keepdim=True)[0] maxtensor[maxtensor == 0] = 1e-4 x_normed = x / maxtensor return x_normed, maxtensor if __name__ == '__main__': device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') df = load_data.load_county_level() x = form_input_tensor(df, [ '#Hospitals', '#ICU_beds', 'MedicareEnrollment,AgedTot2017', 'DiabetesPercentage' ]).to(device) x, xmaxtensor = normalize(x) print(f"#x nan: {(torch.sum(torch.isnan(x)))}") print("x:", x) y = form_labels_tensor(df).to(device) y, ymaxtensor = normalize(y) print(f"#y nan: {torch.sum(torch.isnan(y))}") print("y:", y * ymaxtensor) model = RNN_Model.RNN(x.shape[2], 128, 2).to(device)
os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) sys.path.append(parentdir) sys.path.append(parentdir + '/modeling') sys.path.append(parentdir + '/viz') from fit_and_predict import add_preds from viz_interactive import plot_counties_slider import load_data import numpy as np import pandas as pd if __name__ == "__main__": data_dir = oj(parentdir, 'data') # load in county data df = load_data.load_county_level(data_dir=oj(parentdir, 'data')) # add lat and lon to the dataframe county_lat_lon = pd.read_csv(oj(data_dir, 'county_pop_centers.csv'), dtype={ 'STATEFP': str, 'COUNTYFP': str }) county_lat_lon['fips'] = (county_lat_lon['STATEFP'] + county_lat_lon['COUNTYFP']).astype(np.int64) # add predictions df = add_preds(df, NUM_DAYS_LIST=[1, 2, 3, 4, 5], cached_dir=data_dir) # join lat / lon to df df = df.join(county_lat_lon.set_index('fips'), on='countyFIPS', how='left').rename(columns={ 'LATITUDE': 'lat', 'LONGITUDE': 'lon'
'Sonoma' ] dd = df[df.StateName == 'CA'] dd = dd[dd.CountyName.isin(bay_area)] plt.subplot(R, C, 3) viz_static.plot_forecasts(dd.head(5), death_thresh=20) plt.title('Bay area counties') plt.tight_layout() plt.savefig(oj(parentdir, 'results', 'forecasts.svg')) if __name__ == '__main__': print('loading data...') NUM_DAYS_LIST = [1, 2, 3, 4, 5, 6, 7] df_county = load_data.load_county_level( data_dir=oj(parentdir, 'data')).fillna(0) df_county_dis = load_data.load_county_level( data_dir=oj(parentdir, 'data'), discard=True).fillna(0) # discard one day in time series num_days_in_past = 3 output_key = f'Predicted Deaths {num_days_in_past}-day Lagged' df_county = add_preds( df_county, NUM_DAYS_LIST=NUM_DAYS_LIST, cached_dir=oj(parentdir, 'data'), discard=False) # adds keys like "Predicted Deaths 1-day" df_county_old = add_preds( df_county_dis, NUM_DAYS_LIST=NUM_DAYS_LIST, cached_dir=oj(parentdir, 'data'),
sys.path.append(rootdir + '/modeling') sys.path.append(rootdir + '/functions') from fit_and_predict import add_preds from viz import viz_map import update_severity_index as severity_index import load_data import merge_data server = flask.Flask('app') server.secret_key = os.environ.get('secret_key', 'secret') data_dir = oj(rootdir, 'data') # load in county data df_county = load_data.load_county_level(data_dir=oj(rootdir, 'data')) # add lat and lon to the dataframe county_lat_lon = pd.read_csv( oj(rootdir, 'data/county_level/raw/county_ids/county_popcenters.csv'), dtype={'STATEFP': str, 'COUNTYFP': str} ) county_lat_lon['fips'] = (county_lat_lon['STATEFP'] + county_lat_lon['COUNTYFP']) # add predictions NUM_DAYS_LIST = [1, 2, 3, 4, 5] df_county = add_preds(df_county, NUM_DAYS_LIST=NUM_DAYS_LIST, cached_dir=data_dir) # load in hospital data and merge df_hospital = load_data.load_hospital_level( data_dir=oj(os.path.dirname(rootdir), 'covid-19-private-data') )