def __init__(self, fips, N_samples, t_list, I_initial=1, suppression_policy=None): # Caching globally to avoid relatively significant performance overhead # of loading for each county. global beds_data, population_data if not beds_data or not population_data: beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() self.fips = fips self.agg_level = AggregationLevel.COUNTY if len(self.fips) == 5 else AggregationLevel.STATE self.N_samples = N_samples self.I_initial = I_initial self.suppression_policy = suppression_policy self.t_list = t_list if self.agg_level is AggregationLevel.COUNTY: self.county_metadata = load_data.load_county_metadata().set_index('fips').loc[fips].to_dict() self.state_abbr = us.states.lookup(self.county_metadata['state']).abbr self.population = population_data.get_county_level('USA', state=self.state_abbr, fips=self.fips) # TODO: Some counties do not have hospitals. Likely need to go to HRR level.. self.beds = beds_data.get_county_level(self.state_abbr, fips=self.fips) or 0 self.icu_beds = beds_data.get_county_level(self.state_abbr, fips=self.fips, column='icu_beds') or 0 else: self.state_abbr = us.states.lookup(fips).abbr self.population = population_data.get_state_level('USA', state=self.state_abbr) self.beds = beds_data.get_state_level(self.state_abbr) or 0 self.icu_beds = beds_data.get_state_level(self.state_abbr, column='icu_beds') or 0
def get_usa_by_county_with_projection_df(input_dir, intervention_type): us_only = _get_usa_by_county_df() fips_df = FIPSPopulation.local().data # used to get interventions interventions_df = _get_interventions_df() projections_df = get_county_projections_df(input_dir, intervention_type, interventions_df) counties_decorated = (us_only.merge( projections_df, left_on="State/County FIPS Code", right_on="FIPS", how="inner", ).merge(fips_df[["state", "fips"]], left_on="FIPS", right_on="fips", how="inner").merge(interventions_df, left_on="state", right_on="state", how="inner")) counties_remapped = counties_decorated.rename( columns=OUTPUT_COLUMN_REMAP_TO_RESULT_DATA) counties = pd.DataFrame(counties_remapped, columns=RESULT_DATA_COLUMNS_COUNTIES) counties = counties.fillna(NULL_VALUE) counties.index.name = "OBJECTID" # assert unique key test if counties["Combined Key"].value_counts().max() != 1: raise Exception( f"counties['Combined Key'].value_counts().max() = {counties['Combined Key'].value_counts().max()}, at input_dir {input_dir}." ) return counties
def build_county_summary(min_date, country="USA", state=None, output_dir=OUTPUT_DIR): """Builds county summary json files.""" beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset( AggregationLevel.COUNTY, after=min_date, country=country, state=state ) output_dir = pathlib.Path(output_dir) / "county_summaries" _logger.info(f"Outputting to {output_dir}") if not output_dir.exists(): _logger.info(f"{output_dir} does not exist, creating") output_dir.mkdir(parents=True) counties_by_state = defaultdict(list) for country, state, county, fips in timeseries.county_keys(): counties_by_state[state].append((county, fips)) for state, counties in counties_by_state.items(): data = {"counties_with_data": []} for county, fips in counties: cases = timeseries.get_data(state=state, country=country, fips=fips) beds = beds_data.get_county_level(state, fips=fips) population = population_data.get_county_level(country, state, fips=fips) if population and beds and sum(cases.cases): data["counties_with_data"].append(fips) output_path = output_dir / f"{state}.summary.json" output_path.write_text(json.dumps(data, indent=2))
def run_state_level_forecast( min_date, max_date, country="USA", state=None, output_dir=OUTPUT_DIR ): # DH Beds dataset does not have all counties, so using the legacy state # level bed data. legacy_dataset = LegacyJHUDataset(min_date) population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset( AggregationLevel.STATE, after=min_date, country=country, state=state ) output_dir = pathlib.Path(OUTPUT_DIR) if output_dir.exists() and not state: backup = output_dir.name + "." + str(int(time.time())) output_dir.rename(output_dir.parent / backup) output_dir.mkdir(parents=True, exist_ok=True) pool = get_pool() for state in timeseries.states: args = ( country, state, timeseries, legacy_dataset, population_data, min_date, max_date, output_dir, ) pool.apply_async(forecast_each_state, args=args) pool.close() pool.join()
def __init__( self, state, output_interval_days=4, run_mode="can-before", output_dir=None, jhu_dataset=None, cds_dataset=None, include_imputed=False, ): self.output_interval_days = output_interval_days self.state = state self.run_mode = RunMode(run_mode) self.include_imputed = include_imputed self.state_abbreviation = us.states.lookup(state).abbr self.population_data = FIPSPopulation.local().population() self.output_dir = output_dir self.jhu_local = jhu_dataset or JHUDataset.local() self.cds_dataset = cds_dataset or CDSDataset.local() self.county_timeseries = build_aggregate_county_data_frame(self.jhu_local, self.cds_dataset) self.county_timeseries["date"] = self.county_timeseries["date"].dt.normalize() self.state_timeseries = self.jhu_local.timeseries().state_data self.state_timeseries["date"] = self.state_timeseries["date"].dt.normalize() self.df_whitelist = load_data.load_whitelist() self.df_whitelist = self.df_whitelist[self.df_whitelist["inference_ok"] == True]
def get_county_projections_df(input_dir, initial_intervention_type, state_interventions_df): """ for each state in our data look at the results we generated via run.py to create the projections """ fips_pd = FIPSPopulation.local().data # to get the state, county & fips # save results in a list of lists, converted to df later results = [] # get the state and fips so we can get the files missing = 0 for index, fips_row in fips_pd.iterrows(): state = fips_row["state"] fips = fips_row["fips"] intervention_type = _get_intervention_type(initial_intervention_type, state, state_interventions_df) file_name = f"{state}.{fips}.{intervention_type}.json" path = os.path.join(input_dir, file_name) projection_data = _calculate_projection_data(path) if projection_data: results.append([state, fips] + projection_data) else: missing = missing + 1 if (missing > 2000): raise Exception( f"Missing a majority of counties from input_dir: {input_dir}") print(f"Models missing for {missing} counties") ndf = pd.DataFrame(results, columns=CALCULATED_PROJECTION_HEADERS_COUNTIES) return ndf
def get_county_projections_df(input_dir, intervention_type): """ for each state in our data look at the results we generated via run.py to create the projections """ fips_pd = FIPSPopulation.local().data # to get the state, county & fips # save results in a list of lists, converted to df later results = [] # get the state and fips so we can get the files missing = 0 for index, fips_row in fips_pd.iterrows(): state = fips_row["state"] fips = fips_row["fips"] file_name = f"{state}.{fips}.{intervention_type}.json" path = os.path.join(input_dir, "county", file_name) # if the file exists in that directory then process projection_data = _calculate_projection_data(path) if projection_data: results.append([state, fips] + projection_data) else: missing = missing + 1 print(f"Models missing for {missing} counties") ndf = pd.DataFrame(results, columns=CALCULATED_PROJECTION_HEADERS_COUNTIES) return ndf
def get_county_projections_df(input_dir, initial_intervention_type, state_interventions_df): """ for each state in our data look at the results we generated via run.py to create the projections """ fips_pd = FIPSPopulation.local().data # to get the state, county & fips county_df = fips_pd[["state", CommonFields.FIPS]] county_df.loc[:, "intervention_type"] = county_df.state.apply( lambda x: _get_intervention_type(initial_intervention_type, x, state_interventions_df)) county_df.loc[:, "path"] = county_df.apply(lambda x: get_file_path( input_dir, x.state, x.intervention_type, x.fips), axis=1) new_df = county_df.parallel_apply( lambda x: _calculate_projection_data(x.state, x.path, fips=x.fips), axis=1) missing = new_df.isnull().sum()[CommonFields.STATE] # if missing > 2000: # raise Exception(f"Missing a majority of counties from input_dir: {input_dir}") print(f"Models missing for {missing} counties") return new_df
def __init__( self, state, output_interval_days=4, run_mode="can-before", output_dir=None, jhu_dataset=None, cds_dataset=None, include_imputed=False, ): self.output_interval_days = output_interval_days self.state = state self.run_mode = RunMode(run_mode) self.include_imputed = include_imputed self.state_abbreviation = us.states.lookup(state).abbr self.population_data = FIPSPopulation.local().population() self.output_dir = output_dir self.jhu_local = jhu_dataset or JHUDataset.local() self.cds_dataset = cds_dataset or CDSDataset.local() self.county_timeseries = build_aggregate_county_data_frame( self.jhu_local, self.cds_dataset) self.county_timeseries["date"] = self.county_timeseries[ "date"].dt.normalize() state_timeseries = self.jhu_local.timeseries().get_subset( AggregationLevel.STATE) self.state_timeseries = state_timeseries.data["date"].dt.normalize()
def get_county_projections_df(input_dir, initial_intervention_type, state_interventions_df): """ for each state in our data look at the results we generated via run.py to create the projections #columns=CALCULATED_PROJECTION_HEADERS_COUNTIES) """ fips_pd = FIPSPopulation.local().data # to get the state, county & fips county_df = fips_pd[["state", "fips"]] county_df.loc[:, "intervention_type"] = county_df.state.apply( lambda x: _get_intervention_type(initial_intervention_type, x, state_interventions_df)) county_df.loc[:, "path"] = county_df.apply( lambda x: get_file_path( input_dir, x.state, x.intervention_type, fips=x.fips), axis=1, ).values new_df = county_df.parallel_apply( lambda x: _calculate_projection_data(x.state, x.path, fips=x.fips), axis=1) missing = new_df.isnull().sum()["State"] if missing > 2000: raise Exception( f"Missing a majority of counties from input_dir: {input_dir}") print(f"Models missing for {missing} counties") return new_df
def get_county_projections(): # for each state in our data look at the results we generated via run.py # to create the projections fips_pd = FIPSPopulation.local().data # to get the state, county & fips intervention_type = NO_INTERVENTION # None, as requested # get 16 and 32 days out from now today = datetime.datetime.now() sixteen_days = today + datetime.timedelta(days=16) thirty_two_days = today + datetime.timedelta(days=32) #save results in a list of lists, converted to df later results = [] # get the state and fips so we can get the files missing = 0 for index, fips_row in fips_pd.iterrows(): state = fips_row['state'] fips = fips_row['fips'] file_name = f"{state}.{fips}.{intervention_type}.json" path = os.path.join(OUTPUT_DIR_COUNTIES, file_name) # if the file exists in that directory then process if os.path.exists(path): df = read_json_as_df(path) df['short_fall'] = df.apply(calc_short_fall, axis=1) hosp_16_days, short_fall_16_days = get_hospitals_and_shortfalls(df, sixteen_days) hosp_32_days, short_fall_32_days = get_hospitals_and_shortfalls(df, thirty_two_days) #hospitalizations = [int(row[9]) for row in projection] #deaths = [int(row[11]) for row in projection] df['new_deaths'] = df.dead - df.dead.shift(1) mean_hospitalizations = df.all_hospitalized.mean().round(0) # mean_hospitalizations = math.floor(statistics.mean(hospitalizations)) mean_deaths = df.new_deaths.mean() peak_hospitalizations_date = df.iloc[df.all_hospitalized.idxmax()].date peak_deaths_date = df.iloc[df.new_deaths.idxmax()].date results.append([state, fips, hosp_16_days, hosp_32_days, short_fall_16_days, short_fall_32_days, mean_hospitalizations, mean_deaths, peak_hospitalizations_date, peak_deaths_date]) else: missing = missing + 1 print(f'Models missing for {missing} county') headers = [ 'State', 'FIPS', '16-day_Hospitalization_Prediction', '32-day_Hospitalization_Prediction', '16-day_Beds_Shortfall', '32-day_Beds_Shortfall', "Mean Hospitalizations", "Mean Deaths", "Peak Hospitalizations On", "Mean Deaths On", ] ndf = pd.DataFrame(results, columns=headers) return ndf
def run_county_level_forecast( min_date: datetime.datetime, max_date: datetime.datetime, output_dir: pathlib.Path, country: str = "USA", state: str = None, ): beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.COUNTY, after=min_date, country=country, state=state) _logger.info(f"Outputting to {output_dir}") output_dir.mkdir(parents=True, exist_ok=True) counties_by_state = defaultdict(list) county_keys = timeseries.county_keys() for country, state, county, fips in county_keys: counties_by_state[state].append((county, fips)) pool = get_pool() for state, counties in counties_by_state.items(): _logger.info(f"Running county models for {state}") for county, fips in counties: args = ( min_date, max_date, country, state, county, fips, timeseries, beds_data, population_data, output_dir, ) pool.apply_async( forecast_each_county, args, callback=_result_callback_wrapper( f"{county}, {state}: {fips}"), ) pool.close() pool.join()
def run_county_level_forecast(min_date, max_date, country="USA", state=None, output_dir=OUTPUT_DIR): beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.COUNTY, after=min_date, country=country, state=state) output_dir = pathlib.Path(output_dir) / "county" _logger.info(f"Outputting to {output_dir}") # Dont want to replace when just running the states if output_dir.exists() and not state: backup = output_dir.name + "." + str(int(time.time())) output_dir.rename(output_dir.parent / backup) output_dir.mkdir(parents=True, exist_ok=True) counties_by_state = defaultdict(list) county_keys = timeseries.county_keys() for country, state, county, fips in county_keys: counties_by_state[state].append((county, fips)) pool = get_pool() for state, counties in counties_by_state.items(): _logger.info(f"Running county models for {state}") for county, fips in counties: args = ( min_date, max_date, country, state, county, fips, timeseries, beds_data, population_data, output_dir, ) # forecast_each_county(*args) pool.apply_async(forecast_each_county, args=args) pool.close() pool.join()
def __init__( self, state, output_interval_days=4, run_mode="can-before", output_dir=None, include_imputed=False, ): self.output_interval_days = output_interval_days self.state = state self.run_mode = RunMode(run_mode) self.include_imputed = include_imputed self.state_abbreviation = us.states.lookup(state).abbr self.population_data = FIPSPopulation.local().population() self.output_dir = output_dir
def get_usa_by_county_with_projection_df(input_dir): us_only = get_usa_by_county_df() fips_df = FIPSPopulation.local().data # used to get interventions interventions_df = get_interventions_df( ) # used to say what state has what interventions projections_df = get_county_projections(input_dir) counties_decorated = us_only.merge(projections_df, left_on='State/County FIPS Code', right_on='FIPS', how='inner').merge( fips_df[['state', 'fips']], left_on='FIPS', right_on='fips', how='inner').merge(interventions_df, left_on='state', right_on='state', how='inner') state_col_remap = { 'state_x': 'Province/State', 'intervention': 'State Intervention', '16-day_Hospitalization_Prediction': '16d-HSPTLZD', '32-day_Hospitalization_Prediction': '32d-HSPTLZD', '16-day_Beds_Shortfall': '16d-LACKBEDS', '32-day_Beds_Shortfall': '32d-LACKBEDS', "Mean Hospitalizations": 'MEAN-HOSP', "Mean Deaths": 'MEAN-DEATHS', "Peak Hospitalizations On": 'PEAK-HOSP', "Mean Deaths On": 'PEAK-DEATHS', "Deaths": "Current Deaths", "Confirmed": "Current Confirmed", "Recovered": "Current Recovered", "Active": "Current Active", } counties_remapped = counties_decorated.rename(columns=state_col_remap) new_cols = list(set(county_output_cols + list(state_col_remap.values()))) counties = pd.DataFrame(counties_remapped, columns=new_cols) counties = counties.fillna(NULL_VALUE) counties.index.name = 'OBJECTID' # assert unique key test assert counties['Combined Key'].value_counts().max() == 1 return counties
def get_usa_by_county_with_projection_df(input_dir, intervention_type): us_only = _get_usa_by_county_df() fips_df = FIPSPopulation.local().data # used to get interventions interventions_df = _get_interventions_df() projections_df = get_county_projections_df(input_dir, intervention_type, interventions_df) counties_decorated = ( us_only.merge(projections_df, on=CommonFields.FIPS, how="inner") .merge(fips_df[[CommonFields.STATE, CommonFields.FIPS]], on=CommonFields.FIPS, how="inner",) .merge(interventions_df, on=CommonFields.STATE, how="inner") ) counties_remapped = counties_decorated.rename(columns=OUTPUT_COLUMN_REMAP_TO_RESULT_DATA) counties = pd.DataFrame(counties_remapped)[RESULT_DATA_COLUMNS_COUNTIES] counties = counties.fillna(NULL_VALUE) counties.index.name = "OBJECTID" if counties["Combined Key"].value_counts().max() != 1: combined_key_max = counties["Combined Key"].value_counts().max() raise Exception( "counties['Combined Key'].value_counts().max() = " f"{combined_key_max}, at input_dir {input_dir}." ) return counties
def get_usa_by_county_with_projection_df(input_dir, intervention_type): us_only = get_usa_by_county_df() fips_df = FIPSPopulation.local().data # used to get interventions interventions_df = get_interventions_df() # used to say what state has what interventions projections_df = get_county_projections_df(input_dir, intervention_type) counties_decorated = us_only.merge( projections_df, left_on='State/County FIPS Code', right_on='FIPS', how='inner' ).merge( fips_df[['state', 'fips']], left_on='FIPS', right_on='fips', how='inner' ).merge( interventions_df, left_on='state', right_on='state', how = 'inner' ) counties_remapped = counties_decorated.rename(columns=OUTPUT_COLUMN_REMAP_TO_RESULT_DATA) counties = pd.DataFrame(counties_remapped, columns=RESULT_DATA_COLUMNS_COUNTIES) counties = counties.fillna(NULL_VALUE) counties.index.name = 'OBJECTID' # assert unique key test assert counties['Combined Key'].value_counts().max() == 1 return counties
def run_state_level_forecast( min_date, max_date, output_dir, country="USA", state=None, ): # DH Beds dataset does not have all counties, so using the legacy state # level bed data. beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.STATE, after=min_date, country=country, state=state) output_dir = pathlib.Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) pool = get_pool() for state in timeseries.states: args = ( country, state, timeseries, beds_data, population_data, min_date, max_date, output_dir, ) pool.apply_async( forecast_each_state, args, callback=_result_callback_wrapper(f"{state}, {country}"), ) pool.close() pool.join()
def __init__(self, fips, N_samples, t_list, I_initial=1, suppression_policy=None): # Caching globally to avoid relatively significant performance overhead # of loading for each county. global beds_data, population_data if not beds_data or not population_data: beds_data = CovidCareMapBeds.local().beds() population_data = FIPSPopulation.local().population() self.fips = fips self.agg_level = AggregationLevel.COUNTY if len( self.fips) == 5 else AggregationLevel.STATE self.N_samples = N_samples self.I_initial = I_initial self.suppression_policy = suppression_policy self.t_list = t_list if self.agg_level is AggregationLevel.COUNTY: self.county_metadata = load_data.load_county_metadata().set_index( 'fips').loc[fips].to_dict() self.state_abbr = us.states.lookup( self.county_metadata['state']).abbr self.population = population_data.get_record_for_fips( fips=self.fips)[CommonFields.POPULATION] # TODO: Some counties do not have hospitals. Likely need to go to HRR level.. self._beds_data = beds_data.get_record_for_fips(fips) else: self.state_abbr = us.states.lookup(fips).abbr self.population = population_data.get_record_for_state( self.state_abbr)[CommonFields.POPULATION] self._beds_data = beds_data.get_record_for_state(self.state_abbr)
def build_fips_data_frame(): from libs.datasets import FIPSPopulation return FIPSPopulation.local().data