def __init__(self, fips, N_samples, t_list,
                 I_initial=1, suppression_policy=None):

        # Caching globally to avoid relatively significant performance overhead
        # of loading for each county.
        global beds_data, population_data
        if not beds_data or not population_data:
            beds_data = DHBeds.local().beds()
            population_data = FIPSPopulation.local().population()

        self.fips = fips
        self.agg_level = AggregationLevel.COUNTY if len(self.fips) == 5 else AggregationLevel.STATE
        self.N_samples = N_samples
        self.I_initial = I_initial
        self.suppression_policy = suppression_policy
        self.t_list = t_list

        if self.agg_level is AggregationLevel.COUNTY:
            self.county_metadata = load_data.load_county_metadata().set_index('fips').loc[fips].to_dict()
            self.state_abbr = us.states.lookup(self.county_metadata['state']).abbr
            self.population = population_data.get_county_level('USA', state=self.state_abbr, fips=self.fips)
            # TODO: Some counties do not have hospitals. Likely need to go to HRR level..
            self.beds = beds_data.get_county_level(self.state_abbr, fips=self.fips) or 0
            self.icu_beds = beds_data.get_county_level(self.state_abbr, fips=self.fips, column='icu_beds') or 0
        else:
            self.state_abbr = us.states.lookup(fips).abbr
            self.population = population_data.get_state_level('USA', state=self.state_abbr)
            self.beds = beds_data.get_state_level(self.state_abbr) or 0
            self.icu_beds = beds_data.get_state_level(self.state_abbr, column='icu_beds') or 0
Пример #2
0
def get_usa_by_county_with_projection_df(input_dir, intervention_type):
    us_only = _get_usa_by_county_df()
    fips_df = FIPSPopulation.local().data  # used to get interventions
    interventions_df = _get_interventions_df()
    projections_df = get_county_projections_df(input_dir, intervention_type,
                                               interventions_df)

    counties_decorated = (us_only.merge(
        projections_df,
        left_on="State/County FIPS Code",
        right_on="FIPS",
        how="inner",
    ).merge(fips_df[["state", "fips"]],
            left_on="FIPS",
            right_on="fips",
            how="inner").merge(interventions_df,
                               left_on="state",
                               right_on="state",
                               how="inner"))

    counties_remapped = counties_decorated.rename(
        columns=OUTPUT_COLUMN_REMAP_TO_RESULT_DATA)
    counties = pd.DataFrame(counties_remapped,
                            columns=RESULT_DATA_COLUMNS_COUNTIES)
    counties = counties.fillna(NULL_VALUE)
    counties.index.name = "OBJECTID"
    # assert unique key test

    if counties["Combined Key"].value_counts().max() != 1:
        raise Exception(
            f"counties['Combined Key'].value_counts().max() = {counties['Combined Key'].value_counts().max()}, at input_dir {input_dir}."
        )
    return counties
Пример #3
0
def build_county_summary(min_date, country="USA", state=None, output_dir=OUTPUT_DIR):
    """Builds county summary json files."""
    beds_data = DHBeds.local().beds()
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(
        AggregationLevel.COUNTY, after=min_date, country=country, state=state
    )

    output_dir = pathlib.Path(output_dir) / "county_summaries"
    _logger.info(f"Outputting to {output_dir}")
    if not output_dir.exists():
        _logger.info(f"{output_dir} does not exist, creating")
        output_dir.mkdir(parents=True)

    counties_by_state = defaultdict(list)
    for country, state, county, fips in timeseries.county_keys():
        counties_by_state[state].append((county, fips))

    for state, counties in counties_by_state.items():
        data = {"counties_with_data": []}
        for county, fips in counties:
            cases = timeseries.get_data(state=state, country=country, fips=fips)
            beds = beds_data.get_county_level(state, fips=fips)
            population = population_data.get_county_level(country, state, fips=fips)
            if population and beds and sum(cases.cases):
                data["counties_with_data"].append(fips)

        output_path = output_dir / f"{state}.summary.json"
        output_path.write_text(json.dumps(data, indent=2))
Пример #4
0
def run_state_level_forecast(
    min_date, max_date, country="USA", state=None, output_dir=OUTPUT_DIR
):
    # DH Beds dataset does not have all counties, so using the legacy state
    # level bed data.
    legacy_dataset = LegacyJHUDataset(min_date)
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(
        AggregationLevel.STATE, after=min_date, country=country, state=state
    )
    output_dir = pathlib.Path(OUTPUT_DIR)
    if output_dir.exists() and not state:
        backup = output_dir.name + "." + str(int(time.time()))
        output_dir.rename(output_dir.parent / backup)

    output_dir.mkdir(parents=True, exist_ok=True)

    pool = get_pool()
    for state in timeseries.states:
        args = (
            country,
            state,
            timeseries,
            legacy_dataset,
            population_data,
            min_date,
            max_date,
            output_dir,
        )
        pool.apply_async(forecast_each_state, args=args)

    pool.close()
    pool.join()
Пример #5
0
    def __init__(
        self,
        state,
        output_interval_days=4,
        run_mode="can-before",
        output_dir=None,
        jhu_dataset=None,
        cds_dataset=None,
        include_imputed=False,
    ):

        self.output_interval_days = output_interval_days
        self.state = state
        self.run_mode = RunMode(run_mode)
        self.include_imputed = include_imputed
        self.state_abbreviation = us.states.lookup(state).abbr
        self.population_data = FIPSPopulation.local().population()
        self.output_dir = output_dir

        self.jhu_local = jhu_dataset or JHUDataset.local()
        self.cds_dataset = cds_dataset or CDSDataset.local()

        self.county_timeseries = build_aggregate_county_data_frame(self.jhu_local, self.cds_dataset)
        self.county_timeseries["date"] = self.county_timeseries["date"].dt.normalize()

        self.state_timeseries = self.jhu_local.timeseries().state_data
        self.state_timeseries["date"] = self.state_timeseries["date"].dt.normalize()
        self.df_whitelist = load_data.load_whitelist()
        self.df_whitelist = self.df_whitelist[self.df_whitelist["inference_ok"] == True]
def get_county_projections_df(input_dir, initial_intervention_type,
                              state_interventions_df):
    """
    for each state in our data look at the results we generated via run.py
    to create the projections
    """
    fips_pd = FIPSPopulation.local().data  # to get the state, county & fips

    # save results in a list of lists, converted to df later
    results = []

    # get the state and fips so we can get the files
    missing = 0

    for index, fips_row in fips_pd.iterrows():
        state = fips_row["state"]
        fips = fips_row["fips"]
        intervention_type = _get_intervention_type(initial_intervention_type,
                                                   state,
                                                   state_interventions_df)
        file_name = f"{state}.{fips}.{intervention_type}.json"
        path = os.path.join(input_dir, file_name)
        projection_data = _calculate_projection_data(path)
        if projection_data:
            results.append([state, fips] + projection_data)
        else:
            missing = missing + 1
    if (missing > 2000):
        raise Exception(
            f"Missing a majority of counties from input_dir: {input_dir}")
    print(f"Models missing for {missing} counties")
    ndf = pd.DataFrame(results, columns=CALCULATED_PROJECTION_HEADERS_COUNTIES)
    return ndf
def get_county_projections_df(input_dir, intervention_type):
    """
    for each state in our data look at the results we generated via run.py
    to create the projections
    """
    fips_pd = FIPSPopulation.local().data  # to get the state, county & fips

    # save results in a list of lists, converted to df later
    results = []

    # get the state and fips so we can get the files
    missing = 0
    for index, fips_row in fips_pd.iterrows():
        state = fips_row["state"]
        fips = fips_row["fips"]

        file_name = f"{state}.{fips}.{intervention_type}.json"
        path = os.path.join(input_dir, "county", file_name)
        # if the file exists in that directory then process
        projection_data = _calculate_projection_data(path)
        if projection_data:
            results.append([state, fips] + projection_data)
        else:
            missing = missing + 1
    print(f"Models missing for {missing} counties")
    ndf = pd.DataFrame(results, columns=CALCULATED_PROJECTION_HEADERS_COUNTIES)
    return ndf
Пример #8
0
def get_county_projections_df(input_dir, initial_intervention_type,
                              state_interventions_df):
    """
    for each state in our data look at the results we generated via run.py
    to create the projections

    """
    fips_pd = FIPSPopulation.local().data  # to get the state, county & fips

    county_df = fips_pd[["state", CommonFields.FIPS]]
    county_df.loc[:, "intervention_type"] = county_df.state.apply(
        lambda x: _get_intervention_type(initial_intervention_type, x,
                                         state_interventions_df))
    county_df.loc[:, "path"] = county_df.apply(lambda x: get_file_path(
        input_dir, x.state, x.intervention_type, x.fips),
                                               axis=1)

    new_df = county_df.parallel_apply(
        lambda x: _calculate_projection_data(x.state, x.path, fips=x.fips),
        axis=1)
    missing = new_df.isnull().sum()[CommonFields.STATE]
    # if missing > 2000:
    #     raise Exception(f"Missing a majority of counties from input_dir: {input_dir}")
    print(f"Models missing for {missing} counties")
    return new_df
    def __init__(
        self,
        state,
        output_interval_days=4,
        run_mode="can-before",
        output_dir=None,
        jhu_dataset=None,
        cds_dataset=None,
        include_imputed=False,
    ):

        self.output_interval_days = output_interval_days
        self.state = state
        self.run_mode = RunMode(run_mode)
        self.include_imputed = include_imputed
        self.state_abbreviation = us.states.lookup(state).abbr
        self.population_data = FIPSPopulation.local().population()
        self.output_dir = output_dir

        self.jhu_local = jhu_dataset or JHUDataset.local()
        self.cds_dataset = cds_dataset or CDSDataset.local()

        self.county_timeseries = build_aggregate_county_data_frame(
            self.jhu_local, self.cds_dataset)
        self.county_timeseries["date"] = self.county_timeseries[
            "date"].dt.normalize()

        state_timeseries = self.jhu_local.timeseries().get_subset(
            AggregationLevel.STATE)
        self.state_timeseries = state_timeseries.data["date"].dt.normalize()
Пример #10
0
def get_county_projections_df(input_dir, initial_intervention_type,
                              state_interventions_df):
    """
    for each state in our data look at the results we generated via run.py
    to create the projections

    #columns=CALCULATED_PROJECTION_HEADERS_COUNTIES)
    """
    fips_pd = FIPSPopulation.local().data  # to get the state, county & fips

    county_df = fips_pd[["state", "fips"]]
    county_df.loc[:, "intervention_type"] = county_df.state.apply(
        lambda x: _get_intervention_type(initial_intervention_type, x,
                                         state_interventions_df))
    county_df.loc[:, "path"] = county_df.apply(
        lambda x: get_file_path(
            input_dir, x.state, x.intervention_type, fips=x.fips),
        axis=1,
    ).values
    new_df = county_df.parallel_apply(
        lambda x: _calculate_projection_data(x.state, x.path, fips=x.fips),
        axis=1)

    missing = new_df.isnull().sum()["State"]

    if missing > 2000:
        raise Exception(
            f"Missing a majority of counties from input_dir: {input_dir}")
    print(f"Models missing for {missing} counties")
    return new_df
Пример #11
0
def get_county_projections():
    # for each state in our data look at the results we generated via run.py
    # to create the projections
    fips_pd = FIPSPopulation.local().data # to get the state, county & fips
    intervention_type = NO_INTERVENTION # None, as requested

    # get 16 and 32 days out from now
    today = datetime.datetime.now()
    sixteen_days = today + datetime.timedelta(days=16)
    thirty_two_days = today + datetime.timedelta(days=32)

    #save results in a list of lists, converted to df later
    results = []

    # get the state and fips so we can get the files
    missing = 0
    for index, fips_row in fips_pd.iterrows():
        state = fips_row['state']
        fips = fips_row['fips']
        file_name = f"{state}.{fips}.{intervention_type}.json"
        path = os.path.join(OUTPUT_DIR_COUNTIES, file_name)
        # if the file exists in that directory then process
        if os.path.exists(path):
            df = read_json_as_df(path)
            df['short_fall'] = df.apply(calc_short_fall, axis=1)

            hosp_16_days, short_fall_16_days = get_hospitals_and_shortfalls(df, sixteen_days)
            hosp_32_days, short_fall_32_days = get_hospitals_and_shortfalls(df, thirty_two_days)

            #hospitalizations = [int(row[9]) for row in projection]
            #deaths = [int(row[11]) for row in projection]
            df['new_deaths'] = df.dead - df.dead.shift(1)

            mean_hospitalizations = df.all_hospitalized.mean().round(0)
            # mean_hospitalizations = math.floor(statistics.mean(hospitalizations))
            mean_deaths = df.new_deaths.mean()

            peak_hospitalizations_date = df.iloc[df.all_hospitalized.idxmax()].date
            peak_deaths_date = df.iloc[df.new_deaths.idxmax()].date

            results.append([state, fips, hosp_16_days, hosp_32_days, short_fall_16_days, short_fall_32_days,
                    mean_hospitalizations, mean_deaths, peak_hospitalizations_date, peak_deaths_date])
        else:
            missing = missing + 1
    print(f'Models missing for {missing} county')

    headers = [
        'State',
        'FIPS',
        '16-day_Hospitalization_Prediction',
        '32-day_Hospitalization_Prediction',
        '16-day_Beds_Shortfall',
        '32-day_Beds_Shortfall',
        "Mean Hospitalizations",
        "Mean Deaths",
        "Peak Hospitalizations On",
        "Mean Deaths On",
    ]
    ndf = pd.DataFrame(results, columns=headers)
    return ndf
def run_county_level_forecast(
    min_date: datetime.datetime,
    max_date: datetime.datetime,
    output_dir: pathlib.Path,
    country: str = "USA",
    state: str = None,
):
    beds_data = DHBeds.local().beds()
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(AggregationLevel.COUNTY,
                                       after=min_date,
                                       country=country,
                                       state=state)

    _logger.info(f"Outputting to {output_dir}")
    output_dir.mkdir(parents=True, exist_ok=True)

    counties_by_state = defaultdict(list)
    county_keys = timeseries.county_keys()
    for country, state, county, fips in county_keys:
        counties_by_state[state].append((county, fips))

    pool = get_pool()
    for state, counties in counties_by_state.items():
        _logger.info(f"Running county models for {state}")
        for county, fips in counties:
            args = (
                min_date,
                max_date,
                country,
                state,
                county,
                fips,
                timeseries,
                beds_data,
                population_data,
                output_dir,
            )

            pool.apply_async(
                forecast_each_county,
                args,
                callback=_result_callback_wrapper(
                    f"{county}, {state}: {fips}"),
            )

    pool.close()
    pool.join()
Пример #13
0
def run_county_level_forecast(min_date,
                              max_date,
                              country="USA",
                              state=None,
                              output_dir=OUTPUT_DIR):
    beds_data = DHBeds.local().beds()
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(AggregationLevel.COUNTY,
                                       after=min_date,
                                       country=country,
                                       state=state)

    output_dir = pathlib.Path(output_dir) / "county"
    _logger.info(f"Outputting to {output_dir}")
    # Dont want to replace when just running the states
    if output_dir.exists() and not state:
        backup = output_dir.name + "." + str(int(time.time()))
        output_dir.rename(output_dir.parent / backup)

    output_dir.mkdir(parents=True, exist_ok=True)

    counties_by_state = defaultdict(list)
    county_keys = timeseries.county_keys()
    for country, state, county, fips in county_keys:
        counties_by_state[state].append((county, fips))

    pool = get_pool()
    for state, counties in counties_by_state.items():
        _logger.info(f"Running county models for {state}")
        for county, fips in counties:
            args = (
                min_date,
                max_date,
                country,
                state,
                county,
                fips,
                timeseries,
                beds_data,
                population_data,
                output_dir,
            )
            # forecast_each_county(*args)
            pool.apply_async(forecast_each_county, args=args)

    pool.close()
    pool.join()
Пример #14
0
    def __init__(
        self,
        state,
        output_interval_days=4,
        run_mode="can-before",
        output_dir=None,
        include_imputed=False,
    ):

        self.output_interval_days = output_interval_days
        self.state = state
        self.run_mode = RunMode(run_mode)
        self.include_imputed = include_imputed
        self.state_abbreviation = us.states.lookup(state).abbr
        self.population_data = FIPSPopulation.local().population()
        self.output_dir = output_dir
Пример #15
0
def get_usa_by_county_with_projection_df(input_dir):
    us_only = get_usa_by_county_df()
    fips_df = FIPSPopulation.local().data  # used to get interventions
    interventions_df = get_interventions_df(
    )  # used to say what state has what interventions
    projections_df = get_county_projections(input_dir)

    counties_decorated = us_only.merge(projections_df,
                                       left_on='State/County FIPS Code',
                                       right_on='FIPS',
                                       how='inner').merge(
                                           fips_df[['state', 'fips']],
                                           left_on='FIPS',
                                           right_on='fips',
                                           how='inner').merge(interventions_df,
                                                              left_on='state',
                                                              right_on='state',
                                                              how='inner')

    state_col_remap = {
        'state_x': 'Province/State',
        'intervention': 'State Intervention',
        '16-day_Hospitalization_Prediction': '16d-HSPTLZD',
        '32-day_Hospitalization_Prediction': '32d-HSPTLZD',
        '16-day_Beds_Shortfall': '16d-LACKBEDS',
        '32-day_Beds_Shortfall': '32d-LACKBEDS',
        "Mean Hospitalizations": 'MEAN-HOSP',
        "Mean Deaths": 'MEAN-DEATHS',
        "Peak Hospitalizations On": 'PEAK-HOSP',
        "Mean Deaths On": 'PEAK-DEATHS',
        "Deaths": "Current Deaths",
        "Confirmed": "Current Confirmed",
        "Recovered": "Current Recovered",
        "Active": "Current Active",
    }

    counties_remapped = counties_decorated.rename(columns=state_col_remap)

    new_cols = list(set(county_output_cols + list(state_col_remap.values())))
    counties = pd.DataFrame(counties_remapped, columns=new_cols)
    counties = counties.fillna(NULL_VALUE)
    counties.index.name = 'OBJECTID'
    # assert unique key test
    assert counties['Combined Key'].value_counts().max() == 1
    return counties
Пример #16
0
def get_usa_by_county_with_projection_df(input_dir, intervention_type):
    us_only = _get_usa_by_county_df()
    fips_df = FIPSPopulation.local().data  # used to get interventions
    interventions_df = _get_interventions_df()
    projections_df = get_county_projections_df(input_dir, intervention_type, interventions_df)
    counties_decorated = (
        us_only.merge(projections_df, on=CommonFields.FIPS, how="inner")
        .merge(fips_df[[CommonFields.STATE, CommonFields.FIPS]], on=CommonFields.FIPS, how="inner",)
        .merge(interventions_df, on=CommonFields.STATE, how="inner")
    )
    counties_remapped = counties_decorated.rename(columns=OUTPUT_COLUMN_REMAP_TO_RESULT_DATA)
    counties = pd.DataFrame(counties_remapped)[RESULT_DATA_COLUMNS_COUNTIES]
    counties = counties.fillna(NULL_VALUE)
    counties.index.name = "OBJECTID"

    if counties["Combined Key"].value_counts().max() != 1:
        combined_key_max = counties["Combined Key"].value_counts().max()
        raise Exception(
            "counties['Combined Key'].value_counts().max() = "
            f"{combined_key_max}, at input_dir {input_dir}."
        )
    return counties
def get_usa_by_county_with_projection_df(input_dir, intervention_type):
    us_only = get_usa_by_county_df()
    fips_df = FIPSPopulation.local().data # used to get interventions
    interventions_df = get_interventions_df() # used to say what state has what interventions
    projections_df = get_county_projections_df(input_dir, intervention_type)

    counties_decorated = us_only.merge(
        projections_df, left_on='State/County FIPS Code', right_on='FIPS', how='inner'
    ).merge(
        fips_df[['state', 'fips']], left_on='FIPS', right_on='fips', how='inner'
    ).merge(
        interventions_df, left_on='state', right_on='state', how = 'inner'
    )

    counties_remapped = counties_decorated.rename(columns=OUTPUT_COLUMN_REMAP_TO_RESULT_DATA)
    counties = pd.DataFrame(counties_remapped, columns=RESULT_DATA_COLUMNS_COUNTIES)
    counties = counties.fillna(NULL_VALUE)
    counties.index.name = 'OBJECTID'
    # assert unique key test

    assert counties['Combined Key'].value_counts().max() == 1
    return counties
def run_state_level_forecast(
    min_date,
    max_date,
    output_dir,
    country="USA",
    state=None,
):
    # DH Beds dataset does not have all counties, so using the legacy state
    # level bed data.
    beds_data = DHBeds.local().beds()
    population_data = FIPSPopulation.local().population()
    timeseries = JHUDataset.local().timeseries()
    timeseries = timeseries.get_subset(AggregationLevel.STATE,
                                       after=min_date,
                                       country=country,
                                       state=state)
    output_dir = pathlib.Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    pool = get_pool()
    for state in timeseries.states:
        args = (
            country,
            state,
            timeseries,
            beds_data,
            population_data,
            min_date,
            max_date,
            output_dir,
        )
        pool.apply_async(
            forecast_each_state,
            args,
            callback=_result_callback_wrapper(f"{state}, {country}"),
        )

    pool.close()
    pool.join()
Пример #19
0
    def __init__(self,
                 fips,
                 N_samples,
                 t_list,
                 I_initial=1,
                 suppression_policy=None):

        # Caching globally to avoid relatively significant performance overhead
        # of loading for each county.
        global beds_data, population_data
        if not beds_data or not population_data:
            beds_data = CovidCareMapBeds.local().beds()
            population_data = FIPSPopulation.local().population()

        self.fips = fips
        self.agg_level = AggregationLevel.COUNTY if len(
            self.fips) == 5 else AggregationLevel.STATE
        self.N_samples = N_samples
        self.I_initial = I_initial
        self.suppression_policy = suppression_policy
        self.t_list = t_list

        if self.agg_level is AggregationLevel.COUNTY:
            self.county_metadata = load_data.load_county_metadata().set_index(
                'fips').loc[fips].to_dict()
            self.state_abbr = us.states.lookup(
                self.county_metadata['state']).abbr
            self.population = population_data.get_record_for_fips(
                fips=self.fips)[CommonFields.POPULATION]
            # TODO: Some counties do not have hospitals. Likely need to go to HRR level..
            self._beds_data = beds_data.get_record_for_fips(fips)
        else:
            self.state_abbr = us.states.lookup(fips).abbr
            self.population = population_data.get_record_for_state(
                self.state_abbr)[CommonFields.POPULATION]
            self._beds_data = beds_data.get_record_for_state(self.state_abbr)
Пример #20
0
def build_fips_data_frame():
    from libs.datasets import FIPSPopulation

    return FIPSPopulation.local().data