Пример #1
0
def create_regression_version(version_name, covariate_version,
                              covariate_draw_dict, infection_version,
                              location_set_version_id, **kwargs):
    """
    Utility function to create a regression version. Will cache covariates
    as well.

    :param version_name: (str) what do you want to name the version
    :param covariate_version: (str)
    :param covariate_draw_dict: (Dict[str, bool])
    :param infection_version: (str)
    :param location_set_version_id: (int)
    :param kwargs: other keyword arguments to a regression version.
    """
    directories = Directories()
    location_ids = get_locations(
        directories,
        infection_version,
        location_set_version_id=location_set_version_id,
    )
    cache_version = cache_covariates(directories=directories,
                                     covariate_version=covariate_version,
                                     location_ids=location_ids,
                                     covariate_draw_dict=covariate_draw_dict)
    rv = RegressionVersion(version_name=version_name,
                           covariate_version=cache_version,
                           covariate_draw_dict=covariate_draw_dict,
                           location_set_version_id=location_set_version_id,
                           infection_version=infection_version,
                           **kwargs)
    rv.create_version()
    rv_directory = Directories(regression_version=version_name)
    write_locations(directories=rv_directory, location_ids=location_ids)
Пример #2
0
def create_forecast_version(version_name, covariate_version,
                            covariate_draw_dict, regression_version):
    """
    Utility function to create a regression version. Will cache covariates
    as well.

    :param version_name: (str) what do you want to name the version
    :param covariate_version: (str)
    :param covariate_draw_dict: (Dict[str, bool])
    :param regression_version: (str) which regression version to build off of
    """
    directories = Directories(regression_version=regression_version)
    location_ids = load_locations(directories)
    cache_version = cache_covariates(directories=directories,
                                     covariate_version=covariate_version,
                                     location_ids=location_ids,
                                     covariate_draw_dict=covariate_draw_dict)
    fv = ForecastVersion(version_name=version_name,
                         covariate_version=cache_version,
                         regression_version=regression_version,
                         covariate_draw_dict=covariate_draw_dict)
    fv.create_version()
def run_beta_forecast(location_id: int, regression_version: str, forecast_version: str,
                      coefficient_version: str = None):

    log.info("Initiating SEIIR beta forecasting.")

    # -------------------------- LOAD INPUTS -------------------- #
    # Load metadata
    directories = Directories(
        regression_version=regression_version,
        forecast_version=forecast_version
    )
    regression_settings = load_regression_settings(regression_version)
    forecast_settings = load_forecast_settings(forecast_version)

    # -------------------------- FORECAST THE BETA FORWARDS -------------------- #
    mr = ModelRunner()

    # Get all inputs for the beta forecasting
    # Get all inputs for the ODE
    scales = []

    for draw_id in range(regression_settings.n_draws):
        print(f"On draw {draw_id}\n")

        # Load the previous beta fit compartments and ODE parameters
        beta_fit = load_beta_fit(
            directories, draw_id=draw_id,
            location_id=location_id
        )
        beta_params = load_beta_params(
            directories, draw_id=draw_id
        )

        # Convert settings to the covariates model and load covariates data
        _, all_covmodels_set = convert_to_covmodel(
            regression_settings.covariates,
            regression_settings.covariates_order,
        )
        covariate_data = load_covariates(
            directories,
            covariate_version=forecast_settings.covariate_version,
            location_ids=[location_id]
        )

        # Figure out what date we need to forecast from (the end of the component fit in regression task)
        beta_fit_date = pd.to_datetime(beta_fit[INFECTION_COL_DICT['COL_DATE']])
        CURRENT_DATE = beta_fit[beta_fit_date == beta_fit_date.max()][INFECTION_COL_DICT['COL_DATE']].iloc[0]
        covariate_date = pd.to_datetime(covariate_data[COVARIATE_COL_DICT['COL_DATE']])
        covariate_data = covariate_data.loc[covariate_date >= beta_fit_date.max()].copy()

        # Load the regression coefficients
        regression_fit = load_mr_coefficients(
            directories=directories,
            draw_id=draw_id
        )
        # Forecast the beta forward with those coefficients
        forecasts = mr.predict_beta_forward_prod(
            covmodel_set=all_covmodels_set,
            df_cov=covariate_data,
            df_cov_coef=regression_fit,
            col_t=COVARIATE_COL_DICT['COL_DATE'],
            col_group=COVARIATE_COL_DICT['COL_LOC_ID']
        )

        betas = forecasts.beta_pred.values
        days = forecasts[COVARIATE_COL_DICT['COL_DATE']].values
        times = date_to_days(days)

        # Anchor the betas at the last observed beta (fitted)
        # and scale everything into the future from this anchor value
        anchor_beta = beta_fit.beta[beta_fit.date == CURRENT_DATE].iloc[0]
        scale = anchor_beta / betas[0]
        scales.append(scale)
        # scale = scale + (1 - scale)/20.0*np.arange(betas.size)
        # scale[21:] = 1.0
        betas = betas * scale

        # Get initial conditions based on the beta fit for forecasting into the future
        init_cond = get_ode_init_cond(
            beta_ode_fit=beta_fit,
            current_date=CURRENT_DATE,
            location_id=location_id
        ).astype(float)
        N = np.sum(init_cond)  # total population
        model_specs = SiierdModelSpecs(
            alpha=beta_params['alpha'],
            sigma=beta_params['sigma'],
            gamma1=beta_params['gamma1'],
            gamma2=beta_params['gamma2'],
            N=N
        )
        # Forecast all of the components based on the forecasted beta
        forecasted_components = mr.forecast(
            model_specs=model_specs,
            init_cond=init_cond,
            times=times,
            betas=betas,
            dt=regression_settings.solver_dt
        )
        forecasted_components[COVARIATE_COL_DICT['COL_DATE']] = days
        forecasted_components.to_csv(
            directories.location_draw_component_forecast_file(
                location_id=location_id,
                draw_id=draw_id
            )
        )
    df_scales = pd.DataFrame({
        'beta_scales': scales
    })
    df_scales.to_csv(
        directories.location_beta_scaling_file(
            location_id=location_id
        ),
        index=False
    )
    def __init__(self, directories: Directories, groups: list = None, exclude_groups: list = None,
                 col_group="loc_id", col_date='date', col_observed='observed', covariates=()):

        self.directories = directories
        self.col_group = col_group
        self.col_date = col_date
        self.col_observed = col_observed
        self.groups = groups

        if exclude_groups is not None:
            for exclude_group in exclude_groups:
                self.groups.remove(exclude_group)

        self.data = {
            group: {
                ODE_BETA_FIT: [],
                COEFFICIENTS_FIT: [],
                ODE_COMPONENTS_FORECAST: [],
                OUTPUT_DRAWS_CASES: None,
                OUTPUT_DRAWS_DEATHS: None,
                OUTPUT_DRAWS_REFF: None
            } for group in self.groups
        }

        self.params_for_draws = []
        self.covariates = {}

        self.regression_settings = load_regression_settings(directories.regression_version)
        self.forecast_settings = load_forecast_settings(directories.forecast_version)

        self.location_metadata = pd.read_csv(
            self.directories.get_location_metadata_file(
                self.regression_settings.location_set_version_id)
        )
        self.id2loc = self.location_metadata.set_index('location_id')[
            'location_name'].to_dict()

        # read beta regression draws
        for group in groups:
            path_to_regression_draws_for_group = os.path.join(directories.regression_beta_fit_dir, str(group))
            if os.path.isdir(path_to_regression_draws_for_group):
                for filename in os.listdir(path_to_regression_draws_for_group):
                    if filename.startswith("fit_draw_") and filename.endswith(".csv"):
                        draw_df = pd.read_csv(os.path.join(path_to_regression_draws_for_group, filename))
                        # It's assumed that draw_df contains only the `group` group exclusively
                        self.data[group][ODE_BETA_FIT].append(draw_df)
                    else:
                        continue

        # read components forecast
        for group in groups:
            path_to_compartments_draws_for_group = os.path.join(directories.forecast_component_draw_dir, str(group))
            if os.path.isdir(path_to_compartments_draws_for_group):
                for filename in os.listdir(path_to_compartments_draws_for_group):
                    if filename.startswith("draw_") and filename.endswith(".csv"):
                        draw_df = pd.read_csv(os.path.join(path_to_compartments_draws_for_group, filename))
                        self.data[group][ODE_COMPONENTS_FORECAST].append(draw_df)
                    else:
                        continue
            else:
                error_msg = f"ODE Components forecast for the group with {col_group} = {group} is not found"
                print("Error: " + error_msg)

        #  read final draws
        if os.path.isdir(directories.forecast_output_draw_dir):
            for group in groups:
                self.data[group][OUTPUT_DRAWS_CASES] = pd.read_csv(
                    os.path.join(directories.forecast_output_draw_dir, f"cases_{group}.csv"))
                self.data[group][OUTPUT_DRAWS_DEATHS] = pd.read_csv(
                    os.path.join(directories.forecast_output_draw_dir, f"deaths_{group}.csv"))
                self.data[group][OUTPUT_DRAWS_REFF] = pd.read_csv(
                    os.path.join(directories.forecast_output_draw_dir, f"reff_{group}.csv"))

        for covariate in covariates:
            covariate_file = directories.get_covariate_file(
                covariate_name=covariate,
                covariate_version=self.forecast_settings.covariate_version
            )
            if os.path.exists(covariate_file):
                self.covariates[covariate] = pd.read_csv(covariate_file)
            else:
                raise ValueError(f"Can't find the file for covariate {covariate}: {covariate_file} does not exist.")
def run_beta_regression(draw_id: int, regression_version: str):

    # -------------------------- LOAD INPUTS -------------------- #
    # Load metadata
    directories = Directories(regression_version=regression_version,
                              forecast_version=None)
    settings = load_regression_settings(regression_version)

    # Load data
    location_ids = load_locations(directories)
    location_data = load_all_location_data(directories=directories,
                                           location_ids=location_ids,
                                           draw_id=draw_id)
    covariate_data = load_covariates(
        directories,
        covariate_version=settings.covariate_version,
        location_ids=location_ids)

    # This seed is so that the alpha, sigma, gamma1 and gamma2 parameters are reproducible
    np.random.seed(draw_id)
    beta_fit_inputs = process_ode_process_input(
        settings=settings,
        location_data=location_data,
    )

    # ----------------------- BETA SPLINE + ODE -------------------------------- #
    # Start a Model Runner with the processed inputs and fit the beta spline / ODE
    mr = ModelRunner()
    mr.fit_beta_ode(beta_fit_inputs)

    # -------------- BETA REGRESSION WITH LOADED COVARIATES -------------------- #
    # Convert inputs for beta regression using model_inputs utilities functions
    ordered_covmodel_sets, all_covmodels_set = convert_to_covmodel(
        cov_dict=settings.covariates, cov_order_list=settings.covariates_order)
    mr_data = convert_inputs_for_beta_model(
        data_cov=(covariate_data, COVARIATE_COL_DICT['COL_DATE'],
                  COVARIATE_COL_DICT['COL_LOC_ID']),
        df_beta=mr.get_beta_ode_fit(),
        covmodel_set=all_covmodels_set,
    )
    if settings.coefficient_version is not None:
        # If you want to use a specific coefficient version,
        # this will read in the coefficients and then they will be
        # passed to the beta regression.

        coefficient_directory = Directories(
            regression_version=settings.coefficient_version)
        fixed_coefficients = load_mr_coefficients(
            directories=coefficient_directory, draw_id=draw_id)
    else:
        fixed_coefficients = None
    # Fit the beta regression; the `path` argument automatically saves the coefficients
    # to the specified file
    mr.fit_beta_regression_prod(
        ordered_covmodel_sets=ordered_covmodel_sets,
        mr_data=mr_data,
        path=directories.get_draw_coefficient_file(draw_id),
        df_cov_coef=fixed_coefficients,
        add_intercept=False,
    )
    # -------------------- POST PROCESSING AND SAVING ------------------------ #
    # Get the fitted values of beta from the regression model and append on
    # to the fits -- **this is just for diagnostic purposes**
    regression_fit = load_mr_coefficients(directories=directories,
                                          draw_id=draw_id)
    forecasts = mr.predict_beta_forward_prod(
        covmodel_set=all_covmodels_set,
        df_cov=covariate_data,
        df_cov_coef=regression_fit,
        col_t=COVARIATE_COL_DICT['COL_DATE'],
        col_group=COVARIATE_COL_DICT['COL_LOC_ID'])
    beta_fit = mr.get_beta_ode_fit()
    regression_betas = forecasts[
        [COVARIATE_COL_DICT['COL_LOC_ID'], COVARIATE_COL_DICT['COL_DATE']] +
        list(settings.covariates.keys()) + ['beta_pred']]
    beta_fit_covariates = beta_fit.merge(regression_betas,
                                         left_on=[
                                             INFECTION_COL_DICT['COL_LOC_ID'],
                                             INFECTION_COL_DICT['COL_DATE']
                                         ],
                                         right_on=[
                                             COVARIATE_COL_DICT['COL_LOC_ID'],
                                             COVARIATE_COL_DICT['COL_DATE']
                                         ],
                                         how='left')
    # Save location-specific beta fit (compartment) files for easy reading later
    for l_id in location_ids:
        loc_beta_fits = beta_fit_covariates.loc[beta_fit_covariates[
            INFECTION_COL_DICT['COL_LOC_ID']] == l_id].copy()
        loc_beta_fits.to_csv(directories.get_draw_beta_fit_file(l_id, draw_id),
                             index=False)

    # Save the parameters of alpha, sigma, gamma1, and gamma2 that were drawn
    mr.get_beta_ode_params().to_csv(
        directories.get_draw_beta_param_file(draw_id), index=False)