Пример #1
0
    def __init__(self, directories: Directories):
        self.directories = directories
        # load settings
        self.settings = load_regression_settings(directories.regression_version)
        self.path_to_location_metadata = self.directories.get_location_metadata_file(
            self.settings.location_set_version_id)
        self.path_to_beta_scaling = self.directories.forecast_beta_scaling_dir
        self.path_to_savefig = self.directories.forecast_diagnostic_dir

        # load location metadata
        self.location_metadata = pd.read_csv(self.path_to_location_metadata)
        self.id2loc = self.location_metadata.set_index('location_id')[
            'location_name'].to_dict()

        # load locations
        self.loc_ids = np.array([
            file_name.split('_')[0]
            for file_name in os.listdir(self.path_to_beta_scaling)
        ]).astype(int)
        self.locs = np.array([
            self.id2loc[loc_id]
            for loc_id in self.loc_ids
        ])

        # load data
        self.scales_data = np.hstack([
            pd.read_csv(self.directories.location_beta_scaling_file(loc_id))[
                'beta_scales'
            ].values[:, None]
            for loc_id in self.loc_ids
        ])
Пример #2
0
    def __init__(self,
                 directories: Directories):
        self.directories = directories
        # load settings
        self.settings = load_regression_settings(directories.regression_version)
        self.path_to_location_metadata = self.directories.get_location_metadata_file(
            self.settings.location_set_version_id)
        self.path_to_betas_dir = self.directories.regression_beta_fit_dir
        self.path_to_savefig = self.directories.regression_diagnostic_dir

        # load location metadata
        self.location_metadata = pd.read_csv(self.path_to_location_metadata)
        self.id2loc = self.location_metadata.set_index('location_id')[
            'location_name'].to_dict()

        # load the beta data
        self.df_beta = [
            pd.read_csv(self.directories.get_draw_beta_fit_file(i))[[
                'loc_id',
                'date',
                'days',
                'beta',
                'beta_pred'
            ]].dropna()
            for i in range(self.settings.n_draws)
        ]

        # location information
        self.loc_ids = np.sort(list(self.df_beta[0]['loc_id'].unique()))
        self.locs = np.array([
            self.id2loc[loc_id]
            for loc_id in self.loc_ids
        ])
        self.num_locs = len(self.locs)
Пример #3
0
    def __init__(self,
                 directories: Directories):
        self.directories = directories
        # load settings
        self.settings = load_regression_settings(directories.regression_version)
        self.path_to_location_metadata = self.directories.get_location_metadata_file(
            self.settings.location_set_version_id)
        self.path_to_coef_dir = self.directories.regression_coefficient_dir
        self.path_to_savefig = self.directories.regression_diagnostic_dir

        # load metadata
        self.location_metadata = pd.read_csv(self.path_to_location_metadata)
        self.id2loc = self.location_metadata.set_index('location_id')[
            'location_name'].to_dict()

        # load coef
        df_coef = [
            pd.read_csv(self.directories.get_draw_coefficient_file(i))
            for i in range(self.settings.n_draws)
        ]

        # organize information
        self.covs = np.sort(list(self.settings.covariates.keys()))
        self.covs = np.append(self.covs, 'intercept')
        self.loc_ids = np.sort(list(df_coef[0]['group_id'].unique()))
        self.locs = np.array([
            self.id2loc[loc_id]
            for loc_id in self.loc_ids
        ])
        self.num_locs = len(self.locs)

        # group coef data
        self.coef_data = {}
        for cov in self.covs:
            coef_mat = np.vstack([
                df[cov].values
                for df in df_coef
            ])
            coef_label = self.locs.copy()
            coef_mean = coef_mat.mean(axis=0)
            sort_idx = np.argsort(coef_mean)
            self.coef_data[cov] = (coef_label[sort_idx], coef_mat[:, sort_idx])
Пример #4
0
    def __init__(self, directories: Directories,
                 groups: list = None, exclude_groups: list = None,
                 col_group="loc_id", col_date='date'
                 ):
        self.directories = directories
        self.col_group = col_group
        self.col_date = col_date
        self.groups = groups
        if exclude_groups is not None:
            for exclude_group in exclude_groups:
                self.groups.remove(exclude_group)
        self.data = {group: {
            ODE_BETA_FIT: [],
            COEFFICIENTS_FIT: [],
            ODE_COMPONENTS_FORECAST: [],
            OUTPUT_DRAWS_CASES: None,
            OUTPUT_DRAWS_DEATHS: None,
            OUTPUT_DRAWS_REFF: None

        } for group in self.groups}
        self.params_for_draws = []

        # self.metadata = pd.read_csv("../../../data/covid/metadata-inputs/location_metadata_652.csv")
        # TODO: change it for cluster
        # self.metadata = pd.read_csv(directories.get_location_metadata_file(location_set_version_id=652))

        # dictionary of location_id to name
        # TODO: uncomment it for cluster to make Peng's part working
        self.regression_settings = load_regression_settings(directories.regression_version)
        self.forecast_settings = load_forecast_settings(directories.forecast_version)
        self.location_metadata = pd.read_csv(
            self.directories.get_location_metadata_file(
                self.regression_settings.location_set_version_id)
        )
        self.id2loc = self.location_metadata.set_index('location_id')[
            'location_name'].to_dict()

        # read beta regression draws
        for group in groups:
            path_to_regression_draws_for_group = os.path.join(directories.regression_beta_fit_dir, str(group))
            if os.path.isdir(path_to_regression_draws_for_group):
                for filename in os.listdir(directories.regression_beta_fit_dir):
                    if filename.startswith("fit_draw_") and filename.endswith(".csv"):
                        draw_df = pd.read_csv(os.path.join(path_to_regression_draws_for_group, filename))
                        # It's assumed that draw_df contains only the `group` group exclusively
                        self.data[group][ODE_BETA_FIT].append(draw_df)
                    else:
                        continue

        # Params and coefficients are commented out because Peng does not use them curently.

        # # read coefficients draws
        # for filename in os.listdir(directories.regression_coefficient_dir):
        #     if filename.startswith("coefficients_") and filename.endswith(".csv"):
        #         draw_df = pd.read_csv(os.path.join(directories.regression_coefficient_dir, filename))
        #         for group in self.groups:
        #             self.data[group][COEFFICIENTS_FIT].append(draw_df[draw_df['group_id'] == group])
        #     else:
        #         continue
        #
        # # read params draws
        # for filename in os.listdir(directories.regression_parameters_dir):
        #     if filename.startswith("params_draw_") and filename.endswith(".csv"):
        #         draw_df = pd.read_csv(os.path.join(directories.regression_parameters_dir, filename))
        #         self.params_for_draws.append(draw_df)
        #     else:
        #         continue

        # read components forecast
        for group in groups:
            path_to_compartments_draws_for_group = os.path.join(directories.forecast_component_draw_dir, str(group))
            if os.path.isdir(path_to_compartments_draws_for_group):
                for filename in os.listdir(path_to_compartments_draws_for_group):
                    if filename.startswith("draw_") and filename.endswith(".csv"):
                        draw_df = pd.read_csv(os.path.join(path_to_compartments_draws_for_group, filename))
                        self.data[group][ODE_COMPONENTS_FORECAST].append(draw_df)
                    else:
                        continue
            else:
                error_msg = f"ODE Components forecast for the group with {col_group} = {group} is not found"
                print("Error: " + error_msg)
                # raise FileNotFoundError(error_msg)

        #  read final draws
        if os.path.isdir(directories.forecast_output_draw_dir):
            for group in groups:
                self.data[group][OUTPUT_DRAWS_CASES] = pd.read_csv(
                    os.path.join(directories.forecast_output_draw_dir, f"cases_{group}.csv"))
                self.data[group][OUTPUT_DRAWS_DEATHS] = pd.read_csv(
                    os.path.join(directories.forecast_output_draw_dir, f"deaths_{group}.csv"))
                self.data[group][OUTPUT_DRAWS_REFF] = pd.read_csv(
                    os.path.join(directories.forecast_output_draw_dir, f"reff_{group}.csv"))