示例#1
0
def sample_simulate_pool(main_db: Union[str, Path], index_file_pattern: str,
                         fit_type: str, n_sim: int, n_pool: int):
    """
    Fit the samples in a database in parallel by making copies of the database, fitting them
    separately, and then combining them back together in the sample table of main_db.

    Parameters
    ----------
    main_db
        Path to the main database that will be spawned.
    index_file_pattern
        File pattern for the new databases that will have index equal to the simulation number.
    fit_type
        The type of fit to run, one of "fixed" or "both".
    n_sim
        Number of simulations that will be fit.
    n_pool
        Number of pools for the multiprocessing.
    """
    if fit_type not in ["fixed", "both"]:
        raise SampleError(f"Unrecognized fit type {fit_type}.")

    fit_sample = FitSample(main_db=main_db,
                           index_file_pattern=index_file_pattern,
                           fit_type=fit_type)
    fits = dmdismod_in_parallel(dm_thread=fit_sample,
                                sims=list(range(n_sim)),
                                n_pool=n_pool)
    # Reconstruct the sample table with all n_sim fits
    samp = pd.DataFrame().append(fits).reset_index(drop=True)
    d = DismodIO(path=main_db)
    d.sample = samp[['sample_index', 'var_id', 'var_value']]
示例#2
0
def main():
    """
    Takes dismod databases that have already had a fit run on them and simulates new datasets, refitting
    on all of them, then combining the results back into one database.
    Returns:

    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)
    main_db = context.db_file(location_id=args.parent_location_id,
                              sex_id=args.sex_id)

    d = DismodIO(path=main_db)
    if d.fit_var.empty:
        raise RuntimeError(
            "Cannot run sample / simulate on a database without fit_var!")

    # Create n_sim simulation datasets based on the fitted parameters
    run_dismod_commands(dm_file=main_db,
                        commands=[
                            'set start_var fit_var'
                            'set truth_var fit_var', 'set scale_var fit_var',
                            f'simulate {args.n_sim}'
                        ])

    if args.n_pool > 1:
        # Make a pool and fit to each of the simulations (uses the __call__ method)
        fit_sample = FitSample(context=context,
                               location_id=args.location_id,
                               sex_id=args.sex_id,
                               fit_type=args.fit_type)
        p = Pool(args.n_pool)
        fits = list(p.map(fit_sample, range(args.n_sim)))
        p.close()

        # Reconstruct the sample table with all n_sim fits
        sample = pd.DataFrame().append(fits).reset_index(drop=True)
        sample.rename(columns={
            'fit_var_id': 'var_id',
            'fit_var_value': 'var_value'
        },
                      inplace=True)
        d.sample = sample
    else:
        # If we only have one pool that means we aren't going to run in parallel
        run_dismod_commands(dm_file=main_db,
                            commands=[f'sample simulate {args.n_sim}'])
示例#3
0
    def _process(self, db: str):

        dbio = DismodIO(path=db)
        n_var = len(dbio.var)

        this_sample = dbio.sample.loc[dbio.sample.sample_index ==
                                      self.index].copy()
        this_sample['sample_index'] = 0
        this_sample['sample_id'] = this_sample['var_id']
        dbio.sample = this_sample
        del dbio

        run_dismod_commands(dm_file=db, commands=[f'predict sample'])
        dbio = DismodIO(path=db)
        predict = dbio.predict
        predict['sample_index'] = self.index
        return predict