Пример #1
0
def sample_simulate_pool(main_db: Union[str, Path], index_file_pattern: str,
                         fit_type: str, n_sim: int, n_pool: int):
    """
    Fit the samples in a database in parallel by making copies of the database, fitting them
    separately, and then combining them back together in the sample table of main_db.

    Parameters
    ----------
    main_db
        Path to the main database that will be spawned.
    index_file_pattern
        File pattern for the new databases that will have index equal to the simulation number.
    fit_type
        The type of fit to run, one of "fixed" or "both".
    n_sim
        Number of simulations that will be fit.
    n_pool
        Number of pools for the multiprocessing.
    """
    if fit_type not in ["fixed", "both"]:
        raise SampleError(f"Unrecognized fit type {fit_type}.")

    fit_sample = FitSample(main_db=main_db,
                           index_file_pattern=index_file_pattern,
                           fit_type=fit_type)
    fits = dmdismod_in_parallel(dm_thread=fit_sample,
                                sims=list(range(n_sim)),
                                n_pool=n_pool)
    # Reconstruct the sample table with all n_sim fits
    samp = pd.DataFrame().append(fits).reset_index(drop=True)
    d = DismodIO(path=main_db)
    d.sample = samp[['sample_index', 'var_id', 'var_value']]
Пример #2
0
def main():
    """
    Takes dismod databases that have already had a fit run on them and simulates new datasets, refitting
    on all of them, then combining the results back into one database.
    Returns:

    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)
    main_db = context.db_file(location_id=args.parent_location_id,
                              sex_id=args.sex_id)

    d = DismodIO(path=main_db)
    if d.fit_var.empty:
        raise RuntimeError(
            "Cannot run sample / simulate on a database without fit_var!")

    # Create n_sim simulation datasets based on the fitted parameters
    run_dismod_commands(dm_file=main_db,
                        commands=[
                            'set start_var fit_var'
                            'set truth_var fit_var', 'set scale_var fit_var',
                            f'simulate {args.n_sim}'
                        ])

    if args.n_pool > 1:
        # Make a pool and fit to each of the simulations (uses the __call__ method)
        fit_sample = FitSample(context=context,
                               location_id=args.location_id,
                               sex_id=args.sex_id,
                               fit_type=args.fit_type)
        p = Pool(args.n_pool)
        fits = list(p.map(fit_sample, range(args.n_sim)))
        p.close()

        # Reconstruct the sample table with all n_sim fits
        sample = pd.DataFrame().append(fits).reset_index(drop=True)
        sample.rename(columns={
            'fit_var_id': 'var_id',
            'fit_var_value': 'var_value'
        },
                      inplace=True)
        d.sample = sample
    else:
        # If we only have one pool that means we aren't going to run in parallel
        run_dismod_commands(dm_file=main_db,
                            commands=[f'sample simulate {args.n_sim}'])
Пример #3
0
def collect(dbs, location_ids = None):
    residuals = pd.DataFrame()
    i = -1
    if location_ids:
        dbs = [p for p in dbs if int(p.parts[-3]) in location_ids]
    for p in dbs:
        global db
        db = DismodIO(p)
        try:
            db.option
            loc,sex = map(int, p.parts[-3:-1])
            fit = (db.data_subset.merge(db.data, how='left')
                   .merge(db.fit_data_subset, left_on = 'data_subset_id', right_on = 'fit_data_subset_id')
                   .merge(db.node, how='left')
                   .merge(db.integrand, how='left'))
            cov_names = {f'x_{row.covariate_id}': row.c_covariate_name
                         for i, row in db.covariate[['covariate_id', 'c_covariate_name']].iterrows()}
            fit.rename(columns = cov_names, inplace=True)
            fit['c_parent_location_id'] = loc
            cols = (['c_parent_location_id', 'c_location_id', 'integrand_name', 'data_name',
                     'age_lower', 'age_upper', 'time_lower', 'time_upper', 'weighted_residual']
                    + list(cov_names.values()))
            residuals = residuals.append(fit[cols])
            i += 1
            print (i, f'sex: {sex}, location: {loc}')
        except:
            continue
    return residuals
Пример #4
0
def simulate(path: Union[str, Path], n_sim: int):
    """
    Simulate from a database, within a database.

    Parameters
    ----------
    path
        A path to the database object to create simulations in.
    n_sim
        Number of simulations to create.
    """
    d = DismodIO(path=path)
    try:
        if d.fit_var.empty:
            raise SampleError(
                "Cannot run sample simulate on a database without fit_var!")
    except ValueError:
        raise SampleError(
            "Cannot run sample simulate on a database without fit_var!"
            "Does not have the fit_var table yet.")

    # Create n_sim simulation datasets based on the fitted parameters
    run_dismod_commands(dm_file=path,
                        commands=[
                            'set start_var fit_var', 'set truth_var fit_var',
                            'set scale_var fit_var', f'simulate {n_sim}'
                        ])
Пример #5
0
    def _process(self, db: str):

        dbio = DismodIO(path=db)
        n_var = len(dbio.var)

        this_sample = dbio.sample.loc[dbio.sample.sample_index ==
                                      self.index].copy()
        this_sample['sample_index'] = 0
        this_sample['sample_id'] = this_sample['var_id']
        dbio.sample = this_sample
        del dbio

        run_dismod_commands(dm_file=db, commands=[f'predict sample'])
        dbio = DismodIO(path=db)
        predict = dbio.predict
        predict['sample_index'] = self.index
        return predict
Пример #6
0
def test_predict_sample_pools(mi, settings, dismod):
    alchemy = Alchemy(settings)
    predictions = predict_sample_pool(main_db=NAME,
                                      index_file_pattern='sample_{index}.db',
                                      n_pool=2,
                                      n_sim=2)
    di = DismodIO(NAME)
    assert len(predictions) == 2 * len(di.avgint)
Пример #7
0
def test_predict_pool(mi, settings, dismod):
    alchemy = Alchemy(settings)
    predict = Predict(main_db=NAME, index_file_pattern='sample_{index}.db')
    result = predict(1)
    di = DismodIO(NAME)
    assert len(result) == len(di.avgint)
    assert all(result.sample_index) == 1
    assert all(result.columns ==
               ['predict_id', 'sample_index', 'avgint_id', 'avg_integrand'])
Пример #8
0
def test_sample_simulate_sequence(filler, dismod):
    sample_simulate_sequence(NAME, n_sim=2, fit_type='fixed')
    di = DismodIO(NAME)
    assert len(di.sample) == 500
    assert all(di.sample.columns ==
               ['sample_id', 'sample_index', 'var_id', 'var_value'])
    assert all(di.sample.iloc[0:250].sample_index == 0)
    assert all(di.sample.iloc[250:500].sample_index == 1)
    assert all(~np.isnan(di.sample.var_value))
Пример #9
0
def test_sample_asymptotic(filler, dismod):
    sample_asymptotic(NAME, fit_type='fixed', n_sim=3)
    di = DismodIO(NAME)
    assert len(di.sample) == 750
    assert all(di.sample.columns ==
               ['sample_id', 'sample_index', 'var_id', 'var_value'])
    assert all(di.sample.iloc[0:250].sample_index == 0)
    assert all(di.sample.iloc[250:500].sample_index == 1)
    assert all(di.sample.iloc[500:750].sample_index == 2)
    assert all(~np.isnan(di.sample.var_value))
Пример #10
0
def test_predict_sample(mi, settings, dismod):
    alchemy = Alchemy(settings)
    fill_avgint_with_priors_grid(inputs=mi,
                                 alchemy=alchemy,
                                 settings=settings,
                                 source_db_path=NAME,
                                 child_locations=[72],
                                 child_sexes=[2])
    run_dismod_commands(dm_file=NAME, commands=['predict sample'])
    di = DismodIO(NAME)
    assert len(di.predict) == 2 * len(di.avgint)
Пример #11
0
def fill_avgint_with_priors_grid(inputs: MeasurementInputs, alchemy: Alchemy,
                                 settings: SettingsConfig,
                                 source_db_path: Union[str, Path],
                                 child_locations: List[int],
                                 child_sexes: List[int]):

    sourceDB = DismodIO(path=source_db_path)
    rates = [r.rate for r in settings.rate]
    grids = integrand_grids(alchemy=alchemy, integrands=rates)

    posterior_grid = get_prior_avgint_grid(grids=grids,
                                           sexes=child_sexes,
                                           locations=child_locations,
                                           midpoint=False)
    posterior_grid = inputs.add_covariates_to_data(df=posterior_grid)
    posterior_grid = prep_data_avgint(df=posterior_grid,
                                      node_df=sourceDB.node,
                                      covariate_df=sourceDB.covariate)
    posterior_grid.rename(columns={'sex_id': 'c_sex_id'}, inplace=True)
    sourceDB.avgint = posterior_grid
Пример #12
0
 def __call__(self, index=None):
     index_db = self.context.db_file(location_id=self.location_id,
                                     sex_id=self.sex_id,
                                     index=index)
     if index is not None:
         copy2(src=str(self.main_db), dst=str(index_db))
     run_dismod_commands(dm_file=index_db,
                         commands=[f'fit {self.fit_type} {index}'])
     db = DismodIO(path=index_db)
     fit = db.fit_var
     fit['sample_index'] = index
     return fit
Пример #13
0
 def check_last_command(dm_file: str, command: str):
     LOG.warning(
         "FIXME -- GMA -- Check_last_command needs to wrap the call to dmdismod, not the ODE preprocessor."
     )
     from cascade_at.dismod.api.dismod_io import DismodIO
     db = DismodIO(dm_file)
     log = db.log
     last_begin = [
         l for i, l in log.iterrows()
         if l.message_type == 'command' and l.message.startswith('begin ')
     ]
     rtn = True
     if not last_begin:
         LOG.error(f"ERROR: Failed to find a 'begin' command.")
         rtn = False
     else:
         last_begin = last_begin[-1]
     if rtn:
         start_cmd = [
             l for i, l in log[last_begin.log_id:].iterrows()
             if l.message_type == 'command'
             and l.message.startswith(f'begin {command}')
         ]
         if not start_cmd:
             LOG.error(
                 f"ERROR: Expected 'begin {command}' but found '{last_begin.message}'."
             )
             rtn = False
         else:
             start_cmd = start_cmd[-1]
     if rtn:
         end_cmd = [
             l for i, l in log[start_cmd.log_id:].iterrows()
             if l.message_type == 'command'
             and l.message.startswith(f'end {command}')
         ]
         if not end_cmd:
             LOG.error(
                 f"ERROR: Did not find end for this '{start_cmd.message}' command"
             )
             rtn = False
         for i, l in log[start_cmd.log_id:].iterrows():
             if l.message_type in ['error', 'warning']:
                 LOG.info(f"DISMOD {l.message_type}: {l.message.rstrip()}")
                 rtn = False
     if rtn:
         LOG.info(f"{command} OK")
     else:
         LOG.error(
             f"ERROR: {command} had errors, warnings, or failed to complete."
         )
     return rtn
Пример #14
0
    def _process(self, db: str):
        run_dismod_commands(dm_file=db,
                            commands=[f'fit {self.fit_type} {self.index}'])

        db = DismodIO(path=db)
        fit = db.fit_var
        fit['sample_index'] = self.index
        fit.rename(columns={
            'fit_var_id': 'var_id',
            'fit_var_value': 'var_value'
        },
                   inplace=True)
        return fit
Пример #15
0
def fill_avgint_with_priors_grid(inputs: MeasurementInputs, alchemy: Alchemy,
                                 settings: SettingsConfig,
                                 source_db_path: Union[str, Path],
                                 child_locations: List[int],
                                 child_sexes: List[int]):
    """
    Fill the average integrand table with the grid that the priors are on.
    This is so that we can "predict" the prior for the next level of the cascade.

    Parameters
    ----------
    inputs
        An inputs object
    alchemy
        A grid alchemy object
    settings
        A settings configuration object
    source_db_path
        The path of the source database that has had a fit on it
    child_locations
        The child locations to predict for
    child_sexes
        The child sexes to predict for
    """

    sourceDB = DismodIO(path=source_db_path)
    rates = [r.rate for r in settings.rate]
    grids = integrand_grids(alchemy=alchemy, integrands=rates)

    posterior_grid = get_prior_avgint_grid(grids=grids,
                                           sexes=child_sexes,
                                           locations=child_locations,
                                           midpoint=False)
    posterior_grid = inputs.add_covariates_to_data(df=posterior_grid)
    posterior_grid = prep_data_avgint(df=posterior_grid,
                                      node_df=sourceDB.node,
                                      covariate_df=sourceDB.covariate)
    posterior_grid.rename(columns={'sex_id': 'c_sex_id'}, inplace=True)
    sourceDB.avgint = posterior_grid
Пример #16
0
def mulcov_statistics(model_version_id: int, locations: List[int], sexes: List[int],
                      outfile_name: str, sample: bool = True,
                      mean: bool = True, std: bool = True,
                      quantile: Optional[List[float]] = None) -> None:
    """
    Compute statistics for the covariate multipliers.

    Parameters
    ----------
    model_version_id
        The model version ID
    locations
        A list of locations that, when used in combination with sexes, point to the databases
        to pull covariate multiplier estimates from
    sexes
        A list of sexes that, when used in combination with locations, point to the databases
        to pull covariate multiplier estimates from
    outfile_name
        A filepath specifying where to save the covariate multiplier statistics.
    sample
        Whether or not the results are stored in the sample table or the fit_var table.
    mean
        Whether or not to compute the mean
    std
        Whether or not to compute the standard deviation
    quantile
        An optional list of quantiles to compute
    """

    context = Context(model_version_id=model_version_id)
    db_files = [DismodIO(context.db_file(location_id=loc, sex_id=sex))
                for loc in locations for sex in sexes]
    LOG.info(f"There are {len(db_files)} databases that will be aggregated.")

    common_covariates = common_covariate_names(db_files)
    LOG.info(f"The common covariates in the passed databases are {common_covariates}.")

    if sample:
        table_name = 'sample'
    else:
        table_name = 'fit_var'

    LOG.info(f"Will pull from the {table_name} table from each database.")
    mulcov_estimates = get_mulcovs(
        dbs=db_files, covs=common_covariates, table=table_name
    )
    stats = compute_statistics(
        df=mulcov_estimates, mean=mean, std=std, quantile=quantile
    )
    LOG.info('Write to output file.')
    stats.to_csv(context.outputs_dir / f'{outfile_name}.csv', index=False)
Пример #17
0
def main():
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)
    inputs, alchemy, settings = context.read_inputs()

    sourceDB = DismodIO(path=context.db_file(
        location_id=args.source_location, sex_id=args.source_sex, make=False))

    rates = [r.rate for r in settings.rate]
    posterior_grid = get_prior_avgint_grid(settings=settings,
                                           integrands=rates,
                                           sexes=args.target_sexes,
                                           locations=args.target_locations,
                                           midpoint=False)
    posterior_grid = inputs.add_covariates_to_data(df=posterior_grid)
    posterior_grid = prep_data_avgint(df=posterior_grid,
                                      node_df=sourceDB.node,
                                      covariate_df=sourceDB.covariate)
    posterior_grid.rename(columns={'sex_id': 'c_sex_id'}, inplace=True)
    sourceDB.avgint = posterior_grid
    run_dismod_commands(dm_file=sourceDB, commands=['predict sample'])
Пример #18
0
def predict_sample_pool(main_db: Union[str, Path], index_file_pattern: str,
                        n_sim: int, n_pool: int):
    """
    Run predict sample in a pool by making copies of the existing database
    and splitting out the sample table into n_sim databases, running
    predict sample on each of them, and combining the results back
    into the main database.
    """
    predict = Predict(main_db=main_db, index_file_pattern=index_file_pattern)
    predictions = dmdismod_in_parallel(dm_thread=predict,
                                       sims=list(range(n_sim)),
                                       n_pool=n_pool)
    predictions = pd.DataFrame().append(predictions).reset_index(drop=True)
    d = DismodIO(path=main_db)
    return predictions[['sample_index', 'avgint_id', 'avg_integrand']]
Пример #19
0
def main():
    from cascade_at.dismod.api.dismod_io import DismodIO

    args = parse_args()
    path = Path(args.filename).expanduser()
    assert path.is_file(), f"The database path {path} does not exist."
    global db
    db = DismodIO(path)

    title = case_study_title(db,
                             version=args.model_version_id,
                             disease=args.disease,
                             which_fit=args.fit_type)

    data = get_fitted_data(db)
    data_integrands = sorted(
        set(data.integrand_name[~data.integrand_name.isna()].unique()) -
        set(['mtall', 'mtother']))
    no_ode_integrands = sorted(
        set(['Sincidence', 'mtexcess', 'mtother',
             'remission']).intersection(data_integrands))
    yes_ode_integrands = sorted(
        (set(data_integrands) -
         set(no_ode_integrands)).intersection(data_integrands))
    all_integrands = no_ode_integrands + yes_ode_integrands

    covariate_integrand_list = yes_ode_integrands
    predict_integrand_list = ['susceptible', 'withC']

    rate = db.rate
    integrand = db.integrand
    rate_names = rate.loc[~rate.parent_smooth_id.isna(), 'rate_name'].tolist()
    for rate_name in rate_names:
        plot_rate(db, rate_name, title=title)
    for integrand_name in all_integrands:
        plot_integrand(db, data, integrand_name, title=title)
    plot_predict(db,
                 covariate_integrand_list,
                 predict_integrand_list,
                 title=title)
Пример #20
0
def main():
    """

    Returns:

    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    context = Context(model_version_id=args.model_version_id)
    db_files = [
        DismodIO(context.db_file(location_id=loc, sex_id=sex))
        for loc in args.locations for sex in args.sexes
    ]
    LOG.info(f"There are {len(db_files)} databases that will be aggregated.")

    common_covariates = common_covariate_names(db_files)
    LOG.info(
        f"The common covariates in the passed databases are {common_covariates}."
    )

    if args.sample:
        table_name = 'sample'
    else:
        table_name = 'fit_var'

    LOG.info(f"Will pull from the {table_name} table from each database.")
    mulcov_estimates = get_mulcovs(dbs=db_files,
                                   covs=common_covariates,
                                   table=args.sample)
    mulcov_statistics = compute_statistics(df=mulcov_estimates,
                                           mean=args.mean,
                                           std=args.std,
                                           quantile=args.quantile)
    LOG.info()
    mulcov_statistics.to_csv(context.outputs_dir / f'{args.outfile_name}.csv',
                             index=False)
Пример #21
0
def create_database(file_name, age_list, time_list, integrand_table,
                    node_table, subgroup_table, weight_table, covariate_table,
                    avgint_table, data_table, prior_table, smooth_table,
                    nslist_table, rate_table, mulcov_table, option_table):
    import sys
    #*# import dismod_at
    from cascade_at.dismod.api.dismod_io import DismodIO
    db = DismodIO(file_name)

    # ----------------------------------------------------------------------
    # avgint_extra_columns, data_extra_columns
    avgint_extra_columns = list()
    data_extra_columns = list()
    for row in option_table:
        if row['name'] == 'avgint_extra_columns':
            avgint_extra_columns = row['value'].split()
        if row['name'] == 'data_extra_columns':
            data_extra_columns = row['value'].split()
    # ----------------------------------------------------------------------
    # create database
    new = True
    #*# connection     = dismod_at.create_connection(file_name, new)
    # ----------------------------------------------------------------------
    # create age table
    col_name = ['age']
    col_type = ['real']
    row_list = []
    for age in age_list:
        row_list.append([age])
    tbl_name = 'age'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.age = pd.DataFrame(row_list, columns=col_name)
    # ----------------------------------------------------------------------
    # create time table
    col_name = ['time']
    col_type = ['real']
    row_list = []
    for time in time_list:
        row_list.append([time])
    tbl_name = 'time'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.time = pd.DataFrame(row_list, columns=col_name)
    # ----------------------------------------------------------------------
    # create integrand table
    col_name = ['integrand_name', 'minimum_meas_cv']
    col_type = ['text', 'real']
    row_list = []
    for i in range(len(integrand_table)):
        minimum_meas_cv = 0.0
        if 'minimum_meas_cv' in integrand_table[i]:
            minimum_meas_cv = integrand_table[i]['minimum_meas_cv']
        row = [integrand_table[i]['name'], minimum_meas_cv]
        row_list.append(row)
    tbl_name = 'integrand'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.integrand = pd.DataFrame(row_list, columns=col_name)
    #
    global_integrand_name2id = {}
    for i in range(len(row_list)):
        global_integrand_name2id[row_list[i][0]] = i
    # ----------------------------------------------------------------------
    # create density table
    col_name = ['density_name']
    col_type = ['text']
    row_list = [
        ['uniform'],
        ['gaussian'],
        ['laplace'],
        ['students'],
        ['log_gaussian'],
        ['log_laplace'],
        ['log_students'],
        ['cen_gaussian'],
        ['cen_laplace'],
        ['cen_log_gaussian'],
        ['cen_log_laplace'],
    ]
    tbl_name = 'density'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.density = pd.DataFrame(row_list, columns=col_name)
    #
    global_density_name2id = {}
    for i in range(len(row_list)):
        global_density_name2id[row_list[i][0]] = i
    # ----------------------------------------------------------------------
    # create covariate table
    col_name = ['covariate_name', 'reference', 'max_difference']
    col_type = ['text', 'real', 'real']
    row_list = []
    for i in range(len(covariate_table)):
        max_difference = None
        if 'max_difference' in covariate_table[i]:
            max_difference = covariate_table[i]['max_difference']
        row = [
            covariate_table[i]['name'], covariate_table[i]['reference'],
            max_difference
        ]
        row_list.append(row)
    tbl_name = 'covariate'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.covariate = pd.DataFrame(row_list, columns=col_name)
    #
    global_covariate_name2id = {}
    for i in range(len(covariate_table)):
        global_covariate_name2id[covariate_table[i]['name']] = i
    # ----------------------------------------------------------------------
    # create node table
    global_node_name2id = {}
    for i in range(len(node_table)):
        global_node_name2id[node_table[i]['name']] = i
    #
    col_name = ['node_name', 'parent']
    col_type = ['text', 'integer']
    row_list = []
    for i in range(len(node_table)):
        node = node_table[i]
        name = node['name']
        parent = node['parent']
        if parent == '':
            parent = None
        else:
            parent = global_node_name2id[parent]
        row_list.append([name, parent])
    tbl_name = 'node'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.node = pd.DataFrame(row_list, columns=col_name)

    # create subgroup table
    global_subgroup_name2id = {}
    global_group_name2id = {}
    group_id = 0
    group_name = subgroup_table[0]['group']
    global_group_name2id[group_name] = group_id
    for i in range(len(subgroup_table)):
        global_subgroup_name2id[subgroup_table[i]['subgroup']] = i
        if subgroup_table[i]['group'] != group_name:
            group_id = group_id + 1
            group_name = subgroup_table[i]['group']
            global_group_name2id[group_name] = group_id
    #
    col_name = ['subgroup_name', 'group_id', 'group_name']
    col_type = ['text', 'integer', 'text']
    row_list = []
    for i in range(len(subgroup_table)):
        if i == 0:
            group_id = 0
            group_name = subgroup_table[0]['group']
        elif subgroup_table[i]['group'] != group_name:
            group_id = group_id + 1
            group_name = subgroup_table[i]['group']
        subgroup_name = subgroup_table[i]['subgroup']
        row_list.append([subgroup_name, group_id, group_name])
    tbl_name = 'subgroup'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.subgroup = pd.DataFrame(row_list, columns=col_name)

    # ----------------------------------------------------------------------
    # create prior table
    col_name = [
        'prior_name', 'lower', 'upper', 'mean', 'std', 'density_id', 'eta',
        'nu'
    ]
    col_type = [
        'text', 'real', 'real', 'real', 'real', 'integer', 'real', 'real'
    ]
    row_list = []
    for i in range(len(prior_table)):
        prior = prior_table[i]
        density_id = global_density_name2id[prior['density']]
        #
        # columns that have null for default value
        for key in ['lower', 'upper', 'std', 'eta', 'nu']:
            if not key in prior:
                prior[key] = None
        #
        row = [
            prior['name'],
            prior['lower'],
            prior['upper'],
            prior['mean'],
            prior['std'],
            density_id,
            prior['eta'],
            prior['nu'],
        ]
        row_list.append(row)
    tbl_name = 'prior'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.prior = pd.DataFrame(row_list, columns=col_name)
    #
    global_prior_name2id = {}
    for i in range(len(row_list)):
        global_prior_name2id[row_list[i][0]] = i
    # ----------------------------------------------------------------------
    # create weight table
    col_name = ['weight_name', 'n_age', 'n_time']
    col_type = ['text', 'integer', 'integer']
    row_list = []
    for i in range(len(weight_table)):
        weight = weight_table[i]
        name = weight['name']
        n_age = len(weight['age_id'])
        n_time = len(weight['time_id'])
        row_list.append([name, n_age, n_time])
    tbl_name = 'weight'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.weight = pd.DataFrame(row_list, columns=col_name)
    #
    global_weight_name2id = {}
    for i in range(len(weight_table)):
        global_weight_name2id[weight_table[i]['name']] = i
    # null is used for constant weighting
    global_weight_name2id[''] = None
    # ----------------------------------------------------------------------
    # create weight_grid table
    col_name = ['weight_id', 'age_id', 'time_id', 'weight']
    col_type = ['integer', 'integer', 'integer', 'real']
    row_list = []
    for i in range(len(weight_table)):
        weight = weight_table[i]
        age_id = weight['age_id']
        time_id = weight['time_id']
        fun = weight['fun']
        for j in age_id:
            for k in time_id:
                w = fun(age_list[j], time_list[k])
                row_list.append([i, j, k, w])
    tbl_name = 'weight_grid'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.weight_grid = pd.DataFrame(row_list, columns=col_name)
    # ----------------------------------------------------------------------
    # create smooth table
    col_name = [
        'smooth_name', 'n_age', 'n_time', 'mulstd_value_prior_id',
        'mulstd_dage_prior_id', 'mulstd_dtime_prior_id'
    ]
    col_type = ['text', 'integer', 'integer', 'integer', 'integer', 'integer']
    row_list = []
    for i in range(len(smooth_table)):
        smooth = smooth_table[i]
        name = smooth['name']
        n_age = len(smooth['age_id'])
        n_time = len(smooth['time_id'])
        #
        prior_id = dict()
        for key in ['value', 'dage', 'dtime']:
            prior_id[key] = None
            mulstd_key = 'mulstd_' + key + '_prior_name'
            if mulstd_key in smooth:
                prior_name = smooth[mulstd_key]
                if prior_name != None:
                    prior_id[key] = global_prior_name2id[prior_name]
        #
        row_list.append([
            name,
            n_age,
            n_time,
            prior_id['value'],
            prior_id['dage'],
            prior_id['dtime'],
        ])
    tbl_name = 'smooth'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.smooth = pd.DataFrame(row_list, columns=col_name)
    #
    global_smooth_name2id = {}
    for i in range(len(smooth_table)):
        global_smooth_name2id[smooth_table[i]['name']] = i
    # ----------------------------------------------------------------------
    # create smooth_grid table
    col_name = [
        'smooth_id',
        'age_id',
        'time_id',
        'value_prior_id',
        'dage_prior_id',
        'dtime_prior_id',
        'const_value',
    ]
    col_type = [
        'integer',  # smooth_id
        'integer',  # age_id
        'integer',  # time_id
        'integer',  # value_prior_id
        'integer',  # dage_prior_id
        'integer',  # dtime_prior_id
        'real',  # const_value
    ]
    row_list = []
    for i in range(len(smooth_table)):
        smooth = smooth_table[i]
        age_id = smooth['age_id']
        time_id = smooth['time_id']
        fun = smooth['fun']
        max_j = 0
        for j in age_id:
            if age_list[j] > age_list[max_j]:
                max_j = j
        max_k = 0
        for k in time_id:
            if time_list[k] > time_list[max_k]:
                max_k = k
        for j in age_id:
            for k in time_id:
                (v, da, dt) = fun(age_list[j], time_list[k])
                #
                if j == max_j:
                    da = None
                elif da != None:
                    da = global_prior_name2id[da]
                #
                if k == max_k:
                    dt = None
                elif dt != None:
                    dt = global_prior_name2id[dt]
                #
                const_value = None
                if isinstance(v, float):
                    const_value = v
                    v = None
                elif v != None:
                    v = global_prior_name2id[v]
                row_list.append([i, j, k, v, da, dt, const_value])
    tbl_name = 'smooth_grid'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.smooth_grid = pd.DataFrame(row_list, columns=col_name)
    # ----------------------------------------------------------------------
    # create nslist table
    col_name = ['nslist_name']
    col_type = ['text']
    row_list = list()
    for nslist_name in nslist_table:
        row_list.append([nslist_name])
    tbl_name = 'nslist'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.nslist = pd.DataFrame(row_list, columns=col_name)
    #
    global_nslist_name2id = dict()
    for i in range(len(row_list)):
        global_nslist_name2id[row_list[i][0]] = i
    # ----------------------------------------------------------------------
    # create nslist_pair table
    col_name = ['nslist_id', 'node_id', 'smooth_id']
    col_type = ['integer', 'integer', 'integer']
    row_list = list()
    tbl_name = 'nslist_pair'
    for key in nslist_table:
        pair_list = nslist_table[key]
        nslist_id = global_nslist_name2id[key]
        for pair in pair_list:
            node_id = global_node_name2id[pair[0]]
            smooth_id = global_smooth_name2id[pair[1]]
            row_list.append([nslist_id, node_id, smooth_id])
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.nslist_pair = pd.DataFrame(row_list, columns=col_name)
    # ----------------------------------------------------------------------
    # create rate table
    col_name = [
        'rate_name', 'parent_smooth_id', 'child_smooth_id', 'child_nslist_id'
    ]
    col_type = ['text', 'integer', 'integer', 'integer']
    row_list = list()
    for rate_name in ['pini', 'iota', 'rho', 'chi', 'omega']:
        row = [rate_name, None, None, None]
        for i in range(len(rate_table)):
            rate = rate_table[i]
            if rate['name'] == rate_name:
                row = [rate_name]
                for key in ['parent_smooth', 'child_smooth', 'child_nslist']:
                    entry = None
                    if key in rate:
                        entry = rate[key]
                    if entry != None:
                        if key == 'child_nslist':
                            entry = global_nslist_name2id[entry]
                        else:
                            entry = global_smooth_name2id[entry]
                    row.append(entry)
        row_list.append(row)
    tbl_name = 'rate'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.rate = pd.DataFrame(row_list, columns=col_name)
    global_rate_name2id = {}
    for i in range(len(row_list)):
        global_rate_name2id[row_list[i][0]] = i
    # ----------------------------------------------------------------------
    # create mulcov table
    col_name = [
        'mulcov_type',
        'rate_id',
        'integrand_id',
        'covariate_id',
        'group_id',
        'group_smooth_id',
        'subgroup_smooth_id',
    ]
    col_type = [
        'text',  # mulcov_type
        'integer',  # rate_id
        'integer',  # integrand_id
        'integer',  # covariate_id
        'integer',  # group_id
        'integer',  # group_smooth_id
        'integer',  # subgroup_smooth_id
    ]
    row_list = []
    warning_printed = False
    for i in range(len(mulcov_table)):
        mulcov = mulcov_table[i]
        mulcov_type = mulcov['type']
        effected = mulcov['effected']
        covariate_id = global_covariate_name2id[mulcov['covariate']]
        #
        # rate_id and integrand_id
        if mulcov_type == 'rate_value':
            rate_id = global_rate_name2id[effected]
            integrand_id = None
        else:
            integrand_id = global_integrand_name2id[effected]
            rate_id = None
        #
        # group_id
        if 'group' in mulcov:
            group_id = global_group_name2id[mulcov['group']]
        else:
            group_id = 0
            if not warning_printed:
                msg = 'create_database Warning: '
                msg += 'group key missing in mulcov table,\n'
                msg += 'using default value; i.e., first group '
                msg += '(you should fix this).'
                print(msg)
                warning_printed = True
        #
        # group_smooth_id
        if mulcov['smooth'] == None:
            group_smooth_id = None
        else:
            group_smooth_id = global_smooth_name2id[mulcov['smooth']]
        #
        # subgroup_smooth_id
        if not 'subsmooth' in mulcov:
            subgroup_smooth_id = None
        elif mulcov['subsmooth'] == None:
            subgroup_smooth_id = None
        else:
            subgroup_smooth_id = global_smooth_name2id[mulcov['subsmooth']]
        #
        row_list.append([
            mulcov_type,
            rate_id,
            integrand_id,
            covariate_id,
            group_id,
            group_smooth_id,
            subgroup_smooth_id,
        ])
    tbl_name = 'mulcov'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.mulcov = pd.DataFrame(row_list, columns=col_name)
    # ----------------------------------------------------------------------
    # avgint table
    #
    # extra_name, extra_type
    extra_name = []
    extra_type = []
    if (len(avgint_table) > 0):
        extra_name = avgint_extra_columns
        row = avgint_table[0]
        for key in extra_name:
            if isinstance(row[key], str):
                extra_type.append('text')
            elif isinstance(row[key], int):
                extra_type.append('integer')
            elif isinstance(row[key], float):
                extra_type.append('real')
            else:
                assert False
    #
    # col_name
    col_name = extra_name + [
        'integrand_id', 'node_id', 'subgroup_id', 'weight_id', 'age_lower',
        'age_upper', 'time_lower', 'time_upper'
    ]
    for j in range(len(covariate_table)):
        col_name.append('x_%s' % j)
    #
    # col_type
    col_type = extra_type + [
        'integer',  # integrand_id
        'integer',  # node_id
        'integer',  # subgroup_id
        'integer',  # weight_id
        'real',  # age_lower
        'real',  # age_upper
        'real',  # time_lower
        'real'  # time_upper
    ]
    for j in range(len(covariate_table)):
        col_type.append('real')
    #
    # row_list
    row_list = []
    warning_printed = False
    for i in range(len(avgint_table)):
        avgint = avgint_table[i]
        #
        # subgroup column has a default value
        if 'subgroup' not in avgint:
            avgint['subgroup'] = subgroup_table[0]['subgroup']
            if not warning_printed:
                msg = 'create_database Warning: '
                msg += 'subgroup key missing in avgint table,\n'
                msg += 'using default value; i.e., first subgroup '
                msg += '(you should fix this).'
                print(msg)
                warning_printed = True
        #
        # extra columns first
        row = list()
        for name in extra_name:
            row.append(avgint[name])
        #
        avgint_id = i
        integrand_id = global_integrand_name2id[avgint['integrand']]
        node_id = global_node_name2id[avgint['node']]
        subgroup_id = global_subgroup_name2id[avgint['subgroup']]
        weight_id = global_weight_name2id[avgint['weight']]
        row = row + [
            integrand_id, node_id, subgroup_id, weight_id, avgint['age_lower'],
            avgint['age_upper'], avgint['time_lower'], avgint['time_upper']
        ]
        for j in range(len(covariate_table)):
            row.append(avgint[float(covariate_table[j]['name'])])
        row_list.append(row)

    tbl_name = 'avgint'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.avgint = (pd.DataFrame(row_list, columns=col_name).astype(
        dict(
            zip(
                col_name,
                pd.Series(col_type).replace({
                    'integer': 'int',
                    'real': 'float'
                })))))
    # ----------------------------------------------------------------------
    # create data table
    #
    #
    # extra_name, extra_type
    extra_name = []
    extra_type = []
    if (len(data_table) > 0):
        extra_name = data_extra_columns
        row = data_table[0]
        for key in extra_name:
            if isinstance(row[key], str):
                extra_type.append('text')
            elif isinstance(row[key], int):
                extra_type.append('integer')
            elif isinstance(row[key], float):
                extra_type.append('real')
            else:
                assert False
    #
    # col_name
    col_name = extra_name + [
        'integrand_id',
        'node_id',
        'subgroup_id',
        'weight_id',
        'age_lower',
        'age_upper',
        'time_lower',
        'time_upper',
        'hold_out',
        'density_id',
        'meas_value',
        'meas_std',
        'eta',
        'nu',
    ]
    for j in range(len(covariate_table)):
        col_name.append('x_%s' % j)
    #
    # col_type
    col_type = extra_type + [
        'integer',  # integrand_id
        'integer',  # node_id
        'integer',  # subgroup_id
        'integer',  # weight_id
        'real',  # age_lower
        'real',  # age_upper
        'real',  # time_lower
        'real',  # time_upper
        'integer',  # hold_out
        'integer',  # density_id
        'real',  # meas_value
        'real',  # meas_std
        'real',  # eta
        'real',  # nu
    ]
    for j in range(len(covariate_table)):
        col_type.append('real')
    row_list = []
    warning_printed = False
    for i in range(len(data_table)):
        data = data_table[i]
        #
        # extra columns first
        row = list()
        for name in extra_name:
            row.append(data[name])
        #
        # columns that have null for default value
        for key in ['meas_std', 'eta', 'nu']:
            if not key in data:
                data[key] = None
        #
        # subgroup column has a default value
        if not 'subgroup' in data:
            data['subgroup'] = subgroup_table[0]['subgroup']
            if not warning_printed:
                msg = 'create_database Warning: '
                msg += 'subgroup key missing in data table,\n'
                msg += 'using default value; i.e., first subgroup '
                msg += '(you should fix this).'
                print(msg)
                warning_printed = True
        #
        integrand_id = global_integrand_name2id[data['integrand']]
        density_id = global_density_name2id[data['density']]
        node_id = global_node_name2id[data['node']]
        subgroup_id = global_subgroup_name2id[data['subgroup']]
        weight_id = global_weight_name2id[data['weight']]
        hold_out = int(data['hold_out'])
        row = row + [
            integrand_id, node_id, subgroup_id, weight_id, data['age_lower'],
            data['age_upper'], data['time_lower'], data['time_upper'],
            hold_out, density_id, data['meas_value'], data['meas_std'],
            data['eta'], data['nu']
        ]
        for j in range(len(covariate_table)):
            row.append(float(data[covariate_table[j]['name']]))
        row_list.append(row)

    tbl_name = 'data'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    data = pd.DataFrame(row_list, columns=col_name)
    data['data_name'] = ''
    db.data = data
    # ----------------------------------------------------------------------
    # create option table
    col_name = ['option_name', 'option_value']
    col_type = ['text unique', 'text']
    row_list = []
    for row in option_table:
        name = row['name']
        value = row['value']
        row_list.append([name, value])
    tbl_name = 'option'
    #*# dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list)
    db.option = pd.DataFrame(row_list, columns=col_name)
    # ----------------------------------------------------------------------
    # close the connection
    #*# connection.close()
    return
Пример #22
0
def dm(tmp_path):
    return DismodIO(path=tmp_path / 'dismod.db')
Пример #23
0
def example_db(
    file_name,
    test_config={
        'node_effects': False,
        'group_effects': False,
        'sex_effect': False,
        'use_group_mulcov': False,
        'include_group_data': False,
        'zero_sum_mulcov': False
    },
    truth={},
    prior=dict(subgroup_effects=None,
               parent_density='uniform',
               parent_std=None,
               child_density='uniform',
               child_std=None,
               subgroup_density='uniform',
               subgroup_std=1),
    node_effects=None,
    subgroup_effects=None,
    tol_fixed=1e-10,
    tol_random=1e-10,
):

    if os.path.exists(file_name):
        os.remove(file_name)

    # Note that the a, t values are not used for this example

    def fun_iota_parent(a, t):
        return ('prior_iota_parent', None, None)

    if test_config['node_effects']:

        def fun_iota_child(a, t):
            return ('prior_iota_child', None, None)

    if test_config['group_effects']:

        def fun_iota_group(a, t):
            return ('prior_iota_group', None, None)

        def fun_iota_subgroup(a, t):
            return ('prior_iota_subgroup', None, None)

    if test_config['sex_effect']:

        def fun_iota_sex(a, t):
            return ('prior_iota_sex', None, None)

    # TODO: Delete dependency with dismod_at
    # ----------------------------------------------------------------------
    # age table
    age_list = [0.0, 100.0]
    #
    # time table
    time_list = [1990.0, 2020.0]
    #
    # integrand table
    integrand_table = [{'name': 'Sincidence'}]
    #
    # node table: world -> north_america
    #             north_america -> (united_states, canada)
    if test_config['node_effects']:
        node_table = [
            {
                'name': 'p1',
                'parent': ''
            },
            {
                'name': 'c1',
                'parent': 'p1'
            },
            {
                'name': 'c2',
                'parent': 'p1'
            },
        ]
    else:
        node_table = [
            {
                'name': 'p1',
                'parent': ''
            },
        ]

    #
    # weight table:
    weight_table = list()
    #
    # covariate table
    covariate_table = [{
        'name': 'one',
        'reference': 0.0,
        'max_difference': None
    }]
    if test_config['sex_effect']:
        covariate_table.append({
            'name': 'sex',
            'reference': 0.0,
            'max_difference': None
        })
    #
    # mulcov table
    mulcov_table = []
    if test_config['group_effects']:
        mulcov_table.append({
            'covariate':
            'one',
            'type':
            'rate_value',
            'effected':
            'iota',
            'group':
            'g1',
            'smooth':
            'smooth_iota_group' if test_config['use_group_mulcov'] else None,
            # 'smooth' :   None,
            'subsmooth':
            'smooth_iota_subgroup'
        })
    if test_config['sex_effect']:
        mulcov_table.append({
            'covariate': 'sex',
            'type': 'rate_value',
            'effected': 'iota',
            'group': 'g1' if test_config['group_effects'] else 'none',
            'smooth': 'smooth_iota_sex'
        })
    #
    # avgint table:
    avgint_table = list()
    #
    # nslist_table:
    nslist_table = dict()
    # ----------------------------------------------------------------------
    # subgroup_table
    subgroup_table = [
        {
            'subgroup': 'none',
            'group': 'none'
        },
        {
            'subgroup': 's1',
            'group': 'g1'
        },
        {
            'subgroup': 's2',
            'group': 'g1'
        },
    ]
    # ----------------------------------------------------------------------
    # data table:
    data_table = list()
    # write out data
    row = {
        'density': 'gaussian',
        'weight': '',
        'hold_out': False,
        'time_lower': 2000.0,
        'time_upper': 2000.0,
        'age_lower': 50.0,
        'age_upper': 50.0,
        'integrand': 'Sincidence',
        'one': 1,
        # 'node':         'north_america',
        'node': 'p1',
        # 'eta':          1e-4,
    }
    sexs = [0, 1] if test_config['sex_effect'] else [0]
    for node, node_effect in node_effects.items():
        if (not test_config['node_effects'] and node != 'p1'): continue
        # Exclude data for the parent node
        if (test_config['node_effects'] and node == 'p1'): continue
        for sex in sexs:
            for sg, sge in subgroup_effects.items():
                if (not test_config['group_effects'] and sg != 'none'):
                    continue
                # Exclude data for the group -- if fitting both nodes and groups, omitting sg none creates Hessian errors
                if (test_config['group_effects']
                        and not test_config['include_group_data']
                        and sg == 'none'):
                    continue
                total_effect = 0
                if test_config['sex_effect']:
                    use_sex_covariate = (sg != 'none') or (
                        sg == 'none' and not test_config['group_effects'])
                    row['sex'] = sex if use_sex_covariate else -1
                    subgroups = pd.DataFrame(subgroup_table)
                    group = subgroups.loc[
                        subgroups.subgroup == sg, 'group'].squeeze(
                        ) if test_config['group_effects'] else 'g1'
                    sex_effect = sex * truth['iota_sex_true'][group]
                    total_effect += sex_effect
                if test_config['node_effects']:
                    row['node'] = node
                    total_effect += node_effect
                row['subgroup'] = sg
                sg_effect = 0
                if test_config['group_effects']:
                    if sg in ('s1', 's2'):
                        sg_effect = truth['iota_group_true'] + sge
                total_effect += sg_effect
                # print ({'sex_effect': (sex, sex_effect), 'node_effect': (node, node_effect), 'sg_effect': (sg, sg_effect), 'total_effect': total_effect})
                row['meas_value'] = truth['iota_parent_true'] * np.exp(
                    total_effect)
                row['meas_std'] = row['meas_value'] * 1e-1
                data_table.append(copy.copy(row))
    # ----------------------------------------------------------------------
    # prior_table
    prior_table = [
        { # prior_iota_parent
            'name':     'prior_iota_parent',
            'density':  prior.get('parent_density', 'iniform'),
            'mean':     prior.get('parent_mean', truth['iota_parent_true'] * .5),
            'std':      prior.get('parent_std', 0),
            'eta':      prior.get('parent_eta', None),
            'lower':    truth['iota_parent_true'] * 1e-2,
            'upper':    truth['iota_parent_true'] * 1e+2,
        },{ # prior_iota_child
            'name':     'prior_iota_child',
            'density':  prior.get('child_density', 'uniform'),
            'mean':     prior.get('child_mean', .001),
            'std':      prior.get('child_std', 0),
            'eta':      prior.get('child_eta', None),
            'lower':    -np.inf,
            'upper':    +np.inf,
        },
        { # prior_iota_group
            'name': 'prior_iota_group',
            'density': prior.get('group_density', 'uniform'),
            'mean':    prior.get('group_mean', 0.0),
            'std': prior.get('group_std', 0),
            # 'density': 'gaussian',
            # 'mean': 0.0,
            # 'std': 10.0,
        },
        { # prior_iota_subgroup
            'name': 'prior_iota_subgroup',
            'density': prior.get('subgroup_density', 'uniform'),
            'mean':    prior.get('subgroup_mean', 0.0),
            'std':     prior.get('subgroup_std', 0),
        }
    ]
    if test_config['sex_effect']:
        prior_table.append({  # prior_iota_sex
            'name': 'prior_iota_sex',
            'density': 'uniform',
            'mean': 0.0,
            'lower': -100,
            'upper': 100
        })
    # ----------------------------------------------------------------------
    # smooth table
    smooth_table = [{  # smooth_iota_parent
        'name': 'smooth_iota_parent',
        'age_id': [0],
        'time_id': [0],
        'fun': fun_iota_parent
    }]
    if test_config['node_effects']:
        smooth_table += [{  # smooth_iota_child
            'name': 'smooth_iota_child',
            'age_id': [0],
            'time_id': [0],
            'fun': fun_iota_child
        }]
    if test_config['group_effects']:
        if test_config['use_group_mulcov']:
            smooth_table += [{  # smooth_iota_group
                'name': 'smooth_iota_group',
                'age_id': [0],
                'time_id': [0],
                'fun': fun_iota_group
            }]
        smooth_table += [{  # smooth_iota_subgroup
            'name': 'smooth_iota_subgroup',
            'age_id': [0],
            'time_id': [0],
            'fun': fun_iota_subgroup
        }]
    if test_config['sex_effect']:
        smooth_table.append({  # smooth_iota_sex
            'name': 'smooth_iota_sex',
            'age_id': [0],
            'time_id': [0],
            'fun': fun_iota_sex
        })
    # ----------------------------------------------------------------------
    # rate table
    rate_table = [{
        'name':
        'iota',
        'parent_smooth':
        'smooth_iota_parent',
        'child_smooth':
        'smooth_iota_child' if test_config['node_effects'] else None,
    }]

    # ----------------------------------------------------------------------
    # option_table
    option_table = [
        # { 'name':'parent_node_name',       'value':'north_america' },
        {
            'name': 'parent_node_name',
            'value': 'p1'
        },
        {
            'name': 'print_level_fixed',
            'value': 5
        },
        # { 'name':'print_level_fixed',      'value':0               },
        {
            'name': 'quasi_fixed',
            'value': 'false'
        },
        # { 'name':'derivative_test_fixed',  'value':'second-order'   },
        # { 'name':'derivative_test_fixed',  'value':'trace-adaptive'   },
        {
            'name': 'tolerance_fixed',
            'value': tol_fixed
        },
        {
            'name': 'bound_frac_fixed',
            'value': '1e-10'
        },
        {
            'name': 'derivative_test_random',
            'value': 'second-order'
        },
        {
            'name': 'tolerance_random',
            'value': tol_random
        },
        {
            'name':
            'zero_sum_mulcov_group',
            'value':
            'g1' if test_config['group_effects']
            and test_config['zero_sum_mulcov'] else None
        },
        {
            'name': 'zero_sum_child_rate',
            'value': 'iota' if test_config['node_effects'] else None
        },
        {
            'name': 'rate_case',
            'value': 'iota_pos_rho_zero'
        },
        {
            'name': 'max_num_iter_fixed',
            'value': '1000'
        },
        {
            'name': 'max_num_iter_random',
            'value': '100'
        }
    ]
    # ----------------------------------------------------------------------

    # TODO: Change to using DismodIO instead of dismod_at.create_database
    from cascade_at.dismod.api.dismod_io import DismodIO
    db = DismodIO(file_name)
    try:
        from .create_database import create_database
    except:
        from create_database import create_database

    # create database
    #dismod_at.create_database(
    create_database(file_name, age_list, time_list, integrand_table,
                    node_table, subgroup_table, weight_table, covariate_table,
                    avgint_table, data_table, prior_table, smooth_table,
                    nslist_table, rate_table, mulcov_table, option_table)
    # ----------------------------------------------------------------------
    from cascade_at.dismod.api.dismod_io import DismodIO
    db = DismodIO(file_name)
    return db
Пример #24
0
def dmdismod(cmd):
    """
    Example calling sequence:
    os.system('cp /Users/gma/ihme/epi/at_cascade/data/475588/dbs/100/3/dismod.db /tmp/t1_diabetes.db')
    dmdismod(f'{_dismod_cmd_} /tmp/t1_diabetes.db ODE init')
    dmdismod(f'{_dismod_cmd_} /tmp/t1_diabetes.db ODE fit --ode-hold-out-list mtexcess')
    dmdismod(f'{_dismod_cmd_} /tmp/t1_diabetes.db ODE students --ode-hold-out-list mtexcess')
    """

    help = ("An extended dmdismod command, to handle Brad's strategy of:\n"
            "  1) fit the non-ODE integrands to initialize an ODE fit,\n"
            "  2) fit the non-ODE and ODE integrands\n"
            "  3) fit to log-student data densities.")

    def parse_args(args):
        import argparse
        from distutils.util import strtobool as str2bool
        parser = argparse.ArgumentParser()

        parser.add_argument('path',
                            type=str,
                            help='Path to the Dismod_AT sqlite database')
        parser.add_argument(
            'dispatch',
            type=str,
            help=("If dispatch == 'ODE', use ODE fitting strategy."
                  "If missing, use standard dismod_at commands."))
        parser.add_argument(
            'option',
            type=str,
            help=
            "For the ODE fitting strategy, one of ('init', 'fit' or 'students')."
        )
        parser.add_argument(
            "-m",
            "--max-covariate-effect",
            nargs='?',
            type=float,
            default=2,
            help=
            ("Maximum absolute covariate effect = multiplier * (covariate - referece). "
             "Note that exp(effect) multiplies a model value to get the model value for "
             "this covariate value. (Noise covariate multipliers are not included.)"
             ))
        parser.add_argument(
            "-c",
            '--mulcov-values',
            nargs='+',
            type=str,
            default=None,
            help="Constrain covariate multipliers to the specified value")
        parser.add_argument("-o",
                            "--ode-hold-out-list",
                            nargs='?',
                            type=str,
                            default=None,
                            const=None,
                            help="Integrands to hold out during the ODE fit")
        parser.add_argument("-s",
                            "--random-seed",
                            nargs='?',
                            type=int,
                            default=None,
                            help="Random seed for the random_subsampling")
        parser.add_argument("-d",
                            "--random-subsample",
                            nargs='?',
                            type=int,
                            default=1000,
                            const=None,
                            help="Number of random subsamples to fit.")
        parser.add_argument(
            "-p",
            "--save-to-path",
            nargs='?',
            type=str,
            default=None,
            const=None,
            help="Path to directory where to store the results")
        parser.add_argument(
            "-t",
            "--reference_db",
            nargs='?',
            type=str,
            default="",
            const="",
            help=
            "Path to the reference databases. Fit results are compared to these databases for testing purposes."
        )

        get_help = len(args) > 1 and any(a.startswith('-h') for a in args[1:])
        args = parser.parse_args(args[1:])
        args.cmd = sys.argv[0]
        if args.mulcov_values is None:
            args.mulcov_values = []
        else:
            args.mulcov_values = [[
                a, b, float(c)
            ] for a, b, c in np.asarray(args.mulcov_values).reshape(-1, 3)]
        return args

    args = cmd.split()
    p_args = parse_args(cmd.split())
    print('-' * 10)
    LOG.info(cmd)
    print('-' * 10)

    if p_args.random_seed:
        random_seed = p_args.random_seed
        LOG.info(
            f"Setting the subsampling random_seed to the dmdismod argument value = {random_seed}"
        )
    else:
        db = DismodIO(p_args.path)
        option = db.option
        random_seed = option.loc[option.option_name == 'random_seed',
                                 'option_value']
        if not random_seed.empty:
            random_seed = int(random_seed)
            LOG.info(
                f"Setting the subsampling random_seed to the database option table value = {random_seed}"
            )
        else:
            random_seed = None
            LOG.info(f"The subsampling random_seed not set.")

    if p_args.option == "init":
        db = init_ode_command(
            [_dismod_cmd_] + args[1:],
            max_covariate_effect=p_args.max_covariate_effect,
            mulcov_values=p_args.mulcov_values,
            ode_hold_out_list=p_args.ode_hold_out_list,
            # random_seed = p_args.random_seed,
            random_seed=random_seed,
            random_subsample=p_args.random_subsample,
            save_to_path=p_args.save_to_path,
            reference_db=p_args.reference_db)
    elif p_args.option == "fit":
        db = fit_ode_command(
            [_dismod_cmd_] + args[1:],
            ode_hold_out_list=p_args.ode_hold_out_list,
            # random_seed = p_args.random_seed,
            random_seed=random_seed,
            random_subsample=p_args.random_subsample,
            save_to_path=p_args.save_to_path,
            reference_db=p_args.reference_db)
    elif p_args.option == "students":
        fit_students_command(
            [_dismod_cmd_] + args[1:],
            ode_hold_out_list=p_args.ode_hold_out_list,
            # random_seed = p_args.random_seed,
            random_seed=random_seed,
            random_subsample=p_args.random_subsample,
            save_to_path=p_args.save_to_path,
            reference_db=p_args.reference_db)
Пример #25
0
def reference_dbs(case):
    fit_ihme_path = _CASCADE_DATA_PATH_ / case
    return fit_ihme_path, dict(
        no_ode=DismodIO(fit_ihme_path / 'no_ode/no_ode.db'),
        yes_ode=DismodIO(fit_ihme_path / 'yes_ode/yes_ode.db'),
        students=DismodIO(fit_ihme_path / 'students/students.db'))
Пример #26
0
def run_test(file_name, test_config, truth_in,
             start_from_truth = False, test_asymptotic = False):
    from cascade_at.dismod.constants import _dismod_cmd_
    #

    gradient_error = False
    try:
        db = DismodIO(file_name)

        # from dismod_db_api import DismodDbAPI as API
        # db = API(file_name)

        truth = get_truth(test_config, truth_in)

        system([ _dismod_cmd_, file_name, 'init' ])

        # Initialize the truth_var table to the correct answer
        if True:
            # Need to create the truth_var table before setting it.
            # Can't seem to get db.create_tables to work, so use dismod_at to do it
            system([ _dismod_cmd_, file_name, 'set', 'truth_var', 'prior_mean'])
        truth_var = db.truth_var
        truth_var['truth_var_value'] = truth
        db.truth_var = truth_var

        if 0:
            try:
                # Check dismod gradients
                gradient_error = None
                option = db.option
                system([ _dismod_cmd_, file_name, 'set', 'option', 'derivative_test_fixed', 'adaptive'])
                system([ _dismod_cmd_, file_name, 'set', 'option', 'derivative_test_random', 'second-order'])
                system([ _dismod_cmd_, file_name, 'set', 'option', 'max_num_iter_fixed', '-1'])
                system([ _dismod_cmd_, file_name, 'set', 'option', 'max_num_iter_random', '100'])
                # Start from the truth
                if 0:
                    system([ _dismod_cmd_, file_name, 'set', 'start_var', 'truth_var'])
                    system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'truth_var'])
                system([ _dismod_cmd_, file_name, 'fit', 'fixed'])
                system([ _dismod_cmd_, file_name, 'set', 'start_var', 'fit_var'])
                system([ _dismod_cmd_, file_name, 'fit', 'both'])
            except Exception as ex:
                print (ex)
                gradient_error = ex
                raise ex
            finally:
                db.option = option

        if start_from_truth:
            system([ _dismod_cmd_, file_name, 'set', 'start_var', 'truth_var'])
            system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'truth_var'])

        # Check that prediction matches the measured data
        cols = db.avgint.columns.tolist()
        db.avgint = db.data.rename(columns={'data_id':'avgint_id'})[cols]
        system([ _dismod_cmd_, file_name, 'predict', 'truth_var'])
        check = np.allclose(db.data.meas_value, db.predict.avg_integrand, atol=1e-10, rtol=1e-10)
        assert check, 'ERROR: Predict from truth does not match the data'

        #
        # Fit fixed effects
        system([ _dismod_cmd_, file_name, 'fit', 'fixed'])
        if test_asymptotic:
            system([ _dismod_cmd_, file_name, 'sample', 'asymptotic', 'fixed', '10'])
        #
        # Fit both fixed and random effects
        system([ _dismod_cmd_, file_name, 'set', 'start_var', 'fit_var'])
        system([ _dismod_cmd_, file_name, 'set', 'scale_var', 'fit_var'])
        if (test_config['group_effects'] or test_config['node_effects']):
            system([ _dismod_cmd_, file_name, 'fit', 'both'])
        else:
            print ('Skipping fit both because there are no random effects.')

        check = np.allclose(db.fit_data_subset.weighted_residual, [0]*len(db.fit_data_subset),
                            atol=1e-8, rtol=1e-8)
        assert check, 'ERROR: Measured values do not match the fit result integrand values.'

        print ('Tests OK -- fit both fit_data_subset and measured_data agree.')

        if test_asymptotic:
            system([ _dismod_cmd_, file_name, 'sample', 'asymptotic', 'both', '10'])

        # -----------------------------------------------------------------------

        if gradient_error:
            print ('ERROR: Gradient check failed.')
            print (gradient_error)
            return False, db
        else:
            print ('Test OK')
            return True, db

    except Exception as ex:
        print (ex)
        print ('Test FAILED')
        return False, db
    finally:
        print (f'fit_var_value: {db.fit_var.fit_var_value.tolist()}')
        print (f'RMS(fit_var_value - truth): {np.sqrt(np.sum((db.fit_var.fit_var_value - db.truth_var.truth_var_value)**2))}')
        print (db.var.merge(db.fit_var, left_on = 'var_id', right_on = 'fit_var_id')
               .drop(columns = ['integrand_id', 'fit_var_id', 'residual_value', 'residual_dage',
                                'residual_dtime', 'lagrange_value', 'lagrange_dage', 'lagrange_dtime']))
        print (f'RMS(weighted_residual): {np.sum(np.sqrt((db.fit_data_subset.weighted_residual)**2))}')
        print (db.data.merge(db.fit_data_subset, left_on = 'data_id', right_on = 'fit_data_subset_id')
               .drop(columns = ['fit_data_subset_id', 'integrand_id', 'weight_id', 'eta', 'nu', 'meas_std', 'avg_integrand', 'hold_out']))
Пример #27
0
def _ode_command(args,
                 type='',
                 random_subsample=None,
                 ode_hold_out_list=[],
                 random_seed=None,
                 save_to_path=None,
                 reference_db=None,
                 max_covariate_effect=2,
                 mulcov_values=[],
                 nu=5):
    """
    1) Initialize the database for the non-ODE/ODE fitting strategy
    2) Hold out the ODE strategy related integrands
    3) Fit both on a subset of the integrands. Init step fits those corresponding directly 
       to the rates (e.g. Sincidence, chi and rho). Omega is assumed to be constrained.
       Fit and students setps fit all but the ode_hold_out_list.
    4) Restore the data table to it's original state
    """

    LOG.info(f"_ode_command: {' '.join(args)}")
    dismod, path, cmd, option = args[:4]

    db = setup_db(path, dismod=dismod, ode_hold_out_list=ode_hold_out_list)
    try:
        if reference_db and isinstance(reference_db, str):
            reference_db = DismodIO(reference_db)

        # Seed used to randomly subsample data
        if random_seed in [0, None]:
            random_seed = int(time.time())
        random.seed(random_seed)
        msg = '\nrandom_seed  = ' + str(random_seed)
        LOG.info(msg)

        # Subsample the data
        for integrand in db.integrands:
            db.random_subsample_data(integrand, max_sample=random_subsample)

        db.compress_age_time_intervals()

        if type == 'no_ode':
            db.setup_ode_fit(max_covariate_effect=max_covariate_effect,
                             mulcov_values=mulcov_values,
                             ode_hold_out_list=ode_hold_out_list)
            hold_out_integrands = db.yes_ode_integrands
        elif type in ('yes_ode', 'students'):
            hold_out_integrands = db.ode_hold_out_list

        # Remove integrands appropriate to fit type
        db.hold_out_data(integrand_names=hold_out_integrands, hold_out=1)

        if type == 'no_ode':
            system(f'{db.dismod} {db.path} init')
        elif type in ('yes_ode', 'students'):
            try:
                fit_var = db.fit_var
                if fit_var.empty:
                    fit_var = None
            except ValueError:
                fit_var = None
            if fit_var is not None:
                system(f'{db.dismod} {db.path} set start_var fit_var')
            else:
                system(f'{db.dismod} {db.path} set start_var prior_mean')

        if type == 'students':
            db.set_student_likelihoods(factor_eta=1e-2, nu=nu)

        if reference_db:
            db.check_input_tables(reference_db)

        db.fit(msg=f'fit_ode -- {cmd}_{option}')

        db.save_database(save_to_path)
        if reference_db:
            db.check_output_tables(reference_db)
        cmd = f"dismodat.py {db.path} db2csv"
        LOG.info(cmd)
        os.system(cmd)
    except:
        raise
    finally:
        db.save_database(db.path.parent /
                         f'{db.path.stem}_ODE_{type}{db.path.suffix}')
        LOG.info("Restoring the data table to its original state.")
        db.data = db.input_data
    return db
Пример #28
0
def dm(dismod):
    return DismodIO(path=Path('dismod-init.db'))