Exemplo n.º 1
0
def fit_emp_prior(id, param_type):
    """ Fit empirical prior of specified type for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    param_type : str, one of incidence, prevalence, remission, excess-mortality
      The disease parameter to generate empirical priors for

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    """
    #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Running')

    # load disease model
    dm = dismod3.load_disease_model(id)
    #dm.data = []  # remove all data to speed up computation, for test

    import dismod3.neg_binom_model as model
    dir = dismod3.settings.JOB_WORKING_DIR % id
    model.fit_emp_prior(dm, param_type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, param_type))

    # generate empirical prior plots
    from pylab import subplot
    for sex in dismod3.settings.gbd_sexes:
        for year in dismod3.settings.gbd_years:
            keys = dismod3.utils.gbd_keys(region_list=['all'], year_list=[year], sex_list=[sex], type_list=[param_type])
            dismod3.tile_plot_disease_model(dm, keys, defaults={})
            dm.savefig('dm-%d-emp_prior-%s-%s-%s.png' % (id, param_type, sex, year))

    # TODO: put this in a separate script, which runs after all empirical priors are computed
    for effect in ['alpha', 'beta', 'gamma', 'delta']:
        dismod3.plotting.plot_empirical_prior_effects([dm], effect)
        dm.savefig('dm-%d-emp-prior-%s-%s.png' % (id, param_type, effect))

    # summarize fit quality graphically, as well as parameter posteriors
    k0 = keys[0]
    dm.vars = {k0: dm.vars}   # hack to make posterior predictions plot
    dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
    dm.savefig('dm-%d-emp-prior-check-%s.png' % (dm.id, param_type))
    dm.vars = dm.vars[k0]   # undo hack to make posterior predictions plot
    
    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    dm.save('dm-%d-prior-%s.json' % (id, param_type))
    dismod3.try_posting_disease_model(dm, ntries=5)

    #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Completed')
    return dm
Exemplo n.º 2
0
Arquivo: hep_c.py Projeto: flaxter/gbd
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False):
    """ Fit prevalence for regions and years specified """
    print "\n***************************\nfitting %s for %s (using data from years %f to %f)" % (
        regions,
        prediction_years,
        data_year_start,
        data_year_end,
    )

    ## load model to fit
    # dm = DiseaseJson(file('tests/hep_c.json').read())
    id = 8788
    dismod3.disease_json.create_disease_model_dir(id)
    dm = dismod3.fetch_disease_model(id)

    ## adjust the expert priors
    dm.params["global_priors"]["heterogeneity"]["prevalence"] = "Very"
    dm.params["global_priors"]["smoothness"]["prevalence"]["amount"] = "Slightly"
    # TODO: construct examples of adjusting other covariates
    # ipdb> dm.params['global_priors'].keys()
    # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness']
    # ipdb> dm.params['global_priors']['smoothness']['prevalence']
    # {u'age_start': 0, u'amount': u'Moderately', u'age_end': 100}

    # include a study-level covariate for 'bias'
    covariates_dict = dm.get_covariates()
    covariates_dict["Study_level"]["bias"]["rate"]["value"] = 1
    # TODO: construct additional examples of adjusting covariates

    ## select relevant prevalence data
    # TODO: streamline data selection functions
    if egypt_flag:
        dm.data = [d for d in dm.data if d["country_iso3_code"] == "EGY"]
    else:
        dm.data = [
            d
            for d in dm.data
            if dismod3.utils.clean(d["gbd_region"]) in regions
            and float(d["year_end"]) >= data_year_start
            and float(d["year_start"]) <= data_year_end
            and d["country_iso3_code"] != "EGY"
        ]

    ## create, fit, and save rate model
    dm.vars = {}

    keys = dismod3.utils.gbd_keys(type_list=["prevalence"], region_list=regions, year_list=prediction_years)
    # TODO: consider how to do this for models that use the complete disease model
    # TODO: consider adding hierarchical similarity priors for the male and female models
    k0 = keys[0]  # looks like k0='prevalence+asia_south+1990+male'
    dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data)

    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1)

    # make map object so we can compute AIC and BIC
    dm.map = mc.MAP(dm.vars)
    dm.map.fit()

    for k in keys:
        # save the results in the disease model
        dm.vars[k] = dm.vars[k0]

        neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        # check autocorrelation to confirm chain has mixed
        test_model.summarize_acorr(dm.vars[k]["rate_stoch"].trace())

        # generate plots of results
        dismod3.tile_plot_disease_model(dm, [k], defaults={"ymax": 0.15, "alpha": 0.5})
        dm.savefig("dm-%d-posterior-%s.%f.png" % (dm.id, k, random()))

    # summarize fit quality graphically, as well as parameter posteriors
    dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
    dm.savefig("dm-%d-check-%s.%f.png" % (dm.id, k0, random()))
    dismod3.post_disease_model(dm)
    return dm
Exemplo n.º 3
0
Arquivo: hep_c.py Projeto: flaxter/gbd
    for y in [1990, 2005]:
        for s in ["male", "female"]:
            key = "prevalence+egypt+%d+%s" % (y, s)
            prev_1 = neg_binom_model.calc_rate_trace(dm_egypt, key, dm_egypt.vars[key])
            pop_1 = neg_binom_model.population_by_age[("EGY", str(y), s)]

            key = "prevalence+north_africa_middle_east+%d+%s" % (y, s)
            prev_0 = neg_binom_model.calc_rate_trace(dm_na_me, key, dm_na_me.vars[key])
            pop_0 = neg_binom_model.regional_population(key)

            # generate population weighted average
            prev = (prev_0 * (pop_0 - pop_1) + prev_1 * pop_1) / pop_0
            neg_binom_model.store_mcmc_fit(dm_na_me, key, None, prev)

            # generate plots of results
            dismod3.tile_plot_disease_model(dm_na_me, [key], defaults={"ymax": 0.15, "alpha": 0.5})
            dm_na_me.savefig("dm-%d-posterior-na_me_w_egypt.%f.png" % (dm_na_me.id, random()))

            # save results
            dismod3.post_disease_model(dm_na_me)

    dm = hep_c_fit(
        "caribbean latin_america_tropical latin_america_andean latin_america_central latin_america_southern".split(),
        [1990, 2005],
    )
    dm = hep_c_fit(
        "sub-saharan_africa_central sub-saharan_africa_southern sub-saharan_africa_west".split(), [1990, 2005]
    )

    for (
        r
Exemplo n.º 4
0
                                       ['excess-mortality', 'excess'],
                                       ['incidence', 'incidence'],
                                       ['mrr', 'risk'],
                                       ['prevalence', 'prevalence'],
                                       ]:
                x = [0]
                y = [0]
                for age in age_mesh:
                    x.append(age)
                    y.append(measure_out.model[index_dict[(dm4_type, year, age)]])

                key = dismod3.gbd_key_for(dm3_type, r, year, sex)
                est = dismod3.utils.interpolate(x, y, dm.get_estimate_age_mesh())
                dm.set_truth(key, est)

                dismod3.tile_plot_disease_model(dm, [key], defaults={})
                try:
                    pl.savefig(dismod3.settings.JOB_WORKING_DIR % id + '/dm-%d-posterior-%s-%s-%s.png' % (id, dm3_type, sex, year))   # TODO: refactor naming into its own function
                except IOError, e:
                    print 'Warning: could not create png.  Maybe it exists already?\n%s' % e

    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    dismod3.try_posting_disease_model(dm, ntries=5)

    print
    print '********************'
    print 'computation complete'
    print '********************'

def main():
    import optparse
Exemplo n.º 5
0
def fit_posterior(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years

    Example
    -------
    >>> import fit_posterior
    >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
    """
    #print 'updating job status on server'
    #dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running')

    dm = dismod3.load_disease_model(id)
    #dm.data = []  # for testing, remove all data
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])

    # fit the model
    dir = dismod3.settings.JOB_WORKING_DIR % id
    import dismod3.gbd_disease_model as model
    model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    ## then sample the posterior via MCMC
    model.fit(dm, method='mcmc', keys=keys, iter=50000, thin=25, burn=25000, verbose=1,
              dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, region, sex, year))

    # generate plots of results
    dismod3.tile_plot_disease_model(dm, keys, defaults={})
    dm.savefig('dm-%d-posterior-%s.png' % (id, '+'.join(['all', region, sex, year])))  # TODO: refactor naming into its own function (disease_json.save_image perhaps)
    for param_type in dismod3.settings.output_data_types:
        keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=[param_type])
        dismod3.tile_plot_disease_model(dm, keys, defaults={})
        dm.savefig('dm-%d-posterior-%s-%s-%s-%s.png' % (id, dismod3.utils.clean(param_type), region, sex, year))   # TODO: refactor naming into its own function


    # summarize fit quality graphically, as well as parameter posteriors
    for k in dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]):
        if dm.vars[k].get('data'):
            dismod3.plotting.plot_posterior_predicted_checks(dm, k)
            dm.savefig('dm-%d-check-%s.png' % (dm.id, k))


    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys)

    # make a rate_type_list
    rate_type_list = ['incidence', 'prevalence', 'remission', 'excess-mortality',
                      'mortality', 'relative-risk', 'duration', 'incidence_x_duration']

    # save country level posterior
    save_country_level_posterior(dm, region, year, sex)

    # update job status file
    #print 'updating job status on server'
    #dismod3.log_job_status(id, 'posterior',
    #                       '%s--%s--%s' % (region, sex, year), 'Completed')
    return dm