Exemplo n.º 1
0
def main():
    argv = sys.argv
    assert len(argv) == 2, 'usage: python generate_map_output.py model_id'

    # download the requested model
    id = int(argv[1])
    dm = dismod3.fetch_disease_model(id)

    # generate a list of dicts of requested data
    map_output_list = generate_map_output(dm)

    # dump list of dicts as csv (in memory, not on disk, hence the StringIO)
    str_io = StringIO.StringIO()
    csv_f = csv.DictWriter(str_io, output_fields)
    csv_f.writerows(map_output_list)

    # print out csv file
    str_io.seek(0)
    print str_io.read()
Exemplo n.º 2
0
Arquivo: hep_c.py Projeto: flaxter/gbd
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False):
    """ Fit prevalence for regions and years specified """
    print "\n***************************\nfitting %s for %s (using data from years %f to %f)" % (
        regions,
        prediction_years,
        data_year_start,
        data_year_end,
    )

    ## load model to fit
    # dm = DiseaseJson(file('tests/hep_c.json').read())
    id = 8788
    dismod3.disease_json.create_disease_model_dir(id)
    dm = dismod3.fetch_disease_model(id)

    ## adjust the expert priors
    dm.params["global_priors"]["heterogeneity"]["prevalence"] = "Very"
    dm.params["global_priors"]["smoothness"]["prevalence"]["amount"] = "Slightly"
    # TODO: construct examples of adjusting other covariates
    # ipdb> dm.params['global_priors'].keys()
    # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness']
    # ipdb> dm.params['global_priors']['smoothness']['prevalence']
    # {u'age_start': 0, u'amount': u'Moderately', u'age_end': 100}

    # include a study-level covariate for 'bias'
    covariates_dict = dm.get_covariates()
    covariates_dict["Study_level"]["bias"]["rate"]["value"] = 1
    # TODO: construct additional examples of adjusting covariates

    ## select relevant prevalence data
    # TODO: streamline data selection functions
    if egypt_flag:
        dm.data = [d for d in dm.data if d["country_iso3_code"] == "EGY"]
    else:
        dm.data = [
            d
            for d in dm.data
            if dismod3.utils.clean(d["gbd_region"]) in regions
            and float(d["year_end"]) >= data_year_start
            and float(d["year_start"]) <= data_year_end
            and d["country_iso3_code"] != "EGY"
        ]

    ## create, fit, and save rate model
    dm.vars = {}

    keys = dismod3.utils.gbd_keys(type_list=["prevalence"], region_list=regions, year_list=prediction_years)
    # TODO: consider how to do this for models that use the complete disease model
    # TODO: consider adding hierarchical similarity priors for the male and female models
    k0 = keys[0]  # looks like k0='prevalence+asia_south+1990+male'
    dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data)

    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1)

    # make map object so we can compute AIC and BIC
    dm.map = mc.MAP(dm.vars)
    dm.map.fit()

    for k in keys:
        # save the results in the disease model
        dm.vars[k] = dm.vars[k0]

        neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        # check autocorrelation to confirm chain has mixed
        test_model.summarize_acorr(dm.vars[k]["rate_stoch"].trace())

        # generate plots of results
        dismod3.tile_plot_disease_model(dm, [k], defaults={"ymax": 0.15, "alpha": 0.5})
        dm.savefig("dm-%d-posterior-%s.%f.png" % (dm.id, k, random()))

    # summarize fit quality graphically, as well as parameter posteriors
    dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
    dm.savefig("dm-%d-check-%s.%f.png" % (dm.id, k0, random()))
    dismod3.post_disease_model(dm)
    return dm
Exemplo n.º 3
0
def fit(id):
    """ Download model, conduct fit, and upload results

    Parameters
    ----------
    id : int
      The model id number for the job to fit

Commandline Version:

[omak] dismod4.abie ] test/parameter.sh
[omak] dismod4.abie ] example/simulate.py 5 1 100
[omak] dismod4.abie ] /tmp/dismod4_csv test/parameter.csv measure_in.csv sfun_in.csv
dismod4_csv: Attempt to overwrite the existing file
sfun_in.csv
[omak] dismod4.abie ] /tmp/dismod4_csv test/parameter.csv measure_in.csv sfun_in.csv sfun_out.csv measure_out.csv

    """

    dm = dismod3.get_disease_model(id)
    mort = dismod3.fetch_disease_model('all-cause_mortality')
    dm.data += mort.data

    ## convert model to csv file
    column_names = 'time_lower,time_upper,age_lower,age_upper,likelihood_name,likelihood_sigma,likelihood_beta,value,integrand'.split(',')
    data_list = []

    # add all the model data to the data list
    for d in dm.data:
        row = {}
        row['time_lower'] = d['year_start']
        row['time_upper'] = d['year_end']  # TODO: determine if this should be +1

        row['age_lower'] = d['age_start']+1.
        row['age_upper'] = d['age_end']+1.  # TODO: determine if this should be +1


        row['likelihood_name'] = 'gaussian'
        row['likelihood_sigma'] = .0001  # TODO: use more accurate sigma
        row['likelihood_beta'] = 1.

        row['value'] = d['value'] / float(d.get('units', 1.))

        for dm3_type, dm4_type in [['remission data', 'remission'],
                                   ['excess-mortality data', 'excess'],
                                   ['incidence data', 'incidence'],
                                   ['mrr data', 'risk'],
                                   ['prevalence data', 'prevalence'],
                                   ['all-cause mortality data', 'all_cause'],
                                   ]:
            if d['data_type'] == dm3_type:
                row['integrand'] = dm4_type
                data_list.append(row)
                break

    # add the time/age/regions that we want to predict to the data list as well
    age_mesh = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    index_dict = {}
    for year in [1990, 2005]:
        for age in age_mesh:
            for type in ['remission', 'excess', 'incidence', 'risk', 'prevalence']:
                row = {}

                row['time_lower'] = year
                row['time_upper'] = year

                row['age_lower'] = age
                row['age_upper'] = age

                row['likelihood_name'] = 'gaussian'
                row['likelihood_sigma'] = inf
                row['likelihood_beta'] = 1.

                row['value'] = 0.

                row['integrand'] = type
                
                index_dict[(type, year, age)] = len(data_list)
                data_list.append(row)


    # save the csv file
    import csv
    fname = dismod3.settings.JOB_WORKING_DIR % id + '/measure_in.csv'

    try:
        f = open(fname, 'w')
        csv.writer(f).writerow(column_names)
        csv.DictWriter(f, column_names).writerows(data_list)
        f.close()
    except IOError, e:
        print 'Warning: could not create data csv.  Maybe it exists already?\n%s' % e
Exemplo n.º 4
0
def fit_all(id):
    """ Enqueues all jobs necessary to fit specified model
    to the cluster

    Parameters
    ----------
    id : int
      The model id number for the job to fit

    Example
    -------
    >>> import fit_all
    >>> fit_all.fit_all(2552)
    """

    # TODO: store all disease information in this dir already, so fetching is not necessary
    # download the disease model json and store it in the working dir
    print 'downloading disease model'
    dismod3.disease_json.create_disease_model_dir(id)
    dm = dismod3.fetch_disease_model(id)
    
    # get the all-cause mortality data, and merge it into the model
    mort = dismod3.fetch_disease_model('all-cause_mortality')
    dm.data += mort.data
    dm.save()

    # fit empirical priors (by pooling data from all regions)
    dir = dismod3.settings.JOB_WORKING_DIR % id  # TODO: refactor into a function
    emp_names = []
    for t in ['prevalence']:
        o = '%s/empirical_priors/stdout/%s' % (dir, t)
        e = '%s/empirical_priors/stderr/%s' % (dir, t)
        name_str = '%s-%d' %(t[0], id)
        emp_names.append(name_str)
        call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \
                        + '-N %s ' % name_str \
                        + 'run_on_cluster.sh fit_emp_prior.py %d -t %s' % (id, t)
        subprocess.call(call_str, shell=True)

    # directory to save the country level posterior csv files
    temp_dir = dir + '/posterior/country_level_posterior_dm-' + str(id) + '/'
    if os.path.exists(temp_dir):
        rmtree(temp_dir)
    os.makedirs(temp_dir)

    #fit each region/year/sex individually for this model
    hold_str = '-hold_jid %s ' % ','.join(emp_names)
    post_names = []
    for ii, r in enumerate(dismod3.gbd_regions):
        for s in dismod3.gbd_sexes:
            for y in dismod3.gbd_years:
                k = '%s+%s+%s' % (clean(r), s, y)
                o = '%s/posterior/stdout/%s' % (dir, k)
                e = '%s/posterior/stderr/%s' % (dir, k)
                name_str = '%s%d%s%s%d' % (r[0], ii+1, s[0], str(y)[-1], id)
                post_names.append(name_str)
                call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \
                           + hold_str \
                           + '-N %s ' % name_str \
                           + 'run_on_cluster.sh fit_posterior_prevonly.py %d -r %s -s %s -y %s' % (id, clean(r), s, y)
                subprocess.call(call_str, shell=True)

    # after all posteriors have finished running, upload disease model json
    hold_str = '-hold_jid %s ' % ','.join(post_names)
    o = '%s/upload.stdout' % dir
    e = '%s/upload.stderr' % dir
    call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \
               + hold_str \
               + '-N upld-%s ' % id \
               + 'run_on_cluster.sh upload_fits.py %d' % id
    subprocess.call(call_str, shell=True)