Exemplo n.º 1
0
def validate_fit_consistent_model():
    # create model and priors
    vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {})

    m = fit_model.fit_consistent_model(vars)

    return m
Exemplo n.º 2
0
def validate_fit_consistent_model():
    # create model and priors
    vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {})

    m = fit_model.fit_consistent_model(vars)

    return m
Exemplo n.º 3
0
def fit_simulated(dm, area, sex, year):
    #dm.map, dm.mcmc = fit_model.fit_consistent_model(dm.vars, iter=101, burn=0, thin=1, tune_interval=100)
    dm.map, dm.mcmc = fit_model.fit_consistent_model(dm.vars, iter=10000, burn=5000, thin=25, tune_interval=100)

    posteriors = {}
    for t in 'i r f p rr pf'.split():
        est_k = covariate_model.predict_for(dm.model, area, sex, year, area, sex, year, 1., dm.vars[t], 0., pl.inf)
        posteriors[t] = est_k
    dm.posteriors = posteriors
Exemplo n.º 4
0
def fit_simulated(dm, area, sex, year):
    #dm.map, dm.mcmc = fit_model.fit_consistent_model(dm.vars, iter=101, burn=0, thin=1, tune_interval=100)
    dm.map, dm.mcmc = fit_model.fit_consistent_model(dm.vars,
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=25,
                                                     tune_interval=100)

    posteriors = {}
    for t in 'i r f p rr pf'.split():
        est_k = covariate_model.predict_for(dm.model, area, sex, year, area,
                                            sex, year, 1., dm.vars[t], 0.,
                                            pl.inf)
        posteriors[t] = est_k
    dm.posteriors = posteriors
Exemplo n.º 5
0
subtree = nx.traversal.bfs_tree(model.hierarchy, root_area)
relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                     if r['area'] in subtree \
                     and r['year_end'] >= 1997 \
                     and r['sex'] in ['male', 'total'] \
                     and r['data_type'] in ['pf', 'm']]
model.input_data = model.input_data.ix[relevant_rows]

## create and fit consistent model at gbd region level
vars = consistent_model.consistent_model(model,
                                         root_area=root_area,
                                         root_sex='male',
                                         root_year=2005,
                                         priors={})
posterior_model = fit_model.fit_consistent_model(vars,
                                                 iter=1030,
                                                 burn=500,
                                                 thin=5,
                                                 tune_interval=100)

## generate estimates
predict_area = root_area
posteriors = {}
for t in 'i r f p rr pf'.split():
    posteriors[t] = pl.median(covariate_model.predict_for(
        model.output_template, model.hierarchy, root_area, 'male', 2005,
        predict_area, 'male', 2005, vars[t]),
                              axis=0)

graphics.all_plots(model, vars, {}, posteriors)
Exemplo n.º 6
0
root_area = 'latin_america_central'
subtree = nx.traversal.bfs_tree(model.hierarchy, root_area)
relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                     if r['area'] in subtree \
                     and r['year_end'] >= 1997 \
                     and r['sex'] in ['male', 'total']]
model.input_data = model.input_data.ix[relevant_rows]

## create and fit consistent model at gbd region level
vars = consistent_model.consistent_model(model,
                                         root_area=root_area,
                                         root_sex='male',
                                         root_year=2005,
                                         priors={})
posterior_model = fit_model.fit_consistent_model(vars,
                                                 iter=3003,
                                                 burn=1500,
                                                 thin=10,
                                                 tune_interval=100)

## generate estimates for latin_america_central, male, 2005
predict_area = root_area
posteriors = {}
for t in 'i r f p rr pf'.split():
    posteriors[t] = pl.median(covariate_model.predict_for(
        model.output_template, model.hierarchy, root_area, 'male', 2005,
        predict_area, 'male', 2005, vars[t]),
                              axis=0)

graphics.all_plots(model, vars, {}, posteriors)
Exemplo n.º 7
0
model.parameters['r']['level_value'] = dict(age_before=100, age_after=100, value=0.)

# no covariates
model.input_data = model.input_data.drop([col for col in model.input_data.columns if col.startswith('x_')], axis=1)

# create model for (europe_western, male, 2005)
root_area = 'europe_western'
subtree = nx.traversal.bfs_tree(model.hierarchy, root_area)
relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                     if r['area'] in subtree \
                     and r['year_end'] >= 1997 \
                     and r['sex'] in ['male', 'total'] \
                     and r['data_type'] in ['pf', 'm']]
model.input_data = model.input_data.ix[relevant_rows]


## create and fit consistent model at gbd region level
vars = consistent_model.consistent_model(model, root_area=root_area, root_sex='male', root_year=2005, priors={})
posterior_model = fit_model.fit_consistent_model(vars, iter=1030, burn=500, thin=5, tune_interval=100)


## generate estimates
predict_area = root_area
posteriors = {}
for t in 'i r f p rr pf'.split():
    posteriors[t] = pl.median(covariate_model.predict_for(model.output_template, model.hierarchy,
                                                root_area, 'male', 2005,
                                                predict_area, 'male', 2005, vars[t]), axis=0)

graphics.all_plots(model, vars, {}, posteriors)
Exemplo n.º 8
0
# create model for (europe_western, male, 2005)
root_area = 'europe_western'
subtree = nx.traversal.bfs_tree(model.hierarchy, root_area)
relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                     if r['area'] in subtree \
                     and r['year_end'] >= 1997 \
                     and r['sex'] in ['male', 'total']]
model.input_data = model.input_data.ix[relevant_rows]

## create and fit consistent model at gbd region level
vars = consistent_model.consistent_model(model,
                                         root_area=root_area,
                                         root_sex='male',
                                         root_year=2005,
                                         priors={})
posterior_model = fit_model.fit_consistent_model(vars,
                                                 iter=101,
                                                 burn=0,
                                                 thin=1)

## generate estimates
predict_area = root_area
posteriors = {}
for t in 'i r f p rr pf'.split():
    posteriors[t] = pl.median(covariate_model.predict_for(
        model.output_template, model.hierarchy, root_area, 'male', 2005,
        predict_area, 'male', 2005, vars[t]),
                              axis=0)

graphics.all_plots(model, vars, {}, posteriors)
Exemplo n.º 9
0

# create model for (latin_america_central, male, 2005)
root_area = "latin_america_central"
subtree = nx.traversal.bfs_tree(model.hierarchy, root_area)
relevant_rows = [
    i
    for i, r in model.input_data.T.iteritems()
    if r["area"] in subtree and r["year_end"] >= 1997 and r["sex"] in ["male", "total"]
]
model.input_data = model.input_data.ix[relevant_rows]


## create and fit consistent model at gbd region level
vars = consistent_model.consistent_model(model, root_area=root_area, root_sex="male", root_year=2005, priors={})
posterior_model = fit_model.fit_consistent_model(vars, iter=3003, burn=1500, thin=10, tune_interval=100)


## generate estimates for latin_america_central, male, 2005
predict_area = root_area
posteriors = {}
for t in "i r f p rr pf".split():
    posteriors[t] = pl.median(
        covariate_model.predict_for(
            model.output_template, model.hierarchy, root_area, "male", 2005, predict_area, "male", 2005, vars[t]
        ),
        axis=0,
    )

graphics.all_plots(model, vars, {}, posteriors)
Exemplo n.º 10
0
model.parameters['r']['level_value'] = dict(age_before=100, age_after=100, value=12.)
model.parameters['r']['level_bounds'] = dict(lower=0., upper=1000.)

# no covariates
model.input_data = model.input_data.drop([col for col in model.input_data.columns if col.startswith('x_')], axis=1)

# create model for (europe_western, male, 2005)
root_area = 'europe_western'
subtree = nx.traversal.bfs_tree(model.hierarchy, root_area)
relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                     if r['area'] in subtree \
                     and r['year_end'] >= 1997 \
                     and r['sex'] in ['male', 'total']]
model.input_data = model.input_data.ix[relevant_rows]


## create and fit consistent model at gbd region level
vars = consistent_model.consistent_model(model, root_area=root_area, root_sex='male', root_year=2005, priors={})
posterior_model = fit_model.fit_consistent_model(vars, iter=101, burn=0, thin=1)


## generate estimates
predict_area = root_area
posteriors = {}
for t in 'i r f p rr pf'.split():
    posteriors[t] = pl.median(covariate_model.predict_for(model.output_template, model.hierarchy,
                                                root_area, 'male', 2005,
                                                predict_area, 'male', 2005, vars[t]), axis=0)

graphics.all_plots(model, vars, {}, posteriors)
Exemplo n.º 11
0
def validate_consistent_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], 
                           true=dict(i=quadratic, f=constant, r=constant)):
    types = pl.array(['i', 'r', 'f', 'p'])

    ## generate simulated data
    model = data_simulation.simple_model(N)
    model.input_data['effective_sample_size'] = 1.
    model.input_data['value'] = 0.
    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    for t in 'irf':
        for i, k_i in enumerate(sim[t]['knots']):
            sim[t]['gamma'][i].value = pl.log(true[t](k_i))

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) / float(len(types)), size=N)]


    a = pl.arange(101)
    age_weights = pl.ones_like(a)
    sum_wt = pl.cumsum(age_weights)

    p = pl.zeros(N)
    for t in types:
        mu_t = sim[t]['mu_age'].value
        sum_mu_wt = pl.cumsum(mu_t*age_weights)
    
        p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

        # correct cases where age_start == age_end
        i = age_start == age_end
        if pl.any(i):
            p_t[i] = mu_t[age_start[i]]

        # copy part into p
        p[data_type==t] = p_t[data_type==t]


    # add covariate shifts
    import dismod3
    import simplejson as json
    gbd_model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.hierarchy = gbd_model.hierarchy

    from validate_covariates import alpha_true_sim
    area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR'])
    alpha = {}
    for t in types:
        alpha[t] = alpha_true_sim(model, area_list, sigma_true)
    print json.dumps(alpha, indent=2)

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]
    
    for i, a in model.input_data['area'].iteritems():
        t = data_type[i]
        p[i] = p[i] * pl.exp(pl.sum([alpha[t][n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))

    n = mc.runiform(100, 10000, size=N)

    model.input_data['data_type'] = data_type
    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n

    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    #model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=101, burn=0, thin=1, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_convergence_diag(model.vars)

    graphics.plot_fit(model, model.vars, {}, {})
    for i, t in enumerate('i r f p rr pf'.split()):
        pl.subplot(2, 3, i+1)
        pl.plot(range(101), sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2)
        pl.plot(range(101), sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1)

    pl.show()

    model.input_data['mu_pred'] = 0.
    model.input_data['sigma_pred'] = 0.
    for t in types:
        model.input_data['mu_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['mean']
        model.input_data['sigma_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true for t in types if t != 'rr']))
    model.delta['mu_pred'] = [pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr']
    model.delta['sigma_pred'] = [pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr']
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame()
    model.sigma = pandas.DataFrame()
    for t in types:
        alpha_t = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
        alpha_t['true'] = pandas.Series(dict(alpha[t]))
        alpha_t['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns)
        alpha_t['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns)
        alpha_t['type'] = t
        model.alpha = model.alpha.append(alpha_t.dropna(), ignore_index=True)

        sigma_t = pandas.DataFrame(dict(true=sigma_true))
        sigma_t['mu_pred'] = [n.stats()['mean'] for n in model.vars[t]['sigma_alpha']]
        sigma_t['sigma_pred'] = [n.stats()['standard deviation'] for n in model.vars[t]['sigma_alpha']]
        model.sigma = model.sigma.append(sigma_t.dropna(), ignore_index=True)

    data_simulation.add_quality_metrics(model.alpha)
    data_simulation.add_quality_metrics(model.sigma)


    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame()
    for t in types:
        model.mu = model.mu.append(pandas.DataFrame(dict(true=sim[t]['mu_age'].value,
                                                         mu_pred=model.vars[t]['mu_age'].stats()['mean'],
                                                         sigma_pred=model.vars[t]['mu_age'].stats()['standard deviation'])),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(),
                                                                         pl.median(pl.absolute(model.mu['rel_err'].dropna())),
                                                                         model.mu['covered?'].mean())
    print


    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
Exemplo n.º 12
0
def validate_consistent_model_sim(N=500,
                                  delta_true=.5,
                                  true=dict(i=quadratic,
                                            f=constant,
                                            r=constant)):
    types = pl.array(['i', 'r', 'f', 'p'])

    ## generate simulated data
    model = data_simulation.simple_model(N)
    model.input_data['effective_sample_size'] = 1.
    model.input_data['value'] = 0.

    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    for t in 'irf':
        for i, k_i in enumerate(sim[t]['knots']):
            sim[t]['gamma'][i].value = pl.log(true[t](k_i))

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) /
                                      float(len(types)),
                                      size=N)]

    a = pl.arange(101)
    age_weights = pl.ones_like(a)
    sum_wt = pl.cumsum(age_weights)

    p = pl.zeros(N)
    for t in types:
        mu_t = sim[t]['mu_age'].value
        sum_mu_wt = pl.cumsum(mu_t * age_weights)

        p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] -
                                                             sum_wt[age_start])

        # correct cases where age_start == age_end
        i = age_start == age_end
        if pl.any(i):
            p_t[i] = mu_t[age_start[i]]

        # copy part into p
        p[data_type == t] = p_t[data_type == t]
    n = mc.runiform(100, 10000, size=N)

    model.input_data['data_type'] = data_type
    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars = consistent_model.consistent_model(model, 'all', 'total',
                                                   'all', {})
    model.map, model.mcmc = fit_model.fit_consistent_model(model.vars,
                                                           iter=10000,
                                                           burn=5000,
                                                           thin=25,
                                                           tune_interval=100)

    graphics.plot_convergence_diag(model.vars)

    graphics.plot_fit(model, model.vars, {}, {})
    for i, t in enumerate('i r f p rr pf'.split()):
        pl.subplot(2, 3, i + 1)
        pl.plot(a, sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2)
        pl.plot(a, sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1)

    #graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    #pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = 0.
    model.input_data['sigma_pred'] = 0.
    for t in types:
        model.input_data['mu_pred'][
            data_type == t] = model.vars[t]['p_pred'].stats()['mean']
        model.input_data['sigma_pred'][data_type == t] = model.vars['p'][
            'p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(
        dict(true=[delta_true for t in types if t != 'rr']))
    model.delta['mu_pred'] = [
        pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr'
    ]
    model.delta['sigma_pred'] = [
        pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr'
    ]
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame()
    for t in types:
        model.mu = model.mu.append(pandas.DataFrame(
            dict(true=sim[t]['mu_age'].value,
                 mu_pred=model.vars[t]['mu_age'].stats()['mean'],
                 sigma_pred=model.vars[t]['mu_age'].stats()
                 ['standard deviation'])),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.mu['abs_err'].mean(),
        pl.median(pl.absolute(
            model.mu['rel_err'].dropna())), model.mu['covered?'].mean())
    print

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.finalize_results(model)

    print model.results

    return model