def build_data_in(dm3, data_type, model_num):
    # find standard error and use it for standard deviation
    dm3 = mu.create_uncertainty(dm3, "log_normal")
    # create data file
    data_in = empty_data_in(dm3.input_data.index)
    # add covariates
    cov = dm3.input_data.filter(like="x_")
    data_in = data_in.join(pandas.DataFrame(cov, columns=[""]))
    cov_z = dm3.input_data.filter(like="z_")
    if len(cov_z.columns) != 0:
        data_in = data_in.join(pandas.DataFrame(cov_z, columns=[""]))
    # add data
    data_in["integrand"] = convert_data_type(data_type)
    data_in["meas_value"] = dm3.input_data["value"]
    data_in["meas_stdev"] = dm3.input_data["standard_error"]
    data_in["sex"] = dm3.input_data["sex"]
    data_in["age_lower"] = dm3.input_data["age_start"]
    data_in["age_upper"] = dm3.input_data["age_end"] + 1.0
    data_in["time_lower"] = dm3.input_data["year_start"]
    data_in["time_upper"] = dm3.input_data["year_end"] + 1.0
    data_in["x_sex"] = dm3.input_data["sex"].map(dict(male=0.5, female=-0.5, total=0))
    # create data hierarchy
    model = mu.load_new_model(model_num, "all", data_type)
    superregion = set(model.hierarchy.neighbors("all"))
    region = set(pl.flatten([model.hierarchy.neighbors(sr) for sr in model.hierarchy.neighbors("all")]))
    country = set(
        pl.flatten(
            [
                [model.hierarchy.neighbors(r) for r in model.hierarchy.neighbors(sr)]
                for sr in model.hierarchy.neighbors("all")
            ]
        )
    )
    # create data area levels
    for i in dm3.input_data.index:
        if dm3.input_data.ix[i, "area"] in country:
            data_in.ix[i, "m_sub"] = dm3.input_data.ix[i, "area"]
            data_in.ix[i, "m_region"] = model.hierarchy.in_edges(dm3.input_data.ix[i, "area"])[0][0]
            data_in.ix[i, "m_super"] = model.hierarchy.in_edges(
                model.hierarchy.in_edges(dm3.input_data.ix[i, "area"])[0][0]
            )[0][0]
        elif dm3.input_data.ix[i, "area"] in region:
            data_in.ix[i, "m_region"] = dm3.input_data.ix[i, "area"]
            data_in.ix[i, "m_super"] = model.hierarchy.in_edges(dm3.input_data.ix[i, "area"])[0][0]
        elif dm3.input_data.ix[i, "area"] in superregion:
            data_in.ix[i, "m_super"] = dm3.input_data.ix[i, "area"]
    return data_in
    "seed",
    "bias_" + rate_type,
    "rmse_" + rate_type,
    "mae_" + rate_type,
    "mare_" + rate_type,
    "pc_" + rate_type,
    "time_" + rate_type,
]
output = pandas.DataFrame(pl.zeros((1, len(stats))), columns=stats)
output["seed"] = replicate
failure = []

# load new model
model = mu.load_new_model(model_num, area, data_type)
# replace invalid uncertainty with 10% of data set
model = mu.create_uncertainty(model, rate_type)
# withhold 25% of data
model.input_data, test_ix = mu.test_train(model.input_data, data_type, replicate)

try:
    # create pymc nodes for model and fit the model
    model.vars += dismod3.ism.age_specific_rate(model, data_type, area, "male", 2005, rate_type=rate_type)
    # fit the model, using a hill-climbing alg to find an initial value
    # and then sampling from the posterior with MCMC
    start = time.clock()
    dismod3.fit.fit_asr(model, data_type, iter=iter, thin=thin, burn=burn)
    elapsed = time.clock() - start
    # extract posterior predicted values for data
    pred = pandas.DataFrame(
        model.vars[data_type]["p_pred"].stats()["mean"], columns=["mean"], index=model.input_data.index
    )
Пример #3
0
import model_utilities as mu
reload(mu)

import dismod3
reload(dismod3)

model_num = 40418
test_area = 'europe_western'
data_type = 'p'
rate_type='binom'

# example model0, to test vars and test-train
model = mu.load_new_model(model_num, test_area, data_type)
nan_ix = list(model.input_data['effective_sample_size'][pl.isnan(model.input_data['effective_sample_size'])==1].index) # list of nan in effective sample size
model = mu.create_uncertainty(model, 'binom')
for cv in list(model.input_data.filter(like='x_').columns): # fill missing with 0
    model.input_data[cv] = model.input_data[cv].fillna([0])

# example model1, to test test-train
model1 = mu.load_new_model(model_num, test_area, data_type)
model1 = mu.create_uncertainty(model1, 'normal')

# example model2, to test loading and uncertainty
model2 = mu.load_new_model(model_num, test_area, data_type)
non_nan_ix2 = list(model2.input_data['effective_sample_size'][pl.isnan(model2.input_data['effective_sample_size'])==0].index) # list of nan in effective sample size
ten_percent = pl.percentile(model2.input_data.ix[non_nan_ix2, 'effective_sample_size'], 10.)
model2 = mu.create_uncertainty(model2, 'normal')

# find official areas of western europe
areas = [test_area]