def build_data_in(dm3, data_type, model_num): # find standard error and use it for standard deviation dm3 = mu.create_uncertainty(dm3, "log_normal") # create data file data_in = empty_data_in(dm3.input_data.index) # add covariates cov = dm3.input_data.filter(like="x_") data_in = data_in.join(pandas.DataFrame(cov, columns=[""])) cov_z = dm3.input_data.filter(like="z_") if len(cov_z.columns) != 0: data_in = data_in.join(pandas.DataFrame(cov_z, columns=[""])) # add data data_in["integrand"] = convert_data_type(data_type) data_in["meas_value"] = dm3.input_data["value"] data_in["meas_stdev"] = dm3.input_data["standard_error"] data_in["sex"] = dm3.input_data["sex"] data_in["age_lower"] = dm3.input_data["age_start"] data_in["age_upper"] = dm3.input_data["age_end"] + 1.0 data_in["time_lower"] = dm3.input_data["year_start"] data_in["time_upper"] = dm3.input_data["year_end"] + 1.0 data_in["x_sex"] = dm3.input_data["sex"].map(dict(male=0.5, female=-0.5, total=0)) # create data hierarchy model = mu.load_new_model(model_num, "all", data_type) superregion = set(model.hierarchy.neighbors("all")) region = set(pl.flatten([model.hierarchy.neighbors(sr) for sr in model.hierarchy.neighbors("all")])) country = set( pl.flatten( [ [model.hierarchy.neighbors(r) for r in model.hierarchy.neighbors(sr)] for sr in model.hierarchy.neighbors("all") ] ) ) # create data area levels for i in dm3.input_data.index: if dm3.input_data.ix[i, "area"] in country: data_in.ix[i, "m_sub"] = dm3.input_data.ix[i, "area"] data_in.ix[i, "m_region"] = model.hierarchy.in_edges(dm3.input_data.ix[i, "area"])[0][0] data_in.ix[i, "m_super"] = model.hierarchy.in_edges( model.hierarchy.in_edges(dm3.input_data.ix[i, "area"])[0][0] )[0][0] elif dm3.input_data.ix[i, "area"] in region: data_in.ix[i, "m_region"] = dm3.input_data.ix[i, "area"] data_in.ix[i, "m_super"] = model.hierarchy.in_edges(dm3.input_data.ix[i, "area"])[0][0] elif dm3.input_data.ix[i, "area"] in superregion: data_in.ix[i, "m_super"] = dm3.input_data.ix[i, "area"] return data_in
"seed", "bias_" + rate_type, "rmse_" + rate_type, "mae_" + rate_type, "mare_" + rate_type, "pc_" + rate_type, "time_" + rate_type, ] output = pandas.DataFrame(pl.zeros((1, len(stats))), columns=stats) output["seed"] = replicate failure = [] # load new model model = mu.load_new_model(model_num, area, data_type) # replace invalid uncertainty with 10% of data set model = mu.create_uncertainty(model, rate_type) # withhold 25% of data model.input_data, test_ix = mu.test_train(model.input_data, data_type, replicate) try: # create pymc nodes for model and fit the model model.vars += dismod3.ism.age_specific_rate(model, data_type, area, "male", 2005, rate_type=rate_type) # fit the model, using a hill-climbing alg to find an initial value # and then sampling from the posterior with MCMC start = time.clock() dismod3.fit.fit_asr(model, data_type, iter=iter, thin=thin, burn=burn) elapsed = time.clock() - start # extract posterior predicted values for data pred = pandas.DataFrame( model.vars[data_type]["p_pred"].stats()["mean"], columns=["mean"], index=model.input_data.index )
import model_utilities as mu reload(mu) import dismod3 reload(dismod3) model_num = 40418 test_area = 'europe_western' data_type = 'p' rate_type='binom' # example model0, to test vars and test-train model = mu.load_new_model(model_num, test_area, data_type) nan_ix = list(model.input_data['effective_sample_size'][pl.isnan(model.input_data['effective_sample_size'])==1].index) # list of nan in effective sample size model = mu.create_uncertainty(model, 'binom') for cv in list(model.input_data.filter(like='x_').columns): # fill missing with 0 model.input_data[cv] = model.input_data[cv].fillna([0]) # example model1, to test test-train model1 = mu.load_new_model(model_num, test_area, data_type) model1 = mu.create_uncertainty(model1, 'normal') # example model2, to test loading and uncertainty model2 = mu.load_new_model(model_num, test_area, data_type) non_nan_ix2 = list(model2.input_data['effective_sample_size'][pl.isnan(model2.input_data['effective_sample_size'])==0].index) # list of nan in effective sample size ten_percent = pl.percentile(model2.input_data.ix[non_nan_ix2, 'effective_sample_size'], 10.) model2 = mu.create_uncertainty(model2, 'normal') # find official areas of western europe areas = [test_area]