def test_covariate_model_sim_no_hierarchy(): # simulate normal data model = data.ModelData() model.hierarchy, model.output_template = data_simulation.small_output() X = mc.rnormal(0., 1.**2, size=(128, 3)) beta_true = [-.1, .1, .2] Y_true = pl.dot(X, beta_true) pi_true = pl.exp(Y_true) sigma_true = .01 * pl.ones_like(pi_true) p = mc.rnormal(pi_true, 1. / sigma_true**2.) model.input_data = pandas.DataFrame( dict(value=p, x_0=X[:, 0], x_1=X[:, 1], x_2=X[:, 2])) model.input_data['area'] = 'all' model.input_data['sex'] = 'total' model.input_data['year_start'] = 2000 model.input_data['year_end'] = 2000 # create model and priors vars = {} vars.update( covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model, 'all', 'total', 'all')) vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true)) # fit model m = mc.MCMC(vars) m.sample(2)
def test_covariate_model_shift_for_root_consistency(): # generate simulated data n = 50 sigma_true = .025 a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) d = data.ModelData() d.input_data = data_simulation.simulated_age_intervals( 'p', n, a, pi_age_true, sigma_true) d.hierarchy, d.output_template = data_simulation.small_output() # create model and priors vars = ism.age_specific_rate(d, 'p', 'all', 'total', 'all', None, None, None) vars = ism.age_specific_rate(d, 'p', 'all', 'male', 1990, None, None, None) # fit model m = mc.MCMC(vars) m.sample(3) # check estimates pi_usa = covariate_model.predict_for(d, d.parameters['p'], 'all', 'male', 1990, 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf)
def test_data_model_lower_bound(): # generate simulated data data_type = 'csmr' n = 50 sigma_true = .025 a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) d = data.ModelData() d.input_data = data_simulation.simulated_age_intervals( data_type, n, a, pi_age_true, sigma_true) d.input_data = d.input_data.append(data_simulation.simulated_age_intervals( 'pf', n, a, pi_age_true * 2., sigma_true), ignore_index=True) d.hierarchy, d.output_template = data_simulation.small_output() # create model and priors vars = ism.age_specific_rate(d, 'pf', reference_area='all', reference_sex='total', reference_year='all', mu_age=None, mu_age_parent=None, sigma_age_parent=None, lower_bound='csmr') # fit model m = mc.MCMC(vars) m.sample(3)
def test_expert_model_level_value(): d = data.ModelData() ages = pl.arange(101) # create model with no priors vars = {} vars.update( age_pattern.age_pattern('test', ages, knots=pl.arange(0, 101, 5), smoothing=.01)) vars.update( expert_prior_model.level_constraints('test', {}, vars['mu_age'], ages)) # fit model m = mc.MCMC(vars) m.sample(3) # create model with expert priors parameters = {} parameters['level_value'] = dict(value=.1, age_below=15, age_above=95) parameters['level_bound'] = dict(upper=.01, lower=.001) vars = {} vars.update( age_pattern.age_pattern('test', ages, knots=pl.arange(0, 101, 5), smoothing=.01)) vars.update( expert_prior_model.level_constraints('test', parameters, vars['mu_age'], ages)) # fit model m = mc.MCMC(vars) m.sample(3)
def test_fixed_effect_priors(): model = data.ModelData() # set prior on sex parameters = dict( fixed_effects={ 'x_sex': dict(dist='TruncatedNormal', mu=1., sigma=.5, lower=-10, upper=10) }) # simulate normal data n = 32. sex_list = pl.array(['male', 'female', 'total']) sex = sex_list[mc.rcategorical([.3, .3, .4], n)] beta_true = dict(male=-1., total=0., female=1.) pi_true = pl.exp([beta_true[s] for s in sex]) sigma_true = .05 p = mc.rnormal(pi_true, 1. / sigma_true**2.) model.input_data = pandas.DataFrame(dict(value=p, sex=sex)) model.input_data['area'] = 'all' model.input_data['year_start'] = 2010 model.input_data['year_start'] = 2010 # create model and priors vars = {} vars.update( covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model, 'all', 'total', 'all')) print vars['beta'] assert vars['beta'][0].parents['mu'] == 1.
def test_expert_model_derivative_sign(): d = data.ModelData() ages = pl.arange(101) # create model with no priors vars = {} vars.update( age_pattern.age_pattern('test', ages, knots=pl.arange(0, 101, 5), smoothing=.01)) vars.update( expert_prior_model.derivative_constraints('test', {}, vars['mu_age'], ages)) # create model with expert priors parameters = {} parameters['increasing'] = dict(age_start=15, age_end=95) parameters['decreasing'] = dict(age_start=0, age_end=0) vars = {} vars.update( age_pattern.age_pattern('test', ages, knots=pl.arange(0, 101, 5), smoothing=.01)) vars.update( expert_prior_model.derivative_constraints('test', parameters, vars['mu_age'], vars['knots'])) # fit model m = mc.MCMC(vars) m.sample(3)
def geo_info(country, disease): '''find country region from name''' global_model = dm_data.ModelData() hierarchy = json.load( open('/home/j/Project/dismod/dismod_status/prod/dm-%s/hierarchy.json' % (disease))) global_model.hierarchy.add_nodes_from(hierarchy['nodes']) global_model.hierarchy.add_edges_from(hierarchy['edges']) region = global_model.hierarchy.in_edges(country)[0][0] return region
def test_predict_for_wo_data(): """ Approach to testing predict_for function: 1. Create model with known mu_age, known covariate values, known effect coefficients 2. Setup MCMC with NoStepper for all stochs 3. Sample to generate trace with known values 4. Predict for results, and confirm that they match expected values """ d = data.ModelData() d.hierarchy, d.output_template = data_simulation.small_output() # create model and priors vars = ism.age_specific_rate(d, 'p', 'all', 'total', 'all', None, None, None) # fit model m = mc.MCMC(vars) m.sample(1) ### Prediction case 1: constant zero random effects, zero fixed effect coefficients # check estimates with priors on random effects d.parameters['p']['random_effects'] = {} for node in ['USA', 'NAHI', 'super-region-1', 'all']: d.parameters['p']['random_effects'][node] = dict( dist='Constant', mu=0, sigma=1.e-9) # zero out REs to see if test passes pred = covariate_model.predict_for(d, d.parameters['p'], 'all', 'total', 'all', 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) ### Prediction case 2: constant non-zero random effects, zero fixed effect coefficients # FIXME: this test was failing because PyMC is drawing from the prior of beta[0] even though I asked for NoStepper # check estimates with priors on random effects for i, node in enumerate(['USA', 'NAHI', 'super-region-1']): d.parameters['p']['random_effects'][node]['mu'] = (i + 1.) / 10. pred = covariate_model.predict_for(d, d.parameters['p'], 'all', 'total', 'all', 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected fe_usa_1990 = pl.exp( .5 * vars['p']['beta'][0].value ) # beta[0] is drawn from prior, even though I set it to NoStepper, see FIXME above re_usa_1990 = pl.exp(.1 + .2 + .3) assert_almost_equal( pred, vars['p']['mu_age'].trace() * fe_usa_1990 * re_usa_1990)
def test_blank_input_data(): d = data.ModelData() for field in 'data_type value area sex age_start age_end year_start year_end standard_error effective_sample_size lower_ci upper_ci age_weights'.split( ): assert field in d.input_data.columns, 'Input data CSV should have field "%s"' % field for field in 'data_type area sex year pop'.split(): assert field in d.output_template.columns, 'Output template CSV should have field "%s"' % field for data_type in 'i p r f rr X ages'.split(): assert data_type in d.parameters, 'Parameter dict should have entry for "%s"' % data_type assert d.hierarchy.number_of_nodes() > 0, 'Hierarchy should be non-empty' assert len(d.nodes_to_fit) > 0, 'Nodes to fit should be non-empty'
def test_predict_for_wo_effects(): """ Approach to testing predict_for function: 1. Create model with known mu_age, known covariate values, known effect coefficients 2. Setup MCMC with NoStepper for all stochs 3. Sample to generate trace with known values 4. Predict for results, and confirm that they match expected values """ # generate simulated data n = 5 sigma_true = .025 a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) d = data.ModelData() d.input_data = data_simulation.simulated_age_intervals( 'p', n, a, pi_age_true, sigma_true) d.hierarchy, d.output_template = data_simulation.small_output() # create model and priors vars = ism.age_specific_rate(d, 'p', 'NAHI', 'male', 2005, None, None, None, include_covariates=False) # fit model m = mc.MCMC(vars) for n in m.stochastics: m.use_step_method(mc.NoStepper, n) m.sample(10) ### Prediction case: prediction should match mu age pred = covariate_model.predict_for(d, d.parameters['p'], 'NAHI', 'male', 2005, 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) assert_almost_equal(pred, vars['p']['mu_age'].trace())
def test_data_model_sim(): # generate simulated data data_type = 'p' n = 50 sigma_true = .025 a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) d = data.ModelData() d.input_data = data_simulation.simulated_age_intervals( data_type, n, a, pi_age_true, sigma_true) d.hierarchy, d.output_template = data_simulation.small_output() # create model and priors vars = ism.age_specific_rate(d, data_type, reference_area='all', reference_sex='total', reference_year='all', mu_age=None, mu_age_parent=None, sigma_age_parent=None) # fit model m = mc.MCMC(vars) m.sample(3) # check estimates pi_usa = covariate_model.predict_for(d, d.parameters, 'all', 'total', 'all', 'USA', 'male', 1990, 0., vars[data_type], -pl.inf, pl.inf) # create model w/ emp prior # create model and priors vars = ism.age_specific_rate(d, data_type, reference_area='all', reference_sex='total', reference_year='all', mu_age=None, mu_age_parent=pi_usa.mean(0), sigma_age_parent=pi_usa.std(0))
def test_covariate_model_sim_w_hierarchy(): n = 50 # setup hierarchy hierarchy, output_template = data_simulation.small_output() # simulate normal data area_list = pl.array(['all', 'USA', 'CAN']) area = area_list[mc.rcategorical([.3, .3, .4], n)] sex_list = pl.array(['male', 'female', 'total']) sex = sex_list[mc.rcategorical([.3, .3, .4], n)] year = pl.array(mc.runiform(1990, 2010, n), dtype=int) alpha_true = dict(all=0., USA=.1, CAN=-.2) pi_true = pl.exp([alpha_true[a] for a in area]) sigma_true = .05 * pl.ones_like(pi_true) p = mc.rnormal(pi_true, 1. / sigma_true**2.) model = data.ModelData() model.input_data = pandas.DataFrame( dict(value=p, area=area, sex=sex, year_start=year, year_end=year)) model.hierarchy, model.output_template = hierarchy, output_template # create model and priors vars = {} vars.update( covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model, 'all', 'total', 'all')) vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true)) # fit model m = mc.MCMC(vars) m.sample(2) assert 'sex' not in vars['U'] assert 'x_sex' in vars['X'] assert len(vars['beta']) == 1
def test_covariate_model_dispersion(): # simulate normal data n = 100 model = data.ModelData() model.hierarchy, model.output_template = data_simulation.small_output() Z = mc.rcategorical([.5, 5.], n) zeta_true = -.2 pi_true = .1 ess = 10000. * pl.ones(n) eta_true = pl.log(50) delta_true = 50 + pl.exp(eta_true) p = mc.rnegative_binomial(pi_true * ess, delta_true * pl.exp(Z * zeta_true)) / ess model.input_data = pandas.DataFrame(dict(value=p, z_0=Z)) model.input_data['area'] = 'all' model.input_data['sex'] = 'total' model.input_data['year_start'] = 2000 model.input_data['year_end'] = 2000 # create model and priors vars = dict(mu=mc.Uninformative('mu_test', value=pi_true)) vars.update( covariate_model.mean_covariate_model('test', vars['mu'], model.input_data, {}, model, 'all', 'total', 'all')) vars.update( covariate_model.dispersion_covariate_model('test', model.input_data, .1, 10.)) vars.update( rate_model.neg_binom_model('test', vars['pi'], vars['delta'], p, ess)) # fit model m = mc.MCMC(vars) m.sample(2)
def test_random_effect_priors(): model = data.ModelData() # set prior on sex parameters = dict(random_effects={ 'USA': dict(dist='TruncatedNormal', mu=.1, sigma=.5, lower=-10, upper=10) }) # simulate normal data n = 32. area_list = pl.array(['all', 'USA', 'CAN']) area = area_list[mc.rcategorical([.3, .3, .4], n)] alpha_true = dict(all=0., USA=.1, CAN=-.2) pi_true = pl.exp([alpha_true[a] for a in area]) sigma_true = .05 p = mc.rnormal(pi_true, 1. / sigma_true**2.) model.input_data = pandas.DataFrame(dict(value=p, area=area)) model.input_data['sex'] = 'male' model.input_data['year_start'] = 2010 model.input_data['year_end'] = 2010 model.hierarchy.add_edge('all', 'USA') model.hierarchy.add_edge('all', 'CAN') # create model and priors vars = {} vars.update( covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model, 'all', 'total', 'all')) print vars['alpha'] print vars['alpha'][1].parents['mu'] assert vars['alpha'][1].parents['mu'] == .1
def lr_cg_algorithm(day=0, t_init_hp=[], t_init_chp=[]): #============ Create Optimization model ============================# GlobVar.overall_count = 0 GlobVar.inner_count = 0 GlobVar.outer_count = 0 GlobVar.final_count = 0 GlobVar.first_iteration = True Constants.plot = False Constants.cg_iterations = 50 Constants.inner_loops = [] Constants.final_max = 0 Constants.outer_loops = 0 Constants.outer_max = Constants.cg_iterations Constants.overall_max = Constants.cg_iterations Constants.subgradient_max = 0 # Get data from input files Constants.t = day * 24 * 4 dat = data.ModelData() # HP objects handle the HP pricing subproblems hp = [] for i in range(len(dat.hps)): hp.append(object_hp.HP(dat.hps[i], dat.raw_temperature, i)) if t_init_hp: hp[-1].t_init = t_init_hp[i] # CHP objects handle the CHP pricing subproblems chp = [] for i in range(len(dat.chps)): chp.append(object_chp.CHP(dat.chps[i], dat.raw_temperature, i)) if t_init_chp: chp[-1].t_init = t_init_chp[i] # Master object handles master problem mp = object_master.Master(len(chp), len(hp), dat.P_res) # Get initial feasible proposals for j in range(mp.number_hp): (mp.initial_costs_hp[j], mp.initial_proposals_hp[j]) = hp[j].compute_proposal( np.zeros([Constants.timesteps])) for k in range(mp.number_chp): (mp.initial_costs_chp[k], mp.initial_proposals_chp[k]) = chp[k].compute_proposal( np.zeros([Constants.timesteps])) start_time = time.clock() #============ Initial Column Generation ==================================================# while GlobVar.overall_count < Constants.cg_iterations and ( time.clock() - start_time) < 600: mp.solve_master() mp.master_time[GlobVar.outer_count] = time.clock() - start_time sum_obj_subs = 0 for j in range(mp.number_hp): (mp.pricing_costs_hp[GlobVar.overall_count, j], mp.pricing_proposals_hp[GlobVar.overall_count, j] ) = hp[j].compute_proposal( mp.marginals_mu_master[GlobVar.outer_count]) sum_obj_subs += hp[j].pricing_res_obj[GlobVar.overall_count] for k in range(mp.number_chp): (mp.pricing_costs_chp[GlobVar.overall_count, k], mp.pricing_proposals_chp[GlobVar.overall_count, k] ) = chp[k].compute_proposal( mp.marginals_mu_master[GlobVar.outer_count]) sum_obj_subs += chp[k].pricing_res_obj[GlobVar.overall_count] mp.update_lr_bound(sum_obj_subs) # Get computation time mp.sub_time[GlobVar.overall_count] = time.clock() - start_time mp.update_plot_cg() GlobVar.outer_count += 1 GlobVar.overall_count += 1 f = h5py.File(Constants.path + "/cg_" + str(day) + ".hdf5", "w") for i in range(len(hp)): g = f.create_group("HP_" + str(i)) g.create_dataset("x_p", data=hp[i].pricing_res_x) g.create_dataset("y_p", data=hp[i].pricing_res_y) g.create_dataset("T_p", data=hp[i].pricing_res_T) g.create_dataset("P_p", data=hp[i].pricing_res_P) g.create_dataset("z_p", data=hp[i].pricing_res_obj) for j in range(len(chp)): g = f.create_group("CHP_" + str(j)) g.create_dataset("x_p", data=chp[j].pricing_res_x) g.create_dataset("y_p", data=chp[j].pricing_res_y) g.create_dataset("T_p", data=chp[j].pricing_res_T) g.create_dataset("P_p", data=chp[j].pricing_res_P) g.create_dataset("Q_p", data=chp[j].pricing_res_Q) g.create_dataset("z_p", data=chp[j].pricing_res_obj) g.create_dataset("c_p", data=chp[j].pricing_res_costs) f.create_dataset("pricing_costs_chp", data=mp.pricing_costs_chp) f.create_dataset("pricing_costs_hp", data=mp.pricing_costs_hp) f.create_dataset("pricing_proposals_chp", data=mp.pricing_proposals_chp) f.create_dataset("pricing_proposals_hp", data=mp.pricing_proposals_hp) f.create_dataset("marginals_sigma_chp", data=mp.marginals_sigma_chp) f.create_dataset("marginals_sigma_hp", data=mp.marginals_sigma_hp) f.create_dataset("marginals_mu_master", data=mp.marginals_mu_master) f.create_dataset("lin_obj_values", data=mp.lin_obj_values) f.create_dataset("res_lr_bounds_master", data=mp.res_lr_bounds_master) f.create_dataset("sub_time", data=mp.sub_time) f.create_dataset("master_time", data=mp.master_time) f.attrs["max_time_subs"] = Constants.max_time_subs f.attrs["max_time_master"] = Constants.max_time_master f.attrs["timesteps"] = Constants.timesteps f.attrs["t"] = Constants.t f.attrs["cg_iterations"] = Constants.cg_iterations f.attrs["inner_loops"] = Constants.inner_loops f.attrs["final_max"] = Constants.final_max f.attrs["outer_loops"] = Constants.outer_loops f.attrs["outer_max"] = Constants.outer_max f.attrs["overall_max"] = Constants.overall_max f.attrs["subgradient_max"] = Constants.subgradient_max f.attrs["eps"] = Constants.eps f.attrs["alpha"] = Constants.alpha f.attrs["t_bivalent"] = Constants.t_bivalent f.attrs["pricing_MIPGap"] = Constants.pricing_MIPGap f.attrs["final_MIPGap"] = Constants.final_MIPGap f.attrs["initial_in_master"] = Constants.initial_in_master f.attrs["pricing_time_limit"] = Constants.pricing_time_limit f.close()
def simple_model(N): model = data.ModelData() model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) return model
def lr_cg_algorithm(day=0, t_init_hp=[], t_init_chp=[]): #============ Create Optimization model ============================# GlobVar.overall_count = 0 GlobVar.inner_count = 0 GlobVar.outer_count = 0 GlobVar.final_count = 0 GlobVar.first_iteration = True # Get data from input files Constants.t = day * 24 * 4 dat = data.ModelData() # HP objects handle the HP pricing subproblems hp = [] for i in range(len(dat.hps)): hp.append(object_hp.HP(dat.hps[i], dat.raw_temperature, i)) if t_init_hp: hp[-1].t_init = t_init_hp[i] # CHP objects handle the CHP pricing subproblems chp = [] for i in range(len(dat.chps)): chp.append(object_chp.CHP(dat.chps[i], dat.raw_temperature, i)) if t_init_chp: chp[-1].t_init = t_init_chp[i] # Master object handles master problem mp = object_master.Master(len(chp), len(hp), dat.P_res) # Get initial feasible proposals for j in range(mp.number_hp): (mp.initial_costs_hp[j], mp.initial_proposals_hp[j]) = hp[j].compute_proposal(mp.initial_marginals) for k in range(mp.number_chp): (mp.initial_costs_chp[k], mp.initial_proposals_chp[k]) = chp[k].compute_proposal(mp.initial_marginals) start_time = time.clock() #============ Initial Column Generation ==================================================# while GlobVar.overall_count < Constants.cg_iterations: mp.solve_master() mp.master_time[GlobVar.outer_count] = time.clock() - start_time sum_obj_subs = 0 for j in range(mp.number_hp): (mp.pricing_costs_hp[GlobVar.overall_count, j], mp.pricing_proposals_hp[GlobVar.overall_count, j]) = hp[j].compute_proposal(mp.marginals_mu_master[GlobVar.outer_count]) sum_obj_subs += hp[j].pricing_res_obj[GlobVar.overall_count] for k in range(mp.number_chp): (mp.pricing_costs_chp[GlobVar.overall_count, k], mp.pricing_proposals_chp[GlobVar.overall_count, k]) = chp[k].compute_proposal(mp.marginals_mu_master[GlobVar.outer_count]) sum_obj_subs += chp[k].pricing_res_obj[GlobVar.overall_count] mp.update_lr_bound(sum_obj_subs) # Get computation time mp.sub_time[GlobVar.overall_count] = time.clock() - start_time mp.update_plot_cg() GlobVar.outer_count += 1 GlobVar.overall_count += 1 #============ Outer Loop =================================================================# break_outer_loop = False break_inner_loop = False while GlobVar.outer_count < Constants.outer_max: #============ Inner Loop ======================================================# GlobVar.inner_count = 0 while GlobVar.inner_count < Constants.inner_loops[GlobVar.outer_count-Constants.cg_iterations]: # Compute Sugradient and Stepsize subgradient = mp.compute_subgradient() alpha = Constants.alpha stepsize = alpha * (mp.lin_obj_values[GlobVar.outer_count-1] - mp.lr_bound) / np.sum(np.square(subgradient)) # Update Langrange Multipliers (Shadowprices) for t in range(Constants.timesteps): if GlobVar.first_iteration: marginal_mu_new = mp.marginals_mu_master[Constants.cg_iterations-1, t] + stepsize * subgradient[t] else: marginal_mu_new = mp.marginals_mu_subgradient[GlobVar.overall_count-Constants.cg_iterations-1, t] + stepsize * subgradient[t] if marginal_mu_new <= Constants.r_el * Constants.dt: mp.marginals_mu_subgradient[GlobVar.overall_count-Constants.cg_iterations, t] = Constants.r_el * Constants.dt elif marginal_mu_new >= Constants.k_el * Constants.dt: mp.marginals_mu_subgradient[GlobVar.overall_count-Constants.cg_iterations, t] = Constants.k_el * Constants.dt else: mp.marginals_mu_subgradient[GlobVar.overall_count-Constants.cg_iterations, t] = marginal_mu_new GlobVar.first_iteration = False # Solve Pricing Problems sum_obj_subs = 0 for j in range(mp.number_hp): (mp.pricing_costs_hp[GlobVar.overall_count, j], mp.pricing_proposals_hp[GlobVar.overall_count, j]) = hp[j].compute_proposal(mp.marginals_mu_subgradient[GlobVar.overall_count-Constants.cg_iterations]) sum_obj_subs += hp[j].pricing_res_obj[GlobVar.overall_count] for k in range(mp.number_chp): (mp.pricing_costs_chp[GlobVar.overall_count, k], mp.pricing_proposals_chp[GlobVar.overall_count, k]) = chp[k].compute_proposal(mp.marginals_mu_subgradient[GlobVar.overall_count-Constants.cg_iterations]) sum_obj_subs += chp[k].pricing_res_obj[GlobVar.overall_count] # Check Lagrangian Bound mp.update_lr_bound(sum_obj_subs) # Get computation time mp.sub_time[GlobVar.overall_count] = time.clock() - start_time GlobVar.overall_count += 1 if (mp.lin_obj_values[GlobVar.outer_count-1] - mp.lr_bound)/mp.lin_obj_values[GlobVar.outer_count-1] < Constants.eps or (time.clock() - start_time) > Constants.pricing_time_limit: break_inner_loop = True break GlobVar.inner_count += 1 #============ End Inner Loop ==================================================# if break_inner_loop: break if GlobVar.overall_count == Constants.random_proposals_in_master: GlobVar.random_proposals_in_master = False # Solve Master mp.solve_master() mp.master_time[GlobVar.outer_count] = time.clock() - start_time mp.update_plot_lr() if (mp.lin_obj_values[GlobVar.outer_count] - mp.lr_bound)/mp.lin_obj_values[GlobVar.outer_count] < Constants.eps or (time.clock() - start_time) > Constants.pricing_time_limit: break_outer_loop = True break GlobVar.outer_count += 1 #============ End Outer Loop =============================================================# if break_outer_loop: mp.final_marginals = mp.marginals_mu_subgradient[GlobVar.overall_count-Constants.cg_iterations-1] elif break_inner_loop: mp.final_marginals = mp.marginals_mu_subgradient[GlobVar.overall_count-Constants.cg_iterations-1] mp.update_plot_inner_break() else: mp.final_marginals = mp.marginals_mu_subgradient[-1] while GlobVar.final_count < Constants.final_max: for j in range(mp.number_hp): (mp.final_costs_hp[GlobVar.final_count, j], mp.final_proposals_hp[GlobVar.final_count, j]) = hp[j].compute_proposal(mp.final_marginals, True) for k in range(mp.number_chp): (mp.final_costs_chp[GlobVar.final_count, k], mp.final_proposals_chp[GlobVar.final_count, k]) = chp[k].compute_proposal(mp.final_marginals, True) #mp.solve_master(True) mp.update_plot_final(GlobVar.overall_count) mp.final_time[GlobVar.final_count] = time.clock() - start_time GlobVar.final_count += 1 GlobVar.final_count -= 1 mp.solve_master(True) f = h5py.File(Constants.path + "/lr_" + str(day) + ".hdf5", "w") for i in range(len(hp)): g = f.create_group("HP_"+str(i)) g.create_dataset("x_p", data = hp[i].pricing_res_x) g.create_dataset("y_p", data = hp[i].pricing_res_y) g.create_dataset("T_p", data = hp[i].pricing_res_T) g.create_dataset("P_p", data = hp[i].pricing_res_P) g.create_dataset("z_p", data = hp[i].pricing_res_obj) g.create_dataset("x_f", data = hp[i].final_res_x) g.create_dataset("y_f", data = hp[i].final_res_y) g.create_dataset("T_f", data = hp[i].final_res_T) g.create_dataset("P_f", data = hp[i].final_res_P) g.create_dataset("z_f", data = hp[i].final_res_obj) for j in range(len(chp)): g = f.create_group("CHP_"+str(j)) g.create_dataset("x_p", data = chp[j].pricing_res_x) g.create_dataset("y_p", data = chp[j].pricing_res_y) g.create_dataset("T_p", data = chp[j].pricing_res_T) g.create_dataset("P_p", data = chp[j].pricing_res_P) g.create_dataset("Q_p", data = chp[j].pricing_res_Q) g.create_dataset("z_p", data = chp[j].pricing_res_obj) g.create_dataset("c_p", data = chp[j].pricing_res_costs) g.create_dataset("x_f", data = chp[j].final_res_x) g.create_dataset("y_f", data = chp[j].final_res_y) g.create_dataset("T_f", data = chp[j].final_res_T) g.create_dataset("P_f", data = chp[j].final_res_P) g.create_dataset("Q_f", data = chp[j].final_res_Q) g.create_dataset("z_f", data = chp[j].final_res_obj) g.create_dataset("c_f", data = chp[j].final_res_costs) f.create_dataset("pricing_costs_chp", data = mp.pricing_costs_chp) f.create_dataset("pricing_costs_hp", data = mp.pricing_costs_hp) f.create_dataset("pricing_proposals_chp", data = mp.pricing_proposals_chp) f.create_dataset("pricing_proposals_hp", data = mp.pricing_proposals_hp) f.create_dataset("final_costs_chp", data = mp.final_costs_chp) f.create_dataset("final_costs_hp", data = mp.final_costs_hp) f.create_dataset("final_proposals_chp", data = mp.final_proposals_chp) f.create_dataset("final_proposals_hp", data = mp.final_proposals_hp) f.create_dataset("initial_costs_chp", data = mp.initial_costs_chp) f.create_dataset("initial_costs_hp", data = mp.initial_costs_hp) f.create_dataset("initial_proposals_chp", data = mp.initial_proposals_chp) f.create_dataset("initial_proposals_hp", data = mp.initial_proposals_hp) f.create_dataset("marginals_sigma_chp", data = mp.marginals_sigma_chp) f.create_dataset("marginals_sigma_hp", data = mp.marginals_sigma_hp) f.create_dataset("marginals_mu_master", data = mp.marginals_mu_master) f.create_dataset("marginals_mu_subgradient", data = mp.marginals_mu_subgradient) f.create_dataset("initial_marginals", data = mp.initial_marginals) f.create_dataset("final_marginals", data = mp.final_marginals) f.create_dataset("lin_obj_values", data = mp.lin_obj_values) f.create_dataset("int_obj_values", data = mp.int_obj_values) f.create_dataset("res_lr_bounds_master", data = mp.res_lr_bounds_master) f.create_dataset("res_lr_bounds_sugradient", data = mp.res_lr_bounds_subgradient) f.create_dataset("sub_time", data = mp.sub_time) f.create_dataset("master_time", data = mp.master_time) f.create_dataset("final_time", data = mp.final_time) f.create_dataset("final_hp", data = mp.final_hp) f.create_dataset("final_chp", data = mp.final_chp) f.attrs["max_time_subs"] = Constants.max_time_subs f.attrs["max_time_master"] = Constants.max_time_master f.attrs["timesteps"] = Constants.timesteps f.attrs["t"] = Constants.t f.attrs["cg_iterations"] = Constants.cg_iterations f.attrs["inner_loops"] = Constants.inner_loops f.attrs["final_max"] = Constants.final_max f.attrs["outer_loops"] = Constants.outer_loops f.attrs["outer_max"] = Constants.outer_max f.attrs["overall_max"] = Constants.overall_max f.attrs["subgradient_max"] = Constants.subgradient_max f.attrs["eps"] = Constants.eps f.attrs["alpha"] = Constants.alpha f.attrs["t_bivalent"] = Constants.t_bivalent f.attrs["pricing_MIPGap"] = Constants.pricing_MIPGap f.attrs["final_MIPGap"] = Constants.final_MIPGap f.attrs["initial_in_master"] = Constants.initial_in_master f.attrs["pricing_time_limit"] = Constants.pricing_time_limit f.close() return [hp[j].final_res_T[mp.final_hp[j],95] for j in range(mp.number_hp)], [chp[j].final_res_T[mp.final_chp[j],95] for j in range(mp.number_chp)]
sys.path += ['.', '..'] import pylab as pl import data import data_simulation import data_model import consistent_model import fit_model reload(consistent_model) reload(data_model) ## create model model = data.ModelData() # generate simulated data n = 50 sigma_true = .025 a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) model.input_data = data_simulation.simulated_age_intervals( 'p', n, a, pi_age_true, sigma_true) model.input_data['data_type'][ -1] = 'r' # make sure that there are multiple data types in the data set def validate_fit_data_model(): vars = data_model.data_model('validation', model, 'p',
def validate_covariate_model_fe(N=100, delta_true=3, pi_true=.01, beta_true=[.5, -.5, 0.], replicate=0): # set random seed for reproducibility mc.np.random.seed(1234567 + replicate) ## generate simulated data a = pl.arange(0, 100, 1) pi_age_true = pi_true * pl.ones_like(a) model = data.ModelData() model.parameters['p']['parameter_age_mesh'] = [0, 100] model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) # add fixed effect to simulated data X = mc.rnormal(0., 1.**-2, size=(N, len(beta_true))) Y_true = pl.dot(X, beta_true) for i in range(len(beta_true)): model.input_data['x_%d' % i] = X[:, i] model.input_data['true'] = pi_true * pl.exp(Y_true) model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N) n = model.input_data['effective_sample_size'] p = model.input_data['true'] model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats( )['standard deviation'] add_quality_metrics(model.input_data) model.beta = pandas.DataFrame(index=model.vars['p']['X'].columns) model.beta['true'] = 0. for i in range(len(beta_true)): model.beta['true']['x_%d' % i] = beta_true[i] model.beta['mu_pred'] = [ n.stats()['mean'] for n in model.vars['p']['beta'] ] model.beta['sigma_pred'] = [ n.stats()['standard deviation'] for n in model.vars['p']['beta'] ] add_quality_metrics(model.beta) print '\nbeta' print model.beta model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) add_to_results(model, 'beta') model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() add_quality_metrics(model.delta) print 'delta' print model.delta add_to_results(model, 'delta') print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) print 'effect prediction MAE: %.3f, coverage: %.2f' % ( pl.median(pl.absolute(model.beta['abs_err'].dropna())), model.beta.dropna()['covered?'].mean()) add_to_results(model, 'input_data') add_to_results(model, 'beta') model.results = pandas.DataFrame(model.results) return model
def validate_covariate_model_dispersion(N=1000, delta_true=.15, pi_true=.01, zeta_true=[.5, -.5, 0.]): ## generate simulated data a = pl.arange(0, 100, 1) pi_age_true = pi_true * pl.ones_like(a) model = data.ModelData() model.parameters['p']['parameter_age_mesh'] = [0, 100] model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) Z = mc.rbernoulli(.5, size=(N, len(zeta_true))) * 1.0 delta = delta_true * pl.exp(pl.dot(Z, zeta_true)) for i in range(len(zeta_true)): model.input_data['z_%d' % i] = Z[:, i] model.input_data['true'] = pi_true model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N) n = model.input_data['effective_sample_size'] p = model.input_data['true'] model.input_data['value'] = mc.rnegative_binomial(n * p, delta * n * p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats( )['standard deviation'] add_quality_metrics(model.input_data) model.zeta = pandas.DataFrame(index=model.vars['p']['Z'].columns) model.zeta['true'] = zeta_true model.zeta['mu_pred'] = model.vars['p']['zeta'].stats()['mean'] model.zeta['sigma_pred'] = model.vars['p']['zeta'].stats( )['standard deviation'] add_quality_metrics(model.zeta) print '\nzeta' print model.zeta model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) print 'effect prediction MAE: %.3f, coverage: %.2f' % ( pl.median(pl.absolute(model.zeta['abs_err'].dropna())), model.zeta.dropna()['covered?'].mean()) model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) add_to_results(model, 'delta') add_to_results(model, 'input_data') add_to_results(model, 'zeta') model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split()) return model
def test_predict_for_w_region_as_reference(): """ Approach to testing predict_for function: 1. Create model with known mu_age, known covariate values, known effect coefficients 2. Setup MCMC with NoStepper for all stochs 3. Sample to generate trace with known values 4. Predict for results, and confirm that they match expected values """ # generate simulated data n = 5 sigma_true = .025 a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) d = data.ModelData() d.input_data = data_simulation.simulated_age_intervals( 'p', n, a, pi_age_true, sigma_true) d.hierarchy, d.output_template = data_simulation.small_output() # create model and priors vars = ism.age_specific_rate(d, 'p', 'NAHI', 'male', 2005, None, None, None) # fit model m = mc.MCMC(vars) for n in m.stochastics: m.use_step_method(mc.NoStepper, n) m.sample(10) ### Prediction case 1: constant zero random effects, zero fixed effect coefficients # check estimates with priors on random effects d.parameters['p']['random_effects'] = {} for node in ['USA', 'NAHI', 'super-region-1', 'all']: d.parameters['p']['random_effects'][node] = dict( dist='Constant', mu=0, sigma=1.e-9) # zero out REs to see if test passes pred = covariate_model.predict_for(d, d.parameters['p'], 'NAHI', 'male', 2005, 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected fe_usa_1990 = pl.exp(0.) re_usa_1990 = pl.exp(0.) assert_almost_equal( pred, vars['p']['mu_age'].trace() * fe_usa_1990 * re_usa_1990) ### Prediction case 2: constant non-zero random effects, zero fixed effect coefficients # check estimates with priors on random effects for i, node in enumerate(['USA', 'NAHI', 'super-region-1', 'all']): d.parameters['p']['random_effects'][node]['mu'] = (i + 1.) / 10. pred = covariate_model.predict_for(d, d.parameters['p'], 'NAHI', 'male', 2005, 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected fe_usa_1990 = pl.exp(0.) re_usa_1990 = pl.exp(.1) assert_almost_equal( pred, vars['p']['mu_age'].trace() * fe_usa_1990 * re_usa_1990) ### Prediction case 3: random effect not constant, zero fixed effect coefficients # set random seed to make randomness reproducible pl.np.random.seed(12345) pred = covariate_model.predict_for(d, d.parameters['p'], 'NAHI', 'male', 2005, 'CAN', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected pl.np.random.seed(12345) fe = pl.exp(0.) re = pl.exp(mc.rnormal(0., vars['p']['sigma_alpha'][3].trace()**-2)) assert_almost_equal(pred.mean(0), (vars['p']['mu_age'].trace().T * fe * re).T.mean(0))
def test_predict_for(): """ Approach to testing predict_for function: 1. Create model with known mu_age, known covariate values, known effect coefficients 2. Setup MCMC with NoStepper for all stochs 3. Sample to generate trace with known values 4. Predict for results, and confirm that they match expected values """ # generate simulated data n = 5 sigma_true = .025 a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) d = data.ModelData() d.input_data = data_simulation.simulated_age_intervals( 'p', n, a, pi_age_true, sigma_true) d.hierarchy, d.output_template = data_simulation.small_output() # create model and priors vars = ism.age_specific_rate(d, 'p', 'all', 'total', 'all', None, None, None) # fit model m = mc.MCMC(vars) for n in m.stochastics: m.use_step_method(mc.NoStepper, n) m.sample(3) ### Prediction case 1: constant zero random effects, zero fixed effect coefficients # check estimates with priors on random effects d.parameters['p']['random_effects'] = {} for node in ['USA', 'CAN', 'NAHI', 'super-region-1', 'all']: d.parameters['p']['random_effects'][node] = dict( dist='Constant', mu=0, sigma=1.e-9) # zero out REs to see if test passes pred = covariate_model.predict_for(d, d.parameters['p'], 'all', 'total', 'all', 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected fe_usa_1990 = 1. re_usa_1990 = 1. assert_almost_equal( pred, vars['p']['mu_age'].trace() * fe_usa_1990 * re_usa_1990) ### Prediction case 2: constant non-zero random effects, zero fixed effect coefficients # check estimates with priors on random effects for i, node in enumerate(['USA', 'NAHI', 'super-region-1']): d.parameters['p']['random_effects'][node]['mu'] = (i + 1.) / 10. pred = covariate_model.predict_for(d, d.parameters['p'], 'all', 'total', 'all', 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected fe_usa_1990 = 1. re_usa_1990 = pl.exp(.1 + .2 + .3) assert_almost_equal( pred, vars['p']['mu_age'].trace() * fe_usa_1990 * re_usa_1990) ### Prediction case 3: confirm that changing RE for reference area does not change results d.parameters['p']['random_effects']['all']['mu'] = 1. pred = covariate_model.predict_for(d, d.parameters['p'], 'all', 'total', 'all', 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected fe_usa_1990 = 1. re_usa_1990 = pl.exp(.1 + .2 + .3) # unchanged, since it is alpha_all that is now 1. assert_almost_equal( pred, vars['p']['mu_age'].trace() * fe_usa_1990 * re_usa_1990) ### Prediction case 4: see that prediction of CAN includes region and super-region effect, but not USA effect pred = covariate_model.predict_for(d, d.parameters['p'], 'all', 'total', 'all', 'CAN', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected fe = 1. re = pl.exp(0. + .2 + .3) # unchanged, since it is alpha_all that is now 1. assert_almost_equal(pred, vars['p']['mu_age'].trace() * fe * re) # create model and priors vars = ism.age_specific_rate(d, 'p', 'USA', 'male', 1990, None, None, None) # fit model m = mc.MCMC(vars) for n in m.stochastics: m.use_step_method(mc.NoStepper, n) m.sample(3) # check estimates pi_usa = covariate_model.predict_for(d, d.parameters['p'], 'USA', 'male', 1990, 'USA', 'male', 1990, 0., vars['p'], 0., pl.inf) # test that the predicted value is as expected assert_almost_equal(pi_usa, vars['p']['mu_age'].trace()) ### Prediction case 5: confirm that const RE prior with sigma = 0 does not crash d.parameters['p']['random_effects']['USA']['sigma'] = 0. d.parameters['p']['random_effects']['CAN']['sigma'] = 0. pred = covariate_model.predict_for(d, d.parameters['p'], 'all', 'total', 'all', 'NAHI', 'male', 1990, 0., vars['p'], 0., pl.inf) d.vars = vars return d