def get_sweep_results(sim_meta_config_files, calib_file_path, tags_data_file_path): # Login to COMPs comps_login() # find total number of simulations across given experiment files num_sims = 0 for sim_meta_config_file in sim_meta_config_files: with open(sim_meta_config_file) as metadata_file: metadata = json.loads(metadata_file.read()) num_sims = num_sims + len(metadata['sims']) # Download simulations locally # sample sim meta config file (like "C:\\Users\\Mnikolov\\Zambia-raw\\dtk-scripts\\1node\\simulations\\Sinamalina_Sinazongwe_Calibration_e9979059-33f8-e411-93f9-f0921c16b9e7.json") #print 'Downloading simulations from experiment ' + str(sim_meta_config_files) + '...' # simulations tag data structure: accumulates sims meta information from sims tags tag_data = { 'ITN trajectory': [],\ 'Drug coverage per round': [],\ 'Temporary habitat scale': [],\ 'Constant habitat scale': []\ } # iterate through experiments calib_output = {} # count processed sims to updated progress count = 0 for sim_meta_config_file in sim_meta_config_files: # construct experiment directory structure with open(sim_meta_config_file) as metadata_file: metadata = json.loads(metadata_file.read()) output_path = metadata['sim_root'] exp_id = metadata['exp_id'] exp_name = metadata['exp_name'] sim_dir_map = CompsDTKOutputParser.createSimDirectoryMap(exp_id) # get all successfully completed sims in experiment for sim_id, sim in metadata['sims'].items(): # get path to the sim timeseries channels data timeseries_path = os.path.join(sim_dir_map[sim_id],'output', 'InsetChart.json') #get sim timeseries channels data; json2dict returns None if timeseries_path points to non-existing file, which is the case if the sim has not successfully finished sim_output = json2dict(timeseries_path) # only download successfully completed simulations if sim_output == None: continue # delete all but the specified channels for channel in sim_output['Channels'].keys(): if not channel in channels: del(sim_output['Channels'][channel]) # process specified reports report_channels_data = {} if not reports_channels == None: report_channels_data = process_reports(reports_channels, sim_dir_map, sim_id) # record sim meta information including sim tags tags_path = os.path.join(sim_dir_map[sim_id], 'tags.json') f = open(tags_path, 'r') tags = f.read() sim_meta = ast.literal_eval(tags) append_tag_data(sim_meta, tag_data) # construct sim group key and sim key x_temp_h = sim_meta_2_temp_h(sim_meta) const_h = sim_meta_2_const_h(sim_meta) itn_level = sim_meta_2_itn_level(sim_meta) drug_coverage_level = sim_meta_2_drug_cov(sim_meta) sim_key = get_sim_key(x_temp_h, const_h, itn_level, drug_coverage_level) sim_group_key = get_sim_group_key(itn_level, drug_coverage_level) # store sim channels data if sim_group_key not in calib_output: calib_output[sim_group_key] = {} calib_output[sim_group_key][sim_key] = { # can add/remove data entries depending on needs 'prevalence': sim_output['Channels']['New Diagnostic Prevalence']['Data'], 'reinfections': report_channels_data['reinfections'], 'meta':sim_meta, 'sim_id':sim_id } ''' count = count + 1 percent_complete = 100*count/(num_sims+0.0) sys.stdout.write('\r') sys.stdout.write('%2f %%' % percent_complete) #sys.stdout.write('%d' % count) sys.stdout.flush() ''' print "" print "Writing files..." with open(calib_file_path, 'w') as calib_f: json.dump(calib_output, calib_f) print str(len(calib_output)) + ' simulation results saved to ' + calib_file_path with open(tags_data_file_path, 'w') as tags_f: json.dump(tag_data, tags_f) print 'Meta data tags saved to ' + tags_data_file_path print "" return calib_f
sim_output = sim['output'] x_temp_h = float(sim['meta']['x_Temporary_Larval_Habitat']) const_h_struct = ast.literal_eval(sim['meta']['scale_larval_habitats_single']) const_h = const_h_struct[0][1][1] itn_level_struct = ast.literal_eval(sim['meta']['add_ITN_mult']) itn_level = itn_level_struct[0][1][0][0][1] drug_coverage_level_struct = ast.literal_eval(sim['meta']['add_drug_multi_campaigns']) drug_coverage_level = drug_coverage_level_struct[0][1][0]['coverage'] sim_key = get_sim_key(x_temp_h, const_h, itn_level, drug_coverage_level) sim_group_key = get_sim_group_key(itn_level, drug_coverage_level) sim_data = sim_output['Channels']['New Clinical Cases']['Data'] if not sim_group_key in cc: cc[sim_group_key] = {} cc[sim_group_key][sim_key] = sim_data del calib_data gc.collect()
def fit(self): models_list_prime = calib_data_2_models_list(self.calib_data) best_fits = {} all_fits = {} #all_fits = {'fit':{'min_residual':float('inf')}, } all_fits['min_residual'] = float('inf') all_fits['max_residual'] = 0.0 all_fits['models'] = {} debug_p('category ' + self.category) for idx,cluster_id in enumerate(c2c(self.category)): models_list = copy.deepcopy(models_list_prime) print "Processing cluster " + cluster_id + "." debug_p('Processing cluster ' + cluster_id + " in " + self.category + ".") itn_traj = cluster_2_itn_traj(cluster_id) drug_cov = cluster_2_drug_cov(cluster_id) # prune models to the ones matching prior data cluster_models = [] for model in models_list: model_meta = model.get_meta() if model_meta['group_key'] == get_sim_group_key(itn_traj, drug_cov): #debug_p('model id before kariba conversion ' + str(model.get_model_id())) group_key = model_meta['group_key'] sim_key = model_meta['sim_key'] model = KaribaModel(model, self.calib_data[group_key][sim_key], cluster_id, all_fits = self.fit_terms) #model = kariba_model #debug_p('model id after kariba conversion ' + str(model.get_model_id())) cluster_models.append(model) surv_data = {} all_ref_objs_found = True for channel_code in objectives_channel_codes: if channel_code == 'prevalence': prev_data = c2p(cluster_id) if prev_data: surv_data[channel_code] = prev_data else: msg = 'Prevalence objective reference data was not found!\n Skipping cluster ' + cluster_id + ' fit!' print msg all_ref_objs_found = False else: msg = "Channel objective" + channel_code + " not implemented yet!\nSetting objective reference data to None." warn_p(msg) surv_data[channel_code] = None # one of the reference objective channels was not found; skipping cluster fit! if not all_ref_objs_found: continue ref = d2f(surv_data) # adjust highest possible fit to account for RDT+ model in dtk not reflecting reality at the upper end obj_prev = ref.get_obj_by_name('prevalence') d_points = obj_prev.get_points() obj_prev.set_points([min(point, rdt_max) for point in d_points]) fitting_set = FittingSet(cluster_id, cluster_models, ref) if load_prevalence_mse: fit = Fit(fitting_set, type = 'mmse_distance_cached') else: fit = Fit(fitting_set) best_fit_model = fit.best_fit_mmse_distance() min_residual = fit.get_min_residual() max_residual = fit.get_max_residual() if min_residual < all_fits['min_residual']: all_fits['min_residual'] = min_residual if max_residual > all_fits['max_residual']: all_fits['max_residual'] = max_residual if best_fit_model: temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_model) best_fit_meta = best_fit_model.get_meta() best_fits[cluster_id] = {} best_fits[cluster_id]['habs'] = {} best_fits[cluster_id]['habs']['const_h'] = const_h best_fits[cluster_id]['habs']['temp_h'] = temp_h best_fits[cluster_id]['ITN_cov'] = itn_level best_fits[cluster_id]['category'] = self.category best_fits[cluster_id]['MSAT_cov'] = drug_coverage_level best_fits[cluster_id]['sim_id'] = best_fit_meta['sim_id'] best_fits[cluster_id]['sim_key'] = best_fit_meta['sim_key'] best_fits[cluster_id]['group_key'] = best_fit_meta['group_key'] best_fits[cluster_id]['fit_value'] = best_fit_model.get_fit_val() best_fits[cluster_id]['sim_avg_reinfection_rate'] = best_fit_model.get_sim_avg_reinfection_rate() best_fits[cluster_id]['ref_avg_reinfection_rate'] = best_fit_model.get_ref_avg_reinfection_rate() best_fits[cluster_id]['prevalence'] = best_fit_model.get_objective_by_name('prevalence').get_points() # redundancy; to be refactored via FitEntry class best_fits[cluster_id]['fit'] = {} best_fits[cluster_id]['fit']['value'] = best_fit_model.get_fit_val() best_fits[cluster_id]['fit']['temp_h'] = temp_h best_fits[cluster_id]['fit']['const_h'] = const_h best_fits[cluster_id]['fit']['ITN_cov'] = itn_level best_fits[cluster_id]['fit']['MSAT_cov'] = drug_coverage_level best_fits[cluster_id]['fit']['sim_id'] = best_fit_meta['sim_id'] best_fits[cluster_id]['fit']['sim_key'] = best_fit_meta['sim_key'] best_fits[cluster_id]['mse'] = {} best_fits[cluster_id]['mse']['value'] = fit.get_min_mses()['prevalence']['value'] # get mmse for objective prevalence best_fit_mse_model = fit.get_min_mses()['prevalence']['model'] temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_mse_model) model_meta_data = best_fit_mse_model.get_meta() best_fits[cluster_id]['mse']['temp_h'] = temp_h best_fits[cluster_id]['mse']['const_h'] = const_h best_fits[cluster_id]['mse']['ITN_cov'] = itn_level best_fits[cluster_id]['mse']['MSAT_cov'] = drug_coverage_level best_fits[cluster_id]['mse']['sim_id'] = model_meta_data['sim_id'] best_fits[cluster_id]['mse']['sim_key'] = model_meta_data['sim_key'] best_fits[cluster_id]['cc_penalty'] = {} best_fits[cluster_id]['cc_penalty']['value'] = fit.get_min_penalties()['prevalence']['value'] # get clinical penalty for objective prevalence; at present this is just the clinical cases penalty; if reinfection is considered the code needs to be adjusted best_fit_cc_penalty_model = fit.get_min_penalties()['prevalence']['model'] temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_cc_penalty_model) model_meta_data = best_fit_cc_penalty_model.get_meta() best_fits[cluster_id]['cc_penalty']['temp_h'] = temp_h best_fits[cluster_id]['cc_penalty']['const_h'] = const_h best_fits[cluster_id]['cc_penalty']['ITN_cov'] = itn_level best_fits[cluster_id]['cc_penalty']['MSAT_cov'] = drug_coverage_level best_fits[cluster_id]['cc_penalty']['sim_id'] = model_meta_data['sim_id'] best_fits[cluster_id]['cc_penalty']['sim_key'] = model_meta_data['sim_key'] rho = best_fit_model.get_rho() p_val = best_fit_model.get_p_val() if rho and p_val : best_fits[cluster_id]['rho'] = rho best_fits[cluster_id]['p_val'] = p_val debug_p('rho' + str(rho)) debug_p('p_val' + str(p_val)) else: msg = "something went wrong and the best fit for " + cluster_id + " could not be found." warn_p(msg) all_fits['models'][cluster_id] = cluster_models #all_fits['models'][cluster_id] = fit.get_fitting_set_models() print str(idx+1) + " clusters have been processed." debug_p( str(idx+1) + " clusters have been processed in category " + self.category) ''' if idx > 0: break ''' return best_fits, all_fits