def get_sweep_results(sim_meta_config_files, calib_file_path, tags_data_file_path):
    
    # Login to COMPs
    comps_login()
    
    
    # find total number of simulations across given experiment files
    num_sims = 0
    for sim_meta_config_file in sim_meta_config_files:
        with open(sim_meta_config_file) as metadata_file:
             metadata = json.loads(metadata_file.read())

        num_sims = num_sims +  len(metadata['sims'])
    
    

    # Download simulations locally
    # sample sim meta config file (like "C:\\Users\\Mnikolov\\Zambia-raw\\dtk-scripts\\1node\\simulations\\Sinamalina_Sinazongwe_Calibration_e9979059-33f8-e411-93f9-f0921c16b9e7.json")
    #print 'Downloading simulations from experiment ' + str(sim_meta_config_files) + '...'

    
    # simulations tag data structure: accumulates sims meta information from sims tags
    tag_data = {
                        'ITN trajectory': [],\
                        'Drug coverage per round': [],\
                        'Temporary habitat scale': [],\
                        'Constant habitat scale': []\
                }
    
    
    # iterate through experiments
    calib_output = {}    
    
    # count processed sims to updated progress
    count = 0
    for sim_meta_config_file in sim_meta_config_files:
        
        
        # construct experiment directory structure
        with open(sim_meta_config_file) as metadata_file:
             metadata = json.loads(metadata_file.read())
    
        output_path = metadata['sim_root']
        exp_id = metadata['exp_id']
        exp_name = metadata['exp_name']
        
        sim_dir_map  = CompsDTKOutputParser.createSimDirectoryMap(exp_id)
        
        # get all successfully completed sims in experiment
        for sim_id, sim in metadata['sims'].items():
            
            # get path to the sim timeseries channels data
            timeseries_path = os.path.join(sim_dir_map[sim_id],'output', 'InsetChart.json')

            
            #get sim timeseries channels data; json2dict returns None if timeseries_path points to non-existing file, which is the case if the sim has not successfully finished 
            sim_output = json2dict(timeseries_path)

            # only download successfully completed  simulations
            if sim_output == None:
                continue

            
            # delete all but the specified channels
            for channel in sim_output['Channels'].keys():
                if not channel in channels:
                   del(sim_output['Channels'][channel])
                          
            # process specified reports
            report_channels_data = {}
            if not reports_channels == None:
               report_channels_data = process_reports(reports_channels, sim_dir_map, sim_id)
            
            # record sim meta information including sim tags
            tags_path = os.path.join(sim_dir_map[sim_id], 'tags.json')
            f = open(tags_path, 'r')
            tags = f.read()
            sim_meta =  ast.literal_eval(tags)
            append_tag_data(sim_meta, tag_data)
        
            
            # construct sim group key and sim key
            x_temp_h = sim_meta_2_temp_h(sim_meta)
            const_h = sim_meta_2_const_h(sim_meta)
            itn_level = sim_meta_2_itn_level(sim_meta)
            drug_coverage_level = sim_meta_2_drug_cov(sim_meta)
            
            sim_key = get_sim_key(x_temp_h, const_h, itn_level, drug_coverage_level)
            sim_group_key =  get_sim_group_key(itn_level, drug_coverage_level)
        

            # store sim channels data  
            if sim_group_key not in calib_output:
                calib_output[sim_group_key] = {}

            calib_output[sim_group_key][sim_key] = {
                                                    # can add/remove data entries depending on needs
                                                    'prevalence': sim_output['Channels']['New Diagnostic Prevalence']['Data'],
                                                    'reinfections': report_channels_data['reinfections'],
                                                    'meta':sim_meta,
                                                    'sim_id':sim_id
                                                    }
    '''    
    count = count + 1
    percent_complete = 100*count/(num_sims+0.0)
    sys.stdout.write('\r')
    sys.stdout.write('%2f %%' % percent_complete)
    #sys.stdout.write('%d' % count)
    sys.stdout.flush()
    '''    
        
    print ""
    print "Writing files..."
    
    with open(calib_file_path, 'w') as calib_f:
            json.dump(calib_output, calib_f)
            print str(len(calib_output)) + ' simulation results saved to ' + calib_file_path
            
    with open(tags_data_file_path, 'w') as tags_f:
            json.dump(tag_data, tags_f)
            print 'Meta data tags saved to ' + tags_data_file_path
    
    print ""
    
    return calib_f
Пример #2
0
        
            sim_output = sim['output']
            
            x_temp_h = float(sim['meta']['x_Temporary_Larval_Habitat'])
            
            const_h_struct = ast.literal_eval(sim['meta']['scale_larval_habitats_single'])
            const_h = const_h_struct[0][1][1]
            
            itn_level_struct = ast.literal_eval(sim['meta']['add_ITN_mult'])
            itn_level = itn_level_struct[0][1][0][0][1]
                   
            drug_coverage_level_struct = ast.literal_eval(sim['meta']['add_drug_multi_campaigns'])
            drug_coverage_level = drug_coverage_level_struct[0][1][0]['coverage']
            
            sim_key = get_sim_key(x_temp_h, const_h, itn_level, drug_coverage_level)
            sim_group_key =  get_sim_group_key(itn_level, drug_coverage_level)
         
        
            sim_data = sim_output['Channels']['New Clinical Cases']['Data']
            
            
            if not sim_group_key in cc:
                cc[sim_group_key] = {}

            cc[sim_group_key][sim_key] = sim_data
            
        
        del calib_data
        gc.collect()

Пример #3
0
    def fit(self):
        
        models_list_prime = calib_data_2_models_list(self.calib_data)
                
        best_fits = {}
        all_fits = {}
        #all_fits = {'fit':{'min_residual':float('inf')}, }
        
        all_fits['min_residual'] = float('inf')
        all_fits['max_residual'] = 0.0
        
        
        
        all_fits['models'] = {}
        
        debug_p('category ' + self.category)
        
        for idx,cluster_id in enumerate(c2c(self.category)):
        
            models_list = copy.deepcopy(models_list_prime)
            
            print "Processing cluster " + cluster_id + "."
            debug_p('Processing cluster ' + cluster_id + " in " + self.category + ".")
            
            itn_traj = cluster_2_itn_traj(cluster_id)
            drug_cov = cluster_2_drug_cov(cluster_id)
            
            # prune models to the ones matching prior data
            cluster_models = []
            for model in models_list:
                model_meta = model.get_meta()
                if model_meta['group_key'] == get_sim_group_key(itn_traj, drug_cov):
                    #debug_p('model id before kariba conversion ' + str(model.get_model_id()))
                    group_key = model_meta['group_key']
                    sim_key = model_meta['sim_key']

                    model = KaribaModel(model, self.calib_data[group_key][sim_key], cluster_id, all_fits = self.fit_terms)
                    
                    #model = kariba_model
                    #debug_p('model id after kariba conversion ' + str(model.get_model_id()))
                    cluster_models.append(model)
                
            surv_data = {}
            all_ref_objs_found = True
            for channel_code in objectives_channel_codes:
                if channel_code == 'prevalence':
                    prev_data = c2p(cluster_id)
                    if prev_data:
                        surv_data[channel_code] = prev_data
                    else:
                        msg = 'Prevalence objective reference data was not found!\n Skipping cluster ' + cluster_id + ' fit!'
                        print msg
                        all_ref_objs_found = False
                else:
                    msg = "Channel objective" + channel_code + " not implemented yet!\nSetting objective reference data to None."
                    warn_p(msg)
                    surv_data[channel_code] = None
            
            # one of the reference objective channels was not found; skipping cluster fit!
            if not all_ref_objs_found:
                continue
                        
            ref = d2f(surv_data)
            
            # adjust highest possible fit to account for RDT+ model in dtk not reflecting reality at the upper end
            obj_prev = ref.get_obj_by_name('prevalence')
            d_points = obj_prev.get_points()
            obj_prev.set_points([min(point, rdt_max) for point in d_points])
            
            
            fitting_set = FittingSet(cluster_id, cluster_models, ref)
            
            if load_prevalence_mse:
                fit = Fit(fitting_set, type = 'mmse_distance_cached')
            else:
                fit = Fit(fitting_set)
            
            best_fit_model = fit.best_fit_mmse_distance()
            
            min_residual = fit.get_min_residual()
            max_residual = fit.get_max_residual()
            
            if min_residual  < all_fits['min_residual']:
                all_fits['min_residual'] = min_residual 
                
            if max_residual  > all_fits['max_residual']:
                all_fits['max_residual'] = max_residual
            
            if best_fit_model: 
            
                temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_model)
                best_fit_meta = best_fit_model.get_meta()
                best_fits[cluster_id] = {}
                best_fits[cluster_id]['habs'] = {}
                best_fits[cluster_id]['habs']['const_h'] = const_h 
                best_fits[cluster_id]['habs']['temp_h'] = temp_h
                best_fits[cluster_id]['ITN_cov'] = itn_level
                best_fits[cluster_id]['category'] = self.category
                best_fits[cluster_id]['MSAT_cov'] = drug_coverage_level
                best_fits[cluster_id]['sim_id'] = best_fit_meta['sim_id']
                best_fits[cluster_id]['sim_key'] = best_fit_meta['sim_key'] 
                best_fits[cluster_id]['group_key'] = best_fit_meta['group_key']
                best_fits[cluster_id]['fit_value'] = best_fit_model.get_fit_val()
                best_fits[cluster_id]['sim_avg_reinfection_rate'] = best_fit_model.get_sim_avg_reinfection_rate()
                best_fits[cluster_id]['ref_avg_reinfection_rate'] = best_fit_model.get_ref_avg_reinfection_rate()
                best_fits[cluster_id]['prevalence'] = best_fit_model.get_objective_by_name('prevalence').get_points()
            
                # redundancy; to be refactored via FitEntry class                
                best_fits[cluster_id]['fit'] = {}
                best_fits[cluster_id]['fit']['value'] = best_fit_model.get_fit_val()
                best_fits[cluster_id]['fit']['temp_h'] = temp_h
                best_fits[cluster_id]['fit']['const_h'] = const_h
                best_fits[cluster_id]['fit']['ITN_cov'] = itn_level
                best_fits[cluster_id]['fit']['MSAT_cov'] = drug_coverage_level
                best_fits[cluster_id]['fit']['sim_id'] = best_fit_meta['sim_id']
                best_fits[cluster_id]['fit']['sim_key'] = best_fit_meta['sim_key']
                
                
                best_fits[cluster_id]['mse'] = {}
                best_fits[cluster_id]['mse']['value'] = fit.get_min_mses()['prevalence']['value'] # get mmse for objective prevalence
                best_fit_mse_model = fit.get_min_mses()['prevalence']['model']
                temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_mse_model)
                model_meta_data = best_fit_mse_model.get_meta()
                best_fits[cluster_id]['mse']['temp_h'] = temp_h
                best_fits[cluster_id]['mse']['const_h'] = const_h
                best_fits[cluster_id]['mse']['ITN_cov'] = itn_level
                best_fits[cluster_id]['mse']['MSAT_cov'] = drug_coverage_level
                best_fits[cluster_id]['mse']['sim_id'] = model_meta_data['sim_id']
                best_fits[cluster_id]['mse']['sim_key'] = model_meta_data['sim_key']
                
                best_fits[cluster_id]['cc_penalty'] = {}
                best_fits[cluster_id]['cc_penalty']['value'] = fit.get_min_penalties()['prevalence']['value'] # get clinical penalty for objective prevalence; at present this is just the clinical cases penalty; if reinfection is considered the code needs to be adjusted
                best_fit_cc_penalty_model = fit.get_min_penalties()['prevalence']['model']
                temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_cc_penalty_model)
                model_meta_data = best_fit_cc_penalty_model.get_meta()
                best_fits[cluster_id]['cc_penalty']['temp_h'] = temp_h
                best_fits[cluster_id]['cc_penalty']['const_h'] = const_h
                best_fits[cluster_id]['cc_penalty']['ITN_cov'] = itn_level
                best_fits[cluster_id]['cc_penalty']['MSAT_cov'] = drug_coverage_level
                best_fits[cluster_id]['cc_penalty']['sim_id'] = model_meta_data['sim_id']
                best_fits[cluster_id]['cc_penalty']['sim_key'] = model_meta_data['sim_key']
                  
    
                rho = best_fit_model.get_rho()
                p_val = best_fit_model.get_p_val()
                
                if rho and p_val :
                    best_fits[cluster_id]['rho'] = rho
                    best_fits[cluster_id]['p_val'] = p_val
                    
                    debug_p('rho' + str(rho))
                    debug_p('p_val' + str(p_val)) 
                
                
            else:
                msg = "something went wrong and the best fit for " + cluster_id + " could not be found."
                warn_p(msg)
                
            
            all_fits['models'][cluster_id] = cluster_models
            #all_fits['models'][cluster_id] = fit.get_fitting_set_models()
            
            print str(idx+1) + " clusters have been processed."
            debug_p( str(idx+1) + " clusters have been processed in category " + self.category)
            
            '''
            if idx > 0:
                break 
            '''      
        return best_fits, all_fits