Exemplo n.º 1
0
def get_prevalence_based_cc(best_fits, all_fits, calib_data):
    
    weighted_ccs_by_hfca_id_file_path = os.path.join(sim_data_dir, weighted_ccs_by_hfca_id_file)
    
    hfca_ids = get_hfca_ids()
    
    debug_p('Getting clinical cases samples based on prevalence optimal regions')
    
    weighted_ccs_model_agg_by_hfca = {} 
    for hfca_id in hfca_ids:
        hfca_id = str(hfca_id)
        weighted_ccs_model_agg_by_hfca[hfca_id] = {}
        
        cluster_ids = hfca_id_2_cluster_ids(hfca_id)
        
        for cluster_id in cluster_ids:
            
            cluster_cat = get_cluster_category(cluster_id)
            
            sims_opt_region = get_prevalence_opt_region_sims(best_fits, all_fits, cluster_id)
            
            # assume sample size is always less than the size of the population!
            sample_sims_opt_region = random.sample(sims_opt_region, sample_size) 
            
            for i, (sample_group_key, sample_sim_key) in enumerate(sample_sims_opt_region): 
                      
                sample_cc_trace = calib_data[cluster_cat][sample_group_key][sample_sim_key]
                cc_cluster_weight_factor = get_cc_cluster_weight_factor(cluster_id) # accounting for health seeking behavior data
                cc_cluster_weight_factor = (cluster_2_mean_pop(cluster_id)/(calib_node_pop + 0.0)) * cc_cluster_weight_factor # accounting for real cluster population (mean across all rounds)
                ccs_model_agg, ccs_ref_agg = get_cc_model_ref_traces(sample_cc_trace, cluster_id, cc_cluster_weight_factor)
                ccs_model_agg_unweighted, ccs_ref_agg_unweighted = get_cc_model_ref_traces(sample_cc_trace, cluster_id)
                
                if not cluster_id in weighted_ccs_model_agg_by_hfca[hfca_id]:  
                    weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id] = {
                                                                           'weighted':[],
                                                                           'unweighted':[]
                                                                           }
                    
                weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id]['weighted'].append(ccs_model_agg)
                weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id]['unweighted'].append((sample_group_key,sample_sim_key,ccs_model_agg_unweighted))

    with open(weighted_ccs_by_hfca_id_file_path, 'w') as w_ccs_f:
        json.dump(weighted_ccs_model_agg_by_hfca, w_ccs_f, indent = 3)
        
    debug_p('DONE getting clinical cases samples based on prevalence optimal regions')
    
    debug_p('Saved clinical cases samples based on prevalence optimal regions to ' + weighted_ccs_by_hfca_id_file_path)
    
    return weighted_ccs_model_agg_by_hfca
Exemplo n.º 2
0
    def plot_weighted_cc_per_hfca(self, weighted_ccs_model_agg_by_hfca, ccs_model_agg_by_hfca_cluster_id):
        
        
        clusters_processed = 0
        for hfca_id, weighted_ccs_combos in weighted_ccs_model_agg_by_hfca.iteritems():
        
            weighted_ccs_by_bin = {}
            for i in range(0, cc_num_fold_bins):
                weighted_ccs_by_bin[i] = []
                
            
            for weighted_ccs_combo in weighted_ccs_combos:
                sum_weighted_ccs = cc_num_fold_bins * [0]

                for weighted_ccs in weighted_ccs_combo:
                    sum_weighted_ccs = np.add(sum_weighted_ccs, weighted_ccs)
                    
                for i in range(0, cc_num_fold_bins):
                    weighted_ccs_by_bin[i].append(sum_weighted_ccs[i])
        
        
            per_bottom = []
            per_top = []
            per_median = []
            
            for i in range(0, cc_num_fold_bins):
                weighted_ccs_by_bin_idx = weighted_ccs_by_bin[i]
                per_bottom.append( satp(weighted_ccs_by_bin_idx, 2.5) )
                per_top.append( satp(weighted_ccs_by_bin_idx, 97.5) )
                per_median.append( satp(weighted_ccs_by_bin_idx, 50) )
                
            '''
            debug_p('length of weighted ccs_combos array ' + str(len(weighted_ccs_combos)))
            '''
            debug_p('length of bin 0 in weighted_ccs_by_bin ' + str(len(weighted_ccs_by_bin[0])))
           
            
            for cluster_id in hfca_id_2_cluster_ids(hfca_id):
                
                fig = plt.figure(cluster_id, figsize=(9.2, 4), dpi=100, facecolor='white')
                gs = gridspec.GridSpec(1, 4)
                     
                ax = plt.subplot(gs[0:4])

                x_smooth = np.linspace(0, cc_num_fold_bins-1,60)
                
                per_bottom_smooth = spline(range(0, cc_num_fold_bins),per_bottom,x_smooth)
                per_top_smooth = spline(range(0, cc_num_fold_bins),per_top,x_smooth)
                per_median_smooth = spline(range(0, cc_num_fold_bins),per_median,x_smooth)
                
                ax.plot(x_smooth, per_bottom_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '2.5 percentile HS weighted: prevalence space samples', marker = None)
                ax.plot(x_smooth, per_top_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '97.5 percentile HS weighted: prevalence space samples', marker = None)
                ax.plot(x_smooth, per_median_smooth, alpha=1, linewidth=2.0, color = 'magenta', linestyle='-', label = 'median HS weighted: prevalence space samples', marker = None)
                ax.fill_between(x_smooth, per_bottom_smooth, per_top_smooth, facecolor='gray', alpha=0.5, interpolate=True)
                
                cluster_cat = get_cluster_category(cluster_id)
                
                opt_group_key = self.best_fits[cluster_id]['group_key']
                
                opt_sim_key_cc = self.best_fits[cluster_id]['cc_penalty']['sim_key']
                cc_trace_opt_cc = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_cc]
                
                opt_sim_key_prev = self.best_fits[cluster_id]['mse']['sim_key']
                cc_trace_opt_prev = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_prev]
                
                opt_sim_key_fit = self.best_fits[cluster_id]['fit']['sim_key']
                cc_trace_opt_fit = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_fit]
            
                ccs_model_agg_cc, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_cc, cluster_id)
                ccs_model_agg_prev, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_prev, cluster_id)
                ccs_model_agg_fit, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_fit, cluster_id)
                
                facility = hfca_id_2_facility(hfca_id)
                ax.plot(range(0, len(ccs_model_agg_cc)), ccs_model_agg_cc, alpha=1, linewidth=1, color = 'blue', label = 'Best fit: clinical cases', marker = 's')
                ax.plot(range(0, len(ccs_model_agg_prev)), ccs_model_agg_prev, alpha=1, linewidth=1, color = 'magenta', label = 'Best fit: prevalence', marker = 'o')
                ax.plot(range(0, len(ccs_model_agg_fit)), ccs_model_agg_fit, alpha=1, linewidth=1, color = 'black', label = 'Best fit: prevalence + clinical cases', marker = '*')
                ax.plot(range(0, len(ccs_ref_agg)), ccs_ref_agg, alpha=1, linewidth=2.0, linestyle = '-', color = 'red', label = 'Observed in ' + facility, marker = None)    
                
                for i,sample_ccs in enumerate(ccs_model_agg_by_hfca_cluster_id[hfca_id][cluster_id]['unweighted']):
                      
                    if i == 0:
                        ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', label = 'Opt 5-percentile samples for cluster ' + cluster_id, marker = None)
                        #ax.plot(range(0, cc_num_fold_bins), sample_ccs[2])
                    else:
                        ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', marker = None)
        
                plt.xlabel('6-week bins', fontsize=8)
                plt.ylabel('Clinical cases', fontsize=8)
                legend = plt.legend(loc=1, fontsize=8)
            
                
                plt.xlim(0,8)
                plt.title('Clinical cases timeseries', fontsize = 8, fontweight = 'bold', color = 'black')
                plt.gca().tick_params(axis='x', labelsize=8)
                plt.gca().tick_params(axis='y', labelsize=8)
                plt.tight_layout()
                output_plot_file_path = os.path.join(self.root_sweep_dir, weighted_cc_traces_plots_dir, weighted_cc_traces_base_file_name + cluster_id + '.png')
                plt.savefig(output_plot_file_path, dpi = 300, format='png')
                plt.close()
                
                clusters_processed = clusters_processed + 1
                
                debug_p('Processed weighting and plotting clinical cases for ' + str(clusters_processed) + ' clusters')