def get_colorbar_ticks(self, min_z, max_z, z): blevels = [] ticks = [min_z] current = 0.1 previous = 0.0 index = 0 self.generate_blevel_sequence(blevels, current, previous, index) for blevel in blevels: tick = min_z + blevel*min_z if tick <= max_z: ticks.append(tick) # not efficient; but there are many parts of this that aren't; # need to refactor at some point if speed is an issue # avoid premature optimization ticks.append(satp(z, sample_size_percentile)) return sorted(ticks)
def plot_calib_err_surfaces(self, err_surface_types): count = 0 min_residual = self.residuals['min'] max_residual = self.residuals['max'] for cluster_id, cluster_record in self.best_fits.iteritems(): debug_p('Plotting error surface for cluster ' + cluster_id + ' in category ' + self.category) fig_width = len(err_surface_types)*4.35 fig = plt.figure(cluster_id, figsize=(fig_width, 4), dpi=300, facecolor='white') #debug_p('error surface types length' + str(len(err_surface_types))) #debug_p('fig width' + str(fig_width)) gs = gridspec.GridSpec(1, 3) error_points = {} title_ITN = 'ITN distribution: ' title_drug_coverage = 'drug coverage: ' opt_fit = {} opt_sim_key = cluster_record['sim_key'] opt_group_key = cluster_record['group_key'] opt_const_h = cluster_record['habs']['const_h'] opt_x_temp_h = cluster_record['habs']['temp_h'] for err_surface_type in err_surface_types: opt_fit[err_surface_type] = {} opt_fit[err_surface_type]['const_h'] = cluster_record[err_surface_type]['const_h'] opt_fit[err_surface_type]['temp_h'] = cluster_record[err_surface_type]['temp_h'] opt_fit[err_surface_type]['value'] = cluster_record[err_surface_type]['value'] if err_surface_type == 'cc_penalty': opt_fit[err_surface_type]['value'] = opt_fit[err_surface_type]['value']*(math.pow(cc_weight, -1)) # opt_fit of penalties contains the weighted value; hence we reverse the weighting opt_neigh_fits = [] for sim_key,fit_entry in self.all_fits[cluster_id].iteritems(): if sim_key == 'min_terms' or sim_key == 'max_terms': continue x_temp_h = fit_entry['x_temp_h'] const_h = fit_entry['const_h'] fit_val = fit_entry['fit_val'] mse = fit_entry['fit_terms']['mse'] cc_penalty = get_cc_penalty(fit_entry) itn_level = fit_entry['itn_level'] drug_coverage_level = fit_entry['drug_cov'] group_key = fit_entry['group_key'] if group_key not in error_points: error_points[group_key] = { 'x_temp_h':[], 'const_h':[], 'fit':[], 'cc_penalty':[], 'mse':[], 'title': title_ITN + itn_level + "; " + title_drug_coverage + str(drug_coverage_level), 'itn_level':itn_level, 'drug_coverage':drug_coverage_level } error_points[group_key]['x_temp_h'].append(x_temp_h) error_points[group_key]['const_h'].append(const_h) error_points[group_key]['fit'].append(fit_val) error_points[group_key]['mse'].append(mse) error_points[group_key]['cc_penalty'].append(cc_penalty) ymax = 10 scale_int = np.array(range(1,10)) pal = cm = plt.get_cmap('nipy_spectral') cNorm = colors.Normalize(vmin=0, vmax=ymax) #scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=pal) scalarMap = b2mpl.get_map('Spectral', 'Diverging', 5).mpl_colors lgds = [] for i,(err_surface_type, err_surface_style) in enumerate(err_surface_types.iteritems()): for j,group_key in enumerate(error_points.keys()): itn_level = error_points[group_key]['itn_level'] drug_coverage = error_points[group_key]['drug_coverage'] # currently assume opt_group_key is the same for all err_surface_types if group_key == opt_group_key: #debug_p('plot at position (0, ' + str(i) + ') in grid') plt.subplot(gs[0,i]) x = error_points[group_key]['x_temp_h'] y = error_points[group_key]['const_h'] z = error_points[group_key][err_surface_type] #print len(z) res = 125 ttl = err_surface_style['title'] min_x = np.min(x) min_y = np.min(y) min_z = np.min(z) max_x = np.max(x) max_y = np.max(y) max_z = np.max(z) #f = interpolate.interp2d(x, y, z) xi = np.linspace(min_x, max_x , res) yi = np.linspace(min_y, max_y , res) zi = griddata(x,y,z,xi,yi) #xig, yig = np.meshgrid(xi, yi) #zig = f(xi,yi) #rbf = Rbf(x, y, z, epsilon=2) #zig = rbf(xig, yig) blevels = self.get_colorbar_ticks(min_z, max_z, z) num_colors = len(blevels)-1 from matplotlib.colors import BoundaryNorm cmap2 = self.custom_cmap(num_colors, mincol='DarkBlue', midcol='CornflowerBlue', maxcol='w') cmap2.set_over('0.7') # light gray bnorm = BoundaryNorm(blevels, ncolors = num_colors, clip = False) #rmse_pl = plt.contourf(xi,yi,zi,15,cmap=plt.cm.hot) #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('RdYlGn_r'), vmin=0.475, vmax=0.8) #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('RdYlGn_r')) #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('cool'), vmin = min_residual, vmax = max_residual) #rmse_pl = plt.contourf(xi, yi, zi, cmap=plt.get_cmap('cool'), vmin = min_z, vmax = max_z, levels = blevels, norm = bnorm, extend = 'both') rmse_pl = plt.contourf(xi, yi, zi, cmap=cmap2, vmin = min_z, vmax = max_z, levels = blevels, norm = bnorm, extend = 'both') #rmse_pl = plt.contourf(xi,yi,zi,15,cmap=plt.get_cmap('paired')) #rmse_pl.cmap.set_over('black') #rmse_pl.cmap.set_under('grey') max_blevel_in_sample = 0 for blevel in blevels: if blevel <= satp(z, sample_size_percentile) and blevel > max_blevel_in_sample: max_blevel_in_sample = blevel pc = plt.contour(xi,yi,zi, levels=[max_blevel_in_sample], colors='r', linewidth=0, alpha=0.5) b_per_s = pc.collections[0].get_paths() count_labels = 0 for per in range(len(b_per_s)): b_per_s_x = b_per_s[per].vertices[:,0] b_per_s_y = b_per_s[per].vertices[:,1] if count_labels == 0: plt.fill(b_per_s_x,b_per_s_y, 'magenta', linestyle='solid', alpha=0.3, label = 'Opt 5-percentile: ' + err_surface_style['title']) count_labels = count_labels + 1 else: plt.fill(b_per_s_x,b_per_s_y, 'magenta', linestyle='solid', alpha=0.3) cb = plt.colorbar(rmse_pl, ticks = blevels, spacing='uniform') cb.set_label(ttl + ' residual', fontsize=8) cb.ax.tick_params(labelsize=8) #plt.scatter(x, y, 10, z, cmap=cmap2, vmin = min_z, vmax = max_z, norm = bnorm) level1_opt_neighs_label = False level2_opt_neighs_label = False level3_opt_neighs_label = False # plot all optimal markers on each surface for (opt_err_surface_type, opt_err_surface_style) in err_surface_types.iteritems(): plt.scatter(opt_fit[opt_err_surface_type]['temp_h'], opt_fit[opt_err_surface_type]['const_h'], c = 'red', marker = opt_err_surface_style['marker'], s = 60, facecolor='none', edgecolor='black', zorder=100, label= opt_err_surface_style['title'] + ' best fit') ''' for k,fit_val in enumerate(z): if fit_val < opt_fit[err_surface_type]['value'] + opt_fit[err_surface_type]['value']*subopt_plots_threshold: if not level1_opt_neighs_label: label = '< opt + 0.1opt' level1_opt_neighs_label = True else: label = None #plt.scatter(x[k], y[k], 10, fit_val, marker = 'd', linewidth = 0.75, color = 'green', label = label) elif fit_val < opt_fit[err_surface_type]['value'] + 2*opt_fit[err_surface_type]['value']*subopt_plots_threshold: if not level2_opt_neighs_label: label = '< opt + 0.2opt' level2_opt_neighs_label = True else: label = None #plt.scatter(x[k], y[k], 10, fit_val, marker = 'o', linewidth = 0.75, color = 'blue', label = label) elif fit_val < opt_fit[err_surface_type]['value'] + 3*opt_fit[err_surface_type]['value']*subopt_plots_threshold: if not level3_opt_neighs_label: label = '< opt + 0.3opt' level3_opt_neighs_label = True else: label = None #plt.scatter(x[k], y[k], 10, fit_val, marker = 'x', linewidth = 0.75, color = 'red', label = label) ''' #plt.title(ttl, fontsize = 8, fontweight = 'bold', color = 'white', backgroundcolor = scalarMap.to_rgba(scale_int[itn_levels_2_sbplts[itn_level]])) plt.title(ttl, fontsize = 8, fontweight = 'bold', color = 'black') plt.xlabel('All habitats scale', fontsize=8) plt.ylabel('Constant habitat scale', fontsize=8) plt.xlim(min_x+0.1, max_x+0.1) plt.ylim(min_y+0.1, max_y+0.1) #plt.ylim(0.01, 14) plt.gca().tick_params(axis='x', labelsize=8) plt.gca().tick_params(axis='y', labelsize=8) ''' count_traces = 0 # NEED TO update to new FIT_ENTRY DATA STRUCT IF REUSED for fit_entry in opt_neigh_fits: x_temp_h = fit_entry['x_temp_h'] const_h = fit_entry['const_h'] sim_key = fit_entry['sim_key'] marker = self.get_marker(sim_key, count_traces) plt.scatter(x_temp_h, const_h, c = 'black', marker = marker, s = 20, facecolor='none', zorder=100) count_traces = count_traces + 1 ''' #plt.subplot(gs[itn_levels_2_sbplts[best_fit_itn_level], 0]) #debug_p('plot optimal at position (0, ' + str(i) + ') in grid') plt.subplot(gs[0,i]) cluster_record = self.best_fits[cluster_id] opt_itn = cluster_record['ITN_cov'] opt_drug = cluster_record['MSAT_cov'] #plt.annotate(opt_fit_value, opt_x_temp_h, opt_const_h) #lgds.append(plt.legend(bbox_to_anchor=(0., 1, 1., .1), loc=2, ncol=1, borderaxespad=0., fontsize=8)) lgds.append(plt.legend(ncol=1,loc='upper center', bbox_to_anchor=(0.,-0.15), borderaxespad=0., fontsize=8, mode='expand')) plt.tight_layout() output_plot_file_path = os.path.join(self.root_sweep_dir, err_surfaces_plots_dir, err_surfaces_base_file_name + cluster_id +'.png') plt.savefig(output_plot_file_path, dpi = 300, format='png', bbox_extra_artists=lgds, bbox_inches='tight') plt.close() count = count + 1
def plot_weighted_cc_per_hfca(self, weighted_ccs_model_agg_by_hfca, ccs_model_agg_by_hfca_cluster_id): clusters_processed = 0 for hfca_id, weighted_ccs_combos in weighted_ccs_model_agg_by_hfca.iteritems(): weighted_ccs_by_bin = {} for i in range(0, cc_num_fold_bins): weighted_ccs_by_bin[i] = [] for weighted_ccs_combo in weighted_ccs_combos: sum_weighted_ccs = cc_num_fold_bins * [0] for weighted_ccs in weighted_ccs_combo: sum_weighted_ccs = np.add(sum_weighted_ccs, weighted_ccs) for i in range(0, cc_num_fold_bins): weighted_ccs_by_bin[i].append(sum_weighted_ccs[i]) per_bottom = [] per_top = [] per_median = [] for i in range(0, cc_num_fold_bins): weighted_ccs_by_bin_idx = weighted_ccs_by_bin[i] per_bottom.append( satp(weighted_ccs_by_bin_idx, 2.5) ) per_top.append( satp(weighted_ccs_by_bin_idx, 97.5) ) per_median.append( satp(weighted_ccs_by_bin_idx, 50) ) ''' debug_p('length of weighted ccs_combos array ' + str(len(weighted_ccs_combos))) ''' debug_p('length of bin 0 in weighted_ccs_by_bin ' + str(len(weighted_ccs_by_bin[0]))) for cluster_id in hfca_id_2_cluster_ids(hfca_id): fig = plt.figure(cluster_id, figsize=(9.2, 4), dpi=100, facecolor='white') gs = gridspec.GridSpec(1, 4) ax = plt.subplot(gs[0:4]) x_smooth = np.linspace(0, cc_num_fold_bins-1,60) per_bottom_smooth = spline(range(0, cc_num_fold_bins),per_bottom,x_smooth) per_top_smooth = spline(range(0, cc_num_fold_bins),per_top,x_smooth) per_median_smooth = spline(range(0, cc_num_fold_bins),per_median,x_smooth) ax.plot(x_smooth, per_bottom_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '2.5 percentile HS weighted: prevalence space samples', marker = None) ax.plot(x_smooth, per_top_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '97.5 percentile HS weighted: prevalence space samples', marker = None) ax.plot(x_smooth, per_median_smooth, alpha=1, linewidth=2.0, color = 'magenta', linestyle='-', label = 'median HS weighted: prevalence space samples', marker = None) ax.fill_between(x_smooth, per_bottom_smooth, per_top_smooth, facecolor='gray', alpha=0.5, interpolate=True) cluster_cat = get_cluster_category(cluster_id) opt_group_key = self.best_fits[cluster_id]['group_key'] opt_sim_key_cc = self.best_fits[cluster_id]['cc_penalty']['sim_key'] cc_trace_opt_cc = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_cc] opt_sim_key_prev = self.best_fits[cluster_id]['mse']['sim_key'] cc_trace_opt_prev = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_prev] opt_sim_key_fit = self.best_fits[cluster_id]['fit']['sim_key'] cc_trace_opt_fit = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_fit] ccs_model_agg_cc, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_cc, cluster_id) ccs_model_agg_prev, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_prev, cluster_id) ccs_model_agg_fit, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_fit, cluster_id) facility = hfca_id_2_facility(hfca_id) ax.plot(range(0, len(ccs_model_agg_cc)), ccs_model_agg_cc, alpha=1, linewidth=1, color = 'blue', label = 'Best fit: clinical cases', marker = 's') ax.plot(range(0, len(ccs_model_agg_prev)), ccs_model_agg_prev, alpha=1, linewidth=1, color = 'magenta', label = 'Best fit: prevalence', marker = 'o') ax.plot(range(0, len(ccs_model_agg_fit)), ccs_model_agg_fit, alpha=1, linewidth=1, color = 'black', label = 'Best fit: prevalence + clinical cases', marker = '*') ax.plot(range(0, len(ccs_ref_agg)), ccs_ref_agg, alpha=1, linewidth=2.0, linestyle = '-', color = 'red', label = 'Observed in ' + facility, marker = None) for i,sample_ccs in enumerate(ccs_model_agg_by_hfca_cluster_id[hfca_id][cluster_id]['unweighted']): if i == 0: ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', label = 'Opt 5-percentile samples for cluster ' + cluster_id, marker = None) #ax.plot(range(0, cc_num_fold_bins), sample_ccs[2]) else: ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', marker = None) plt.xlabel('6-week bins', fontsize=8) plt.ylabel('Clinical cases', fontsize=8) legend = plt.legend(loc=1, fontsize=8) plt.xlim(0,8) plt.title('Clinical cases timeseries', fontsize = 8, fontweight = 'bold', color = 'black') plt.gca().tick_params(axis='x', labelsize=8) plt.gca().tick_params(axis='y', labelsize=8) plt.tight_layout() output_plot_file_path = os.path.join(self.root_sweep_dir, weighted_cc_traces_plots_dir, weighted_cc_traces_base_file_name + cluster_id + '.png') plt.savefig(output_plot_file_path, dpi = 300, format='png') plt.close() clusters_processed = clusters_processed + 1 debug_p('Processed weighting and plotting clinical cases for ' + str(clusters_processed) + ' clusters')
def get_prevalence_opt_region_sims(best_fits, all_fits, cluster_id): opt_region_sims = [] cluster_record = best_fits[cluster_id] opt_group_key = cluster_record['group_key'] error_points = {} for sim_key,fit_entry in all_fits[cluster_id].iteritems(): if sim_key == 'min_terms' or sim_key == 'max_terms': continue mse = fit_entry['fit_terms']['mse'] group_key = fit_entry['group_key'] if group_key not in error_points: error_points[group_key] = { 'mse':[], 'sim_key': [] } error_points[group_key]['mse'].append(mse) error_points[group_key]['sim_key'].append(sim_key) for j,group_key in enumerate(error_points.keys()): if group_key == opt_group_key: z = error_points[group_key]['mse'] sim_keys = error_points[group_key]['sim_key'] sample_space = zip(sim_keys, z) opt_region_sims = [(group_key, sim_key) for (sim_key, res) in sample_space if res <= satp(z, sample_size_percentile)] break return opt_region_sims