def make_hist_ratio_blackhole2(bin_edges, data, mc, data_err, label, suffix = None, bg_est='data_driven', signal=None, mode='no_signal'): bin_centres = (bin_edges[:-1] + bin_edges[1:])/2. fig = plt.figure() gs = gridspec.GridSpec(2,1,height_ratios=[3,1]) ax1=fig.add_subplot(gs[0]) ax2=fig.add_subplot(gs[1],sharex=ax1) ax1.grid(True) ax2.grid(True) plt.setp(ax1.get_xticklabels(), visible=False) fig.subplots_adjust(hspace=0.001) #ax = plt.gca() ax1.set_yscale("log", nonposy='clip') if bg_est in ['data_driven','mc']: #fill_between_steps(ax1, bin_edges, mc,1e-4, alpha=0.2, step_where='pre',linewidth=0,label='QCD MC') hist(np.asarray([mc,signal]).T,bin_edges, ax=ax1, alpha=0.2) else: fill_between_steps(ax1, bin_edges, mc,1e-4, alpha=0.2, step_where='pre',linewidth=0,label='ST_mul2 excl. (normed)') if mode in ['signal_search','signal_search_inj']: fill_between_steps(ax1, bin_edges,mc+signal,mc,alpha=0.6,step_where='pre',linewidth=0,label='Signal', color='darkgreen') ax1.errorbar(bin_centres, data, yerr=data_err, fmt='ok',label='data') #plt.semilogy() ax1.legend() ax1.set_ylim(1e-4,ax1.get_ylim()[1]) if bg_est=='data_driven': ax1.set_title('ST_mult '+label+' QCD MC and real data, binned from data') elif bg_est=='mc': ax1.set_title('ST_mult '+label+' QCD MC and real data, binned from MC') elif bg_est=='low_ST': ax1.set_title('ST_mult '+label+' data, bg est from ST mult_2 data') if mode in ['signal_search','signal_search_inj']: ratio = data/(mc+signal) ratio_err = data_err/(mc+signal) else: ratio = data/mc ratio_err = data_err/mc fill_between_steps(ax2, bin_edges, ratio+ratio_err ,ratio-ratio_err, alpha=0.2, step_where='pre',linewidth=0,color='red') ax2.errorbar(bin_centres, ratio, yerr=None, xerr=[np.abs(bin_edges[0:-1]-bin_centres),np.abs(bin_edges[1:]-bin_centres)], fmt='ok') ax2.set_xlabel('ST (GeV)',fontsize=17) ax2.set_ylabel('Data/BG',fontsize=17) ax1.set_ylabel(r'N/$\Delta$x',fontsize=17) ylims=[0.1,2] #ylims = ax2.get_ylim() #if ylims[0]>1: ylims[0] = 0.995 #if ylims[1]<1: ylims[1] = 1.005 ax2.set_ylim(ylims[0],ylims[1]) ax2.get_yaxis().get_major_formatter().set_useOffset(False) ax2.axhline(1,linewidth=2,color='r') tickbins = len(ax1.get_yticklabels()) # added ax2.yaxis.set_major_locator(MaxNLocator(nbins=7, prune='upper')) if suffix: suffix = '_'.join([suffix,mode]) else: suffix = mode if bg_est=='data_driven': save_name = '../../plots/ST_mul'+label+'_mc_and_data_normed_databin' elif bg_est=='mc': save_name = '../../plots/ST_mul'+label+'_mc_and_data_normed_mcbin' else: save_name = '../../plots/ST_mul'+label+'_mc_and_data_normed_st2_bg' if suffix: save_name+='_'+suffix save_name+='.pdf' plt.savefig(save_name)
normed_counts_mc, bb_edges = np.histogram(my_rands,bayesian_blocks(my_rands), density=True) normed_counts_data, _= np.histogram(ST_dict_data[mult],bb_edges, density=True) counts_mc, _= np.histogram(my_rands,bb_edges) counts_data, _= np.histogram(ST_dict_data[mult],bb_edges) rescaled_counts_mc = normed_counts_mc*nentries rescaled_counts_data = normed_counts_data*nentries bin_centres = (bb_edges[:-1] + bb_edges[1:])/2. rescaled_err = np.sqrt(counts_data)/(bb_edges[1:]-bb_edges[:-1]) err = np.sqrt(counts_data) fig = plt.figure() ax = plt.gca() ax.set_yscale("log", nonposy='clip') fill_between_steps(ax, bb_edges, rescaled_counts_mc,1e-3, alpha=0.2, step_where='pre',linewidth=0,label='fit MC') ax.errorbar(bin_centres, rescaled_counts_data, yerr=rescaled_err, fmt='ok',label='data') #plt.semilogy() ax.legend() plt.title('MC gen from '+mult+' fit function and real data') plt.xlabel('ST (GeV)') plt.ylabel(r'N/$\Delta$x') plt.show() plt.savefig('plots/'+mult+'_fit_and_data_normed.pdf') fig = plt.figure() ax = plt.gca() ax.set_yscale("log", nonposy='clip') fill_between_steps(ax, bb_edges, counts_mc,1e-3, alpha=0.2, step_where='pre',linewidth=0,label='fit MC') ax.errorbar(bin_centres, counts_data, yerr=err, fmt='ok',label='data') #plt.semilogy()
normed_counts_mc, bb_edges = np.histogram(my_rands, bayesian_blocks(my_rands), density=True) normed_counts_data, _ = np.histogram(ST_dict_data[mult], bb_edges, density=True) counts_mc, _ = np.histogram(my_rands, bb_edges) counts_data, _ = np.histogram(ST_dict_data[mult], bb_edges) rescaled_counts_mc = normed_counts_mc * nentries rescaled_counts_data = normed_counts_data * nentries bin_centres = (bb_edges[:-1] + bb_edges[1:]) / 2.0 rescaled_err = np.sqrt(counts_data) / (bb_edges[1:] - bb_edges[:-1]) err = np.sqrt(counts_data) fig = plt.figure() ax = plt.gca() ax.set_yscale("log", nonposy="clip") fill_between_steps(ax, bb_edges, rescaled_counts_mc, 1e-3, alpha=0.2, step_where="pre", linewidth=0, label="fit MC") ax.errorbar(bin_centres, rescaled_counts_data, yerr=rescaled_err, fmt="ok", label="data") # plt.semilogy() ax.legend() plt.title("MC gen from " + mult + " fit function and real data") plt.xlabel("ST (GeV)") plt.ylabel(r"N/$\Delta$x") plt.show() plt.savefig("plots/" + mult + "_fit_and_data_normed.pdf") fig = plt.figure() ax = plt.gca() ax.set_yscale("log", nonposy="clip") fill_between_steps(ax, bb_edges, counts_mc, 1e-3, alpha=0.2, step_where="pre", linewidth=0, label="fit MC") ax.errorbar(bin_centres, counts_data, yerr=err, fmt="ok", label="data") # plt.semilogy()
def plot_cases(datafile, outputfile): # Read the data into a pandas DataFrame. case_data = pd.read_csv(datafile, sep = "\t") N_classes = len(case_data.loc[1])-1 N_rows = len(case_data.index) case_data.columns = ["Time"] + ["n" + str(i) for i in range(1,N_classes)] + ["n_tot"] y_max = max(case_data["n_tot"]) x_max = case_data["Time"].iloc[-1] ##Old: #case_data_cumul = pd.DataFrame([case_data.iloc[:,1:(i+1)].sum(axis=1) for i in range(1,N_classes)]).transpose() #case_data_cumul.columns = ["n" + str(i) for i in range(1,N_classes)] #case_data_cumul["Time"] = case_data["Time"] #case_data_cumul["n_tot"] = case_data["n_tot"] #case_data_cumul = case_data_cumul[case_data.columns] # These are the "Tableau 20" colors as RGB. tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] # Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts. for i in range(len(tableau20)): r, g, b = tableau20[i] tableau20[i] = (r / 255., g / 255., b / 255.) #Define font: hfont = {'fontname':'sans-serif'} # You typically want your plot to be ~1.33x wider than tall. This plot is a rare # exception because of the number of lines being plotted on it. # Common sizes: (10, 7.5) and (12, 9) plt.figure(figsize=(10, 7.5)) # Remove the plot frame lines. They are unnecessary chartjunk. ax = plt.subplot(111) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) # Ensure that the axis ticks only show up on the bottom and left of the plot. # Ticks on the right and top of the plot are generally unnecessary chartjunk. ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() # Limit the range of the plot to only where the data is. # Avoid unnecessary whitespace. plt.ylim(0, y_max) plt.xlim(0, N_rows) #y axis ticks and label range y_ticks_range = range(0, int(y_max+1), max(1,int((y_max+1)/10))) x_ticks_range = range(0,int(max(case_data["Time"]))+1, max(1,int(max(case_data["Time"])/5))) # Make sure your axis ticks are large enough to be easily read. # You don't want your viewers squinting to read your plot. plt.yticks(y_ticks_range, [str(x) for x in y_ticks_range], fontsize=14, **hfont) plt.xticks(x_ticks_range, fontsize=14, **hfont) # Provide tick lines across the plot to help your viewers trace along # the axis ticks. Make sure that the lines are light and small so they # don't obscure the primary data lines. handles, labels = [[],[]] for y in y_ticks_range: plt.plot(range(0, N_rows), [y] * len(range(0, N_rows)), "--", lw=0.5, color="black", alpha=0.3) # Remove the tick marks; they are unnecessary with the tick lines we just plotted. plt.tick_params(axis="both", which="both", bottom="off", top="off", labelbottom="on", left="off", right="off", labelleft="on") ## Now that the plot is prepared, it's time to actually plot the data! ## Note that I plotted the majors in order of the highest % in the final year. #majors = ['Health Professions', 'Public Administration', 'Education', 'Psychology', # 'Foreign Languages', 'English', 'Communications\nand Journalism', # 'Art and Performance', 'Biology', 'Agriculture', # 'Social Sciences and History', 'Business', 'Math and Statistics', # 'Architecture', 'Physical Sciences', 'Computer Science', # 'Engineering'] # y = case_data["n_tot"] for rank, column in enumerate(["n" + str(i) for i in range(N_classes-1,0, -1)]): # # Plot each line separately with its own color, using the Tableau 20 # # color set in order. # plt.plot(case_data_cumul.iloc[:,0].values, # case_data_cumul[column.replace("\n", " ")].values, # lw=2.5, color=tableau20[rank], drawstyle='steps') #Plot cumulative cases. Makes use of function from fill_between_steps.py: fbs.fill_between_steps(case_data.iloc[:,0].values, y, ax = ax, facecolor=tableau20[rank],color=tableau20[rank], interpolate=False, lw = 0.01) y = y - case_data[column] handles.append(mpatches.Patch(color=tableau20[rank], label=str(20-rank))) labels.append(str(rank)) #print(handles) #print(labels) plt.legend(handles=handles, loc=2, bbox_to_anchor=(1.03, 1), frameon=False) plt.text(x_max*1.01, y_max/2, "Transmissibility", rotation=90 , va="center", fontsize=17, **hfont) ## matplotlib's title() call centers the title on the plot, but not the graph, ## so I used the text() call to customize where the title goes. # ## Make the title big enough so it spans the entire plot, but don't make it ## so big that it requires two lines to show. # ## Note that if the title is descriptive enough, it is unnecessary to include ## axis labels; they are self-evident, in this plot's case. #plt.text(N_rows/2, y_max+1, "Weekly cases, shaded by category" # , fontsize=17, ha="center", **hfont) plt.xlabel("Week", fontsize=17, ha="center", **hfont) plt.ylabel("Cases",fontsize=17, ha="center", **hfont) # ## Always include your data source(s) and copyright notice! And for your ## data sources, tell your viewers exactly where the data came from, ## preferably with a direct link to the data. Just telling your viewers ## that you used data from the "U.S. Census Bureau" is completely useless: ## the U.S. Census Bureau provides all kinds of data, so how are your ## viewers supposed to know which data set you used? #plt.text(0, -y_max/10, "Data source: numerical simulation using Gillespie algorithm" # "\nAuthor: Toby Brett (tsbrett.net / @tsbrett)", fontsize=10, **hfont) # Finally, save the figure as a PNG. # You can also save it as a PDF, JPEG, etc. # Just change the file extension in this call. # bbox_inches="tight" removes all the extra whitespace on the edges of your plot. plt.savefig(outputfile, bbox_inches="tight")