def make_hist_ratio_blackhole2(bin_edges, data, mc, data_err, label, suffix = None, bg_est='data_driven', signal=None, mode='no_signal'):
    bin_centres = (bin_edges[:-1] + bin_edges[1:])/2.
    fig = plt.figure()
    gs = gridspec.GridSpec(2,1,height_ratios=[3,1])
    ax1=fig.add_subplot(gs[0])
    ax2=fig.add_subplot(gs[1],sharex=ax1)
    ax1.grid(True)
    ax2.grid(True)
    plt.setp(ax1.get_xticklabels(), visible=False)
    fig.subplots_adjust(hspace=0.001)
    #ax = plt.gca()
    ax1.set_yscale("log", nonposy='clip')
    if bg_est in ['data_driven','mc']:
        #fill_between_steps(ax1, bin_edges, mc,1e-4, alpha=0.2, step_where='pre',linewidth=0,label='QCD MC')
        hist(np.asarray([mc,signal]).T,bin_edges, ax=ax1, alpha=0.2)
    else:
        fill_between_steps(ax1, bin_edges, mc,1e-4, alpha=0.2, step_where='pre',linewidth=0,label='ST_mul2 excl. (normed)')
    if mode in ['signal_search','signal_search_inj']:
        fill_between_steps(ax1, bin_edges,mc+signal,mc,alpha=0.6,step_where='pre',linewidth=0,label='Signal', color='darkgreen')
    ax1.errorbar(bin_centres, data, yerr=data_err, fmt='ok',label='data')
#plt.semilogy()
    ax1.legend()
    ax1.set_ylim(1e-4,ax1.get_ylim()[1])
    if bg_est=='data_driven':
        ax1.set_title('ST_mult '+label+' QCD MC and real data, binned from data')
    elif bg_est=='mc':
        ax1.set_title('ST_mult '+label+' QCD MC and real data, binned from MC')
    elif bg_est=='low_ST':
        ax1.set_title('ST_mult '+label+' data, bg est from ST mult_2 data')
    if mode in ['signal_search','signal_search_inj']:
        ratio = data/(mc+signal)
        ratio_err = data_err/(mc+signal)
    else:
        ratio = data/mc
        ratio_err = data_err/mc
    fill_between_steps(ax2, bin_edges, ratio+ratio_err ,ratio-ratio_err, alpha=0.2, step_where='pre',linewidth=0,color='red')
    ax2.errorbar(bin_centres, ratio, yerr=None, xerr=[np.abs(bin_edges[0:-1]-bin_centres),np.abs(bin_edges[1:]-bin_centres)], fmt='ok')
    ax2.set_xlabel('ST (GeV)',fontsize=17)
    ax2.set_ylabel('Data/BG',fontsize=17)
    ax1.set_ylabel(r'N/$\Delta$x',fontsize=17)
    ylims=[0.1,2]
    #ylims = ax2.get_ylim()
    #if ylims[0]>1: ylims[0] = 0.995
    #if ylims[1]<1: ylims[1] = 1.005
    ax2.set_ylim(ylims[0],ylims[1])
    ax2.get_yaxis().get_major_formatter().set_useOffset(False)
    ax2.axhline(1,linewidth=2,color='r')
    tickbins = len(ax1.get_yticklabels()) # added
    ax2.yaxis.set_major_locator(MaxNLocator(nbins=7, prune='upper'))
    if suffix: suffix = '_'.join([suffix,mode])
    else: suffix = mode

    if bg_est=='data_driven':
        save_name = '../../plots/ST_mul'+label+'_mc_and_data_normed_databin'
    elif bg_est=='mc':
        save_name = '../../plots/ST_mul'+label+'_mc_and_data_normed_mcbin'
    else:
        save_name = '../../plots/ST_mul'+label+'_mc_and_data_normed_st2_bg'

    if suffix: save_name+='_'+suffix
    save_name+='.pdf'
    plt.savefig(save_name)
normed_counts_mc, bb_edges = np.histogram(my_rands,bayesian_blocks(my_rands), density=True)
normed_counts_data, _= np.histogram(ST_dict_data[mult],bb_edges, density=True)
counts_mc, _= np.histogram(my_rands,bb_edges)
counts_data, _= np.histogram(ST_dict_data[mult],bb_edges)

rescaled_counts_mc = normed_counts_mc*nentries
rescaled_counts_data = normed_counts_data*nentries
bin_centres = (bb_edges[:-1] + bb_edges[1:])/2.

rescaled_err = np.sqrt(counts_data)/(bb_edges[1:]-bb_edges[:-1])
err = np.sqrt(counts_data)

fig = plt.figure()
ax = plt.gca()
ax.set_yscale("log", nonposy='clip')
fill_between_steps(ax, bb_edges, rescaled_counts_mc,1e-3, alpha=0.2, step_where='pre',linewidth=0,label='fit MC')
ax.errorbar(bin_centres, rescaled_counts_data, yerr=rescaled_err, fmt='ok',label='data')
#plt.semilogy()
ax.legend()
plt.title('MC gen from '+mult+' fit function and real data')
plt.xlabel('ST (GeV)')
plt.ylabel(r'N/$\Delta$x')
plt.show()
plt.savefig('plots/'+mult+'_fit_and_data_normed.pdf')

fig = plt.figure()
ax = plt.gca()
ax.set_yscale("log", nonposy='clip')
fill_between_steps(ax, bb_edges, counts_mc,1e-3, alpha=0.2, step_where='pre',linewidth=0,label='fit MC')
ax.errorbar(bin_centres, counts_data, yerr=err, fmt='ok',label='data')
#plt.semilogy()
normed_counts_mc, bb_edges = np.histogram(my_rands, bayesian_blocks(my_rands), density=True)
normed_counts_data, _ = np.histogram(ST_dict_data[mult], bb_edges, density=True)
counts_mc, _ = np.histogram(my_rands, bb_edges)
counts_data, _ = np.histogram(ST_dict_data[mult], bb_edges)

rescaled_counts_mc = normed_counts_mc * nentries
rescaled_counts_data = normed_counts_data * nentries
bin_centres = (bb_edges[:-1] + bb_edges[1:]) / 2.0

rescaled_err = np.sqrt(counts_data) / (bb_edges[1:] - bb_edges[:-1])
err = np.sqrt(counts_data)

fig = plt.figure()
ax = plt.gca()
ax.set_yscale("log", nonposy="clip")
fill_between_steps(ax, bb_edges, rescaled_counts_mc, 1e-3, alpha=0.2, step_where="pre", linewidth=0, label="fit MC")
ax.errorbar(bin_centres, rescaled_counts_data, yerr=rescaled_err, fmt="ok", label="data")
# plt.semilogy()
ax.legend()
plt.title("MC gen from " + mult + " fit function and real data")
plt.xlabel("ST (GeV)")
plt.ylabel(r"N/$\Delta$x")
plt.show()
plt.savefig("plots/" + mult + "_fit_and_data_normed.pdf")

fig = plt.figure()
ax = plt.gca()
ax.set_yscale("log", nonposy="clip")
fill_between_steps(ax, bb_edges, counts_mc, 1e-3, alpha=0.2, step_where="pre", linewidth=0, label="fit MC")
ax.errorbar(bin_centres, counts_data, yerr=err, fmt="ok", label="data")
# plt.semilogy()
示例#4
0
def plot_cases(datafile, outputfile):
	# Read the data into a pandas DataFrame.    
	case_data = pd.read_csv(datafile, sep = "\t")  
	N_classes = len(case_data.loc[1])-1
	N_rows = len(case_data.index)
	case_data.columns = ["Time"] + ["n" + str(i) for i in range(1,N_classes)] + ["n_tot"]  
	y_max = max(case_data["n_tot"])
	x_max = case_data["Time"].iloc[-1]

	##Old:
	#case_data_cumul = pd.DataFrame([case_data.iloc[:,1:(i+1)].sum(axis=1) for i in range(1,N_classes)]).transpose()
	#case_data_cumul.columns = ["n" + str(i) for i in range(1,N_classes)]
	#case_data_cumul["Time"] = case_data["Time"] 
	#case_data_cumul["n_tot"] = case_data["n_tot"]
	#case_data_cumul = case_data_cumul[case_data.columns]

	# These are the "Tableau 20" colors as RGB.    
	tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),    
		     (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),    
		     (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),    
		     (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),    
		     (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]    
	  
	# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.    
	for i in range(len(tableau20)):    
	    r, g, b = tableau20[i]    
	    tableau20[i] = (r / 255., g / 255., b / 255.)    
	  
	#Define font:
	hfont = {'fontname':'sans-serif'} 
	  
	  
	# You typically want your plot to be ~1.33x wider than tall. This plot is a rare    
	# exception because of the number of lines being plotted on it.    
	# Common sizes: (10, 7.5) and (12, 9)    
	plt.figure(figsize=(10, 7.5))    
	  
	# Remove the plot frame lines. They are unnecessary chartjunk.    
	ax = plt.subplot(111)    
	ax.spines["top"].set_visible(False)    
	ax.spines["bottom"].set_visible(False)    
	ax.spines["right"].set_visible(False)    
	ax.spines["left"].set_visible(False)    
	  
	# Ensure that the axis ticks only show up on the bottom and left of the plot.    
	# Ticks on the right and top of the plot are generally unnecessary chartjunk.    
	ax.get_xaxis().tick_bottom()    
	ax.get_yaxis().tick_left()    
	  
	# Limit the range of the plot to only where the data is.    
	# Avoid unnecessary whitespace.    
	plt.ylim(0, y_max)    
	plt.xlim(0, N_rows)    
	
	
	#y axis ticks and label range
	y_ticks_range = range(0, int(y_max+1), max(1,int((y_max+1)/10)))
	x_ticks_range = range(0,int(max(case_data["Time"]))+1, max(1,int(max(case_data["Time"])/5)))
	# Make sure your axis ticks are large enough to be easily read.    
	# You don't want your viewers squinting to read your plot.    
	plt.yticks(y_ticks_range, [str(x) for x in y_ticks_range], fontsize=14, **hfont)    
	plt.xticks(x_ticks_range, fontsize=14, **hfont)  
	  
	  
	# Provide tick lines across the plot to help your viewers trace along    
	# the axis ticks. Make sure that the lines are light and small so they    
	# don't obscure the primary data lines. 
	handles, labels = [[],[]]   
	for y in y_ticks_range:    
	   plt.plot(range(0, N_rows), [y] * len(range(0, N_rows)), "--", lw=0.5, color="black", alpha=0.3)    
	  
	# Remove the tick marks; they are unnecessary with the tick lines we just plotted.    
	plt.tick_params(axis="both", which="both", bottom="off", top="off",    
		        labelbottom="on", left="off", right="off", labelleft="on")    
	  
	## Now that the plot is prepared, it's time to actually plot the data!    
	## Note that I plotted the majors in order of the highest % in the final year.    
	#majors = ['Health Professions', 'Public Administration', 'Education', 'Psychology',    
	#          'Foreign Languages', 'English', 'Communications\nand Journalism',    
	#          'Art and Performance', 'Biology', 'Agriculture',    
	#          'Social Sciences and History', 'Business', 'Math and Statistics',    
	#          'Architecture', 'Physical Sciences', 'Computer Science',    
	#          'Engineering']    
	# 

	y = case_data["n_tot"]
	for rank, column in enumerate(["n" + str(i) for i in range(N_classes-1,0, -1)]):    
	#    # Plot each line separately with its own color, using the Tableau 20    
	#    # color set in order.    
	#    plt.plot(case_data_cumul.iloc[:,0].values,    
	#            case_data_cumul[column.replace("\n", " ")].values,    
	#            lw=2.5, color=tableau20[rank],  drawstyle='steps')

	#Plot cumulative cases. Makes use of function from fill_between_steps.py:
	    fbs.fill_between_steps(case_data.iloc[:,0].values, y, ax = ax, facecolor=tableau20[rank],color=tableau20[rank], interpolate=False, lw = 0.01)
	    y = y - case_data[column]
	    handles.append(mpatches.Patch(color=tableau20[rank], label=str(20-rank)))
	    labels.append(str(rank))

	#print(handles) 
	#print(labels)
	plt.legend(handles=handles, loc=2, bbox_to_anchor=(1.03, 1), frameon=False) 
	plt.text(x_max*1.01, y_max/2, "Transmissibility", rotation=90 , va="center", fontsize=17, **hfont)  	
	  
	## matplotlib's title() call centers the title on the plot, but not the graph,    
	## so I used the text() call to customize where the title goes.    
	#  
	## Make the title big enough so it spans the entire plot, but don't make it    
	## so big that it requires two lines to show.    
	#  
	## Note that if the title is descriptive enough, it is unnecessary to include    
	## axis labels; they are self-evident, in this plot's case.    
	#plt.text(N_rows/2, y_max+1, "Weekly cases, shaded by category"    
	#       , fontsize=17, ha="center", **hfont)  


	plt.xlabel("Week", fontsize=17, ha="center", **hfont)  
	plt.ylabel("Cases",fontsize=17, ha="center", **hfont)  
	#  
	## Always include your data source(s) and copyright notice! And for your    
	## data sources, tell your viewers exactly where the data came from,    
	## preferably with a direct link to the data. Just telling your viewers    
	## that you used data from the "U.S. Census Bureau" is completely useless:    
	## the U.S. Census Bureau provides all kinds of data, so how are your    
	## viewers supposed to know which data set you used?    
	#plt.text(0, -y_max/10, "Data source: numerical simulation using Gillespie algorithm"    
	#       "\nAuthor: Toby Brett (tsbrett.net / @tsbrett)", fontsize=10, **hfont)    
	  
	# Finally, save the figure as a PNG.    
	# You can also save it as a PDF, JPEG, etc.    
	# Just change the file extension in this call.    
	# bbox_inches="tight" removes all the extra whitespace on the edges of your plot.    
	plt.savefig(outputfile, bbox_inches="tight")