def merge_json_files(self): """Merge json files that are created individually.""" print "\n -- Merging JSON files -- \n" names_dict = info.physicsSamples() names = names_dict['signal']+names_dict['backgrounds']+['data'] savePath = "{0}/{1}".format(info.getJsonPath(),self.p_lepton) jsonfiles = open("share/jsonfiles2plot.txt","r").readlines() # jsonfiles is the saved list of all json files made for var in self.p_varlist_nolead: print " Preparing JSON file for variable ",var pathData = info.getJsonPath()+"{0}/{1}_{2}.json".format(self.p_lepton,var,self.p_outfile) merged_data = {} for name in jsonfiles: logging.info(" Merging {0}".format(name)) json_filename = name%(var) json_filename = json_filename.rstrip('\n') if not os.path.isfile(json_filename): logging.info(" File {0} does not exist. ".format(json_filename)) continue temp_data = json.load(open(json_filename)) merged_data.update(temp_data) with open(pathData,'w') as outputfile: json.dump(merged_data, outputfile) return
def __init__(self,cfg_parser): """ Initialize the parameters from the config file. @param cfg_parser Object that parsed the configuration file """ logging.getLogger('share/systematics.log') loggingLEVEL = logging.getLogger().getEffectiveLevel() # DEBUG, INFO, ERROR, etc. logging.info("") logging.critical(" -- In tree2hist.py") logging.info(" ------------ ") logging.info(" Initializing the config file.") self.GeV = 1000. ## -- Configuration -- ## self.p_varList = cfg_parser.get('systematics','vars') # ex. share/varNames.txt self.p_rootfiles = cfg_parser.get('systematics','inputfile') # ex. share/systematics_ntuples.txt self.p_selection = cfg_parser.get('systematics','selection') # ex. pre2pre self.p_outputname = cfg_parser.get('systematics','outputname') # ex. pre self.p_makejsons = config.str2bool(cfg_parser.get('systematics','make_jsons')) # ex. True self.p_makehists = config.str2bool(cfg_parser.get('systematics','make_hists')) # ex. True self.p_lepton = cfg_parser.get('systematics','lepton') # ex. muel self.p_nEvents = cfg_parser.get('systematics','nevents') # ex. -1 ## ------------------- ## self.p_varList = open(self.p_varList,'r').readlines() # update the variable self.p_varList = [v.rstrip('\n') for v in self.p_varList] if not self.p_makejsons and not self.p_makehists: print print " You didn't specify outputs (either json, hist, or both)" print " for the systematics output." print sys.exit(1) self.json_path = info.getJsonPath()+self.p_lepton self.hist_path = self.json_path.split('json')[0]+'hists/'+self.p_lepton ## Run the program self.main() return
def ROOT2json(cfg_parser): """ For converting data in the ROOT files to .json files for plotting. Increases ease-of-use for making Data/MC plots (don't have to re-process the ROOT file just to make a single plot). @param cfg_parser Object which parsed the configuration file """ logging.getLogger('share/datamc.log') loggingLEVEL = logging.getLogger().getEffectiveLevel() # DEBUG, INFO, ERROR, etc. logging.info("") logging.critical(" -- In root2json.py") logging.info(" ------------ ") logging.info(" Initializing the config file.") ## -- Configuration -- ## p_varList = cfg_parser.get('datamc','vars') # ex. 'share/varNames.txt' p_inputfiles = cfg_parser.get('datamc','rootfiles') # ex. 'share/datamc_ntuples.txt' p_lepton = cfg_parser.get('datamc','lepton') # ex. muel (both muon & electron) p_outfile = cfg_parser.get('datamc','jsonfilename') # ex. 'elLHMedium_pre_A1_1tagin' p_nEvents = int(cfg_parser.get('datamc','nEvents')) # ex. -1 ## ------------------- ## treename = info.treename() bckg_names = info.physicsSamples()['backgrounds'] savePath = "{0}/{1}".format(info.getJsonPath(),p_lepton) if not os.path.isdir(savePath): os.makedirs(savePath) logging.info(" Set the output path: {0}".format(savePath)) ## -- Load various files of data inputfiles = info.read_txt_file(p_inputfiles) if not inputfiles: print print " ERROR: File {0} is empty (no files!) ".format(p_inputfiles) print from sys import exit exit(1) i_varList = info.read_txt_file(p_varList) varList = [] for p_var in i_varList: p_var = p_var.split('[')[0] if p_var not in varList: varList.append(p_var) ## -- Loop over input files logging.info(" inputfiles = {0}".format(inputfiles)) logged_files = {} # keeping track if a sample has been used before # the list of input files may have multiple root files # for the same sample (ttbar, wjets, etc.) ## -- Make a simple text file that stores all of the json files we just made newfile = open("share/jsonfiles2plot.txt","w") for p in inputfiles: jsonData = config.AutoVivification() p_file = ROOT.TFile.Open(p) p_tree = p_file.Get(treename) p_tree.GetEntry(0) # just to get the mcChannelNumber name = config.getSampleName(root_tree=p_tree,dsid=p_tree.mcChannelNumber)['name'] # need different names from each file (otherwise different ttbar files # will overwrite each other!) ## -- load the new DataMC object if name not in logged_files.keys(): entry = DataMC_Type(name) logged_files[name] = entry for var in varList: entry.varVals[var] = [] entry.scaleFactors[var] = [] entry.lepCharges[var] = [] entry.lepNames[var] = [] else: entry = logged_files[name] print "\n ++ Producing json file from {0}\n".format(p) logging.info(" ++ Running {0}".format(name)) ## -- Attach the data (values,weights) to each DataMC object entry = addData.addData(entry, p_tree, varList, cfg_parser) # Get data from ROOT logging.info(" Exporting data to json format.") ## -- Log the DataMC object in the dictionary ## not sure that this is being used effectively... logged_files[name] = entry ## Save each json file now that we have looped over the file logging.info(" Saving json information.") outfile_name = '{0}/%s_{1}_{2}.json'.format(savePath,p_outfile,name) newfile.write("%s\n" % outfile_name) for var in varList: # put information in dictionaries to be saved to json jsonData[var][entry.name] = entry.varVals[var] jsonData[var][entry.name+'_weight'] = entry.scaleFactors[var] jsonData[var][entry.name+'_lepNames'] = entry.lepNames[var] jsonData[var]['LUMI'] = info.LUMI() print " Saving output to {0}".format(outfile_name%(var)) logging.info(" -- Saving output to {0}".format(outfile_name)) with open(outfile_name%(var),'w') as outputfile: json.dump(jsonData[var], outputfile) logging.info(" End root2json.py") return
def datamcplotter(self,variable): """Run the script that makes the figure and adjust parameters.""" self.var = variable self._leading = False if '[' in self.var: self._leading = True self.var,self.var_entry = brackets(self.var) ## -- for pre-selection, skip variables that aren't defined if self.var in self.notforpre and 'pre' in self.p_jsonfilename: return ## -- Set the output file ana_status_label = self.p_ana_status.replace(' ','_') outfile = "{0}{1}_{2}_{3}{4}_{5}".format(self.pathSave,self.var,self.p_jsonfilename,ana_status_label,self.p_extra_saveAs,self.timeStamp) ## -- Load our data! pathData = info.getJsonPath()+"{0}/{1}_{2}.json".format(self.p_lepton,self.var,self.p_jsonfilename) json_data = json.load(open(pathData)) ## -- Initializing various bin related arrays self.plotBins = self.plot_keys['variables'][self.var]['bins'] # Binning for this histogram plotBins_array = np.asarray(self.plotBins) bins_mp = 0.5*(plotBins_array[:-1]+plotBins_array[1:]) # midpoint in bins bin_widths = plotBins_array[1:]-plotBins_array[:-1] # bin widths half_bin_widths = bin_widths/2. # half of bin widths (xerr) ## -- Plot only desired signals ## Recommended format = 'TTS_M800','TTS_M1400',etc. (csv format) try: if int(self.p_plot_signal) < 0: # self.p_plot_signal = -1 (or some other negative number to mean 'ALL') signal = self.physics_samples['signal'] else: # self.p_plot_signal = 1400 (or something to plot a single mass point) signal = [i for i in self.physics_samples['signal'] if self.p_plot_signal in i] except: # self.p_plot_signal = [comma separated values (works for >= 0 entries)] signal = self.p_plot_signal.split(',') ## ## ## -- Setting up figure -- ## ## ## self.x_label = self.plot_keys['variables'][self.var]['label'] self.x_min = self.plotBins[0] # horizontal axis scale self.x_max = self.plotBins[-1] self.y1min = 0. # set the minimum of the y-axis (for logplots, see 'plot_prediction()') py_samples = [i for i in json_data.keys() if '_weight' in i] # just grabbing what's in the json file py_samples = [i.split('_weight')[0] for i in py_samples] ## plotting all samples in one command, need lists of everything to do that self.entry_values = [] self.entry_scales = [] self.entry_labels = [] self.entry_colors = [] ## plotting uncertainty bands fill_between_bins = np.asarray(self.plotBins) ## for plotting hatch uncertainty fill_between_bins = [self.plotBins[0]]+list(fill_between_bins[1:-1].repeat(2))+[self.plotBins[-1]] if self.p_blind: ## No data ## fig, self.ax1 = plt.subplots(figsize=(10,8)) if self.p_logplot: self.ax1.set_yscale('log') self.plot_prediction(py_samples,json_data) self.plot_signal(py_samples,json_data,signal) ## Set the axis properties of the main x-axis self.config_xaxis(self.ax1) self.get_uncertainties() self.y1max = max(self.totpred)/0.82 data_height = [0. for i in self.totpred] else: ## Data ## fig = plt.figure(figsize=(10,8)) gs = gridspec.GridSpec(2,1,height_ratios=[3,1],hspace=0.0) self.ax1 = fig.add_subplot(gs[0]) self.ax2 = fig.add_subplot(gs[1],sharex=self.ax1) plt.setp(self.ax1.get_xticklabels(),visible=False) if self.p_logplot: self.ax1.set_yscale('log') data_label = 'Data' data_color = 'black' data_values = json_data['data'] ## For vectors (e.g., jet_pt; there is >=1 jet per event) if type(data_values[0])==list: if self._leading: data_values = [i[self.var_entry] for i in data_values] else: data_values = list(itertools.chain(*data_values)) else: ## catch possible issues here (not putting this with MC, because ## it should be caught here) if self._leading: try: data_values = [i[self.var_entry] for i in data_values] except TypeError: print print " Cannot access a pT-sorted value for object " print " that is not stored in vector." print " If you're trying to access an object that " print " should be stored as a vector and isn't, " print " please contact the author." print " If you're trying to access an object that " print " isn't stored as a vector and shouldn't be, " print " please fix your error. " print print " Continuing to next variable. " print return ## Plot the data points as 'error bars' (circles with error bars) ## Check if we want underflow and/or overflow if not self.p_underflow and not self.p_overflow: d_hist,bins = np.histogram(data_values,self.plotBins) data_hist = np.asarray([i if i else float('NaN') for i in d_hist]) data_err = np.sqrt(data_hist) data_height = data_hist+data_err # for scaling the y-axis on the plot self.ax1.errorbar(bins_mp,data_hist,yerr=data_err,capsize=0,fmt='o', c=data_color,label=data_label,zorder=100) else: ## To get underflow/overflow in python, we need to define the histograms ## first with numpy, then plot them in matplotlib (after adding the ## underflow/overflow values to first/last bins. ## For data, we can use the histogram we already made (because this is ## not plotted with ax.hist(), but as error bars instead. d_hist,bins = np.histogram(data_values,self.plotBins) if self.p_underflow: underflow = self.getUnderflow(data_values,[]) d_hist[0] += underflow if self.p_overflow: overflow = self.getOverflow(data_values,[]) d_hist[-1] += overflow data_hist = np.asarray([i if i else float('NaN') for i in d_hist]) data_err = np.sqrt(data_hist) data_height = data_hist+data_err # for scaling the y-axis on the plot ## Now make the plot. Use the numpy histogram output as the weights ## so that the histogram function still works ## Use the binning as a proxy for 'data' so that we get 1 entry per bin ## and the new values for weights scale the hist appropriately self.ax1.errorbar(bins_mp,data_hist,yerr=data_err,capsize=0,fmt='o', c=data_color,label=data_label,zorder=100) ## Now plot the prediction and signal samples self.plot_prediction(py_samples,json_data) self.plot_signal(py_samples,json_data,signal) self.get_uncertainties() ## ## ## Residual plotting (the Data/MC ratio) ## ## ## self.y2min = 0.5 self.y2max = 1.5 self.y1max = max([max(self.totpred), np.nanmax(data_height)])/0.82 # Using 'np.nanmax' here because the 'data_height' array may contain # float('NaN') values. In that case, the built-in method 'max' doesn't # interpret float('NaN') as 0 (or as any number). ## Residual Values (data/prediction subplot) self.resid_unc['total']['up'] = list(((self.totpred+self.unc['total']['up'])/self.totpred).repeat(2)) self.resid_unc['total']['dn'] = list(((self.totpred-self.unc['total']['dn'])/self.totpred).repeat(2)) residual = deepcopy( data_hist / self.totpred ) residual_err = deepcopy( data_err / self.totpred ) self.ax2.errorbar(bins_mp,residual,yerr=residual_err,xerr=half_bin_widths,\ capsize=0,fmt='o',c='black',zorder=100) ## Simulation Uncertainties self.ax2.fill_between(fill_between_bins,\ self.resid_unc['total']['dn'],\ self.resid_unc['total']['up'],\ **self.p_hatch_args) ## labels, legends, and text ## self.ax2.axhline(y=1,ls='--',c='k',zorder=1) self.config_xaxis(self.ax2) ## Set the axis properties of the ratio y-axis if any('qcd' in b.lower() for b in self.background): y_ratio_label = "Data/Pred." else: y_ratio_label = "Data/MC" self.ax2.set_ylim(ymin=self.y2min,ymax=self.y2max) self.ax2.set_yticks(np.asarray([0.6,1.0,1.4])) self.ax2.set_yticklabels(self.ax2.get_yticks(),fontProperties,fontsize=self.label_size) self.ax2.set_ylabel(y_ratio_label,fontsize=self.label_size,ha='center',va='bottom') ## Set the axis properties of the main y-axis ax1_unc_bottom = [0.] # for checking later (in case there is no uncertainty drawn) ax1_unc_height = [0.] # for checking later if self.draw_ax1_unc: ax1_unc_bottom = list( np.asarray( self.totpred-self.unc['total']['dn'] ).repeat(2) ) ax1_unc_height = list( np.asarray( self.totpred+self.unc['total']['up'] ).repeat(2) ) self.ax1.fill_between(fill_between_bins,\ ax1_unc_bottom,\ ax1_unc_height,\ zorder=200,\ **self.p_hatch_args) # need to cover up the uncertainty band (if it's the green band) with black line for histogram # if p_format=='eps': # dummy_hist, d_bins, d_patches = ax1.hist(entry.var_vals, bins=plot_bins, weights = entry.scale_factors, # histtype = 'step', bottom = bottom_edge, # color = 'k', zorder = 999) self.y1max = max(self.y1max, max(ax1_unc_height)/0.82) ## the rest is common to both kinds of plots, so put it at the end ## If the bin width is 10, we don't want 10.0. If it's a float, ## we only want to keep the first 2 decimal points bin_width_ = str(min(bin_widths)).split('.') if len(bin_width_)>1: if bin_width_[1][0]=='0': bin_width_ = bin_width_[0] else: if len(bin_width_[1])>1: bin_width_ = "{0:.2f}".format(min(bin_widths)) else: bin_width_ = "{0:.1f}".format(min(bin_widths)) else: bin_width_ = bin_width_[0] unit_tag = '' if self.plot_keys['variables'][self.var]['gev']: unit_tag = ' GeV' y_main_label = "Events/"+bin_width_+unit_tag self.ax1.yaxis.get_label().set_position((0,1)) self.ax1.set_ylabel(y_main_label,fontsize=self.label_size,ha='right',va='bottom') ax1_legend = self.ax1.legend(numpoints=1, fontsize=self.leg_txtsize, ncol=2, columnspacing=0.) ax1_legend.draw_frame(False) ## The following is a hack to change the figure height based on the legend ## (so the plot doesn't interfere with the legend). This is difficult ## because the legend is drawn at the very end -- when the plot is made: ## Using the assumption that size 22 font is approximately 0.08 (use 0.10) units tall ## to get the height of the legend, and then determing what gives the larger axis handles,labels = self.ax1.get_legend_handles_labels() nrows = len(handles)/ax1_legend._ncol + len(handles)%ax1_legend._ncol # approximately number of rows in legend legend_height = float(self.leg_txtsize)/self.atlas_size * 0.10 * nrows legend_ax1ymax = max( max(ax1_unc_height), max(self.totpred), np.nanmax(data_height) )/(1-legend_height) text_heights = [legend_ax1ymax,self.y1max] # compare this new value # (height based on legend) with # height based on 'ATLAS' label self.config_yaxis(self.ax1,self.y1min,max(text_heights)) # configure the y-axis self.plot_text() print " Saving file as: {0}.{1}\n".format(outfile,self.p_format) plt.savefig(outfile+'.'+self.p_format,bbox_inches='tight',format=self.p_format,dpi=300) plt.close() return