def plot_fit_results( histograms, category, channel ): global variable, b_tag_bin, output_folder from tools.plotting import Histogram_properties, make_data_mc_comparison_plot fit_variables = histograms.keys() for variable_bin in variable_bins_ROOT[variable]: path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_results/' make_folder_if_not_exists( path ) for fit_variable in fit_variables: plotname = channel + '_' + fit_variable + '_bin_' + variable_bin # check if template plots exist already for output_format in output_formats: if os.path.isfile( plotname + '.' + output_format ): continue # plot with matplotlib h_data = histograms[fit_variable][variable_bin]['data'] h_signal = histograms[fit_variable][variable_bin]['signal'] h_background = histograms[fit_variable][variable_bin]['background'] histogram_properties = Histogram_properties() histogram_properties.name = plotname histogram_properties.x_axis_title = fit_variables_latex[fit_variable] histogram_properties.y_axis_title = 'Events/(%s)' % get_unit_string(fit_variable) label, _ = get_cms_labels( channel ) histogram_properties.title = label histogram_properties.x_limits = measurement_config.fit_boundaries[fit_variable] make_data_mc_comparison_plot( [h_data, h_background, h_signal], ['data', 'background', 'signal'], ['black', 'green', 'red'], histogram_properties, save_folder = path, save_as = output_formats )
def plot_fit_results(histograms, category, channel): global variable, b_tag_bin, output_folder from tools.plotting import Histogram_properties, make_data_mc_comparison_plot for variable_bin in variable_bins_ROOT[variable]: path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category + '/fit_results/' make_folder_if_not_exists(path) plotname = channel + '_bin_' + variable_bin # check if template plots exist already for output_format in output_formats: if os.path.isfile(plotname + '.' + output_format): continue # plot with matplotlib h_data = histograms[variable_bin]['data'] h_signal = histograms[variable_bin]['signal'] h_background = histograms[variable_bin]['background'] histogram_properties = Histogram_properties() histogram_properties.name = plotname histogram_properties.x_axis_title = channel + ' $\left|\eta\\right|$' histogram_properties.y_axis_title = 'events/0.2' histogram_properties.title = get_cms_labels(channel) make_data_mc_comparison_plot([h_data, h_background, h_signal], ['data', 'background', 'signal'], ['black', 'green', 'red'], histogram_properties, save_folder = path, save_as = output_formats)
def plot_central_and_systematics( channel, systematics, exclude = [], suffix = 'altogether' ): global variable, k_values, b_tag_bin, met_type plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' ) axes = plt.axes() axes.minorticks_on() hist_data_central = read_xsection_measurement_results( 'central', channel )[0]['unfolded_with_systematics'] hist_data_central.markersize = 2 # points. Imagine, tangible units! hist_data_central.marker = 'o' plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) rplt.errorbar( hist_data_central, axes = axes, label = 'data', xerr = True ) for systematic in sorted( systematics ): if systematic in exclude or systematic == 'central': continue hist_data_systematic = read_xsection_measurement_results( systematic, channel )[0]['unfolded'] hist_data_systematic.markersize = 2 hist_data_systematic.marker = 'o' colour_number = systematics.index( systematic ) + 2 if colour_number == 10: colour_number = 42 hist_data_systematic.SetMarkerColor( colour_number ) if 'PDF' in systematic: rplt.errorbar( hist_data_systematic, axes = axes, label = systematic.replace( 'Weights_', ' ' ), xerr = None ) elif met_type in systematic: rplt.errorbar( hist_data_systematic, axes = axes, label = met_systematics_latex[systematic.replace( met_type, '' )], xerr = None ) else: rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic], xerr = None ) plt.legend( numpoints = 1, loc = 'center right', prop = {'size':25}, ncol = 2 ) label, channel_label = get_cms_labels( channel ) plt.title( label, CMS.title ) # CMS text # note: fontweight/weight does not change anything as we use Latex text!!! plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42, verticalalignment='top',horizontalalignment='right') # channel text axes.text(0.95, 0.90, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40, verticalalignment='top',horizontalalignment='right') plt.tight_layout() path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable make_folder_if_not_exists( path ) for output_format in output_formats: filename = path + '/normalised_xsection_' + channel + '_' + suffix + '_kv' + str( k_values[channel] ) + '.' + output_format if channel == 'combined': filename = filename.replace( '_kv' + str( k_values[channel] ), '' ) plt.savefig( filename ) plt.close() gc.collect()
def make_template_plots(histograms, category, channel): global variable, output_folder for variable_bin in variable_bins_ROOT[variable]: path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category + '/fit_templates/' make_folder_if_not_exists(path) plotname = path + channel + '_templates_bin_' + variable_bin # check if template plots exist already for output_format in output_formats: if os.path.isfile(plotname + '.' + output_format): continue # plot with matplotlib h_signal = histograms[variable_bin]['signal'] h_VJets = histograms[variable_bin]['V+Jets'] h_QCD = histograms[variable_bin]['QCD'] h_signal.linecolor = 'red' h_VJets.linecolor = 'green' h_QCD.linecolor = 'gray' h_VJets.linestyle = 'dashed' h_QCD.linestyle = 'dotted'# currently not working #bug report: http://trac.sagemath.org/sage_trac/ticket/13834 h_signal.linewidth = 5 h_VJets.linewidth = 5 h_QCD.linewidth = 5 plt.figure(figsize=(16, 16), dpi=200, facecolor='white') axes = plt.axes() axes.minorticks_on() plt.xlabel(r'lepton $|\eta|$', CMS.x_axis_title) plt.ylabel('normalised to unit area/0.2', CMS.y_axis_title) plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) rplt.hist(h_signal, axes=axes, label='signal') if (h_VJets.Integral() != 0): rplt.hist(h_VJets, axes=axes, label='V+Jets') else: print "WARNING: in %s bin %s, %s category, %s channel, V+Jets template is empty: not plotting." % (variable, variable_bin, category, channel) if (h_QCD.Integral() != 0): rplt.hist(h_QCD, axes=axes, label='QCD') else: print "WARNING: in %s bin %s, %s category, %s channel, QCD template is empty: not plotting." % (variable, variable_bin, category, channel) axes.set_ylim([0, 0.2]) plt.legend(numpoints=1, loc='upper right', prop=CMS.legend_properties) plt.title(get_cms_labels(channel), CMS.title) plt.tight_layout() for output_format in output_formats: plt.savefig(plotname + '.' + output_format) plt.close() gc.collect()
def submit(self): ''' Submits all registered jobs to the local HTCondor scheduler using a job template (DailyPythonScripts/condor/job_template) description file and the 'condor_submit' command ''' today = time.strftime("%d-%m-%Y") job_folder = 'jobs/{0}/'.format(today) make_folder_if_not_exists(job_folder) make_folder_if_not_exists(job_folder + 'logs') # construct jobs self._construct_jobs() # convert each job into a pickle file # construct a class ad for each job with open('condor/job_template', 'r') as template: job_template = template.read() condor_jobs = [] # prepare DPS for submission self._dps_tar_directory_on_hdfs = '/TopQuarkGroup/condor_dps/{you}/{now}/'.format( you = getpass.getuser(), now = time.strftime('%d_%m_%Y_%H_%M') ) for i, job in enumerate(self.prepared_jobs): job_file = job_folder + 'job_{0}.pkl'.format(i) job_desc_file = job_folder + 'job_{0}.dsc'.format(i) job_description = job_template.replace('%pkl_file%', job_file) job_description = job_description.replace('%dir_of_dps_on_hdfs%', self._dps_tar_directory_on_hdfs) job_description = job_description.replace('%total_memory%', str(self.request_memory)) job_description = job_description.replace('%n_jobs_to_run%', str(self.n_jobs_to_run)) job_description = job_description.replace('%n_jobs_to_split%', str(self.n_jobs_to_split)) input_files = [] if hasattr(job, 'additional_input_files'): input_files.extend(job.additional_input_files) input_files_str = ','.join(input_files) job_description = job_description.replace('%input_files%', input_files_str) job_description = job_description.replace('%today%', today) with open(job_file, 'w+') as jf: pickle.dump(job, jf) with open(job_desc_file, 'w+') as jdf: jdf.write(job_description) condor_jobs.append(job_desc_file) prepare_process = subprocess.Popen(['./condor/prepare_dps.sh',self._dps_tar_directory_on_hdfs]) prepare_process.communicate() # # submit jobs for j in condor_jobs: p = subprocess.Popen(['condor_submit', j]) p.communicate() # wait until command completed
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str( unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile( SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def make_plots_matplotlib(histograms, category, output_folder, histname): global variable, variables_latex_matplotlib, measurements_latex_matplotlib, k_value channel = 'electron' if 'electron' in histname: channel = 'electron' elif 'muon' in histname: channel = 'muon' else: channel = 'combined' # plot with matplotlib hist_data = histograms['unfolded'] hist_measured = histograms['measured'] hist_data.markersize = 2 hist_measured.markersize = 2 hist_data.marker = 'o' hist_measured.marker = 'o' hist_measured.color = 'red' plt.figure(figsize=(14, 10), dpi=200, facecolor='white') axes = plt.axes() axes.minorticks_on() plt.xlabel('$%s$ [GeV]' % variables_latex_matplotlib[variable], CMS.x_axis_title) plt.ylabel(r'$\frac{1}{\sigma} \times \frac{d\sigma}{d' + variables_latex_matplotlib[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title) plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) rplt.errorbar(hist_data, axes=axes, label='unfolded', xerr=False) rplt.errorbar(hist_measured, axes=axes, label='measured', xerr=False) for key, hist in histograms.iteritems(): if not 'unfolded' in key and not 'measured' in key: hist.linestyle = 'dashed' hist.linewidth = 2 # hist.SetLineStyle(7) # hist.SetLineWidth(2) #setting colours if 'POWHEG' in key or 'matchingdown' in key: hist.SetLineColor(kBlue) elif 'MADGRAPH' in key or 'matchingup' in key: hist.SetLineColor(kRed + 1) elif 'MCATNLO' in key or 'scaleup' in key: hist.SetLineColor(kMagenta + 3) elif 'scaledown' in key: hist.SetLineColor(kGreen) rplt.hist(hist, axes=axes, label=measurements_latex_matplotlib[key]) plt.legend(numpoints=1, loc='upper right', prop=CMS.legend_properties) plt.title(get_cms_labels_matplotlib(channel), CMS.title) plt.tight_layout() path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category make_folder_if_not_exists(path) for output_format in output_formats: plt.savefig(path + '/' + histname + '_kv' + str(k_value) + '.' + output_format)
def main(): ''' Main function for this script ''' set_root_defaults(msg_ignore_level=3001) parser = OptionParser() parser.add_option("-o", "--output", dest="output_folder", default='data/pull_data/', help="output folder for pull data files") parser.add_option("-n", "--n_input_mc", type=int, dest="n_input_mc", default=100, help="number of toy MC used for the tests") parser.add_option("-k", "--k_value", type=int, dest="k_value", default=3, help="k-value for SVD unfolding") parser.add_option("--tau", type='float', dest="tau_value", default=-1., help="tau-value for SVD unfolding") parser.add_option("-m", "--method", type='string', dest="method", default='RooUnfoldSvd', help="unfolding method") parser.add_option("-f", "--file", type='string', dest="file", default='data/toy_mc/unfolding_toy_mc.root', help="file with toy MC") parser.add_option("-v", "--variable", dest="variable", default='MET', help="set the variable to analyse (MET, HT, ST, MT, WPT)") parser.add_option("-s", "--centre-of-mass-energy", dest="CoM", default=13, help='''set the centre of mass energy for analysis. Default = 8 [TeV]''', type=int) parser.add_option("-c", "--channel", type='string', dest="channel", default='combined', help="channel to be analysed: electron|muon|combined") parser.add_option("--offset_toy_mc", type=int, dest="offset_toy_mc", default=0, help="offset of the toy MC used to response matrix") parser.add_option("--offset_toy_data", type=int, dest="offset_toy_data", default=0, help="offset of the toy MC used as data for unfolding") (options, _) = parser.parse_args() centre_of_mass = options.CoM make_folder_if_not_exists(options.output_folder) # set the number of toy MC for error calculation k_value = options.k_value tau_value = options.tau_value use_n_toy = options.n_input_mc offset_toy_mc = options.offset_toy_mc offset_toy_data = options.offset_toy_data method = options.method variable = options.variable create_unfolding_pull_data(options.file, method, options.channel, centre_of_mass, variable, use_n_toy, use_n_toy, options.output_folder, offset_toy_mc, offset_toy_data, k_value, tau_value)
def create_unfolding_pull_data(input_file_name, method, channel, centre_of_mass, variable, n_toy_mc, n_toy_data, output_folder, offset_toy_mc, offset_toy_data, k_value, tau_value=-1, run_matrix=None): ''' Sets up all variables for check_multiple_data_multiple_unfolding ''' timer = Timer() input_file = File(input_file_name, 'read') folder_template = '{path}/{centre_of_mass}TeV/{variable}/' folder_template += '{n_toy_mc}_input_toy_mc/{n_toy_data}_input_toy_data/' folder_template += '{vtype}_value_{value}/' msg_template = 'Producing unfolding pull data for {variable},' msg_template += ' {vtype}-value {value}' inputs = { 'path': output_folder, 'centre_of_mass': centre_of_mass, 'variable': variable, 'n_toy_mc': n_toy_mc, 'n_toy_data': n_toy_data, 'vtype': 'k', 'value': k_value, } if tau_value >= 0: inputs['vtype'] = 'tau' inputs['value'] = round(tau_value, 1) output_folder = folder_template.format(**inputs) make_folder_if_not_exists(output_folder) print(msg_template.format(**inputs)) print('Output folder: {0}'.format(output_folder)) check_multiple_data_multiple_unfolding( input_file, method, channel, variable, n_toy_mc, n_toy_data, output_folder, offset_toy_mc, offset_toy_data, k_value, tau_value, run_matrix, ) print('Runtime', timer.elapsed_time())
def compare_vjets_templates( variable = 'MET', met_type = 'patType1CorrectedPFMet', title = 'Untitled', channel = 'electron' ): ''' Compares the V+jets templates in different bins of the current variable''' global fit_variable_properties, b_tag_bin, save_as variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template( variable ) for fit_variable in electron_fit_variables: all_hists = {} inclusive_hist = None save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable ) make_folder_if_not_exists( save_path + '/vjets/' ) max_bins = len( variable_bins ) for bin_range in variable_bins[0:max_bins]: params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} fit_variable_distribution = histogram_template % params # format: histograms['data'][qcd_fit_variable_distribution] histograms = get_histograms_from_files( [fit_variable_distribution], histogram_files ) prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale ) all_hists[bin_range] = histograms['V+Jets'][fit_variable_distribution] # create the inclusive distributions inclusive_hist = deepcopy( all_hists[variable_bins[0]] ) for bin_range in variable_bins[1:max_bins]: inclusive_hist += all_hists[bin_range] for bin_range in variable_bins[0:max_bins]: if not all_hists[bin_range].Integral() == 0: all_hists[bin_range].Scale( 1 / all_hists[bin_range].Integral() ) # normalise all histograms inclusive_hist.Scale( 1 / inclusive_hist.Integral() ) # now compare inclusive to all bins histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.' ) histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] histogram_properties.title = title histogram_properties.additional_text = channel_latex[channel] + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin + '_VJets_template_comparison' histogram_properties.y_max_scale = 1.5 measurements = {bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems()} measurements = OrderedDict( sorted( measurements.items() ) ) fit_var = fit_variable.replace( 'electron_', '' ) fit_var = fit_var.replace( 'muon_', '' ) graphs = spread_x( measurements.values(), fit_variable_bin_edges[fit_var] ) for key, graph in zip( sorted( measurements.keys() ), graphs ): measurements[key] = graph compare_measurements( models = {'inclusive' : inclusive_hist}, measurements = measurements, show_measurement_errors = True, histogram_properties = histogram_properties, save_folder = save_path + '/vjets/', save_as = save_as )
def save(self): make_folder_if_not_exists(self._path) for f in self.__properties.formats: file_name = '{path}{name}.{format}' file_name = file_name.format( path = self._path, name = self.__properties.name, format = f, ) plt.savefig(file_name)
def save(self): make_folder_if_not_exists(self._path) for f in self.__properties.formats: file_name = '{path}{name}.{format}' file_name = file_name.format( path=self._path, name=self.__properties.name, format=f, ) plt.savefig(file_name)
def print_xsections(xsections, channel, toFile=True): global savePath, variable, k_value, met_type, b_tag_bin printout = '\n' printout += '=' * 60 printout = '\n' printout += 'Results for %s variable, %s channel, k-value %s, met type %s, %s b-tag region\n' % ( variable, channel, k_value, met_type, b_tag_bin) printout += '=' * 60 printout += '\n' rows = {} header = 'Measurement' scale = 100 bins = variable_bins_ROOT[variable] assert (len(bins) == len(xsections['central'])) for bin_i, variable_bin in enumerate(bins): header += '& $\sigma_{meas}$ %s bin %s~\GeV' % (variable, variable_bin) for source in categories: value, error = xsections[source][bin_i] relativeError = getRelativeError(value, error) text = ' $(%.2f \pm %.2f) \cdot 10^{-2}$ ' % ( value * scale, error * scale) + '(%.2f' % (relativeError * 100) + '\%)' if rows.has_key(source): rows[source].append(text) else: rows[source] = [translateOptions[source], text] header += '\\\\ \n' printout += header printout += '\hline\n' for item in rows['central']: printout += item + '&' printout = printout.rstrip('&') printout += '\\\\ \n' for source in sorted(rows.keys()): if source == 'central': continue for item in rows[source]: printout += item + '&' printout = printout.rstrip('&') printout += '\\\\ \n' printout += '\hline \n\n' make_folder_if_not_exists(savePath + '/' + variable) if toFile: output_file = open( savePath + '/' + variable + '/normalised_xsection_result_' + channel + '_' + met_type + '_kv' + str(k_value) + '.tex', 'w') output_file.write(printout) output_file.close() else: print printout
def check_save_folder(save_folder): ''' Checks and fixes (if necessary) the save folder ''' # save_folder should end with an '/' if not save_folder.endswith('/'): save_folder += '/' # save_folder should exist make_folder_if_not_exists(save_folder) return save_folder
def unfold_results( results, category, channel, k_value, h_truth, h_measured, h_response, h_fakes, method ): global variable, path_to_JSON, options h_data = value_error_tuplelist_to_hist( results, bin_edges[variable] ) unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.Hreco = 0 else: unfoldCfg.Hreco = options.Hreco h_unfolded_data = unfolding.unfold( h_data ) if options.write_unfolding_objects: # export the D and SV distributions SVD_path = path_to_JSON + '/unfolding_objects/' + channel + '/kv_' + str( k_value ) + '/' make_folder_if_not_exists( SVD_path ) if method == 'TSVDUnfold': SVDdist = File( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate' ) directory = SVDdist.mkdir( 'SVDdist' ) directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = File( SVD_path + method + '_SVDdistributions_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root', 'recreate' ) directory = SVDdist.mkdir( 'SVDdist' ) directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() # export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root' if not os.path.isfile( unfolding_object_file_name ): unfoldingObjectFile = File( unfolding_object_file_name, 'recreate' ) directory = unfoldingObjectFile.mkdir( 'unfoldingObject' ) directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist( h_unfolded_data )
def plot_central_and_systematics(channel, systematics, exclude=[], suffix='altogether'): global variable, variables_latex, k_value, b_tag_bin, maximum, ttbar_generator_systematics canvas = Canvas(width=700, height=500) canvas.SetLeftMargin(0.15) canvas.SetBottomMargin(0.15) canvas.SetTopMargin(0.05) canvas.SetRightMargin(0.05) legend = plotting.create_legend(x0=0.6, y1=0.5) hist_data_central = read_xsection_measurement_results('central', channel)[0]['unfolded'] hist_data_central.GetXaxis().SetTitle(variables_latex[variable] + ' [GeV]') hist_data_central.GetYaxis().SetTitle('#frac{1}{#sigma} #frac{d#sigma}{d' + variables_latex[variable] + '} [GeV^{-1}]') hist_data_central.GetXaxis().SetTitleSize(0.05) hist_data_central.GetYaxis().SetTitleSize(0.05) hist_data_central.SetMinimum(0) hist_data_central.SetMaximum(maximum[variable]) hist_data_central.SetMarkerSize(1) hist_data_central.SetMarkerStyle(20) gStyle.SetEndErrorSize(20) hist_data_central.Draw('P') legend.AddEntry(hist_data_central, 'measured (unfolded)', 'P') for systematic in systematics: if systematic in exclude or systematic == 'central': continue hist_data_systematic = read_xsection_measurement_results(systematic, channel)[0]['unfolded'] hist_data_systematic.SetMarkerSize(0.5) hist_data_systematic.SetMarkerStyle(20) colour_number = systematics.index(systematic)+1 if colour_number == 10: colour_number = 42 hist_data_systematic.SetMarkerColor(colour_number) hist_data_systematic.Draw('same P') legend.AddEntry(hist_data_systematic, systematic, 'P') legend.Draw() cms_label, channel_label = get_cms_labels(channel) cms_label.Draw() channel_label.Draw() canvas.Modified() canvas.Update() path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable make_folder_if_not_exists(path) for output_format in output_formats: canvas.SaveAs(path + '/normalised_xsection_' + channel + '_' + suffix + '_kv' + str(k_value) + '.' + output_format)
def main(): ''' Main function for this script ''' set_root_defaults(msg_ignore_level=3001) parser = OptionParser() parser.add_option("-o", "--output", dest="output_folder", default='data/pull_data/', help="output folder for pull data files") parser.add_option("-n", "--n_input_mc", type=int, dest="n_input_mc", default=100, help="number of toy MC used for the tests") parser.add_option("--tau", type='float', dest="tau_value", default=-1., help="tau-value for SVD unfolding") parser.add_option("-m", "--method", type='string', dest="method", default='TUnfold', help="unfolding method") parser.add_option("-f", "--file", type='string', dest="file", default='data/toy_mc/unfolding_toy_mc.root', help="file with toy MC") parser.add_option("-v", "--variable", dest="variable", default='MET', help="set the variable to analyse (defined in config/variable_binning.py)") parser.add_option("--com", "--centre-of-mass-energy", dest="CoM", default=13, help='''set the centre of mass energy for analysis. Default = 8 [TeV]''', type=int) parser.add_option("-c", "--channel", type='string', dest="channel", default='combined', help="channel to be analysed: electron|muon|combined") parser.add_option("-s", type='string', dest="sample", default='madgraph', help="channel to be analysed: electron|muon|combined") (options, _) = parser.parse_args() centre_of_mass = options.CoM measurement_config = XSectionConfig(centre_of_mass) make_folder_if_not_exists(options.output_folder) use_n_toy = options.n_input_mc method = options.method variable = options.variable sample = options.sample tau_value = options.tau_value create_unfolding_pull_data(options.file, method, options.channel, centre_of_mass, variable, sample, measurement_config.unfolding_central, use_n_toy, options.output_folder, tau_value)
def tau_from_scan( unfoldingObject, regularisation_settings ): variable = regularisation_settings.variable # Plots that get outputted by the scan lCurve = TGraph() scanResult = TSpline3() d = 'signal' a = '' # Parameters of scan # Number of points to scan, and min/max tau nScan = 1000 minTau = 1.E-6 maxTau = 1.E-0 if variable == 'abs_lepton_eta': minTau = 1.E-8 maxTau = 1.E-3 elif variable == 'lepton_pt': minTau = 1.E-6 maxTau = 1.E-2 elif variable == 'NJets': minTau = 1.E-6 maxTau = 1.E-2 # Scan is performed here iBest = unfoldingObject.ScanTau(nScan, minTau, maxTau, scanResult, TUnfoldDensity.kEScanTauRhoSquareAvg); # Plot the scan result # Correlation as function of log tau canvas = TCanvas() scanResult.SetMarkerColor(600) scanResult.SetMarkerSize(1) scanResult.SetMarkerStyle(5) scanResult.Draw('LP') # Add point corresponding to optimum tau t = Double(0) x = Double(0) scanResult.GetKnot(iBest,t,x); bestTau = Graph(1) bestTau.SetPoint(1,t,x) bestTau.markercolor = 'red' bestTau.SetMarkerSize(1.25) bestTau.Draw('*') # Write to file output_dir = regularisation_settings.output_folder make_folder_if_not_exists(output_dir) canvas.SaveAs(output_dir + '/{0}.png'.format(variable) ) return unfoldingObject.GetTau()
def plot_central_and_systematics(channel, systematics, exclude=[], suffix='altogether'): global variable, k_value, b_tag_bin, met_type plt.figure(figsize=(16, 16), dpi=200, facecolor='white') axes = plt.axes() axes.minorticks_on() hist_data_central = read_xsection_measurement_results('central', channel)[0]['unfolded_with_systematics'] hist_data_central.markersize = 2 # points. Imagine, tangible units! hist_data_central.marker = 'o' plt.xlabel('$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title) plt.ylabel(r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title) plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) rplt.errorbar(hist_data_central, axes=axes, label='data', xerr=True) for systematic in sorted(systematics): if systematic in exclude or systematic == 'central': continue hist_data_systematic = read_xsection_measurement_results(systematic, channel)[0]['unfolded'] hist_data_systematic.markersize = 2 hist_data_systematic.marker = 'o' colour_number = systematics.index(systematic) + 2 if colour_number == 10: colour_number = 42 hist_data_systematic.SetMarkerColor(colour_number) if 'PDF' in systematic: rplt.errorbar(hist_data_systematic, axes=axes, label=systematic.replace('Weights_', ' '), xerr=False) elif met_type in systematic: rplt.errorbar(hist_data_systematic, axes=axes, label=met_systematics_latex[systematic.replace(met_type, '')], xerr=False) else: rplt.errorbar(hist_data_systematic, axes=axes, label=measurements_latex[systematic], xerr=False) plt.legend(numpoints=1, loc='upper right', prop={'size':25}, ncol=2) plt.title(get_cms_labels(channel), CMS.title) plt.tight_layout() path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable make_folder_if_not_exists(path) for output_format in output_formats: plt.savefig(path + '/normalised_xsection_' + channel + '_' + suffix + '_kv' + str(k_value) + '.' + output_format) plt.close() gc.collect()
def make_template_plots(histograms, category, channel): global variable, output_folder for variable_bin in variable_bins_ROOT[variable]: path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category + '/fit_templates/' make_folder_if_not_exists(path) plotname = path + channel + '_templates_bin_' + variable_bin #check if template plots exist already for output_format in output_formats: if os.path.isfile(plotname + '.' + output_format): continue canvas = Canvas(width=700, height=500) canvas.SetLeftMargin(0.15) canvas.SetBottomMargin(0.15) canvas.SetTopMargin(0.05) canvas.SetRightMargin(0.05) legend = plotting.create_legend(x0=0.7, y1=0.8) h_signal = histograms[variable_bin]['signal'] h_VJets = histograms[variable_bin]['V+Jets'] h_QCD = histograms[variable_bin]['QCD'] h_signal.GetXaxis().SetTitle('Lepton #eta') h_signal.GetYaxis().SetTitle('Normalised Events') h_signal.GetXaxis().SetTitleSize(0.05) h_signal.GetYaxis().SetTitleSize(0.05) h_signal.SetMinimum(0) h_signal.SetMaximum(0.2) h_signal.SetLineWidth(2) h_VJets.SetLineWidth(2) h_QCD.SetLineWidth(2) h_signal.SetLineColor(kRed + 1) h_VJets.SetLineColor(kBlue) h_QCD.SetLineColor(kYellow) h_signal.Draw('hist') h_VJets.Draw('hist same') h_QCD.Draw('hist same') legend.AddEntry(h_signal, 'signal', 'l') legend.AddEntry(h_VJets, 'V+Jets', 'l') legend.AddEntry(h_QCD, 'QCD', 'l') legend.Draw() cms_label, channel_label = get_cms_labels(channel) cms_label.Draw() channel_label.Draw() canvas.Modified() canvas.Update() for output_format in output_formats: canvas.SaveAs(plotname + '.' + output_format)
def make_template_plots_matplotlib(histograms, category, channel): global variable, output_folder from matplotlib import rc rc('text', usetex=True) for variable_bin in variable_bins_ROOT[variable]: path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category + '/fit_templates/' make_folder_if_not_exists(path) plotname = path + channel + '_templates_bin_' + variable_bin # check if template plots exist already for output_format in output_formats: if os.path.isfile(plotname + '.' + output_format): continue # plot with matplotlib h_signal = histograms[variable_bin]['signal'] h_VJets = histograms[variable_bin]['V+Jets'] h_QCD = histograms[variable_bin]['QCD'] h_signal.linecolor = 'red' h_VJets.linecolor = 'green' h_QCD.linecolor = 'yellow' h_signal.linewidth = 5 h_VJets.linewidth = 5 h_QCD.linewidth = 5 plt.figure(figsize=(14, 10), dpi=200, facecolor='white') axes = plt.axes() axes.minorticks_on() plt.xlabel(r'lepton $|\eta|$', CMS.x_axis_title) plt.ylabel('normalised to unit area/0.2', CMS.y_axis_title) plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) rplt.hist(h_signal, axes=axes, label='signal') rplt.hist(h_VJets, axes=axes, label='V+Jets') rplt.hist(h_QCD, axes=axes, label='QCD') axes.set_ylim([0,0.2]) plt.legend(numpoints=1, loc='upper right', prop=CMS.legend_properties) plt.title(get_cms_labels_matplotlib(channel), CMS.title) plt.tight_layout() for output_format in output_formats: plt.savefig(plotname + '.' + output_format)
def submit(self): ''' Submits all registered jobs to the local HTCondor scheduler using a job template (DailyPythonScripts/condor/job_template) description file and the 'condor_submit' command ''' today = time.strftime("%d-%m-%Y") job_folder = 'jobs/{0}/'.format(today) make_folder_if_not_exists(job_folder) make_folder_if_not_exists(job_folder + 'logs') # construct jobs self._construct_jobs() # convert each job into a pickle file # construct a class ad for each job with open('condor/job_template', 'r') as template: job_template = template.read() condor_jobs = [] for i, job in enumerate(self.prepared_jobs): job_file = job_folder + 'job_{0}.pkl'.format(i) job_desc_file = job_folder + 'job_{0}.dsc'.format(i) job_description = job_template.replace('%pkl_file%', job_file) job_description = job_description.replace('%total_memory%', str(self.request_memory)) job_description = job_description.replace('%n_jobs_to_run%', str(self.n_jobs_to_run)) job_description = job_description.replace( '%n_jobs_to_split%', str(self.n_jobs_to_split)) input_files = ['dps.tar'] if hasattr(job, 'additional_input_files'): input_files.extend(job.additional_input_files) input_files_str = ','.join(input_files) job_description = job_description.replace('%input_files%', input_files_str) job_description = job_description.replace('%today%', today) with open(job_file, 'w+') as jf: pickle.dump(job, jf) with open(job_desc_file, 'w+') as jdf: jdf.write(job_description) condor_jobs.append(job_desc_file) # prepare DPS for submission subprocess.Popen(['./condor/prepare_dps.sh']) # submit jobs for j in condor_jobs: p = subprocess.Popen(['condor_submit', j]) p.communicate() # wait until command completed
def create_toy_mc(input_file, sample, output_folder, n_toy, centre_of_mass, ttbar_xsection): from tools.file_utilities import make_folder_if_not_exists from tools.toy_mc import generate_toy_MC_from_distribution, generate_toy_MC_from_2Ddistribution from tools.Unfolding import get_unfold_histogram_tuple make_folder_if_not_exists(output_folder) input_file_hists = File(input_file) output_file_name = get_output_file_name(output_folder, sample, n_toy, centre_of_mass) variable_bins = bin_edges_vis.copy() with root_open(output_file_name, 'recreate') as f_out: for channel in ['combined']: for variable in variable_bins: output_dir = f_out.mkdir(channel + '/' + variable, recurse=True) cd = output_dir.cd mkdir = output_dir.mkdir h_truth, h_measured, h_response, _ = get_unfold_histogram_tuple(input_file_hists, variable, channel, centre_of_mass = centre_of_mass, ttbar_xsection = ttbar_xsection, visiblePS = True, load_fakes=False) cd() mkdir('Original') cd ('Original') h_truth.Write('truth') h_measured.Write('measured') h_response.Write('response') for i in range(1, n_toy+1): toy_id = 'toy_{0}'.format(i) mkdir(toy_id) cd(toy_id) # create histograms # add tuples (truth, measured, response) of histograms truth = generate_toy_MC_from_distribution(h_truth) measured = generate_toy_MC_from_distribution(h_measured) response = generate_toy_MC_from_2Ddistribution(h_response) truth.SetName('truth') measured.SetName('measured') response.SetName('response') truth.Write() measured.Write() response.Write()
def print_xsections(xsections, channel, toFile = True, print_before_unfolding = False): global output_folder, variable, k_value, met_type, b_tag_bin printout = '=' * 60 printout += '\n' printout += 'Results for %s variable, %s channel, k-value %s, met type %s, %s b-tag region\n' % (variable, channel, k_value, met_type, b_tag_bin) if print_before_unfolding: printout += 'BEFORE UNFOLDING\n' printout += '=' * 60 printout += '\n' printout += '$%s$ bin & $\sigma_{meas}$' % variables_latex[variable] printout += '\\\\ \n\hline\n' scale = 100 bins = variable_bins_ROOT[variable] assert(len(bins) == len(xsections['unfolded_with_systematics'])) for bin_i, variable_bin in enumerate(bins): if print_before_unfolding: value, error_up, error_down = xsections['measured_with_systematics'][bin_i] else: value, error_up, error_down = xsections['unfolded_with_systematics'][bin_i] relativeError_up = getRelativeError(value, error_up) relativeError_down = getRelativeError(value, error_down) if error_up == error_down: printout += '%s & ' % variable_bins_latex[variable_bin] + ' $(%.2f \pm %.2f ) \cdot 10^{-2} ' % (value * scale, error_up * scale) +\ '(%.2f' % (relativeError_up * 100) + '\%)$' else: printout += '%s & ' % variable_bins_latex[variable_bin] + ' $(%.2f^{+%.2f}_{-%.2f)} \cdot 10^{-2} ' % (value * scale, error_up * scale, error_down * scale) +\ '(^{+%.2f}_{-%.2f}' % (relativeError_up * 100, relativeError_down * 100) + '\%)$' printout += '\\\\ \n' printout += '\hline \n\n' if toFile: path = output_folder + '/' + str(measurement_config.centre_of_mass) + 'TeV/' + variable make_folder_if_not_exists(path) if print_before_unfolding: output_file = open(path + '/normalised_xsection_result_' + channel + '_' + met_type + '_kv' + str(k_value) + '_measured.tex', 'w') else: output_file = open(path + '/normalised_xsection_result_' + channel + '_' + met_type + '_kv' + str(k_value) + '_unfolded.tex', 'w') output_file.write(printout) output_file.close() else: print printout
def create_unfolding_pull_data(input_file_name, method, channel, centre_of_mass, variable, sample, responseFile, n_toy_data, output_folder, tau_value, run_matrix=None): ''' Sets up all variables for check_multiple_data_multiple_unfolding ''' set_root_defaults(msg_ignore_level=3001) timer = Timer() input_file = File(input_file_name, 'read') folder_template = '{path}/{centre_of_mass}TeV/{variable}/{sample}/' msg_template = 'Producing unfolding pull data for {variable},' msg_template += ' tau-value {value}' inputs = { 'path': output_folder, 'centre_of_mass': centre_of_mass, 'variable': variable, 'sample': sample, 'value': round(tau_value,4), } h_response = get_response_histogram(responseFile, variable, channel) output_folder = folder_template.format(**inputs) make_folder_if_not_exists(output_folder) print(msg_template.format(**inputs)) print('Output folder: {0}'.format(output_folder)) print ('Response here :',h_response) output_file_name = check_multiple_data_multiple_unfolding( input_file, method, channel, variable, h_response, n_toy_data, output_folder, tau_value, ) print('Runtime', timer.elapsed_time()) return output_file_name
def main(): global measurement_config, histogram_files global electron_fit_variables, muon_fit_variables, fit_variable_properties global b_tag_bin, category, histogram_files, variables global b_tag_bin_ctl title_template = 'CMS Preliminary, $\mathcal{L} = %.1f$ fb$^{-1}$ at $\sqrt{s}$ = %d TeV \n %s' e_title = title_template % ( measurement_config.new_luminosity / 1000., measurement_config.centre_of_mass_energy, 'e+jets, $\geq$ 4 jets' ) met_type = 'patType1CorrectedPFMet' for variable in variables: variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template( variable ) for fit_variable in electron_fit_variables: if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (measurement_config.centre_of_mass_energy, variable, fit_variable) make_folder_if_not_exists(save_path) make_folder_if_not_exists(save_path + 'qcd/') make_folder_if_not_exists(save_path + 'vjets/') for bin_range in variable_bins: params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} fit_variable_distribution = histogram_template % params qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' ) qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl ) histograms = get_histograms_from_files( [fit_variable_distribution, qcd_fit_variable_distribution], histogram_files ) plot_fit_variable( histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, e_title, save_path ) compare_qcd_control_regions(variable, met_type, e_title) compare_vjets_btag_regions(variable, met_type, e_title)
def compare_vjets_btag_regions( variable = 'MET', met_type = 'patType1CorrectedPFMet', title = 'Untitled', channel = 'electron' ): ''' Compares the V+Jets template in different b-tag bins''' global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl b_tag_bin_ctl = '0orMoreBtag' variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template( variable ) for fit_variable in electron_fit_variables: if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable ) make_folder_if_not_exists( save_path + '/vjets/' ) histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.' ) histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] histogram_properties.title = title histogram_properties.additional_text = channel_latex[channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl] histogram_properties.y_max_scale = 1.5 for bin_range in variable_bins: params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} fit_variable_distribution = histogram_template % params fit_variable_distribution_ctl = fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl ) # format: histograms['data'][qcd_fit_variable_distribution] histograms = get_histograms_from_files( [fit_variable_distribution, fit_variable_distribution_ctl], {'V+Jets' : histogram_files['V+Jets']} ) prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale ) histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin_ctl + '_VJets_template_comparison' histograms['V+Jets'][fit_variable_distribution].Scale( 1 / histograms['V+Jets'][fit_variable_distribution].Integral() ) histograms['V+Jets'][fit_variable_distribution_ctl].Scale( 1 / histograms['V+Jets'][fit_variable_distribution_ctl].Integral() ) compare_measurements( models = {'no b-tag' : histograms['V+Jets'][fit_variable_distribution_ctl]}, measurements = {'$>=$ 2 b-tags': histograms['V+Jets'][fit_variable_distribution]}, show_measurement_errors = True, histogram_properties = histogram_properties, save_folder = save_path + '/vjets/', save_as = save_as )
def plot_fit_results( histograms, category, channel ): global variable, b_tag_bin, output_folder, phase_space from tools.plotting import Histogram_properties, make_data_mc_comparison_plot fit_variables = histograms.keys() variableBins = None if phase_space == 'VisiblePS': variableBins = variable_bins_visiblePS_ROOT elif phase_space == 'FullPS': variableBins = variable_bins_ROOT for variable_bin in variableBins[variable]: path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_results/' make_folder_if_not_exists( path ) for fit_variable in fit_variables: plotname = channel + '_' + fit_variable + '_bin_' + variable_bin # check if template plots exist already for output_format in output_formats: if os.path.isfile( plotname + '.' + output_format ): continue # plot with matplotlib h_data = histograms[fit_variable][variable_bin]['data'] h_signal = histograms[fit_variable][variable_bin]['signal'] h_background = histograms[fit_variable][variable_bin]['background'] histogram_properties = Histogram_properties() histogram_properties.name = plotname histogram_properties.x_axis_title = fit_variables_latex[fit_variable] histogram_properties.y_axis_title = 'Events/(%s)' % get_unit_string(fit_variable) label, _ = get_cms_labels( channel ) histogram_properties.title = label histogram_properties.x_limits = measurement_config.fit_boundaries[fit_variable] make_data_mc_comparison_plot( [h_data, h_background, h_signal], ['data', 'background', 'signal'], ['black', 'green', 'red'], histogram_properties, save_folder = path, save_as = output_formats )
def main(): global measurement_config, histogram_files global electron_fit_variables, muon_fit_variables, fit_variable_properties global b_tag_bin, category, histogram_files, variables global b_tag_bin_ctl title_template = '$%.1f$ fb$^{-1}$(%d TeV)' e_title = title_template % ( measurement_config.new_luminosity / 1000., measurement_config.centre_of_mass_energy ) met_type = 'patType1CorrectedPFMet' for variable in variables: variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template( variable ) for fit_variable in electron_fit_variables: if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable ) make_folder_if_not_exists( save_path ) make_folder_if_not_exists( save_path + 'qcd/' ) make_folder_if_not_exists( save_path + 'vjets/' ) inclusive_histograms = {} inclusive_fit_distribution = '' inclusive_qcd_distribution = '' for bin_range in variable_bins: params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} fit_variable_distribution = histogram_template % params qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' ) qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl ) histograms = get_histograms_from_files( [fit_variable_distribution, qcd_fit_variable_distribution], histogram_files ) plot_fit_variable( histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, e_title, save_path ) # sum histograms for inclusive plots for sample, hist in histograms.iteritems(): inclusive_fit_distribution = fit_variable_distribution.replace( bin_range, "inclusive" ) inclusive_qcd_distribution = qcd_fit_variable_distribution.replace( bin_range, "inclusive" ) if not inclusive_histograms.has_key( sample ): inclusive_histograms[sample] = {} inclusive_histograms[sample][inclusive_fit_distribution] = hist[fit_variable_distribution].clone() inclusive_histograms[sample][inclusive_qcd_distribution] = hist[qcd_fit_variable_distribution].clone() else: inclusive_histograms[sample][inclusive_fit_distribution] += hist[fit_variable_distribution] inclusive_histograms[sample][inclusive_qcd_distribution] += hist[qcd_fit_variable_distribution] plot_fit_variable( inclusive_histograms, fit_variable, variable, 'inclusive', inclusive_fit_distribution, inclusive_qcd_distribution, e_title, save_path ) compare_qcd_control_regions( variable, met_type, e_title ) compare_vjets_btag_regions( variable, met_type, e_title ) compare_vjets_templates( variable, met_type, e_title )
def main(): global measurement_config, histogram_files global electron_fit_variables, muon_fit_variables, fit_variable_properties global b_tag_bin, category, histogram_files, variables global b_tag_bin_ctl title_template = '$%.1f$ fb$^{-1}$(%d TeV)' e_title = title_template % (measurement_config.new_luminosity / 1000., measurement_config.centre_of_mass_energy) met_type = 'patType1CorrectedPFMet' for variable in variables: variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template(variable) for fit_variable in electron_fit_variables: if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable) make_folder_if_not_exists(save_path) make_folder_if_not_exists(save_path + 'qcd/') make_folder_if_not_exists(save_path + 'vjets/') inclusive_histograms = {} inclusive_fit_distribution = '' inclusive_qcd_distribution = '' for bin_range in variable_bins: params = { 'met_type': met_type, 'bin_range': bin_range, 'fit_variable': fit_variable, 'b_tag_bin': b_tag_bin, 'variable': variable } fit_variable_distribution = histogram_template % params qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions') qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl) histograms = get_histograms_from_files( [fit_variable_distribution, qcd_fit_variable_distribution], histogram_files) plot_fit_variable(histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, e_title, save_path) # sum histograms for inclusive plots for sample, hist in histograms.iteritems(): inclusive_fit_distribution = fit_variable_distribution.replace( bin_range, "inclusive") inclusive_qcd_distribution = qcd_fit_variable_distribution.replace( bin_range, "inclusive") if not inclusive_histograms.has_key(sample): inclusive_histograms[sample] = {} inclusive_histograms[sample][ inclusive_fit_distribution] = hist[ fit_variable_distribution].clone() inclusive_histograms[sample][ inclusive_qcd_distribution] = hist[ qcd_fit_variable_distribution].clone() else: inclusive_histograms[sample][ inclusive_fit_distribution] += hist[ fit_variable_distribution] inclusive_histograms[sample][ inclusive_qcd_distribution] += hist[ qcd_fit_variable_distribution] plot_fit_variable(inclusive_histograms, fit_variable, variable, 'inclusive', inclusive_fit_distribution, inclusive_qcd_distribution, e_title, save_path) compare_qcd_control_regions(variable, met_type, e_title) compare_vjets_btag_regions(variable, met_type, e_title) compare_vjets_templates(variable, met_type, e_title)
def plot_central_and_systematics(channel): global variable, translate_options, k_value, b_tag_bin, maximum, categories ROOT.TH1.SetDefaultSumw2(False) ROOT.gROOT.SetBatch(True) ROOT.gROOT.ProcessLine('gErrorIgnoreLevel = 1001;') plotting.setStyle() gStyle.SetTitleYOffset(1.4) ROOT.gROOT.ForceStyle() canvas = Canvas(width=700, height=500) canvas.SetLeftMargin(0.15) canvas.SetBottomMargin(0.15) canvas.SetTopMargin(0.05) canvas.SetRightMargin(0.05) legend = plotting.create_legend(x0=0.6, y1=0.5) hist_data_central = read_xsection_measurement_results( 'central', channel)[0]['unfolded'] hist_data_central.GetXaxis().SetTitle(translate_options[variable] + ' [GeV]') hist_data_central.GetYaxis().SetTitle('#frac{1}{#sigma} #frac{d#sigma}{d' + translate_options[variable] + '} [GeV^{-1}]') hist_data_central.GetXaxis().SetTitleSize(0.05) hist_data_central.GetYaxis().SetTitleSize(0.05) hist_data_central.SetMinimum(0) hist_data_central.SetMaximum(maximum[variable]) hist_data_central.SetMarkerSize(1) hist_data_central.SetMarkerStyle(20) # plotAsym = TGraphAsymmErrors(hist_data) # plotStatErr = TGraphAsymmErrors(hist_data) gStyle.SetEndErrorSize(20) hist_data_central.Draw('P') # plotStatErr.Draw('same P') # plotAsym.Draw('same P Z') legend.AddEntry(hist_data_central, 'measured (unfolded)', 'P') for systematic in categories: if systematic != 'central': hist_data_systematic = read_xsection_measurement_results( systematic, channel)[0]['unfolded'] hist_data_systematic.SetMarkerSize(0.5) hist_data_systematic.SetMarkerStyle(20) colour_number = categories.index(systematic) + 1 if colour_number == 10: colour_number = 42 hist_data_systematic.SetMarkerColor(colour_number) hist_data_systematic.Draw('same P') legend.AddEntry(hist_data_systematic, systematic, 'P') # for central_generator in ['MADGRAPH', 'POWHEG', 'MCATNLO']: # hist_MC = read_xsection_measurement_results('central', channel)[0][central_generator] # hist_MC.SetLineStyle(7) # hist_MC.SetLineWidth(2) # #setting colours # if central_generator == 'POWHEG': # hist_MC.SetLineColor(kBlue) # elif central_generator == 'MADGRAPH': # hist_MC.SetLineColor(kRed + 1) # elif central_generator == 'MCATNLO': # hist_MC.SetLineColor(kMagenta + 3) # hist_MC.Draw('hist same') #legend.AddEntry(hist_MC, translate_options[central_generator], 'l') legend.Draw() mytext = TPaveText(0.5, 0.97, 1, 1.01, "NDC") channelLabel = TPaveText(0.18, 0.97, 0.5, 1.01, "NDC") if channel == 'electron': channelLabel.AddText( "e, %s, %s, k_v = %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value)) elif channel == 'muon': channelLabel.AddText( "#mu, %s, %s, k_v = %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value)) else: channelLabel.AddText( "combined, %s, %s, k_v = %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value)) mytext.AddText("CMS Preliminary, L = %.1f fb^{-1} at #sqrt{s} = 8 TeV" % (5.8)) mytext.SetFillStyle(0) mytext.SetBorderSize(0) mytext.SetTextFont(42) mytext.SetTextAlign(13) channelLabel.SetFillStyle(0) channelLabel.SetBorderSize(0) channelLabel.SetTextFont(42) channelLabel.SetTextAlign(13) mytext.Draw() if not channel == 'combination': channelLabel.Draw() canvas.Modified() canvas.Update() path = save_path + '/' + variable make_folder_if_not_exists(path) canvas.SaveAs(path + '/normalised_xsection_' + channel + '_altogether_kv' + str(k_value) + '.png') canvas.SaveAs(path + '/normalised_xsection_' + channel + '_altogether_kv' + str(k_value) + '.pdf')
def compare_qcd_control_regions( variable = 'MET', met_type = 'patType1CorrectedPFMet', title = 'Untitled'): ''' Compares the templates from the control regions in different bins of the current variable''' global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template( variable ) for fit_variable in electron_fit_variables: all_hists = {} inclusive_hist = None if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/fit_variables/%dTeV/%s/%s/' % (measurement_config.centre_of_mass_energy, variable, fit_variable) make_folder_if_not_exists(save_path + '/qcd/') max_bins = 3 for bin_range in variable_bins[0:max_bins]: params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} fit_variable_distribution = histogram_template % params qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' ) qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl ) # format: histograms['data'][qcd_fit_variable_distribution] histograms = get_histograms_from_files( [qcd_fit_variable_distribution], histogram_files ) prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale ) histograms_for_cleaning = {'data':histograms['data'][qcd_fit_variable_distribution], 'V+Jets':histograms['V+Jets'][qcd_fit_variable_distribution], 'SingleTop':histograms['SingleTop'][qcd_fit_variable_distribution], 'TTJet':histograms['TTJet'][qcd_fit_variable_distribution]} qcd_from_data = clean_control_region( histograms_for_cleaning, subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) # clean all_hists[bin_range] = qcd_from_data # create the inclusive distributions inclusive_hist = deepcopy(all_hists[variable_bins[0]]) for bin_range in variable_bins[1:max_bins]: inclusive_hist += all_hists[bin_range] for bin_range in variable_bins[0:max_bins]: if not all_hists[bin_range].Integral() == 0: all_hists[bin_range].Scale(1/all_hists[bin_range].Integral()) # normalise all histograms inclusive_hist.Scale(1/inclusive_hist.Integral()) # now compare inclusive to all bins histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace('Events', 'a.u.') histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] # histogram_properties.y_limits = [0, 0.5] histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin_ctl] histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin_ctl + '_QCD_template_comparison' measurements = {bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems()} measurements = OrderedDict(sorted(measurements.items())) compare_measurements(models = {'inclusive' : inclusive_hist}, measurements = measurements, show_measurement_errors = True, histogram_properties = histogram_properties, save_folder = save_path + '/qcd/', save_as = save_as)
sys.exit() #set up the config according to the centre of mass energy config = XSectionConfig(options.centreOfMassEnergy) #Get the luminosity for the centre of mass energy luminosity = config.luminosities[options.centreOfMassEnergy] #Get current working directory current_working_directory = os.getcwd() #Get folder to move files to path_to_AN_folder = config.path_to_files #move log files separately first, since there is no "logs" category in categories_and_prefixes make_folder_if_not_exists(path_to_AN_folder + '/logs/') command = 'mv ' + options.pathToBATOutputFiles + '/*' + str(options.centreOfMassEnergy) + 'TeV*.log ' + path_to_AN_folder + '/logs/' if options.doNothing: print "command = ", command print "path to folder = ", path_to_AN_folder + '/logs/' elif not options.doNothing: make_folder_if_not_exists( path_to_AN_folder + "/logs" ) p = subprocess.Popen(command, shell=True) p.wait() #Now move all other BAT output files. for category in config.categories_and_prefixes.keys(): make_folder_if_not_exists(path_to_AN_folder + "/" + category) command = 'mv ' + options.pathToBATOutputFiles + '/*' + str(luminosity) + 'pb*' + config.categories_and_prefixes[category] + '.root ' + path_to_AN_folder + "/" + category
def make_template_plots( histograms, category, channel ): global variable, output_folder, phase_space fit_variables = histograms.keys() variableBins = None if phase_space == 'VisiblePS': variableBins = variable_bins_visiblePS_ROOT elif phase_space == 'FullPS': variableBins = variable_bins_ROOT for variable_bin in variableBins[variable]: path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_templates/' make_folder_if_not_exists( path ) for fit_variable in fit_variables: plotname = path + channel + '_' + fit_variable + '_template_bin_' + variable_bin # check if template plots exist already for output_format in output_formats: if os.path.isfile( plotname + '.' + output_format ): continue # plot with matplotlib h_ttjet = histograms[fit_variable][variable_bin]['TTJet'] h_single_top = histograms[fit_variable][variable_bin]['SingleTop'] h_VJets = histograms[fit_variable][variable_bin]['V+Jets'] h_QCD = histograms[fit_variable][variable_bin]['QCD'] h_ttjet.linecolor = 'red' h_single_top.linecolor = 'magenta' h_VJets.linecolor = 'green' h_QCD.linecolor = 'gray' h_VJets.linestyle = 'dashed' h_QCD.linestyle = 'dotted' # currently not working # bug report: http://trac.sagemath.org/sage_trac/ticket/13834 h_ttjet.linewidth = 5 h_single_top.linewidth = 5 h_VJets.linewidth = 5 h_QCD.linewidth = 5 plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' ) axes = plt.axes() if not variable in ['NJets']: axes.minorticks_on() plt.xlabel( fit_variables_latex[fit_variable], CMS.x_axis_title ) plt.ylabel( 'normalised to unit area/(%s)' % get_unit_string(fit_variable), CMS.y_axis_title ) plt.tick_params( **CMS.axis_label_major ) if not variable in ['NJets']: plt.tick_params( **CMS.axis_label_minor ) rplt.hist( h_ttjet, axes = axes, label = 'signal' ) rplt.hist( h_single_top, axes = axes, label = 'Single Top' ) if ( h_VJets.Integral() != 0 ): rplt.hist( h_VJets, axes = axes, label = 'V+Jets' ) else: print("WARNING: in %s bin %s, %s category, %s channel, V+Jets template is empty: not plotting." % ( variable, variable_bin, category, channel )) if ( h_QCD.Integral() != 0 ): rplt.hist( h_QCD, axes = axes, label = 'QCD' ) else: print("WARNING: in %s bin %s, %s category, %s channel, QCD template is empty: not plotting." % ( variable, variable_bin, category, channel )) y_max = get_best_max_y([h_ttjet, h_single_top, h_VJets, h_QCD]) axes.set_ylim( [0, y_max * 1.1] ) axes.set_xlim( measurement_config.fit_boundaries[fit_variable] ) plt.legend( numpoints = 1, loc = 'upper right', prop = CMS.legend_properties ) label, channel_label = get_cms_labels( channel ) plt.title( label, CMS.title ) # CMS text # note: fontweight/weight does not change anything as we use Latex text!!! plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42, verticalalignment='top',horizontalalignment='right') # channel text axes.text(0.95, 0.95, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40, verticalalignment='top',horizontalalignment='right') plt.tight_layout() for output_format in output_formats: plt.savefig( plotname + '.' + output_format ) plt.close() gc.collect()
parser.add_option( "-l", "--log-plots", dest = "log_plots", action = "store_true", help = "plots the y axis in log scale" ) ( options, args ) = parser.parse_args() measurement_config = XSectionConfig( options.CoM) centre_of_mass = measurement_config.centre_of_mass_energy luminosity = measurement_config.luminosity * measurement_config.luminosity_scale ttbar_xsection = measurement_config.ttbar_xsection load_fakes = options.load_fakes method = options.unfolding_method path_to_JSON = options.data_path plot_location = options.output_folder + '/' + str(centre_of_mass) + 'TeV/' + options.test + '/' met_type = measurement_config.translate_options[options.metType] log_plots = options.log_plots make_folder_if_not_exists( plot_location ) test = options.test input_filename_central = measurement_config.unfolding_madgraph input_filename_bias = measurement_config.unfolding_mcatnlo variables = ['MET', 'WPT', 'MT', 'ST', 'HT'] input_file = File( input_filename_central, 'read' ) input_file_bias = File( input_filename_bias, 'read' ) print 'Performing', test, 'unfolding checks at', centre_of_mass, 'TeV' for channel in ['electron', 'muon']: for variable in variables:
import json from config import XSectionConfig from config.variable_binning import bin_edges, bin_edges_vis from tools.file_utilities import make_folder_if_not_exists com = 13 fitVars = "M3_angle_bl" config = XSectionConfig( com ) make_folder_if_not_exists('config/unfolding/FullPS/') make_folder_if_not_exists('config/unfolding/VisiblePS/') for channel in config.analysis_types.keys(): for variable in bin_edges.keys(): histogramTemplate = "unfolding_%s_analyser_%s_channel" % ( variable, channel ) outputJson = { "output_folder": "plots/%sTeV/unfolding_tests/FullPS" % com, "output_format": ["png", "pdf"], "centre-of-mass energy" : com, "channel": "%s" % channel, "variable": "%s" % variable, "phaseSpace" : "FullPS", "truth" : { "file" : "%s" % config.unfolding_central, # "histogram": "%s/truth" % ( histogramTemplate ), }, "gen_vs_reco" : { "file" : "%s" % config.unfolding_central, # "histogram": "%s/response_without_fakes" % ( histogramTemplate ),
def generate_toy(n_toy, n_input_mc, config, output_folder, start_at=0, split=1): from progressbar import Percentage, Bar, ProgressBar, ETA set_root_defaults() genWeight = '( EventWeight * {0})'.format(config.luminosity_scale) file_name = config.ttbar_category_templates_trees['central'] make_folder_if_not_exists(output_folder) outfile = get_output_file_name( output_folder, n_toy, start_at, n_input_mc, config.centre_of_mass_energy) variable_bins = bin_edges.copy() widgets = ['Progress: ', Percentage(), ' ', Bar(), ' ', ETA()] with root_open(file_name, 'read') as f_in, root_open(outfile, 'recreate') as f_out: tree = f_in.Get("TTbar_plus_X_analysis/Unfolding/Unfolding") n_events = tree.GetEntries() print("Number of entries in tree : ", n_events) for channel in ['electron', 'muon']: print('Channel :', channel) gen_selection, gen_selection_vis = '', '' if channel is 'muon': gen_selection = '( isSemiLeptonicMuon == 1 )' gen_selection_vis = '( isSemiLeptonicMuon == 1 && passesGenEventSelection )' else: gen_selection = '( isSemiLeptonicElectron == 1 )' gen_selection_vis = '( isSemiLeptonicElectron == 1 && passesGenEventSelection )' selection = '( {0} ) * ( {1} )'.format(genWeight, gen_selection) selection_vis = '( {0} ) * ( {1} )'.format(genWeight, gen_selection_vis) weighted_entries = get_weighted_entries(tree, selection) weighted_entries_vis = get_weighted_entries(tree, selection_vis) pbar = ProgressBar(widgets=widgets, maxval=n_input_mc).start() toy_mc_sets = [] for variable in ['MET', 'HT', 'ST', 'WPT']: # variable_bins: toy_mc = ToySet(f_out, variable, channel, n_toy) toy_mc_sets.append(toy_mc) count = 0 for event in tree: # generate 300 weights for each event mc_weights = get_mc_weight(weighted_entries, n_toy) mc_weights_vis = get_mc_weight(weighted_entries_vis, n_toy) if count >= n_input_mc: break count += 1 if count < start_at: continue # weight = event.EventWeight * config.luminosity_scale # # rescale to N input events # weight *= n_events / n_input_mc / split weight = 1 for toy_mc in toy_mc_sets: toy_mc.fill(event, weight, mc_weights, mc_weights_vis) if count % 1000 == 1: pbar.update(count) print('Processed {0} events'.format(count)) pbar.finish() for toy_mc in toy_mc_sets: toy_mc.write() print('Toy MC was saved to file:', outfile)
def print_xsections_with_uncertainties(xsections, channel, toFile=True): global savePath, variable, k_value, met_type, b_tag_bin printout = '\n' printout += '=' * 60 printout = '\n' printout += 'Results for %s variable, %s channel, k-value %s, met type %s, %s b-tag region\n' % ( variable, channel, k_value, met_type, b_tag_bin) printout += '=' * 60 printout += '\n' # rows = {} printout += '%s bin & $\sigma_{meas}$ \\\\ \n' % variable printout += '\hline\n' uncertainties = {} header = 'Uncertainty' bins = variable_bins_ROOT[variable] assert (len(bins) == len(xsections['central'])) for bin_i, variable_bin in enumerate(bins): header += '& %s bin %s' % (variable, variable_bin) centralresult = xsections['central'][bin_i] uncertainty = calculateTotalUncertainty(xsections, bin_i) uncertainty_total_plus = uncertainty['Total+'][0] uncertainty_total_minus = uncertainty['Total-'][0] uncertainty_total_plus, uncertainty_total_minus = symmetriseErrors( uncertainty_total_plus, uncertainty_total_minus) scale = 100 central_measurement = centralresult[0] fit_error = centralresult[1] formatting = (variable_bins_latex[variable_bin], central_measurement * scale, fit_error * scale, uncertainty_total_plus * scale, uncertainty_total_minus * scale) text = '%s & $%.2f \pm %.2f (fit)^{+%.2f}_{-%.2f} (sys) \cdot 10^{-2}$\\\\ \n' % formatting if doSymmetricErrors: relativeError = getRelativeError( central_measurement, fit_error + uncertainty_total_plus) formatting = (variable_bins_latex[variable_bin], central_measurement * scale, fit_error * scale, uncertainty_total_plus * scale) text = '%s & $\\left(%.2f \\pm %.2f \\text{ (fit)} \pm %.2f \\text{ (syst.)}\\right)' % formatting + '(%.2f' % ( relativeError * 100) + '\%) \\times 10^{-2}\, \\GeV^{-1}$\\\\ \n' printout += text for source in uncertainty.keys(): unc_result = uncertainty[source] if not uncertainties.has_key(source): if source in metsystematics_sources: uncertainties[ source] = metsystematics_sources_latex[source] + ' & ' else: uncertainties[source] = source + ' & ' relativeError = getRelativeError(centralresult[0], unc_result[0]) # text = ' $(%.2f \pm %.2f) \cdot 10^{-2} $ ' % (unc_result[0]*scale,unc_result[1]*scale) + '(%.2f' % (relativeError * 100) + '\%) &' text = '%.2f' % (relativeError * 100) + '\% &' # text = ' $%.2f \pm %.2f $ ' % (unc_result[0]*scale,unc_result[1]*scale) + '(%.2f' % (relativeError * 100) + '\%) &' uncertainties[source] += text printout += '\\\\ \n' for source in sorted(uncertainties.keys()): value = uncertainties[source] value = value.rstrip('&') value += '\\\\ \n' printout += value make_folder_if_not_exists(savePath + '/' + variable) if toFile: output_file = open( savePath + '/' + variable + '/normalised_xsection_main_result_' + channel + '_' + met_type + '_kv' + str(k_value) + '.tex', 'w') output_file.write(printout) output_file.close() else: print printout
def plot(regularisation_settings, results, use_current_k_values=False): variable = regularisation_settings.variable channel = regularisation_settings.channel com = regularisation_settings.centre_of_mass_energy output_folder = regularisation_settings.output_folder output_format = regularisation_settings.output_format measurement_config = XSectionConfig(com) name = 'reg_param_from_global_correlation_%s_channel_%s' % (channel, variable) hp = Histogram_properties() hp.name = name hp.x_axis_title = r'log($\tau$)' hp.y_axis_title = r'$\bar{\rho}(\tau)$' hp.title = r'global correlation for $%s$, %s channel, $\sqrt{s} = %d$ TeV' hp.title = hp.title % (variables_latex[variable], channel, com) k_results, tau_results = results optimal_tau, minimal_rho, tau_values, rho_values = tau_results optimal_k, optimal_k_rho, k_values, k_tau_values, k_rho_values = k_results plt.figure(figsize=(16, 16), dpi=200, facecolor='white') plt.plot(tau_values, rho_values) plt.plot(k_tau_values, k_rho_values, 'ro') plt.title(hp.title, CMS.title) plt.xlabel(hp.x_axis_title, CMS.x_axis_title) plt.ylabel(hp.y_axis_title, CMS.y_axis_title) plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) ax = plt.axes() current_k, closest_tau, _, _ = get_k_tau_set(measurement_config, channel, variable, results) current_k_rho = k_rho_values[k_values.index(current_k)] # first best k tau_index = k_values.index(optimal_k) closest_tau_best_k = k_tau_values[tau_index] ax.annotate( r"$\tau = %.2g$" % optimal_tau, xy=(optimal_tau, minimal_rho), xycoords='data', xytext=(optimal_tau * 0.9, minimal_rho * 1.15), textcoords='data', bbox=dict(boxstyle="round4", fc="w"), arrowprops=dict( arrowstyle="fancy,head_length=0.4,head_width=0.4,tail_width=0.4", connectionstyle="arc3"), size=40, ) ax.annotate( r"$\tau(k_b = %d) = %.2g$" % (optimal_k, closest_tau_best_k), xy=(closest_tau_best_k, optimal_k_rho), xycoords='data', xytext=(closest_tau_best_k * 10, optimal_k_rho), textcoords='data', bbox=dict(boxstyle="round4", fc="w"), arrowprops=dict(arrowstyle="<-", connectionstyle="arc3", lw=3), size=40, ) # then current k if use_current_k_values: ax.annotate( r"$\tau(k_c = %d) = %.2g$" % (current_k, closest_tau), xy=(closest_tau, current_k_rho), xycoords='data', xytext=(closest_tau, current_k_rho * 0.9), textcoords='data', bbox=dict(boxstyle="round4", fc="w"), arrowprops=dict(arrowstyle="<-", connectionstyle="arc3", lw=3), size=40, ) plt.xscale('log') make_folder_if_not_exists(output_folder) for f in output_format: plt.savefig(output_folder + '/' + hp.name + '.' + f)
def make_template_plots(histograms, category, channel): global variable, translate_options, b_tag_bin, save_path ROOT.TH1.SetDefaultSumw2(False) ROOT.gROOT.SetBatch(True) ROOT.gROOT.ProcessLine('gErrorIgnoreLevel = 1001;') plotting.setStyle() gStyle.SetTitleYOffset(1.4) ROOT.gROOT.ForceStyle() for variable_bin in variable_bins_ROOT[variable]: path = save_path + '/' + variable + '/' + category + '/fit_templates/' make_folder_if_not_exists(path) plotname = path + channel + '_templates_bin_' + variable_bin + '.png' # check if template plots exist already if os.path.isfile(plotname): continue canvas = Canvas(width=700, height=500) canvas.SetLeftMargin(0.15) canvas.SetBottomMargin(0.15) canvas.SetTopMargin(0.05) canvas.SetRightMargin(0.05) legend = plotting.create_legend(x0=0.7, y1=0.8) h_signal = histograms[variable_bin]['signal'] h_VJets = histograms[variable_bin]['V+Jets'] h_QCD = histograms[variable_bin]['QCD'] h_signal.GetXaxis().SetTitle('Lepton #eta') h_signal.GetYaxis().SetTitle('Normalised Events') h_signal.GetXaxis().SetTitleSize(0.05) h_signal.GetYaxis().SetTitleSize(0.05) h_signal.SetMinimum(0) h_signal.SetMaximum(0.2) h_signal.SetLineWidth(2) h_VJets.SetLineWidth(2) h_QCD.SetLineWidth(2) h_signal.SetLineColor(kRed + 1) h_VJets.SetLineColor(kBlue) h_QCD.SetLineColor(kYellow) h_signal.Draw('hist') h_VJets.Draw('hist same') h_QCD.Draw('hist same') legend.AddEntry(h_signal, 'signal', 'l') legend.AddEntry(h_VJets, 'V+Jets', 'l') legend.AddEntry(h_QCD, 'QCD', 'l') legend.Draw() mytext = TPaveText(0.5, 0.97, 1, 1.01, "NDC") channelLabel = TPaveText(0.18, 0.97, 0.5, 1.01, "NDC") if channel == 'electron': channelLabel.AddText("e, %s, %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin])) elif channel == 'muon': channelLabel.AddText("#mu, %s, %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin])) else: channelLabel.AddText("combined, %s, %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin])) mytext.AddText( "CMS Preliminary, L = %.1f fb^{-1} at #sqrt{s} = 8 TeV" % (5.8)) mytext.SetFillStyle(0) mytext.SetBorderSize(0) mytext.SetTextFont(42) mytext.SetTextAlign(13) channelLabel.SetFillStyle(0) channelLabel.SetBorderSize(0) channelLabel.SetTextFont(42) channelLabel.SetTextAlign(13) mytext.Draw() channelLabel.Draw() canvas.Modified() canvas.Update() canvas.SaveAs(plotname) canvas.SaveAs(plotname.replace('png', 'pdf'))
def plot_fit_results(histograms, category, channel): global variable, translate_options, b_tag_bin, save_path #ROOT.TH1.SetDefaultSumw2(False) ROOT.gROOT.SetBatch(True) ROOT.gROOT.ProcessLine('gErrorIgnoreLevel = 1001;') plotting.setStyle() gStyle.SetTitleYOffset(1.4) ROOT.gROOT.ForceStyle() for variable_bin in variable_bins_ROOT[variable]: path = save_path + '/' + variable + '/' + category + '/fit_results/' make_folder_if_not_exists(path) plotname = path + channel + '_bin_' + variable_bin + '.png' # check if template plots exist already if os.path.isfile(plotname): continue canvas = Canvas(width=700, height=500) canvas.SetLeftMargin(0.15) canvas.SetBottomMargin(0.15) canvas.SetTopMargin(0.05) canvas.SetRightMargin(0.05) legend = plotting.create_legend(x0=0.7, y1=0.8) h_data = histograms[variable_bin]['data'] h_signal = histograms[variable_bin]['signal'] h_background = histograms[variable_bin]['background'] h_data.GetXaxis().SetTitle('Lepton #eta') h_data.GetYaxis().SetTitle('Number of Events') h_data.GetXaxis().SetTitleSize(0.05) h_data.GetYaxis().SetTitleSize(0.05) h_data.SetMinimum(0) h_data.SetMarkerSize(1) h_data.SetMarkerStyle(20) gStyle.SetEndErrorSize(20) h_data.Draw('P') h_signal.SetFillColor(kRed + 1) h_background.SetFillColor(kGreen - 3) h_signal.SetLineWidth(2) h_background.SetLineWidth(2) h_signal.SetFillStyle(1001) h_background.SetFillStyle(1001) mcStack = THStack("MC", "MC") mcStack.Add(h_background) mcStack.Add(h_signal) mcStack.Draw('hist same') h_data.Draw('error P same') legend.AddEntry(h_data, 'data', 'P') legend.AddEntry(h_signal, 'signal', 'F') legend.AddEntry(h_background, 'background', 'F') legend.Draw() mytext = TPaveText(0.5, 0.97, 1, 1.01, "NDC") channelLabel = TPaveText(0.18, 0.97, 0.5, 1.01, "NDC") if channel == 'electron': channelLabel.AddText("e, %s, %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin])) elif channel == 'muon': channelLabel.AddText("#mu, %s, %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin])) else: channelLabel.AddText("combined, %s, %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin])) mytext.AddText( "CMS Preliminary, L = %.1f fb^{-1} at #sqrt{s} = 8 TeV" % (5.8)) mytext.SetFillStyle(0) mytext.SetBorderSize(0) mytext.SetTextFont(42) mytext.SetTextAlign(13) channelLabel.SetFillStyle(0) channelLabel.SetBorderSize(0) channelLabel.SetTextFont(42) channelLabel.SetTextAlign(13) mytext.Draw() channelLabel.Draw() canvas.Modified() canvas.Update() canvas.SaveAs(plotname) canvas.SaveAs(plotname.replace('png', 'pdf'))
from tools.file_utilities import make_folder_if_not_exists, read_data_from_JSON make_folder_if_not_exists('hepdata') from dps.config.variable_binning import bin_edges_vis from dps.config.latex_labels import variables_latex from dps.config.xsection import XSectionConfig from dps.utils.pandas_utilities import file_to_df, matrix_from_df import os.path import numpy as np measurement_config = XSectionConfig(13) regularisations = { 'regularised': '/scratch/db0268/DPS/DPSTestingGround/DailyPythonScripts/data_X_allFixes/normalisation/background_subtraction/', 'unregularised': '/scratch/db0268/DPS/DPSTestingGround/DailyPythonScripts/data_X_allFixes_allTau0/normalisation/background_subtraction/' } normalisations = ['normalised', 'absolute'] variableHeaders = { 'MET': 'name: "{variable}", units: GEV'.format(variable=variables_latex['MET']), 'HT': 'name: "{variable}", units: GEV'.format(variable=variables_latex['HT']), 'ST': 'name: "{variable}", units: GEV'.format(variable=variables_latex['ST']), 'WPT': 'name: "{variable}", units: GEV'.format(variable=variables_latex['WPT']), 'NJets':
if options.jobNumber > (len(allJobs) - 1): print 'Job number', options.jobNumber, 'too large' print 'Total number of possible jobs :', len(allJobs) print 'Largest possible job number : ', len(allJobs) - 1 sys.exit() jobNumber = options.jobNumber job = allJobs[jobNumber] category = job[0] sample = job[1] input_samples = job[2] # print 'Test with :',sample, category, input_samples # Make folder make_folder_if_not_exists(path_to_AN_folder + "/" + category) current_working_directory = os.getcwd() #find current working directory output_file_hdfs = config.general_category_templates[category] % sample output_file = output_file_hdfs.replace( "/hdfs/TopQuarkGroup/results/histogramfiles", current_working_directory) input_files = [ config.general_category_templates[category] % input_sample for input_sample in input_samples ] if not os.path.exists(output_file): merge_ROOT_files(input_files, output_file, compression=7, waitToFinish=True)
def plot_central_and_systematics( channel, systematics, exclude = [], suffix = 'altogether' ): global variable, b_tag_bin, met_type plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' ) axes = plt.axes() if not variable in ['NJets']: axes.minorticks_on() hist_data_central = read_xsection_measurement_results( 'central', channel )[0]['unfolded_with_systematics'] hist_data_central.markersize = 2 # points. Imagine, tangible units! hist_data_central.marker = 'o' if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: plt.xlabel( '$%s$' % variables_latex[variable], CMS.x_axis_title ) plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '}$', CMS.y_axis_title ) else: plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title ) plt.tick_params( **CMS.axis_label_major ) if not variable in ['NJets']: plt.tick_params( **CMS.axis_label_minor ) rplt.errorbar( hist_data_central, axes = axes, label = 'data', xerr = True ) for systematic in sorted( systematics ): if systematic in exclude or systematic == 'central': continue hist_data_systematic = read_xsection_measurement_results( systematic, channel )[0]['unfolded'] hist_data_systematic.markersize = 2 hist_data_systematic.marker = 'o' colour_number = systematics.index( systematic ) + 2 if colour_number == 10: colour_number = 42 hist_data_systematic.SetMarkerColor( colour_number ) if 'PDF' in systematic: rplt.errorbar( hist_data_systematic, axes = axes, label = systematic.replace( 'Weights_', ' ' ), xerr = None ) elif met_type in systematic: rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic.replace( met_type, '' )], xerr = None ) else: rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic], xerr = None ) plt.legend( numpoints = 1, loc = 'center right', prop = {'size':25}, ncol = 2 ) label, channel_label = get_cms_labels( channel ) plt.title( label, CMS.title ) # CMS text # note: fontweight/weight does not change anything as we use Latex text!!! plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42, verticalalignment='top',horizontalalignment='right') # channel text axes.text(0.95, 0.90, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40, verticalalignment='top',horizontalalignment='right') plt.tight_layout() path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable make_folder_if_not_exists( path ) for output_format in output_formats: filename = path + '/normalised_xsection_' + channel + '_' + suffix + '.' + output_format plt.savefig( filename ) plt.close() gc.collect()
def main(): parser = OptionParser(__doc__) parser.add_option( "-v", "--variable", dest="variable", default='MET', help="set the variable to analyse (MET, HT, ST, MT, WPT)") parser.add_option( "-s", "--centre-of-mass-energy", dest="CoM", default=8, help="set the centre of mass energy for analysis. Default = 8 [TeV]", type=int) parser.add_option("-o", "--output", dest="output_folder", default='plots/unfolding_pulls', help="output folder for unfolding pull plots") parser.add_option("-c", "--channel", type='string', dest="channel", default='combined', help="channel to be analysed: electron|muon|combined") parser.add_option( "-k", "--k_value", type='int', dest="k_value", default=-1, help= "k-value used in SVD unfolding, only for categorisation purpose at this stage" ) parser.add_option("--tau", type='float', dest="tau_value", default=-1., help="tau-value for SVD unfolding") (options, args) = parser.parse_args() # measurement_config = XSectionConfig(options.CoM) if len(args) == 0: print('No input files specified.') print('Run script with "-h" for usage') sys.exit(-1) files = args centre_of_mass = options.CoM variable = options.variable channel = options.channel config = XSectionConfig(centre_of_mass) k_value = get_k_value(options.k_value, config, channel, variable) tau_value = get_tau_value(options.tau_value, config, channel, variable) output_folder = '{option_output}/{centre_of_mass}TeV/{variable}/{channel}/' output_folder = output_folder.format(option_output=options.output_folder, centre_of_mass=centre_of_mass, variable=variable, channel=channel) make_folder_if_not_exists(output_folder) output_formats = ['pdf'] bins = array('d', bin_edges[variable]) nbins = len(bins) - 1 msg = 'Producing unfolding pull plots for {0} variable, channel: {1}' print(msg.format(variable, channel)) value = get_value_title(k_value, tau_value) print('Using {0}'.format(value)) print('Output folder: {0}'.format(output_folder)) pulls = get_data(files, subset='pull') fit_results = [] for bin_i in range(0, nbins): fr = plot_pull(pulls, centre_of_mass, channel, variable, k_value, tau_value, output_folder, output_formats, bin_index=bin_i, n_bins=nbins) fit_results.append(fr) plot_fit_results(fit_results, centre_of_mass, channel, variable, k_value, tau_value, output_folder, output_formats, bins) # plot all bins plot_pull(pulls, centre_of_mass, channel, variable, k_value, tau_value, output_folder, output_formats) del pulls # deleting to make space in memory difference = get_data(files, subset='difference') plot_difference(difference, centre_of_mass, channel, variable, k_value, tau_value, output_folder, output_formats)
def make_error_plot( errorHists, bins ): global output_folder, variable # For each up/down source, reduce to one set of numbers symmetricErrorHists = {} for source, hist in errorHists.iteritems(): if ( variable == 'HT' or variable == 'NJets' or variable == 'lepton_pt' or variable == 'abs_lepton_eta' ) and source in measurement_config.met_systematics and not 'JES' in source and not 'JER' in source: continue if 'down' in source or '-' in source or 'lower' in source or 'Down' in source: # Find up version upHist = None newSource = '' if 'down' in source: upHist = errorHists[source.replace('down','up')] newSource = source.replace('down','') elif 'Down' in source: upHist = errorHists[source.replace('Down','Up')] newSource = source.replace('Down','') elif '-' in source: upHist = errorHists[source.replace('-','+')] newSource = source.replace('-','') elif 'lower' in source: upHist = errorHists[source.replace('lower','upper')] newSource = source.replace('lower','') if newSource[-1] == '_': newSource = newSource[:-1] # if '_' in newSource: # newSource = newSource.replace('_','') symmetricErrorHists[newSource] = [] for errorup, errordown in zip(hist, upHist): newError = max( abs(errorup), abs(errordown) ) symmetricErrorHists[newSource].append(newError) elif 'TTJets_hadronisation' in source or 'QCD_shape' in source or 'TTJets_NLOgenerator' in source: symmetricErrorHists[source] = [ abs(i) for i in hist ] x_limits = [bins[0], bins[-1]] y_limits = [0,0.6] plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' ) ax0 = plt.axes() ax0.minorticks_on() ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 ax0.set_xlim( x_limits ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) statisticalErrorHists = values_and_errors_to_hist( errorHists['statistical'], [], bins ) for source, hist in symmetricErrorHists.iteritems(): symmetricErrorHists[source] = values_and_errors_to_hist( hist, [], bins ) colours = ['silver', 'r', 'tan', 'chartreuse', 'cadetblue', 'dodgerblue', 'pink', 'hotpink', 'coral', 'forestgreen', 'cyan', 'teal', 'crimson', 'darkmagenta', 'olive', 'slateblue', 'deepskyblue', 'orange', 'r' ] for source, colour in zip( symmetricErrorHists.keys(), colours): hist = symmetricErrorHists[source] hist.linewidth = 4 hist.color = colour rplt.hist( hist, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = source ) statisticalErrorHists.linewidth = 4 statisticalErrorHists.color = 'black' statisticalErrorHists.linestyle = 'dashed' rplt.hist( statisticalErrorHists, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'stat.' ) ax0.set_ylim( y_limits ) leg = plt.legend(loc=1,prop={'size':40},ncol=2) leg.draw_frame(False) x_title = variables_NonLatex[variable] if variable in ['HT', 'MET', 'WPT', 'ST', 'lepton_pt']: x_title += ' [GeV]' plt.xlabel( x_title, CMS.x_axis_title ) plt.ylabel( 'Relative Uncertainty', CMS.y_axis_title) plt.tight_layout() path = output_folder + '/' + variable + '/' make_folder_if_not_exists(path) file_template = path + '/%s_systematics_%dTeV_%s.pdf' % (variable, measurement_config.centre_of_mass_energy, channel) plt.savefig(file_template) pass
def main(): parser = OptionParser(__doc__) parser.add_option( "-v", "--variable", dest="variable", default='MET', help="set the variable to analyse (MET, HT, ST, MT, WPT)") parser.add_option( "-s", "--centre-of-mass-energy", dest="CoM", default=8, help="set the centre of mass energy for analysis. Default = 8 [TeV]", type=int) parser.add_option("-o", "--output", dest="output_folder", default='plots/unfolding_pulls', help="output folder for unfolding pull plots") parser.add_option("-c", "--channel", type='string', dest="channel", default='combined', help="channel to be analysed: electron|muon|combined") (options, args) = parser.parse_args() if len(args) == 0: print('No input files specified.') print('Run script with "-h" for usage') sys.exit(-1) files = args centre_of_mass = options.CoM variable = options.variable channel = options.channel output_folder_base = options.output_folder + '/' + \ str(centre_of_mass) + 'TeV/' + variable + '/' + channel + '/' make_folder_if_not_exists(output_folder_base) output_formats = ['pdf'] bins = array('d', bin_edges[variable]) nbins = len(bins) - 1 kValues = sorted(getkValueRange(files)) sigmaForEachK = [] tau = -1 for k in kValues: if k is 1: continue output_folder = output_folder_base + '/kv' + str(k) + '/' make_folder_if_not_exists(output_folder) print( 'Producing unfolding pull plots for {0} variable, k-value of {1}, channel: {2}.' .format(variable, k, channel)) print('Output folder: {0}'.format(output_folder)) pulls = get_data(files, subset='pull') maxSigma = 0 minSigma = 100 for bin_i in range(0, nbins): fitResults = plot_pull(pulls, centre_of_mass, channel, variable, k, tau, output_folder, output_formats, bin_index=bin_i, n_bins=nbins) if fitResults.sigma > maxSigma: maxSigma = fitResults.sigma if fitResults.sigma < minSigma: minSigma = fitResults.sigma # plot all bins allBinsFitResults = plot_pull(pulls, centre_of_mass, channel, variable, k, tau, output_folder, output_formats) allBinsSigma = allBinsFitResults.sigma sigmaForEachK.append([k, allBinsSigma, maxSigma, minSigma]) print('All bins sigma :', allBinsFitResults.sigma) print('Max/min sigma :', maxSigma, minSigma) print('Spread :', maxSigma - minSigma) del pulls # deleting to make space in memory print() kValues = list(zip(*sigmaForEachK)[0]) kValuesup = [] kValuesdown = [] sigmas = list(zip(*sigmaForEachK)[1]) sigmaups = list(zip(*sigmaForEachK)[2]) sigmadowns = list(zip(*sigmaForEachK)[3]) spread = [] for i in range(0, len(sigmas)): spread.append((sigmaups[i] - sigmadowns[i]) / sigmas[i]) sigmaups[i] = sigmaups[i] - sigmas[i] sigmadowns[i] = sigmas[i] - sigmadowns[i] kValuesup.append(0.5) kValuesdown.append(0.5) print(spread) kValueChoice = spread.index(min(spread)) print(kValueChoice) graph = asrootpy( TGraphAsymmErrors(len(sigmas), array('d', kValues), array('d', sigmas), array('d', kValuesdown), array('d', kValuesup), array('d', sigmadowns), array('d', sigmaups))) graphSpread = asrootpy( TGraphAsymmErrors(len(sigmas), array('d', kValues), array('d', spread), array('d', kValuesdown), array('d', kValuesup), array('d', sigmadowns), array('d', sigmaups))) # plot with matplotlib plt.figure(figsize=(20, 16), dpi=200, facecolor='white') ax0 = plt.axes() ax0.minorticks_on() ax0.grid(True, 'major', linewidth=2) ax0.grid(True, 'minor') plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) ax0.xaxis.set_major_formatter(FormatStrFormatter('%d')) ax0.yaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax0.xaxis.labelpad = 11 rplt.errorbar(graph, xerr=True, emptybins=True, axes=ax0, marker='o', ms=15, mew=3, lw=2) rplt.errorbar(graphSpread, xerr=None, yerr=False, axes=ax0, linestyle='-', marker='s', ms=10, mew=1, lw=2) for output_format in output_formats: print(output_folder_base) plt.savefig(output_folder_base + '/TESTGRAPH' + '.' + output_format) print(kValues) print(kValuesup) print(kValuesdown) print(sigmas) print(sigmaups) print(sigmadowns) print(sigmaForEachK)
def print_typical_systematics_table(central_values, errors, channel, toFile = True, print_before_unfolding = False): global output_folder, variable, met_type, b_tag_bin, all_measurements, phase_space, measurement_config bins = None if phase_space == 'VisiblePS': bins = variable_bins_visiblePS_ROOT[variable] elif phase_space == 'FullPS': bins = variable_bins_ROOT[variable] if print_before_unfolding: measurement = 'measured' else: measurement = 'unfolded' assert(len(bins) == len(errors['central'])) assert(len(bins) == len(central_values[measurement])) typical_systematics = measurement_config.typical_systematics for s in typical_systematics: assert(errors.has_key(s)) group_errors = {} for group in measurement_config.typical_systematics_summary: group_errors[group] = [] for bin_i, _ in enumerate(bins): central_value = central_values[measurement][bin_i][0] uncertainties = {} # calculate all relative errors for systematic in typical_systematics: abs_error = errors[systematic][bin_i] relative_error = getRelativeError(central_value, abs_error) uncertainties[systematic] = relative_error # add errors in a group in quadrature for group, u_list in measurement_config.typical_systematics_summary.items(): group_error_squared = 0 for subgroup in u_list: # use the biggest of up and down subgroup_error = max(uncertainties[subgroup[0]], uncertainties[subgroup[1]]) group_error_squared += pow(subgroup_error, 2) group_errors[group].append(math.sqrt(group_error_squared)) summarised_typical_systematics = {} summarised_max_systematics = {} # calculate the median # x 100 to be in % for group, u_list in group_errors.items(): summarised_typical_systematics[group] = median(u_list)*100 summarised_max_systematics[group] = max(u_list) * 100 for summary, errors in {'median':summarised_typical_systematics,'max':summarised_max_systematics}.iteritems(): printout = '%% ' + '=' * 60 printout += '\n' printout += '%% Typical systematics table for {0} channel, met type {1}, {2} b-tag region\n'.format(channel, met_type, b_tag_bin) if print_before_unfolding: printout += '%% BEFORE UNFOLDING\n' printout += '%% ' + '=' * 60 printout += '\n' printout += '\\begin{table}[htbp]\n' printout += '\\centering\n' printout += '\\caption{Typical systematic uncertainties (median values) for the normalised \\ttbar cross section measurement \n' printout += 'at a centre-of-mass energy of {0} TeV '.format(measurement_config.centre_of_mass_energy) if channel == 'combined' or channel == 'combinedBeforeUnfolding': printout += '(combination of electron and muon channels).}\n' else: printout += '({0} channel).}\n'.format(channel) printout += '\\label{{tab:typical_systematics_{0}TeV_{1}}}\n'.format(measurement_config.centre_of_mass_energy, channel) printout += '\\resizebox{\\columnwidth}{!} {\n' printout += '\\begin{tabular}{l' + 'r'*len(bins) + '}\n' printout += '\\hline\n' header = 'Uncertainty source ' header += '& {0}'.format(variables_latex[variable]) header += ' ' printout += header printout += '\n\\hline\n' for group, ts in errors.items(): printout += group + ' (\\%) & {:.2f} \\\\ \n'.format(ts) printout += '\\hline \n' printout += '\\hline \n' printout += '\\end{tabular}\n' printout += '}\n' printout += '\\end{table}\n' if toFile: path = output_folder + '/' make_folder_if_not_exists(path) file_template = path + '/{0}_systematics_{1}TeV_{2}.tex'.format(summary,measurement_config.centre_of_mass_energy, channel) if print_before_unfolding: make_folder_if_not_exists(path + '/before_unfolding/') file_template = file_template.replace(path, path + '/before_unfolding/') if os.path.isfile(file_template): with open(file_template, 'r+') as output_file: lines = output_file.readlines() for line_number, line in enumerate (lines): if line.startswith("Uncertainty source"): lines[line_number] = lines[line_number].strip() + "& " + variables_latex[variable] + "\n" elif variable == "HT" and line.startswith("$E_{T}^{miss}$ uncertainties"): lines[line_number] = lines[line_number].strip() + "& - \n" else: for group, ts in errors.items(): if line.startswith(group): new_line = line.replace('\\\\', '') new_line = new_line.strip() lines[line_number] = new_line + '& {:.2f} \\\\ \n'.format(ts) output_file.seek(0) for line in lines: output_file.write(line) else: output_file = open(file_template, 'w') output_file.write(printout) output_file.close() else: print printout
if __name__ == '__main__': set_root_defaults( msg_ignore_level = 3001 ) parser = OptionParser() parser.add_option("-p", "--path", dest="path", default='/hdfs/TopQuarkGroup/trigger_BLT_ntuples/', help="set path to input BLT ntuples") parser.add_option("-o", "--output_folder", dest="output_plots_folder", default='plots/2011/hadron_leg/', help="set path to save tables") (options, args) = parser.parse_args() input_path = options.path output_folder = options.output_plots_folder output_pickle_folder = './pickle_files/' channel = 'electron' centre_of_mass = 7 make_folder_if_not_exists(output_folder) make_folder_if_not_exists(output_pickle_folder) output_formats = ['pdf'] data_histFile = input_path + '/2011/SingleElectron_2011_RunAB_had_leg.root' data_input_file = File(data_histFile) data_tree = data_input_file.Get('rootTupleTreeEPlusJets/ePlusJetsTree') reco_leptons_collection = 'selectedPatElectronsLoosePFlow' reco_jet_collection = 'cleanedJetsPFlowEPlusJets' trigger_object_lepton = 'TriggerObjectElectronIsoLeg' trigger_jet_collection = 'TriggerObjectHadronPFIsoLeg' print 'Number of events in data tree: ', data_tree.GetEntries() n_lepton_leg_events = 0
def main(): ''' Main function for this script ''' set_root_defaults(msg_ignore_level=3001) parser = OptionParser() parser.add_option("-o", "--output", dest="output_folder", default='data/pull_data/', help="output folder for pull data files") parser.add_option("-n", "--n_input_mc", type=int, dest="n_input_mc", default=100, help="number of toy MC used for the tests") parser.add_option("-k", "--k_value", type=int, dest="k_value", default=3, help="k-value for SVD unfolding") parser.add_option("--tau", type='float', dest="tau_value", default=-1., help="tau-value for SVD unfolding") parser.add_option("-m", "--method", type='string', dest="method", default='RooUnfoldSvd', help="unfolding method") parser.add_option("-f", "--file", type='string', dest="file", default='data/toy_mc/unfolding_toy_mc.root', help="file with toy MC") parser.add_option( "-v", "--variable", dest="variable", default='MET', help="set the variable to analyse (MET, HT, ST, MT, WPT)") parser.add_option("-s", "--centre-of-mass-energy", dest="CoM", default=13, help='''set the centre of mass energy for analysis. Default = 8 [TeV]''', type=int) parser.add_option("-c", "--channel", type='string', dest="channel", default='combined', help="channel to be analysed: electron|muon|combined") parser.add_option("--offset_toy_mc", type=int, dest="offset_toy_mc", default=0, help="offset of the toy MC used to response matrix") parser.add_option("--offset_toy_data", type=int, dest="offset_toy_data", default=0, help="offset of the toy MC used as data for unfolding") (options, _) = parser.parse_args() centre_of_mass = options.CoM make_folder_if_not_exists(options.output_folder) # set the number of toy MC for error calculation k_value = options.k_value tau_value = options.tau_value use_n_toy = options.n_input_mc offset_toy_mc = options.offset_toy_mc offset_toy_data = options.offset_toy_data method = options.method variable = options.variable create_unfolding_pull_data(options.file, method, options.channel, centre_of_mass, variable, use_n_toy, use_n_toy, options.output_folder, offset_toy_mc, offset_toy_data, k_value, tau_value)
def print_fit_results_table(initial_values, fit_results, channel, toFile = True): global output_folder, variable, met_type, phase_space bins = None bins_latex = None if phase_space == 'VisiblePS': bins = variable_bins_visiblePS_ROOT[variable] bins_latex = variable_bins_visiblePS_latex[variable] elif phase_space == 'FullPS': bins = variable_bins_ROOT[variable] bins_latex = variable_bins_latex[variable] printout = '%% ' + '=' * 60 printout += '\n' printout += '%% Fit results for %s variable, %s channel, met type %s \n' % (variable, channel, met_type) printout += '%% ' + '=' * 60 printout += '\n' printout += '\\begin{table}[htbp]\n' printout += '\\centering\n' printout += '\\caption{Fit results for the %s variable\n' % variables_latex[variable] printout += 'at a centre-of-mass energy of %d TeV (%s channel).}\n' % ( measurement_config.centre_of_mass_energy, channel ) printout += '\\label{tab:%s_fit_results_%dTeV_%s}\n' % (variable, measurement_config.centre_of_mass_energy, channel) printout += '\\resizebox{\\columnwidth}{!} {\n' printout += '\\begin{tabular}{l' + 'r'*len(bins) + 'r}\n' printout += '\\hline\n' header = 'Process' template_in = '%s in' ttjet_in_line = template_in % samples_latex['TTJet'] singletop_in_line = template_in % samples_latex['SingleTop'] vjets_in_line = template_in % samples_latex['V+Jets'] qcd_in_line = template_in % samples_latex['QCD'] template_fit = '%s fit' ttjet_fit_line = template_fit % samples_latex['TTJet'] singletop_fit_line = template_fit % samples_latex['SingleTop'] vjets_fit_line = template_fit % samples_latex['V+Jets'] qcd_fit_line = template_fit % samples_latex['QCD'] sum_MC_in_line = 'Sum MC in' sum_MC_fit_line = 'Sum MC fit' sum_data_line = 'Data' N_initial_ttjet = 0 N_initial_singletop = 0 N_initial_vjets = 0 N_initial_qcd = 0 N_initial_sum_MC = 0 N_initial_ttjet_error = 0 N_initial_singletop_error = 0 N_initial_vjets_error = 0 N_initial_qcd_error = 0 N_initial_sum_MC_error = 0 N_data = 0 N_data_error = 0 N_fit_ttjet = 0 N_fit_singletop = 0 N_fit_vjets = 0 N_fit_qcd = 0 N_fit_sum_MC = 0 N_fit_ttjet_error = 0 N_fit_singletop_error = 0 N_fit_vjets_error = 0 N_fit_qcd_error = 0 N_fit_sum_MC_error = 0 for bin_i, variable_bin in enumerate(bins): header += ' & %s' % (bins_latex[variable_bin]) ttjet_in_line += ' & %.1f $\pm$ %.1f' % (initial_values['TTJet'][bin_i][0], initial_values['TTJet'][bin_i][1]) N_initial_ttjet += initial_values['TTJet'][bin_i][0] N_initial_ttjet_error += initial_values['TTJet'][bin_i][1] singletop_in_line += ' & %.1f $\pm$ %.1f' % (initial_values['SingleTop'][bin_i][0], initial_values['SingleTop'][bin_i][1]) N_initial_singletop += initial_values['SingleTop'][bin_i][0] N_initial_singletop_error += initial_values['SingleTop'][bin_i][1] vjets_in_line += ' & %.1f $\pm$ %.1f' % (initial_values['V+Jets'][bin_i][0], initial_values['V+Jets'][bin_i][1]) N_initial_vjets += initial_values['V+Jets'][bin_i][0] N_initial_vjets_error += initial_values['V+Jets'][bin_i][1] qcd_in_line += ' & %.1f $\pm$ %.1f' % (initial_values['QCD'][bin_i][0], initial_values['QCD'][bin_i][1]) N_initial_qcd += initial_values['QCD'][bin_i][0] N_initial_qcd_error += initial_values['QCD'][bin_i][1] sumMCin = initial_values['TTJet'][bin_i][0] + initial_values['SingleTop'][bin_i][0] + initial_values['V+Jets'][bin_i][0] + initial_values['QCD'][bin_i][0] sumMCinerror = initial_values['TTJet'][bin_i][1] + initial_values['SingleTop'][bin_i][1] + initial_values['V+Jets'][bin_i][1] + initial_values['QCD'][bin_i][1] sum_MC_in_line += ' & %.1f $\pm$ %.1f' % (sumMCin, sumMCinerror) N_initial_sum_MC += sumMCin N_initial_sum_MC_error += sumMCinerror ttjet_fit_line += ' & %.1f $\pm$ %.1f' % (fit_results['TTJet'][bin_i][0], fit_results['TTJet'][bin_i][1]) N_fit_ttjet += fit_results['TTJet'][bin_i][0] N_fit_ttjet_error += fit_results['TTJet'][bin_i][1] singletop_fit_line += ' & %.1f $\pm$ %.1f' % (fit_results['SingleTop'][bin_i][0], fit_results['SingleTop'][bin_i][1]) N_fit_singletop += fit_results['SingleTop'][bin_i][0] N_fit_singletop_error += fit_results['SingleTop'][bin_i][1] vjets_fit_line += ' & %.1f $\pm$ %.1f' % (fit_results['V+Jets'][bin_i][0], fit_results['V+Jets'][bin_i][1]) N_fit_vjets += fit_results['V+Jets'][bin_i][0] N_fit_vjets_error += fit_results['V+Jets'][bin_i][1] qcd_fit_line += ' & %.1f $\pm$ %.1f' % (fit_results['QCD'][bin_i][0], fit_results['QCD'][bin_i][1]) N_fit_qcd += fit_results['QCD'][bin_i][0] N_fit_qcd_error += fit_results['QCD'][bin_i][1] sumMCfit = fit_results['TTJet'][bin_i][0] + fit_results['SingleTop'][bin_i][0] + fit_results['V+Jets'][bin_i][0] + fit_results['QCD'][bin_i][0] sumMCfiterror = fit_results['TTJet'][bin_i][1] + fit_results['SingleTop'][bin_i][1] + fit_results['V+Jets'][bin_i][1] + fit_results['QCD'][bin_i][1] sum_MC_fit_line += ' & %.1f $\pm$ %.1f' % (sumMCfit, sumMCfiterror) N_fit_sum_MC += sumMCfit N_fit_sum_MC_error += sumMCfiterror sum_data_line += ' & %.1f $\pm$ %.1f' % (initial_values['data'][bin_i][0], initial_values['data'][bin_i][1]) N_data += initial_values['data'][bin_i][0] N_data_error += initial_values['data'][bin_i][1] header += '& Total \\\\' ttjet_in_line += ' & %.1f $\pm$ %.1f \\\\' % (N_initial_ttjet, N_initial_ttjet_error) singletop_in_line += ' & %.1f $\pm$ %.1f \\\\' % (N_initial_singletop, N_initial_singletop_error) vjets_in_line += ' & %.1f $\pm$ %.1f \\\\' % (N_initial_vjets, N_initial_vjets_error) qcd_in_line += ' & %.1f $\pm$ %.1f \\\\' % (N_initial_qcd, N_initial_qcd_error) sum_MC_in_line += '& %.1f $\pm$ %.1f \\\\' % (N_initial_sum_MC, N_initial_sum_MC_error) ttjet_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_ttjet, N_fit_ttjet_error) singletop_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_singletop, N_fit_singletop_error) vjets_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_vjets, N_fit_vjets_error) qcd_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_qcd, N_fit_qcd_error) sum_MC_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_sum_MC, N_fit_sum_MC_error) sum_data_line += ' & %.1f $\pm$ %.1f \\\\' % (N_data, N_data_error) printout += header printout += '\n\hline\n' printout += ttjet_in_line printout += '\n' printout += ttjet_fit_line printout += '\n\hline\n' printout += singletop_in_line printout += '\n' printout += singletop_fit_line printout += '\n\hline\n' printout += vjets_in_line printout += '\n' printout += vjets_fit_line printout += '\n\hline\n' printout += qcd_in_line printout += '\n' printout += qcd_fit_line printout += '\n\hline\n' printout += sum_MC_in_line printout += '\n' printout += sum_MC_fit_line printout += '\n\hline\n' printout += sum_data_line printout += '\n\hline\n' printout += '\\end{tabular}\n' printout += '}\n' printout += '\\end{table}\n' if toFile: path = output_folder + '/' + variable make_folder_if_not_exists(path) file_template = path + '/%s_fit_results_table_%dTeV_%s.tex' % (variable, measurement_config.centre_of_mass_energy, channel) output_file = open(file_template, 'w') output_file.write(printout) output_file.close() else: print printout
def make_plots( histograms, category, output_folder, histname, show_ratio = True, show_before_unfolding = False ): global variable, phase_space channel = 'electron' if 'electron' in histname: channel = 'electron' elif 'muon' in histname: channel = 'muon' else: channel = 'combined' # plot with matplotlib hist_data = histograms['unfolded'] if category == 'central': hist_data_with_systematics = histograms['unfolded_with_systematics'] hist_measured = histograms['measured'] hist_data.markersize = 2 hist_data.marker = 'o' if category == 'central': hist_data_with_systematics.markersize = 2 hist_data_with_systematics.marker = 'o' hist_measured.markersize = 2 hist_measured.marker = 'o' hist_measured.color = 'red' plt.figure( figsize = CMS.figsize, dpi = CMS.dpi, facecolor = CMS.facecolor ) if show_ratio: gs = gridspec.GridSpec( 2, 1, height_ratios = [5, 1] ) axes = plt.subplot( gs[0] ) else: axes = plt.axes() if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: plt.xlabel( '$%s$' % variables_latex[variable], CMS.x_axis_title ) else: plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) if not variable in ['NJets']: axes.minorticks_on() if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '}$', CMS.y_axis_title ) else: plt.ylabel( r'$\frac{1}{\sigma} \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title ) plt.tick_params( **CMS.axis_label_major ) if not variable in ['NJets']: plt.tick_params( **CMS.axis_label_minor ) hist_data.visible = True if category == 'central': hist_data_with_systematics.visible = True rplt.errorbar( hist_data_with_systematics, axes = axes, label = 'do_not_show', xerr = None, capsize = 0, elinewidth = 2, zorder = len( histograms ) + 1 ) rplt.errorbar( hist_data, axes = axes, label = 'do_not_show', xerr = None, capsize = 15, capthick = 3, elinewidth = 2, zorder = len( histograms ) + 2 ) rplt.errorbar( hist_data, axes = axes, label = 'data', xerr = None, yerr = False, zorder = len( histograms ) + 3 ) # this makes a nicer legend entry if show_before_unfolding: rplt.errorbar( hist_measured, axes = axes, label = 'data (before unfolding)', xerr = None, zorder = len( histograms ) ) dashes = {} for key, hist in sorted( histograms.items() ): zorder = sorted( histograms, reverse = False ).index( key ) if key == 'powhegPythia8' and zorder != len(histograms) - 3: zorder = len(histograms) - 3 elif key != 'powhegPythia8' and not 'unfolded' in key: while zorder >= len(histograms) - 3: zorder = zorder - 1 if not 'unfolded' in key and not 'measured' in key: hist.linewidth = 4 # setting colours linestyle = None if 'amcatnlo_HERWIG' in key or 'massdown' in key: hist.SetLineColor( kBlue ) dashes[key] = [25,5,5,5,5,5,5,5] elif 'madgraphMLM' in key or 'scaledown' in key: hist.SetLineColor( 417 ) dashes[key] = [5,5] elif 'MADGRAPH_ptreweight' in key: hist.SetLineColor( kBlack ) elif 'powhegPythia8' in key: linestyle = 'solid' dashes[key] = None hist.SetLineColor( 633 ) elif 'massup' in key or 'amcatnlo' in key: hist.SetLineColor( 807 ) dashes[key] = [20,5] elif 'MCATNLO' in key or 'scaleup' in key: hist.SetLineColor( 619 ) dashes[key] = [5,5,10,5] if linestyle != None: hist.linestyle = linestyle line, h = rplt.hist( hist, axes = axes, label = measurements_latex[key], zorder = zorder ) if dashes[key] != None: line.set_dashes(dashes[key]) h.set_dashes(dashes[key]) handles, labels = axes.get_legend_handles_labels() # making data first in the list data_label_index = labels.index( 'data' ) data_handle = handles[data_label_index] labels.remove( 'data' ) handles.remove( data_handle ) labels.insert( 0, 'data' ) handles.insert( 0, data_handle ) new_handles, new_labels = [], [] zipped = dict( zip( labels, handles ) ) labelOrder = ['data', measurements_latex['powhegPythia8'], measurements_latex['amcatnlo'], measurements_latex['amcatnlo_HERWIG'], measurements_latex['madgraphMLM'], measurements_latex['scaleup'], measurements_latex['scaledown'], measurements_latex['massup'], measurements_latex['massdown'] ] for label in labelOrder: if label in labels: new_handles.append(zipped[label]) new_labels.append(label) legend_location = (0.97, 0.82) if variable == 'MT': legend_location = (0.05, 0.82) elif variable == 'ST': legend_location = (0.97, 0.82) elif variable == 'WPT': legend_location = (1.0, 0.84) elif variable == 'abs_lepton_eta': legend_location = (1.0, 0.94) plt.legend( new_handles, new_labels, numpoints = 1, prop = CMS.legend_properties, frameon = False, bbox_to_anchor=legend_location, bbox_transform=plt.gcf().transFigure ) label, channel_label = get_cms_labels( channel ) # title plt.title( label,loc='right', **CMS.title ) # CMS text # note: fontweight/weight does not change anything as we use Latex text!!! logo_location = (0.05, 0.98) prelim_location = (0.05, 0.92) channel_location = ( 0.05, 0.86) if variable == 'WPT': logo_location = (0.03, 0.98) prelim_location = (0.03, 0.92) channel_location = (0.03, 0.86) elif variable == 'abs_lepton_eta': logo_location = (0.03, 0.98) prelim_location = (0.03, 0.92) channel_location = (0.03, 0.86) plt.text(logo_location[0], logo_location[1], r"\textbf{CMS}", transform=axes.transAxes, fontsize=42, verticalalignment='top',horizontalalignment='left') # preliminary plt.text(prelim_location[0], prelim_location[1], r"\emph{Preliminary}", transform=axes.transAxes, fontsize=42, verticalalignment='top',horizontalalignment='left') # channel text plt.text(channel_location[0], channel_location[1], r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40, verticalalignment='top',horizontalalignment='left') ylim = axes.get_ylim() if ylim[0] < 0: axes.set_ylim( ymin = 0.) if variable == 'WPT': axes.set_ylim(ymax = ylim[1]*1.3) elif variable == 'abs_lepton_eta': axes.set_ylim(ymax = ylim[1]*1.3) else : axes.set_ylim(ymax = ylim[1]*1.2) if show_ratio: plt.setp( axes.get_xticklabels(), visible = False ) ax1 = plt.subplot( gs[1] ) if not variable in ['NJets']: ax1.minorticks_on() #ax1.grid( True, 'major', linewidth = 1 ) # setting the x_limits identical to the main plot x_limits = axes.get_xlim() ax1.set_xlim(x_limits) ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) ) if not variable in ['NJets']: ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']: plt.xlabel('$%s$' % variables_latex[variable], CMS.x_axis_title ) else: plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title ) plt.tick_params( **CMS.axis_label_major ) if not variable in ['NJets']: plt.tick_params( **CMS.axis_label_minor ) plt.ylabel( '$\\frac{\\textrm{pred.}}{\\textrm{data}}$', CMS.y_axis_title ) ax1.yaxis.set_label_coords(-0.115, 0.8) #draw a horizontal line at y=1 for data plt.axhline(y = 1, color = 'black', linewidth = 2) for key, hist in sorted( histograms.iteritems() ): if not 'unfolded' in key and not 'measured' in key: ratio = hist.Clone() ratio.Divide( hist_data ) #divide by data line, h = rplt.hist( ratio, axes = ax1, label = 'do_not_show' ) if dashes[key] != None: h.set_dashes(dashes[key]) stat_lower = hist_data.Clone() stat_upper = hist_data.Clone() syst_lower = hist_data.Clone() syst_upper = hist_data.Clone() # plot error bands on data in the ratio plot stat_errors = graph_to_value_errors_tuplelist(hist_data) if category == 'central': syst_errors = graph_to_value_errors_tuplelist(hist_data_with_systematics) for bin_i in range( 1, hist_data.GetNbinsX() + 1 ): stat_value, stat_error, _ = stat_errors[bin_i-1] stat_rel_error = stat_error/stat_value stat_lower.SetBinContent( bin_i, 1 - stat_rel_error ) stat_upper.SetBinContent( bin_i, 1 + stat_rel_error ) if category == 'central': syst_value, syst_error_down, syst_error_up = syst_errors[bin_i-1] syst_rel_error_down = syst_error_down/syst_value syst_rel_error_up = syst_error_up/syst_value syst_lower.SetBinContent( bin_i, 1 - syst_rel_error_down ) syst_upper.SetBinContent( bin_i, 1 + syst_rel_error_up ) if category == 'central': rplt.fill_between( syst_lower, syst_upper, ax1, color = 'yellow' ) rplt.fill_between( stat_upper, stat_lower, ax1, color = '0.75', ) loc = 'upper left' # if variable in ['ST']: # loc = 'upper right' # legend for ratio plot p_stat = mpatches.Patch(facecolor='0.75', label='Stat.', edgecolor='black' ) p_stat_and_syst = mpatches.Patch(facecolor='yellow', label=r'Stat. $\oplus$ Syst.', edgecolor='black' ) l1 = ax1.legend(handles = [p_stat, p_stat_and_syst], loc = loc, frameon = False, prop = {'size':26}, ncol = 2) # ax1.legend(handles = [p_stat_and_syst], loc = 'lower left', # frameon = False, prop = {'size':30}) ax1.add_artist(l1) if variable == 'MET': ax1.set_ylim( ymin = 0.8, ymax = 1.2 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) ) # ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) if variable == 'MT': ax1.set_ylim( ymin = 0.8, ymax = 1.2 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) ) ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) elif variable == 'HT': ax1.set_ylim( ymin = 0.8, ymax = 1.37 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) ) ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) elif variable == 'ST': ax1.set_ylim( ymin = 0.7, ymax = 1.5 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) ) ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) elif variable == 'WPT': ax1.set_ylim( ymin = 0.8, ymax = 1.2 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) ) ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) elif variable == 'NJets': ax1.set_ylim( ymin = 0.7, ymax = 1.5 ) elif variable == 'abs_lepton_eta': ax1.set_ylim( ymin = 0.8, ymax = 1.2 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) ) ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) elif variable == 'lepton_pt': ax1.set_ylim( ymin = 0.8, ymax = 1.3 ) ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) ) ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) ) if CMS.tight_layout: plt.tight_layout() path = '{output_folder}/{centre_of_mass_energy}TeV/{phaseSpace}/{variable}/' path = path.format( output_folder = output_folder, centre_of_mass_energy = measurement_config.centre_of_mass_energy, phaseSpace = phase_space, variable = variable ) make_folder_if_not_exists( path ) for output_format in output_formats: filename = path + '/' + histname + '.' + output_format plt.savefig( filename ) del hist_data, hist_measured plt.close() gc.collect()
def print_error_table(central_values, errors, channel, toFile = True, print_before_unfolding = False): global output_folder, variable, met_type, b_tag_bin, all_measurements, phase_space bins = None bins_latex = None binEdges = None variable_latex = variables_latex[variable] if phase_space == 'VisiblePS': bins = variable_bins_visiblePS_ROOT[variable] bins_latex = variable_bins_visiblePS_latex[variable] binEdges = bin_edges_vis[variable] elif phase_space == 'FullPS': bins = variable_bins_ROOT[variable] bins_latex = variable_bins_latex[variable] binEdges = bin_edges_full[variable] printout = '%% ' + '=' * 60 printout += '\n' printout += '%% Systematics table for %s variable, %s channel, met type %s, %s b-tag region\n' % (variable, channel, met_type, b_tag_bin) if print_before_unfolding: printout += '%% BEFORE UNFOLDING\n' printout += '%% ' + '=' * 60 printout += '\n' printout += '\\begin{table}[htbp]\n' printout += '\\centering\n' printout += '\\caption{Systematic uncertainties for the normalised \\ttbar cross section measurement with respect to %s variable\n' % variable_latex printout += 'at a centre-of-mass energy of %d TeV ' % measurement_config.centre_of_mass_energy if channel == 'combined' or channel == "combinedBeforeUnfolding": printout += '(combination of electron and muon channels).}\n' else: printout += '(%s channel).}\n' % channel printout += '\\label{tab:%s_systematics_%dTeV_%s}\n' % (variable, measurement_config.centre_of_mass_energy, channel) if variable == 'MT': printout += '\\resizebox*{!}{\\textheight} {\n' else: printout += '\\resizebox{\\columnwidth}{!} {\n' printout += '\\begin{tabular}{l' + 'r'*len(bins) + '}\n' printout += '\\hline\n' header = 'Uncertainty source ' rows = {} assert(len(bins) == len(errors['central'])) if print_before_unfolding: assert(len(bins) == len(central_values['measured'])) else: assert(len(bins) == len(central_values['unfolded'])) errorHists = {} errorHists['statistical'] = [] for source in all_measurements: errorHists[source] = [] for bin_i, variable_bin in enumerate(bins): header += '& %s' % (bins_latex[variable_bin]) if print_before_unfolding: central_value = central_values['measured'][bin_i][0] else: central_value = central_values['unfolded'][bin_i][0] for source in all_measurements: if ( variable == 'HT' or variable == 'NJets' or variable == 'lepton_pt' or variable == 'abs_lepton_eta' ) and source in measurement_config.met_systematics and not 'JES' in source and not 'JER' in source: continue abs_error = errors[source][bin_i] relative_error = getRelativeError(central_value, abs_error) errorHists[source].append(relative_error) text = '%.2f' % (relative_error*100) if rows.has_key(source): rows[source].append(text) elif met_type in source: rows[source] = [measurements_latex[source.replace(met_type, '')] + ' (\%)', text] else: if source in met_systematics_latex.keys(): rows[source] = [met_systematics_latex[source] + ' (\%)', text] else: rows[source] = [measurements_latex[source] + ' (\%)', text] header += ' \\\\' printout += header printout += '\n\\hline\n' for source in sorted(rows.keys()): if source == 'central': continue for item in rows[source]: printout += item + ' & ' printout = printout.rstrip('& ') printout += ' \\\\ \n' #append the total statistical error to the table printout += '\\hline \n' total_line = 'Total Stat. (\%)' for bin_i, variable_bin in enumerate(bins): if print_before_unfolding: value, error = central_values['measured'][bin_i] else: value, error = central_values['unfolded'][bin_i] relativeError = getRelativeError(value, error) errorHists['statistical'].append(relativeError) total_line += ' & %.2f ' % (relativeError * 100) printout += total_line + '\\\\ \n' if not print_before_unfolding: make_error_plot( errorHists, binEdges ) #append the total systematic error to the table total_line = 'Total Sys. (\%)' for bin_i, variable_bin in enumerate(bins): if print_before_unfolding: value, error_up, error_down = central_values['measured_with_systematics_only'][bin_i] else: value, error_up, error_down = central_values['unfolded_with_systematics_only'][bin_i] error = max(error_up, error_down) relativeError = getRelativeError(value, error) total_line += ' & %.2f ' % (relativeError * 100) printout += total_line + '\\\\ \n' #append the total error to the table printout += '\\hline \n' total_line = 'Total (\%)' for bin_i, variable_bin in enumerate(bins): if print_before_unfolding: value, error_up, error_down = central_values['measured_with_systematics'][bin_i] else: value, error_up, error_down = central_values['unfolded_with_systematics'][bin_i] error = max(error_up, error_down) relativeError = getRelativeError(value, error) total_line += ' & %.2f ' % (relativeError * 100) printout += total_line + '\\\\ \n' printout += '\\hline \n' printout += '\\end{tabular}\n' printout += '}\n' printout += '\\end{table}\n' if toFile: path = output_folder + '/' + variable + '/' make_folder_if_not_exists(path) file_template = path + '/%s_systematics_%dTeV_%s.tex' % (variable, measurement_config.centre_of_mass_energy, channel) if print_before_unfolding: make_folder_if_not_exists(path + '/before_unfolding/') file_template = file_template.replace(path, path + '/before_unfolding/') output_file = open(file_template, 'w') output_file.write(printout) output_file.close() else: print printout
def compare_qcd_control_regions(variable='MET', met_type='patType1CorrectedPFMet', title='Untitled', channel='electron'): ''' Compares the templates from the control regions in different bins of the current variable''' global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template(variable) for fit_variable in electron_fit_variables: all_hists = {} inclusive_hist = None if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable) make_folder_if_not_exists(save_path + '/qcd/') max_bins = 3 for bin_range in variable_bins[0:max_bins]: params = { 'met_type': met_type, 'bin_range': bin_range, 'fit_variable': fit_variable, 'b_tag_bin': b_tag_bin, 'variable': variable } fit_variable_distribution = histogram_template % params qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions') qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl) # format: histograms['data'][qcd_fit_variable_distribution] histograms = get_histograms_from_files( [qcd_fit_variable_distribution], histogram_files) prepare_histograms( histograms, rebin=fit_variable_properties[fit_variable]['rebin'], scale_factor=measurement_config.luminosity_scale) histograms_for_cleaning = { 'data': histograms['data'][qcd_fit_variable_distribution], 'V+Jets': histograms['V+Jets'][qcd_fit_variable_distribution], 'SingleTop': histograms['SingleTop'][qcd_fit_variable_distribution], 'TTJet': histograms['TTJet'][qcd_fit_variable_distribution] } qcd_from_data = clean_control_region( histograms_for_cleaning, subtract=['TTJet', 'V+Jets', 'SingleTop']) # clean all_hists[bin_range] = qcd_from_data # create the inclusive distributions inclusive_hist = deepcopy(all_hists[variable_bins[0]]) for bin_range in variable_bins[1:max_bins]: inclusive_hist += all_hists[bin_range] for bin_range in variable_bins[0:max_bins]: if not all_hists[bin_range].Integral() == 0: all_hists[bin_range].Scale(1 / all_hists[bin_range].Integral()) # normalise all histograms inclusive_hist.Scale(1 / inclusive_hist.Integral()) # now compare inclusive to all bins histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[ fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[ fit_variable]['y-title'] histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.') histogram_properties.x_limits = [ fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max'] ] # histogram_properties.y_limits = [0, 0.5] histogram_properties.title = title histogram_properties.additional_text = channel_latex[ channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl] histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin_ctl + '_QCD_template_comparison' histogram_properties.y_max_scale = 1.5 measurements = { bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems() } measurements = OrderedDict(sorted(measurements.items())) compare_measurements(models={'inclusive': inclusive_hist}, measurements=measurements, show_measurement_errors=True, histogram_properties=histogram_properties, save_folder=save_path + '/qcd/', save_as=save_as)
def main(): parser = OptionParser() parser.add_option("--topPtReweighting", dest="applyTopPtReweighting", type="int", default=0) parser.add_option("--topEtaReweighting", dest="applyTopEtaReweighting", type="int", default=0) parser.add_option("-c", "--centreOfMassEnergy", dest="centreOfMassEnergy", type="int", default=13) parser.add_option("--generatorWeight", type="int", dest="generatorWeight", default=-1) parser.add_option("--nGeneratorWeights", type="int", dest="nGeneratorWeights", default=1) parser.add_option("-s", "--sample", dest="sample", default="central") parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False) parser.add_option("-n", action="store_true", dest="donothing", default=False) parser.add_option("-e", action="store_true", dest="extraHists", default=False) parser.add_option("-f", action="store_true", dest="fineBinned", default=False) (options, _) = parser.parse_args() measurement_config = XSectionConfig(options.centreOfMassEnergy) # Input file name file_name = "crap.root" if int(options.centreOfMassEnergy) == 13: # file_name = fileNames['13TeV'][options.sample] file_name = getFileName("13TeV", options.sample, measurement_config) # if options.generatorWeight >= 0: # file_name = 'localInputFile.root' else: print "Error: Unrecognised centre of mass energy." generatorWeightsToRun = [] # nGeneratorWeights = How many PDF weights do you want to run in 1 job (specified in runJobsCrab.py) if options.nGeneratorWeights > 1: for i in range(0, options.nGeneratorWeights): generatorWeightsToRun.append(options.generatorWeight + i) # generatorWeights = 1 for PDF Variations # generatorWeights either 4 or 8 for alpha_s, renormalisation, hadronisation elif options.generatorWeight >= 0: generatorWeightsToRun.append(options.generatorWeight) else: generatorWeightsToRun.append(-1) # Output file name outputFileName = "crap.root" outputFileDir = "unfolding/%sTeV/" % options.centreOfMassEnergy make_folder_if_not_exists(outputFileDir) energySuffix = "%sTeV" % (options.centreOfMassEnergy) for meWeight in generatorWeightsToRun: if options.applyTopEtaReweighting != 0: if options.applyTopEtaReweighting == 1: outputFileName = ( outputFileDir + "/unfolding_TTJets_%s_asymmetric_withTopEtaReweighting_up.root" % energySuffix ) elif options.applyTopEtaReweighting == -1: outputFileName = ( outputFileDir + "/unfolding_TTJets_%s_asymmetric_withTopEtaReweighting_down.root" % energySuffix ) elif options.applyTopPtReweighting != 0: if options.applyTopPtReweighting == 1: outputFileName = ( outputFileDir + "/unfolding_TTJets_%s_asymmetric_withTopPtReweighting_up.root" % energySuffix ) elif options.applyTopPtReweighting == -1: outputFileName = ( outputFileDir + "/unfolding_TTJets_%s_asymmetric_withTopPtReweighting_down.root" % energySuffix ) elif meWeight == 4: outputFileName = outputFileDir + "/unfolding_TTJets_%s_asymmetric_scaleUpWeight.root" % (energySuffix) elif meWeight == 8: outputFileName = outputFileDir + "/unfolding_TTJets_%s_asymmetric_scaleDownWeight.root" % (energySuffix) elif meWeight >= 9 and meWeight <= 108: outputFileName = outputFileDir + "/unfolding_TTJets_%s_asymmetric_generatorWeight_%i.root" % ( energySuffix, meWeight, ) elif options.sample != "central": outputFileName = outputFileDir + "/unfolding_TTJets_%s_%s_asymmetric.root" % (energySuffix, options.sample) elif options.fineBinned: outputFileName = outputFileDir + "/unfolding_TTJets_%s.root" % (energySuffix) else: outputFileName = outputFileDir + "/unfolding_TTJets_%s_asymmetric.root" % energySuffix with root_open(file_name, "read") as f, root_open(outputFileName, "recreate") as out: # Get the tree treeName = "TTbar_plus_X_analysis/Unfolding/Unfolding" if options.sample == "jesup": treeName += "_JESUp" elif options.sample == "jesdown": treeName += "_JESDown" elif options.sample == "jerup": treeName += "_JERUp" elif options.sample == "jerdown": treeName += "_JERDown" tree = f.Get(treeName) nEntries = tree.GetEntries() # weightTree = f.Get('TTbar_plus_X_analysis/Unfolding/GeneratorSystematicWeights') # if meWeight >= 0 : # tree.AddFriend('TTbar_plus_X_analysis/Unfolding/GeneratorSystematicWeights') # tree.SetBranchStatus('genWeight_*',1) # tree.SetBranchStatus('genWeight_%i' % meWeight, 1) # For variables where you want bins to be symmetric about 0, use abs(variable) (but also make plots for signed variable) allVariablesBins = bin_edges_vis.copy() for variable in bin_edges_vis: if "Rap" in variable: allVariablesBins["abs_%s" % variable] = [0, bin_edges_vis[variable][-1]] recoVariableNames = {} genVariable_particle_names = {} genVariable_parton_names = {} histograms = {} outputDirs = {} for variable in allVariablesBins: if options.debug and variable != "HT": continue if options.sample in measurement_config.met_systematics and variable not in ["MET", "ST", "WPT"]: continue outputDirs[variable] = {} histograms[variable] = {} # # Variable names # recoVariableName = branchNames[variable] sysIndex = None if variable in ["MET", "ST", "WPT"]: if options.sample == "jesup": recoVariableName += "_METUncertainties" sysIndex = 2 elif options.sample == "jesdown": recoVariableName += "_METUncertainties" sysIndex = 3 elif options.sample == "jerup": recoVariableName += "_METUncertainties" sysIndex = 0 elif options.sample == "jerdown": recoVariableName += "_METUncertainties" sysIndex = 1 elif options.sample in measurement_config.met_systematics: recoVariableName += "_METUncertainties" sysIndex = measurement_config.met_systematics[options.sample] genVariable_particle_name = None genVariable_parton_name = None if variable in genBranchNames_particle: genVariable_particle_name = genBranchNames_particle[variable] if variable in genBranchNames_parton: genVariable_parton_name = genBranchNames_parton[variable] recoVariableNames[variable] = recoVariableName genVariable_particle_names[variable] = genVariable_particle_name genVariable_parton_names[variable] = genVariable_parton_name for channel in channels: # Make dir in output file outputDirName = variable + "_" + channel.outputDirName outputDir = out.mkdir(outputDirName) outputDirs[variable][channel.channelName] = outputDir # # Book histograms # # 1D histograms histograms[variable][channel.channelName] = {} h = histograms[variable][channel.channelName] h["truth"] = Hist(allVariablesBins[variable], name="truth") h["truthVis"] = Hist(allVariablesBins[variable], name="truthVis") h["truth_parton"] = Hist(allVariablesBins[variable], name="truth_parton") h["measured"] = Hist(reco_bin_edges_vis[variable], name="measured") h["measuredVis"] = Hist(reco_bin_edges_vis[variable], name="measuredVis") h["measured_without_fakes"] = Hist(reco_bin_edges_vis[variable], name="measured_without_fakes") h["measuredVis_without_fakes"] = Hist( reco_bin_edges_vis[variable], name="measuredVis_without_fakes" ) h["fake"] = Hist(reco_bin_edges_vis[variable], name="fake") h["fakeVis"] = Hist(reco_bin_edges_vis[variable], name="fakeVis") # 2D histograms h["response"] = Hist2D(reco_bin_edges_vis[variable], allVariablesBins[variable], name="response") h["response_without_fakes"] = Hist2D( reco_bin_edges_vis[variable], allVariablesBins[variable], name="response_without_fakes" ) h["responseVis_without_fakes"] = Hist2D( reco_bin_edges_vis[variable], allVariablesBins[variable], name="responseVis_without_fakes" ) h["response_parton"] = Hist2D( reco_bin_edges_vis[variable], allVariablesBins[variable], name="response_parton" ) h["response_without_fakes_parton"] = Hist2D( reco_bin_edges_vis[variable], allVariablesBins[variable], name="response_without_fakes_parton" ) if options.fineBinned: minVar = trunc(allVariablesBins[variable][0]) maxVar = trunc( max( tree.GetMaximum(genVariable_particle_names[variable]), tree.GetMaximum(recoVariableNames[variable]), ) * 1.2 ) nBins = int(maxVar - minVar) if variable is "lepton_eta" or variable is "bjets_eta": maxVar = 2.5 minVar = -2.5 nBins = 1000 elif "abs" in variable and "eta" in variable: maxVar = 3.0 minVar = 0.0 nBins = 1000 elif "Rap" in variable: maxVar = 3.0 minVar = -3.0 nBins = 1000 elif "NJets" in variable: maxVar = 20.5 minVar = 3.5 nBins = 17 h["truth"] = Hist(nBins, minVar, maxVar, name="truth") h["truthVis"] = Hist(nBins, minVar, maxVar, name="truthVis") h["truth_parton"] = Hist(nBins, minVar, maxVar, name="truth_parton") h["measured"] = Hist(nBins, minVar, maxVar, name="measured") h["measuredVis"] = Hist(nBins, minVar, maxVar, name="measuredVis") h["measured_without_fakes"] = Hist(nBins, minVar, maxVar, name="measured_without_fakes") h["measuredVis_without_fakes"] = Hist(nBins, minVar, maxVar, name="measuredVis_without_fakes") h["fake"] = Hist(nBins, minVar, maxVar, name="fake") h["fakeVis"] = Hist(nBins, minVar, maxVar, name="fakeVis") h["response"] = Hist2D(nBins, minVar, maxVar, nBins, minVar, maxVar, name="response") h["response_without_fakes"] = Hist2D( nBins, minVar, maxVar, nBins, minVar, maxVar, name="response_without_fakes" ) h["responseVis_without_fakes"] = Hist2D( nBins, minVar, maxVar, nBins, minVar, maxVar, name="responseVis_without_fakes" ) h["response_parton"] = Hist2D( nBins, minVar, maxVar, nBins, minVar, maxVar, name="response_parton" ) h["response_without_fakes_parton"] = Hist2D( nBins, minVar, maxVar, nBins, minVar, maxVar, name="response_without_fakes_parton" ) # Some interesting histograms h["puOffline"] = Hist(20, 0, 2, name="puWeights_offline") h["eventWeightHist"] = Hist(100, -2, 2, name="eventWeightHist") h["genWeightHist"] = Hist(100, -2, 2, name="genWeightHist") h["offlineWeightHist"] = Hist(100, -2, 2, name="offlineWeightHist") h["phaseSpaceInfoHist"] = Hist(10, 0, 1, name="phaseSpaceInfoHist") # Counters for studying phase space nVis = {c.channelName: 0 for c in channels} nVisNotOffline = {c.channelName: 0 for c in channels} nOffline = {c.channelName: 0 for c in channels} nOfflineNotVis = {c.channelName: 0 for c in channels} nFull = {c.channelName: 0 for c in channels} nOfflineSL = {c.channelName: 0 for c in channels} n = 0 # Event Loop # for event, weight in zip(tree,weightTree): for event in tree: branch = event.__getattr__ n += 1 if not n % 100000: print "Processing event %.0f Progress : %.2g %%" % (n, float(n) / nEntries * 100) # if n == 100000: break # # # # # # Weights and selection # # # # Pileup weight # Don't apply if calculating systematic pileupWeight = event.PUWeight if options.sample == "pileupSystematic": pileupWeight = 1 # Generator level weight genWeight = event.EventWeight * measurement_config.luminosity_scale # Offline level weights offlineWeight = pileupWeight # Lepton weight leptonWeight = event.LeptonEfficiencyCorrection if options.sample == "leptonup": leptonWeight = event.LeptonEfficiencyCorrectionUp elif options.sample == "leptondown": leptonWeight == event.LeptonEfficiencyCorrectionDown # B Jet Weight bjetWeight = event.BJetWeight if options.sample == "bjetup": bjetWeight = event.BJetUpWeight elif options.sample == "bjetdown": bjetWeight = event.BJetDownWeight elif options.sample == "lightjetup": bjetWeight = event.LightJetUpWeight elif options.sample == "lightjetdown": bjetWeight = event.LightJetDownWeight offlineWeight = event.EventWeight * measurement_config.luminosity_scale offlineWeight *= pileupWeight offlineWeight *= bjetWeight offlineWeight *= leptonWeight # Generator weight # Scale up/down, pdf if meWeight >= 0: genWeight *= branch("genWeight_%i" % meWeight) offlineWeight *= branch("genWeight_%i" % meWeight) pass if options.applyTopPtReweighting != 0: ptWeight = calculateTopPtWeight( branch("lepTopPt_parton"), branch("hadTopPt_parton"), options.applyTopPtReweighting ) offlineWeight *= ptWeight genWeight *= ptWeight if options.applyTopEtaReweighting != 0: etaWeight = calculateTopEtaWeight( branch("lepTopRap_parton"), branch("hadTopRap_parton"), options.applyTopEtaReweighting ) offlineWeight *= etaWeight genWeight *= etaWeight for channel in channels: # Generator level selection genSelection = "" genSelectionVis = "" if channel.channelName is "muPlusJets": genSelection = event.isSemiLeptonicMuon == 1 genSelectionVis = event.isSemiLeptonicMuon == 1 and event.passesGenEventSelection == 1 elif channel.channelName is "ePlusJets": genSelection = event.isSemiLeptonicElectron == 1 genSelectionVis = event.isSemiLeptonicElectron == 1 and event.passesGenEventSelection == 1 # Offline level selection offlineSelection = 0 if channel.channelName is "muPlusJets": offlineSelection = event.passSelection == 1 elif channel.channelName is "ePlusJets": offlineSelection = event.passSelection == 2 # Fake selection fakeSelection = offlineSelection and not genSelection fakeSelectionVis = offlineSelection and not genSelectionVis # Phase space info if genSelection: nFull[channel.channelName] += genWeight if offlineSelection: nOfflineSL[channel.channelName] += genWeight if genSelectionVis: nVis[channel.channelName] += genWeight if not offlineSelection: nVisNotOffline[channel.channelName] += genWeight if offlineSelection: nOffline[channel.channelName] += offlineWeight if not genSelectionVis: nOfflineNotVis[channel.channelName] += offlineWeight for variable in allVariablesBins: if options.sample in measurement_config.met_systematics and variable not in [ "MET", "ST", "WPT", ]: continue # # # # # # Variable to plot # # # recoVariable = branch(recoVariableNames[variable]) if ( variable in ["MET", "ST", "WPT"] and sysIndex != None and (offlineSelection or fakeSelection or fakeSelectionVis) ): recoVariable = recoVariable[sysIndex] if "abs" in variable: recoVariable = abs(recoVariable) # With TUnfold, reco variable never goes in the overflow (or underflow) # if recoVariable > allVariablesBins[variable][-1]: # print 'Big reco variable : ',recoVariable # print 'Setting to :',min( recoVariable, allVariablesBins[variable][-1] - 0.000001 ) if not options.fineBinned: recoVariable = min(recoVariable, allVariablesBins[variable][-1] - 0.000001) genVariable_particle = branch(genVariable_particle_names[variable]) if "abs" in variable: genVariable_particle = abs(genVariable_particle) # # # # Fill histograms # # histogramsToFill = histograms[variable][channel.channelName] if not options.donothing: if genSelection: histogramsToFill["truth"].Fill(genVariable_particle, genWeight) if genSelectionVis: histogramsToFill["truthVis"].Fill(genVariable_particle, genWeight) if offlineSelection: histogramsToFill["measured"].Fill(recoVariable, offlineWeight) histogramsToFill["measuredVis"].Fill(recoVariable, offlineWeight) if genSelectionVis: histogramsToFill["measuredVis_without_fakes"].Fill(recoVariable, offlineWeight) if genSelection: histogramsToFill["measured_without_fakes"].Fill(recoVariable, offlineWeight) histogramsToFill["response"].Fill(recoVariable, genVariable_particle, offlineWeight) if offlineSelection and genSelection: histogramsToFill["response_without_fakes"].Fill( recoVariable, genVariable_particle, offlineWeight ) elif genSelection: histogramsToFill["response_without_fakes"].Fill( allVariablesBins[variable][0] - 1, genVariable_particle, genWeight ) # if genVariable_particle < 0 : print recoVariable, genVariable_particle # if genVariable_particle < 0 : print genVariable_particle if offlineSelection and genSelectionVis: histogramsToFill["responseVis_without_fakes"].Fill( recoVariable, genVariable_particle, offlineWeight ) elif genSelectionVis: histogramsToFill["responseVis_without_fakes"].Fill( allVariablesBins[variable][0] - 1, genVariable_particle, genWeight ) if fakeSelection: histogramsToFill["fake"].Fill(recoVariable, offlineWeight) if fakeSelectionVis: histogramsToFill["fakeVis"].Fill(recoVariable, offlineWeight) if options.extraHists: if genSelection: histogramsToFill["eventWeightHist"].Fill(event.EventWeight) histogramsToFill["genWeightHist"].Fill(genWeight) histogramsToFill["offlineWeightHist"].Fill(offlineWeight) # # Output histgorams to file # for variable in allVariablesBins: if options.sample in measurement_config.met_systematics and variable not in ["MET", "ST", "WPT"]: continue for channel in channels: # Fill phase space info h = histograms[variable][channel.channelName]["phaseSpaceInfoHist"] h.SetBinContent(1, nVisNotOffline[channel.channelName] / nVis[channel.channelName]) h.SetBinContent(2, nOfflineNotVis[channel.channelName] / nOffline[channel.channelName]) h.SetBinContent(3, nVis[channel.channelName] / nFull[channel.channelName]) # Selection efficiency for SL ttbar h.SetBinContent(4, nOfflineSL[channel.channelName] / nFull[channel.channelName]) # Fraction of offline that are SL h.SetBinContent(5, nOfflineSL[channel.channelName] / nOffline[channel.channelName]) outputDirs[variable][channel.channelName].cd() for h in histograms[variable][channel.channelName]: histograms[variable][channel.channelName][h].Write() with root_open(outputFileName, "update") as out: # Done all channels, now combine the two channels, and output to the same file for path, dirs, objects in out.walk(): if "electron" in path: outputDir = out.mkdir(path.replace("electron", "combined")) outputDir.cd() for h in objects: h_e = out.Get(path + "/" + h) h_mu = out.Get(path.replace("electron", "muon") + "/" + h) h_comb = (h_e + h_mu).Clone(h) h_comb.Write() pass pass pass
def compare_vjets_btag_regions(variable='MET', met_type='patType1CorrectedPFMet', title='Untitled', channel='electron'): ''' Compares the V+Jets template in different b-tag bins''' global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl b_tag_bin_ctl = '0orMoreBtag' variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template(variable) for fit_variable in electron_fit_variables: if '_bl' in fit_variable: b_tag_bin_ctl = '1orMoreBtag' else: b_tag_bin_ctl = '0orMoreBtag' save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable) make_folder_if_not_exists(save_path + '/vjets/') histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[ fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[ fit_variable]['y-title'] histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.') histogram_properties.x_limits = [ fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max'] ] histogram_properties.title = title histogram_properties.additional_text = channel_latex[ channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl] histogram_properties.y_max_scale = 1.5 for bin_range in variable_bins: params = { 'met_type': met_type, 'bin_range': bin_range, 'fit_variable': fit_variable, 'b_tag_bin': b_tag_bin, 'variable': variable } fit_variable_distribution = histogram_template % params fit_variable_distribution_ctl = fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl) # format: histograms['data'][qcd_fit_variable_distribution] histograms = get_histograms_from_files( [fit_variable_distribution, fit_variable_distribution_ctl], {'V+Jets': histogram_files['V+Jets']}) prepare_histograms( histograms, rebin=fit_variable_properties[fit_variable]['rebin'], scale_factor=measurement_config.luminosity_scale) histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin_ctl + '_VJets_template_comparison' histograms['V+Jets'][fit_variable_distribution].Scale( 1 / histograms['V+Jets'][fit_variable_distribution].Integral()) histograms['V+Jets'][fit_variable_distribution_ctl].Scale( 1 / histograms['V+Jets'][fit_variable_distribution_ctl].Integral()) compare_measurements( models={ 'no b-tag': histograms['V+Jets'][fit_variable_distribution_ctl] }, measurements={ '$>=$ 2 b-tags': histograms['V+Jets'][fit_variable_distribution] }, show_measurement_errors=True, histogram_properties=histogram_properties, save_folder=save_path + '/vjets/', save_as=save_as)
def compare_vjets_templates(variable='MET', met_type='patType1CorrectedPFMet', title='Untitled', channel='electron'): ''' Compares the V+jets templates in different bins of the current variable''' global fit_variable_properties, b_tag_bin, save_as variable_bins = variable_bins_ROOT[variable] histogram_template = get_histogram_template(variable) for fit_variable in electron_fit_variables: all_hists = {} inclusive_hist = None save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable) make_folder_if_not_exists(save_path + '/vjets/') max_bins = len(variable_bins) for bin_range in variable_bins[0:max_bins]: params = { 'met_type': met_type, 'bin_range': bin_range, 'fit_variable': fit_variable, 'b_tag_bin': b_tag_bin, 'variable': variable } fit_variable_distribution = histogram_template % params # format: histograms['data'][qcd_fit_variable_distribution] histograms = get_histograms_from_files([fit_variable_distribution], histogram_files) prepare_histograms( histograms, rebin=fit_variable_properties[fit_variable]['rebin'], scale_factor=measurement_config.luminosity_scale) all_hists[bin_range] = histograms['V+Jets'][ fit_variable_distribution] # create the inclusive distributions inclusive_hist = deepcopy(all_hists[variable_bins[0]]) for bin_range in variable_bins[1:max_bins]: inclusive_hist += all_hists[bin_range] for bin_range in variable_bins[0:max_bins]: if not all_hists[bin_range].Integral() == 0: all_hists[bin_range].Scale(1 / all_hists[bin_range].Integral()) # normalise all histograms inclusive_hist.Scale(1 / inclusive_hist.Integral()) # now compare inclusive to all bins histogram_properties = Histogram_properties() histogram_properties.x_axis_title = fit_variable_properties[ fit_variable]['x-title'] histogram_properties.y_axis_title = fit_variable_properties[ fit_variable]['y-title'] histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.') histogram_properties.x_limits = [ fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max'] ] histogram_properties.title = title histogram_properties.additional_text = channel_latex[ channel] + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin + '_VJets_template_comparison' histogram_properties.y_max_scale = 1.5 measurements = { bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems() } measurements = OrderedDict(sorted(measurements.items())) fit_var = fit_variable.replace('electron_', '') fit_var = fit_var.replace('muon_', '') graphs = spread_x(measurements.values(), fit_variable_bin_edges[fit_var]) for key, graph in zip(sorted(measurements.keys()), graphs): measurements[key] = graph compare_measurements(models={'inclusive': inclusive_hist}, measurements=measurements, show_measurement_errors=True, histogram_properties=histogram_properties, save_folder=save_path + '/vjets/', save_as=save_as)
def make_plots_ROOT(histograms, category, save_path, histname, channel): global variable, translateOptions, k_value, b_tag_bin, maximum ROOT.TH1.SetDefaultSumw2(False) ROOT.gROOT.SetBatch(True) ROOT.gROOT.ProcessLine('gErrorIgnoreLevel = 1001;') plotting.setStyle() gStyle.SetTitleYOffset(2.) ROOT.gROOT.ForceStyle() canvas = Canvas(width=700, height=500) canvas.SetLeftMargin(0.18) canvas.SetBottomMargin(0.15) canvas.SetTopMargin(0.05) canvas.SetRightMargin(0.05) legend = plotting.create_legend(x0=0.6, y1=0.5) hist_data = histograms['unfolded'] hist_data.GetXaxis().SetTitle(translate_options[variable] + ' [GeV]') hist_data.GetYaxis().SetTitle('#frac{1}{#sigma} #frac{d#sigma}{d' + translate_options[variable] + '} [GeV^{-1}]') hist_data.GetXaxis().SetTitleSize(0.05) hist_data.GetYaxis().SetTitleSize(0.05) hist_data.SetMinimum(0) hist_data.SetMaximum(maximum[variable]) hist_data.SetMarkerSize(1) hist_data.SetMarkerStyle(8) plotAsym = TGraphAsymmErrors(hist_data) plotStatErr = TGraphAsymmErrors(hist_data) xsections = read_unfolded_xsections(channel) bins = variable_bins_ROOT[variable] assert (len(bins) == len(xsections['central'])) for bin_i in range(len(bins)): scale = 1 # / width centralresult = xsections['central'][bin_i] fit_error = centralresult[1] uncertainty = calculateTotalUncertainty(xsections, bin_i) uncertainty_total_plus = uncertainty['Total+'][0] uncertainty_total_minus = uncertainty['Total-'][0] uncertainty_total_plus, uncertainty_total_minus = symmetriseErrors( uncertainty_total_plus, uncertainty_total_minus) error_up = sqrt(fit_error**2 + uncertainty_total_plus**2) * scale error_down = sqrt(fit_error**2 + uncertainty_total_minus**2) * scale plotStatErr.SetPointEYhigh(bin_i, fit_error * scale) plotStatErr.SetPointEYlow(bin_i, fit_error * scale) plotAsym.SetPointEYhigh(bin_i, error_up) plotAsym.SetPointEYlow(bin_i, error_down) gStyle.SetEndErrorSize(20) plotAsym.SetLineWidth(2) plotStatErr.SetLineWidth(2) hist_data.Draw('P') plotStatErr.Draw('same P') plotAsym.Draw('same P Z') legend.AddEntry(hist_data, 'unfolded', 'P') hist_measured = histograms['measured'] hist_measured.SetMarkerSize(1) hist_measured.SetMarkerStyle(20) hist_measured.SetMarkerColor(2) #hist_measured.Draw('same P') #legend.AddEntry(hist_measured, 'measured', 'P') for key, hist in sorted(histograms.iteritems()): if not 'unfolded' in key and not 'measured' in key: hist.SetLineStyle(7) hist.SetLineWidth(2) # setting colours if 'POWHEG' in key or 'matchingdown' in key: hist.SetLineColor(kBlue) elif 'MADGRAPH' in key or 'matchingup' in key: hist.SetLineColor(kRed + 1) elif 'MCATNLO' in key or 'scaleup' in key: hist.SetLineColor(kGreen - 3) elif 'scaledown' in key: hist.SetLineColor(kMagenta + 3) hist.Draw('hist same') legend.AddEntry(hist, translate_options[key], 'l') legend.Draw() mytext = TPaveText(0.5, 0.97, 1, 1.01, "NDC") channelLabel = TPaveText(0.18, 0.97, 0.5, 1.01, "NDC") if 'electron' in histname: channelLabel.AddText( "e, %s, %s, k = %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value)) elif 'muon' in histname: channelLabel.AddText( "#mu, %s, %s, k = %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value)) else: channelLabel.AddText( "combined, %s, %s, k = %s" % ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value)) mytext.AddText("CMS Preliminary, L = %.1f fb^{-1} at #sqrt{s} = 8 TeV" % (5.8)) mytext.SetFillStyle(0) mytext.SetBorderSize(0) mytext.SetTextFont(42) mytext.SetTextAlign(13) channelLabel.SetFillStyle(0) channelLabel.SetBorderSize(0) channelLabel.SetTextFont(42) channelLabel.SetTextAlign(13) mytext.Draw() channelLabel.Draw() canvas.Modified() canvas.Update() path = save_path + '/' + variable + '/' + category make_folder_if_not_exists(path) canvas.SaveAs(path + '/' + histname + '_kv' + str(k_value) + '.png') canvas.SaveAs(path + '/' + histname + '_kv' + str(k_value) + '.pdf')