def _rename_variables(src): match = patterns.re_var_assignment.search(src) if match and match.group(1) not in utils.reserved_words: name = variable_mapper.get(match.group(1)) or utils.gen_random_name() variable_mapper[match.group(1)] = name return re.sub(r'(\w+)( =)', name + r'\2', src) return src
def get_dNdeta_in_classifier_bin_interval(sums_classifier_dir, event_counter, classifier_bin_interval): """ Get dN/deta for a given interval of classifier bin indices Parameters ---------- sums_classifier_dir : TList Sums directory of a classifier event_counter : Hist1D Event counter histogram with the classifier value on the xaxis classifier_bin_interval : list classifier value bin edges given as bin indices Returns ------- Hist1D """ hist_name = "eta_classifier_{0}".format(sums_classifier_dir.GetName()) h2d = asrootpy(sums_classifier_dir.FindObject(hist_name)) if not h2d: raise ValueError("Could not find histogram {0}".format(hist_name)) h2d.yaxis.set_range(classifier_bin_interval[0], classifier_bin_interval[1]) h = asrootpy(h2d.projection_x(gen_random_name())) h.title = "{0} - {1} %".format(100 * classifier_bin_interval[0], 100 * classifier_bin_interval[1]) # scale by the number of events in this mult_interval and bin width try: h.Scale((1.0 / float( event_counter.Integral(classifier_bin_interval[0], classifier_bin_interval[1]))), "width") except ZeroDivisionError: # If this happens, we have empty bins in dN/deta! The stats must suck! raise ZeroDivisionError( "Your statistics are terrible! Consider increasing the classifier value interval to avoid this" ) return h
def _rename_functions(src): match = patterns.re_function.search(src) if match: new_name = utils.gen_random_name() function_mapper[match.group(0)] = new_name return re.sub(r'(def )' + match.group(0), r'\1' + new_name, src) return src
def get_pT_distribution(results_est_dir, pids, classifier_bin_interval, normalized=False): """ Parameters ---------- results_est_dir : TDirectory Directory of a given estimator pids : list List of strings denoting requested pids classifier_bin_interval : tuple Lower and upper limit of classifier value for which the p_T distribution should be made. This value needs to be given as bin indices! normalized : Boolean Should the distribution be normalized to yield P(p_T)? Returns ------- Hist1D : Histogram P(p_T) """ mult_pt_hists = [] for pid in pids: mult_pt_hists.append(getattr(results_est_dir.mult_pt, pid)) summed_mult_pt = sum(mult_pt_hists) summed_mult_pt.xaxis.SetRange(*classifier_bin_interval) projy = asrootpy(summed_mult_pt.ProjectionY()) projy.name = gen_random_name() event_counter = asrootpy(results_est_dir.event_counter) # Scale by the number of events in the interval; projy.Scale(1.0 / event_counter.Integral(*classifier_bin_interval)) if normalized: projy.Scale(1.0 / projy.Integral()) return projy
def get_meanpt_vs_estmult(resutlts_est_dir, pids): """ Create a 1Dprofile for the given pids and the given estimator name """ # find the mult vs pt histograms for the given pids mult_vs_pts = [] for pid in pids: mult_vs_pts.append(asrootpy(getattr(resutlts_est_dir.mult_pt, pid))) profx = sum(mult_vs_pts).ProfileX() profx.name = gen_random_name() return profx
def get_PNch_vs_estmult(sums, est): """ Parameters ---------- sums : TList Sums directory est : str Estimator name Returns ------- Hist1D : Counter Histogram for Number of events with Nch in the estimator region """ if not isinstance(sums, ROOT.TList): raise TypeError("{0} is not of type ROOT.TList".format(sums)) # nasty hardcoded: ref_est = "EtaLt05" corr_hist = get_correlation_histogram(sums, est, ref_est) return asrootpy(corr_hist.ProjectionX(gen_random_name()))
def get_identified_vs_mult(h3d, pdg): """ Return 1D counter histogram of identified particles vs N_ch^est Parameters ---------- h3d: Hist3D x: est_mult; y: pT; z: pids1 pdg: str pdg code as string Return ------ Hist1D: x: Nch_est y: counts """ pid_bin = h3d.zaxis.find_bin(pdg) if pid_bin == 0: raise ValueError( "given pdg ({0}) does not exist in histogram".format(pdg)) h3d.zaxis.SetRange(pid_bin, pid_bin) h = asrootpy(h3d.Project3D("yx").ProjectionX()) h.SetName(gen_random_name()) return h
def _add_fuzzed_code(src): """ Add random function and variable declarations on empty lines. The functions and variables will never be referenced but will be obfuscated like everything else to create confusion. """ data_dir = os.path.join(os.path.dirname(__file__), '..', 'data') with open(data_dir + '/random_code.py', 'r') as f: random_code = f.read() # Find all functions in the file and get the code for each one of them random_functions = random_code.split('def') # Remove empty strings random_functions = filter(None, random_functions) random_functions = ['def' + rf for rf in random_functions] random_functions = [r.split('\n') for r in random_functions] # List comprehensions are preferred over map lambdas def leading_spaces(s): """Calculate how many spaces a line is indented with.""" return len(s) - len(s.lstrip()) new_src = [] in_comment_block = False in_multiline_declaration = False parenthesis_open = 0 for idx, line in enumerate(src): # Add random code to the line if it does not containg anything. Retain # identation. if '\"\"\"' in line: in_comment_block = not in_comment_block if '\\' in line: in_multiline_declaration = '\\' == line.strip()[-1] # TODO: Check if paraenteses are uneven, then we probably in multiline # too. parenthesis_open += line.count('(') # We do not wanna add code within a comment block. It will create # syntax errors. Neither in a block that is opened with parenthesis. if in_comment_block or in_multiline_declaration or parenthesis_open: parenthesis_open -= line.count(')') # Make sure that closing parenthesis are accounted for in next line new_src.append(line) continue # Fetch how much previous line is indented leads = leading_spaces(src[idx - 1]) # If previous line is beginning of a block we have to add som addtional # identation. Assuming its pep8 compliant, so we use 4 spaces if src[idx - 1].strip() and src[idx - 1].rstrip()[-1] == ':': leads += 4 # Some lines have one space in them so we do some magic to compensate indent = ' ' * (leads - leads % 2) # Randomly decide if we should add a random variable to the code. It # will never be referenced by any code. if src[idx - 1].strip() and random.random() > 0.5: name = utils.gen_random_name() num = str(random.randint(-10000, 10000)) dec = str(random.random()) chars = '\'' + utils.gen_random_name() + '\'' val = random.choice([num, dec, chars]) new_src.append(indent + name + ' = ' + val + '\n') if line.strip(): new_src.append(line) continue logger.info('Inserting random code on line %s', len(new_src)) # Pick a random function to insert fun_lines = random.choice(random_functions) # Append each line of the function with correct indentation for fun_line in fun_lines: new_src.append(indent + fun_line + '\n') return new_src
def plot_PNch(self): log.info("Creating P(Nch_est) and P(Nch_refest) histograms") # mult_bin_size = 10 figs = [] for ref_est_name in self.ref_ests: for res_est_dir in get_est_dirs(self.results_post, self.considered_ests): est_name = res_est_dir.GetName() # Figure properties: fig_vs_estmult = Figure() fig_vs_refmult = Figure() fig_vs_estmult.plot.logy = True fig_vs_refmult.plot.logy = True fig_vs_estmult.plot.palette = 'colorblind' fig_vs_refmult.plot.palette = 'colorblind' fig_vs_estmult.legend.position = 'tr' fig_vs_refmult.legend.position = 'tr' fig_vs_estmult.xtitle = "N_{{ch}}^{{{0}}}".format(est_name) fig_vs_refmult.xtitle = "N_{{ch}}^{{{0}}}".format(ref_est_name) fig_vs_estmult.ytitle = "P(N_{{ch}}^{{{0}}})".format(est_name) fig_vs_refmult.ytitle = "P(N_{{ch}}^{{{0}}})".format(ref_est_name) corr_hist = get_correlation_histogram(self.sums, est_name, ref_est_name) # logic when dealing with fixed bins given in Nch: # ------------------------------------------------ # mean_nch_est = corr_hist.GetMean(1) # mean of x axis # nch_max = corr_hist.xaxis.GetNbins() # nch_cutoff = mean_nch_est * mean_mult_cutoff_factor # nch_bins = [(low, low + mult_bin_size) for low in range(0, int(nch_cutoff), mult_bin_size)] # # a large last bin covering the rest: # nch_bins += [(nch_bins[-1][2], nch_max)] # legend_tmpl = "{} < N_{ch} < {}" # logic when dealing with percentile bins: # ---------------------------------------- # event_counter_est = asrootpy(getattr(res_est_dir, "event_counter")) legend_tmpl = "{0}% - {1}%" fig_vs_estmult.legend.title = "Selected in {0}".format(make_estimator_title(ref_est_name)) fig_vs_refmult.legend.title = "Selected in {0}".format(make_estimator_title(est_name)) # WARNING: the following needs tweeking when going back to fixed N_ch bins! for nch_bin, perc_bin in zip(self.nch_edges[ref_est_name], self.perc_bins[ref_est_name]): # vs est_mult: corr_hist.xaxis.SetRange(0, 0) # reset x axis corr_hist.yaxis.SetRange(nch_bin[0], nch_bin[1]) h_vs_est = asrootpy(corr_hist.ProjectionX(gen_random_name())) if h_vs_est.Integral() > 0: h_vs_est.Scale(1.0 / h_vs_est.Integral()) fig_vs_estmult.add_plottable(h_vs_est, legend_tmpl.format(perc_bin[1] * 100, perc_bin[0] * 100)) else: log.info("No charged particles in {0}*100 percentile bin of estimator {1}. This should not happen". format(perc_bin, ref_est_name)) for nch_bin, perc_bin in zip(self.nch_edges[est_name], self.perc_bins[est_name]): # vs ref_mult: corr_hist.yaxis.SetRange(0, 0) # reset y axis corr_hist.xaxis.SetRange(*nch_bin) h_vs_ref = asrootpy(corr_hist.ProjectionY(gen_random_name())) if h_vs_ref.Integral() > 0: h_vs_ref.Scale(1.0 / h_vs_ref.Integral()) fig_vs_refmult.add_plottable(h_vs_ref, legend_tmpl.format(perc_bin[1] * 100, perc_bin[0] * 100)) else: log.info( "No charged particles in {0}*100 percentile bin of estimator {1}. This should not happen". format(perc_bin, est_name)) path = res_est_dir.GetPath().split(":")[1] # vs est_mult fig_vs_estmult.save_to_root_file(self.f_out, "PNchEst_binned_in_Nch{0}".format(ref_est_name), path) # vs est_mult fig_vs_refmult.save_to_root_file(self.f_out, "PNch{0}_binned_in_NchEst".format(ref_est_name), path) figs.append(fig_vs_estmult) figs.append(fig_vs_refmult) return figs
def plot_PNch(self): log.info("Creating P(Nch_est) and P(Nch_refest) histograms") # mult_bin_size = 10 figs = [] for ref_est_name in self.ref_ests: for res_est_dir in get_est_dirs(self.results_post, self.considered_ests): est_name = res_est_dir.GetName() # Figure properties: fig_vs_estmult = Figure() fig_vs_refmult = Figure() fig_vs_estmult.plot.logy = True fig_vs_refmult.plot.logy = True fig_vs_estmult.plot.palette = 'colorblind' fig_vs_refmult.plot.palette = 'colorblind' fig_vs_estmult.legend.position = 'tr' fig_vs_refmult.legend.position = 'tr' fig_vs_estmult.xtitle = "N_{{ch}}^{{{0}}}".format(est_name) fig_vs_refmult.xtitle = "N_{{ch}}^{{{0}}}".format(ref_est_name) fig_vs_estmult.ytitle = "P(N_{{ch}}^{{{0}}})".format(est_name) fig_vs_refmult.ytitle = "P(N_{{ch}}^{{{0}}})".format(ref_est_name) corr_hist = get_correlation_histogram(self.sums, est_name, ref_est_name) # logic when dealing with fixed bins given in Nch: # ------------------------------------------------ # mean_nch_est = corr_hist.GetMean(1) # mean of x axis # nch_max = corr_hist.xaxis.GetNbins() # nch_cutoff = mean_nch_est * mean_mult_cutoff_factor # nch_bins = [(low, low + mult_bin_size) for low in range(0, int(nch_cutoff), mult_bin_size)] # # a large last bin covering the rest: # nch_bins += [(nch_bins[-1][2], nch_max)] # legend_tmpl = "{} < N_{ch} < {}" # logic when dealing with percentile bins: # ---------------------------------------- # event_counter_est = asrootpy(getattr(res_est_dir, "event_counter")) legend_tmpl = "{0}% - {1}%" fig_vs_estmult.legend.title = "Selected in {0}".format(make_estimator_title(ref_est_name)) fig_vs_refmult.legend.title = "Selected in {0}".format(make_estimator_title(est_name)) # WARNING: the following needs tweeking when going back to fixed N_ch bins! for nch_bin, perc_bin in zip(self.nch_edges[ref_est_name], self.perc_bins[ref_est_name]): # vs est_mult: corr_hist.xaxis.SetRange(0, 0) # reset x axis corr_hist.yaxis.SetRange(nch_bin[0], nch_bin[1]) h_vs_est = asrootpy(corr_hist.ProjectionX(gen_random_name())) if h_vs_est.Integral() > 0: h_vs_est.Scale(1.0 / h_vs_est.Integral()) fig_vs_estmult.add_plottable(h_vs_est, legend_tmpl.format(perc_bin[1] * 100, perc_bin[0] * 100)) else: log.info("No charged particles in {0}*100 percentile bin of estimator {1}. This should not happen". format(perc_bin, ref_est_name)) for nch_bin, perc_bin in zip(self.nch_edges[est_name], self.perc_bins[est_name]): # vs ref_mult: corr_hist.yaxis.SetRange(0, 0) # reset y axis corr_hist.xaxis.SetRange(*nch_bin) h_vs_ref = asrootpy(corr_hist.ProjectionY(gen_random_name())) if h_vs_ref.Integral() > 0: h_vs_ref.Scale(1.0 / h_vs_ref.Integral()) fig_vs_refmult.add_plottable(h_vs_ref, legend_tmpl.format(perc_bin[1] * 100, perc_bin[0] * 100)) else: log.info( "No charged particles in {0}*100 percentile bin of estimator {1}. This should not happen". format(perc_bin, est_name)) path = res_est_dir.GetPath().split(":")[1] # vs est_mult fig_vs_estmult.save_to_root_file(self.f, "PNchEst_binned_in_Nch{0}".format(ref_est_name), path) # vs est_mult fig_vs_refmult.save_to_root_file(self.f, "PNch{0}_binned_in_NchEst".format(ref_est_name), path) figs.append(fig_vs_estmult) figs.append(fig_vs_refmult) return figs