def test_descendant_idxs(): # 0 1 2 3 4 5 6 7 8 9 10 children = [[], [2, 3], [5], [6, 5], [], [], [7, 8, 9], [], [], [], []] with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) eventWise.append(Children=[ak.from_iter(children)]) eventWise.selected_event = 0 tst.assert_allclose(list(FormShower.descendant_idxs(eventWise, 0)), [0]) tst.assert_allclose(list(FormShower.descendant_idxs(eventWise, 2)), [5]) tst.assert_allclose(sorted(FormShower.descendant_idxs(eventWise, 1)), [5, 7, 8, 9])
def label_parings(eventWise): """ For every pair of jet inputs, label if they are from the same b quark Parameters ---------- eventWise : EventWise Data set containing particle data. Returns ------- labels : list of numpy arrays of bools for each """ labels = [] eventWise.selected_event = None for event_n in range(len(eventWise.X)): eventWise.selected_event = event_n jet_inputs = eventWise.JetInputs_SourceIdx n_inputs = len(jet_inputs) local = np.full((n_inputs, n_inputs), False, dtype=bool) for b in eventWise.BQuarkIdx: decendants = FormShower.descendant_idxs(eventWise, b) is_decendent = np.fromiter((p in decendants for p in jet_inputs), dtype=bool) local += np.expand_dims(is_decendent, 0) * np.expand_dims( is_decendent, 1) labels.append(local) return labels
def decendants_width(eventWise, *root_idxs, only_visible=True): """ Get the width of the particals showered from the specified roots. Parameters ---------- eventWise : EventWise object containting particle data *root_idxs : floats integers specifying the locations in the eventWise of the root particles of the shower only_visible : bool should only particles in eventWise.JetInputs_SourceIdx be used to measure the width? Returns ------- : float width of the shower """ assert eventWise.selected_event is not None decendants = list(FormShower.descendant_idxs(eventWise, *root_idxs)) # select only the visible objects if only_visible: decendants = [ d for d in decendants if d in eventWise.JetInputs_SourceIdx ] rapidity = eventWise.Rapidity phi = eventWise.Phi dwidth = width(rapidity[decendants], phi[decendants]) return dwidth
def calculate_roots_showers(data, bcreator=25, lcreator=23): n_events = len(data.X) b_root_name = "Is_BRoot" l_root_name = "Is_lRoot" bg_root_name = "Is_BGRoot" name_pids = [(b_root_name, 5, bcreator), (l_root_name, np.array([11, 13, 15]), lcreator)] for name, pid, creator in name_pids: if name in data.columns: continue print(f"\n\n{name}\n\n") root_idxs, shower_idxs = [], [] for event_n in range(n_events): if event_n % 10 == 0: print(f"\t\t{event_n/n_events:.0%} ", end="\r") data.selected_event = event_n try: roots = get_paired_tags(data, pid, creator) except: jet_tools.st() roots = get_paired_tags(data, pid, creator) shower = FormShower.descendant_idxs(data, *roots) root_idxs.append(list(roots)) shower_idxs.append(list(shower)) data.append(**{ name: root_idxs, name.replace("Root", "Shower"): shower_idxs }) if bg_root_name not in data.columns: print(f"\n\n{bg_root_name}\n\n") root_idxs, shower_idxs = [], [] for event_n in range(n_events): if event_n % 10 == 0: print(f"{event_n/n_events:.0%} ", end="\r") data.selected_event = event_n tag_idxs = np.concatenate( [getattr(data, name) for name, _, _ in name_pids]) roots = get_bg_tags(data, tag_idxs) shower = FormShower.descendant_idxs(data, *roots) root_idxs.append(list(roots)) shower_idxs.append(list(shower)) data.append( **{ bg_root_name: root_idxs, bg_root_name.replace("Root", "Shower"): shower_idxs })
def plot_decendants(ax, eventWise, tag_idx, marker, colour='r'): all_decendants = FormShower.descendant_idxs(eventWise, tag_idx) decendants = sorted( all_decendants.intersection(eventWise.JetInputs_SourceIdx)) ax.scatter(eventWise.Rapidity[decendants], eventWise.Phi[decendants], 100, edgecolor=colour, marker=marker, color=(0, 0, 0, 0), label=f"Index={tag_idx}")
def add_detectable_fourvector(eventWise, tag_name="TagIndex", silent=False): """ Add a list of detectable four vectors for the tags, as present in the JetInputs. also add the indices themselves. Parameters ---------- eventWise : EventWise dataset containing locations of particles and jets tag_name : str name of the column in the eventWise that countains the indices of the tags that we wish to use (Default="TagIndex") """ eventWise.selected_event = None name = "DetectableTag" if "TagIndex" not in eventWise.columns and tag_name == "TagIndex": add_tag_particles(eventWise, silent=silent) tag_particles = getattr(eventWise, tag_name) # the leaves are the bits that are detected, the roots are the tag particles # group roots with common leaves leaves = [] roots = [] invisible = [] px = [] py = [] pz = [] energy = [] for i, tag_idxs in enumerate(tag_particles): eventWise.selected_event = i shower_inputs = set(eventWise.JetInputs_SourceIdx) all_energy = eventWise.Energy all_px = eventWise.Px all_py = eventWise.Py all_pz = eventWise.Pz per_tag_detectables = [] per_tag_undetectables = [] for tag in tag_idxs: tag_decendants = FormShower.descendant_idxs(eventWise, tag) detectables = shower_inputs.intersection(tag_decendants) undetectables = tag_decendants - detectables per_tag_detectables.append(detectables) per_tag_undetectables.append(undetectables) # now work out what overlaps leaves.append([]) invisible.append([]) roots.append([]) energy.append([]) px.append([]) py.append([]) pz.append([]) unallocated = np.ones(len(tag_idxs), dtype=bool) while np.any(unallocated): position = next(i for i, free in enumerate(unallocated) if free) unallocated[position] = False # start from the first free tag seed = per_tag_detectables[position] # make a mask of what will be grouped with if not seed: # this tag is undetectable continue group_with = [ g for g, other in enumerate(per_tag_detectables) if not seed.isdisjoint(other) ] unallocated[group_with] = False roots[-1].append(tag_idxs[group_with].tolist()) detectables = sorted(set().union(*(per_tag_detectables[g] for g in group_with))) undetectables = sorted(set().union(*(per_tag_undetectables[g] for g in group_with))) leaves[-1].append(detectables) invisible[-1].append(undetectables) # now find the kinematics energy[-1].append(np.sum(all_energy[detectables])) px[-1].append(np.sum(all_px[detectables])) py[-1].append(np.sum(all_py[detectables])) pz[-1].append(np.sum(all_pz[detectables])) params = { name + "_Leaves": ak.from_iter(leaves), "UndetectableTag_Leaves": ak.from_iter(invisible), name + "_Roots": ak.from_iter(roots), name + "_Energy": ak.from_iter(energy), name + "_Px": ak.from_iter(px), name + "_Py": ak.from_iter(py), name + "_Pz": ak.from_iter(pz) } eventWise.append(**params)
def add_mass_share(eventWise, jet_name, batch_length=100, silent=False, append=True): """ Tagging procedure based on which jet has the largest portion of the tag's mass. Parameters ---------- eventWise : EventWise dataset containing locations of particles and jets jet_name : str The prefix of the jet vairables in the eventWise batch_length: int max number of events to process (Default value = 100) silent : bool Should the progress be printed? (Default value = False) append : bool Should the results be appended to the eventWise? (Default value = True) Returns ------- (if append is false) content: dict of awkward arrays content for eventWise """ eventWise.selected_event = None name = jet_name + "_TagMass" tag_name = jet_name + "_MTags" n_events = len(getattr(eventWise, jet_name + "_Label", [])) jet_tagmass2 = list(getattr(eventWise, name, np.array([]))**2) jet_tags = list(getattr(eventWise, tag_name, [])) start_point = len(jet_tagmass2) if start_point >= n_events: print("Finished") if append: return else: content = {} content[name] = ak.from_iter(jet_tagmass2)**0.5 content[tag_name] = ak.from_iter(jet_tags) return content end_point = min(n_events, start_point + batch_length) if not silent: print(f" Will stop at {end_point/n_events:.1%}") for event_n in range(start_point, end_point): if event_n % 10 == 0 and not silent: print(f"{event_n/n_events:.1%}", end='\r', flush=True) if os.path.exists("stop"): print(f"Completed event {event_n-1}") break eventWise.selected_event = event_n jets_idxs = getattr(eventWise, jet_name + "_Label") tags_here = [[] for _ in jets_idxs] mass2_here = [[] for _ in jets_idxs] this_tag = np.zeros(len(jets_idxs)) if len(tags_here) > 0: this_tag[:] = 0. energies = eventWise.Energy pxs = eventWise.Px pys = eventWise.Py pzs = eventWise.Pz sourceidx = eventWise.JetInputs_SourceIdx.tolist() # :( for tag_idx in eventWise.TagIndex: tag_decendants = { sourceidx.index(d) for d in FormShower.descendant_idxs(eventWise, tag_idx) if d in sourceidx } for jet_n, jet_idx in enumerate(jets_idxs): tag_in_jet = list(tag_decendants.intersection(jet_idx)) mass2 = np.sum(energies[tag_in_jet])**2 - np.sum(pxs[tag_in_jet])**2 - \ np.sum(pys[tag_in_jet])**2 - np.sum(pzs[tag_in_jet])**2 mass2_here[jet_n].append(mass2) this_tag[jet_n] = mass2 # IndexError if (this_tag > 0 ).any(): # if all the inheritances are 0, then no tags # decide who gets the tag tags_here[np.argmax(this_tag)].append(tag_idx) jet_tagmass2.append(ak.from_iter(mass2_here)) jet_tags.append(ak.from_iter(tags_here)) content = {} content[name] = ak.from_iter(jet_tagmass2)**0.5 content[tag_name] = ak.from_iter(jet_tags) if append: eventWise.append(**content) else: return content
def add_inheritance(eventWise, jet_name, batch_length=100, silent=False, append=True): """ Add the inheritance from each to the tagging particles Represents the portion of the energy that has been derived from the true particles in the rest frame of the root particle. The highest percentage inheritance*jet energy gets the itag. Parameters ---------- eventWise : EventWise dataset containing locations of particles and jets jet_name : str The prefix of the jet vairables in the eventWise batch_length: int max number of events to process (Default value = 100) silent : bool Should the progress be printed? (Default value = False) append : bool Should the results be appended to the eventWise? (Default value = True) Returns ------- (if append is false) content: dict of awkward arrays content for eventWise """ eventWise.selected_event = None name = jet_name + "_Inheritance" tag_name = jet_name + "_ITags" n_events = len(getattr(eventWise, jet_name + "_Energy", [])) jet_inhs = list(getattr(eventWise, name, [])) jet_tags = list(getattr(eventWise, tag_name, [])) start_point = len(jet_inhs) if start_point >= n_events: print("Finished") if append: return else: content = {} content[name] = ak.from_iter(jet_inhs) return content end_point = min(n_events, start_point + batch_length) if not silent: print(f" Will stop at {end_point/n_events:.1%}") # will actually compare the square of the angle for speed for event_n in range(start_point, end_point): if event_n % 10 == 0 and not silent: print(f"{event_n/n_events:.1%}", end='\r', flush=True) if os.path.exists("stop"): print(f"Completed event {event_n-1}") break eventWise.selected_event = event_n jets_idxs = getattr(eventWise, jet_name + "_Label") inhs_here = [] tags_here = [[] for _ in jets_idxs] if len(tags_here) > 0: parents_idxs = getattr(eventWise, jet_name + "_Parent") roots = getattr(eventWise, jet_name + "_Parent") == -1 energies = getattr(eventWise, jet_name + "_Energy") pxs = getattr(eventWise, jet_name + "_Px") pys = getattr(eventWise, jet_name + "_Py") pzs = getattr(eventWise, jet_name + "_Pz") rf_energies = get_root_rest_energies(roots, energies, pxs, pys, pzs) root_energies = energies[roots] sourceidx = eventWise.JetInputs_SourceIdx.tolist() for tag_idx in eventWise.TagIndex: inhs_here.append([]) tag_decendants = [ sourceidx.index(d) for d in FormShower.descendant_idxs(eventWise, tag_idx) if d in sourceidx ] for jet_idx, parent_idx, energy in zip(jets_idxs, parents_idxs, rf_energies): ratings = percent_pos(jet_idx, parent_idx, tag_decendants, energy) inhs_here[-1].append(ratings) inhs_here[-1] = ak.from_iter(inhs_here[-1]) if np.any(np.any(inhs_here[-1] > 0)): # if all the inheritances are 0, then no tags # decide who gets the tag root_scores = root_energies * inhs_here[-1][roots] tags_here[np.argmax(root_scores)].append(tag_idx) jet_inhs.append(ak.from_iter(inhs_here)) jet_tags.append(ak.from_iter(tags_here)) content = {} content[name] = ak.from_iter(jet_inhs) content[tag_name] = ak.from_iter(jet_tags) if append: eventWise.append(**content) else: return content