def transform_single_file(file_path, output_path, servicex=None, tree_name='Events'): print("Transforming a single path: " + str(file_path)) try: import generated_transformer start_transform = time.time() table = generated_transformer.run_query(file_path, tree_name) end_transform = time.time() print( f'generated_transformer.py: {round(end_transform - start_transform, 2)} sec' ) start_serialization = time.time() table_awk1 = awkward1.from_awkward0(table) new_table = awkward1.to_awkward0(table_awk1) arrow = awkward.toarrow(new_table) end_serialization = time.time() print( f'awkward Table -> Arrow: {round(end_serialization - start_serialization, 2)} sec' ) if output_path: writer = pq.ParquetWriter(output_path, arrow.schema) writer.write_table(table=arrow) writer.close() except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(exc_value) raise RuntimeError("Failed to transform input file " + file_path + ": " + str(exc_value)) if messaging: arrow_writer = ArrowWriter(file_format=args.result_format, object_store=None, messaging=messaging) #Todo implement chunk size parameter transformer = ArrowIterator(arrow, chunk_size=1000, file_path=file_path) arrow_writer.write_branches_to_arrow(transformer=transformer, topic_name=args.request_id, file_id=None, request_id=args.request_id)
def __getitem__(self, key): if key in self._dict: return self._dict[key] elif key in self._tree: self._materialized.add(key) array = self._tree[key].array(**self._branchargs) if self._flatten and isinstance( awkward1.type(array).type, awkward1.types.ListType): array = awkward1.flatten(array) array = awkward1.to_awkward0(array) self._dict[key] = array return self._dict[key] else: raise KeyError(key)
evtShape5 = -np.ones(len(events['Tracks.fCoordinates.fX'])) for ievt in [ x for x in range(len(events['Tracks.fCoordinates.fX'])) if x > 1065 ]: if ievt % 1000 == 0: print("Processing event %d. Progress: %.2f%%" % (ievt, 100 * ievt / len(events['Tracks.fCoordinates.fX']))) if events['HT'][ievt] < 1200: continue tracks_x = events['Tracks.fCoordinates.fX'][ievt] tracks_y = events['Tracks.fCoordinates.fY'][ievt] tracks_z = events['Tracks.fCoordinates.fZ'][ievt] tracks_E = np.sqrt(tracks_x**2 + tracks_y**2 + tracks_z**2 + 0.13957**2) tracks = uproot_methods.TLorentzVectorArray.from_cartesian( ak.to_awkward0(tracks_x), ak.to_awkward0(tracks_y), ak.to_awkward0(tracks_z), ak.to_awkward0(tracks_E)) tracks_fromPV0 = events['Tracks_fromPV0'][ievt] tracks_matchedToPFCandidate = events['Tracks_matchedToPFCandidate'][ievt] tracks = tracks[(tracks.pt > 1.) & (abs(tracks.eta) < 2.5) & (ak.to_awkward0(tracks_fromPV0) >= 2) & (ak.to_awkward0(tracks_matchedToPFCandidate) > 0)] # Cluster AK15 jets jetsAK15 = suepsUtilities.makeJets(tracks, 1.5) if len(jetsAK15) > 0: isrJet = suepsUtilities.isrTagger(jetsAK15) # Boost everything to scalar's rest frame tracks_boosted = tracks.boost(-isrJet.p3 / isrJet.energy) else:
genParticles_phi = events['GenParticles.fCoordinates.fPhi'][ievt] genParticles_E = events['GenParticles.fCoordinates.fE'][ievt] genParticles_ParentId = events['GenParticles_ParentId'][ievt] genParticles_PdgId = events['GenParticles_PdgId'][ievt] genParticles_Status = events['GenParticles_Status'][ievt] crossSection = events['CrossSection'][ievt] tracks_x = events['Tracks.fCoordinates.fX'][ievt] tracks_y = events['Tracks.fCoordinates.fY'][ievt] tracks_z = events['Tracks.fCoordinates.fZ'][ievt] tracks_fromPV0 = events['Tracks_fromPV0'][ievt] tracks_matchedToPFCandidate = events['Tracks_matchedToPFCandidate'][ievt] tracks_E = np.sqrt(tracks_x**2 + tracks_y**2 + tracks_z**2 + 0.13957**2) tracks = uproot_methods.TLorentzVectorArray.from_cartesian( ak.to_awkward0(tracks_x), ak.to_awkward0(tracks_y), ak.to_awkward0(tracks_z), ak.to_awkward0(tracks_E)) # Select good tracks tracks = tracks[(tracks.pt > 1.) & (abs(tracks.eta) < 2.5) & (ak.to_awkward0(tracks_fromPV0) >= 2) & (ak.to_awkward0(tracks_matchedToPFCandidate) > 0)] genParticles = uproot_methods.TLorentzVectorArray.from_ptetaphie( ak.to_awkward0(genParticles_pt), ak.to_awkward0(genParticles_eta), ak.to_awkward0(genParticles_phi), ak.to_awkward0(genParticles_E)) # Keep only final particles genParticles_ParentId = genParticles_ParentId[ (ak.to_awkward0(genParticles_Status) == 1) & (genParticles.pt > 1) & (abs(genParticles.eta) < 2.5)] genParticles = genParticles[(ak.to_awkward0(genParticles_Status) == 1) & (genParticles.pt > 1) &
def test_toawkward0(): array = awkward1.from_iter([1.1, 2.2, 3.3, 4.4], highlevel=False) assert isinstance(awkward1.to_awkward0(array), numpy.ndarray) assert awkward1.to_awkward0(array).tolist() == [1.1, 2.2, 3.3, 4.4] array = awkward1.from_numpy(numpy.arange(2 * 3 * 5).reshape(2, 3, 5), highlevel=False).toRegularArray() assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray) assert awkward1.to_awkward0(array).tolist() == [[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]], [[15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]]] array = awkward1.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]], highlevel=False) assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray) assert awkward1.to_awkward0(array).tolist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] array = awkward1.layout.ListArray64( awkward1.layout.Index64(numpy.array([4, 999, 1], dtype=numpy.int64)), awkward1.layout.Index64(numpy.array([7, 999, 3], dtype=numpy.int64)), awkward1.layout.NumpyArray( numpy.array([3.14, 4.4, 5.5, 123, 1.1, 2.2, 3.3, 321]))) assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray) assert awkward1.to_awkward0(array).tolist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] array = awkward1.from_iter([{ "x": 0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": [2, 2] }, { "x": 3.3, "y": [3, 3, 3] }], highlevel=False) assert isinstance(awkward1.to_awkward0(array[2]), dict) assert awkward1.to_awkward0(array[2])["x"] == 2.2 assert isinstance(awkward1.to_awkward0(array[2])["y"], numpy.ndarray) assert awkward1.to_awkward0(array[2])["y"].tolist() == [2, 2] assert isinstance(awkward1.to_awkward0(array), awkward0.Table) assert awkward1.to_awkward0(array).tolist() == [{ "x": 0, "y": [] }, { "x": 1.1, "y": [1] }, { "x": 2.2, "y": [2, 2] }, { "x": 3.3, "y": [3, 3, 3] }] array = awkward1.from_iter([(0, []), (1.1, [1]), (2.2, [2, 2]), (3.3, [3, 3, 3])], highlevel=False) assert isinstance(awkward1.to_awkward0(array), awkward0.Table) assert awkward1.to_awkward0(array).tolist() == [(0, []), (1.1, [1]), (2.2, [2, 2]), (3.3, [3, 3, 3])] assert isinstance(awkward1.to_awkward0(array[2]), tuple) assert awkward1.to_awkward0(array[2])[0] == 2.2 assert awkward1.to_awkward0(array[2])[1].tolist() == [2, 2] array = awkward1.from_iter( [0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]], highlevel=False) assert isinstance(awkward1.to_awkward0(array), awkward0.UnionArray) assert awkward1.to_awkward0(array).tolist() == [ 0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3] ] array = awkward1.from_iter([1.1, 2.2, None, None, 3.3, None, 4.4], highlevel=False) assert isinstance(awkward1.to_awkward0(array), awkward0.IndexedMaskedArray) assert awkward1.to_awkward0(array).tolist() == [ 1.1, 2.2, None, None, 3.3, None, 4.4 ] content = awkward1.layout.NumpyArray( numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])) index = awkward1.layout.Index64( numpy.array([3, 2, 2, 5, 0], dtype=numpy.int64)) array = awkward1.layout.IndexedArray64(index, content) assert isinstance(awkward1.to_awkward0(array), awkward0.IndexedArray) assert awkward1.to_awkward0(array).tolist() == [3.3, 2.2, 2.2, 5.5, 0.0]
for ievt in range(N_events): if ievt % 1000 == 0: print("Processing event %d. Progress: %.2f%%" % (ievt, 100 * ievt / N_events)) if events['HT'][ievt] < 1200: continue tracks_x = events['Tracks.fCoordinates.fX'][ievt] tracks_y = events['Tracks.fCoordinates.fY'][ievt] tracks_z = events['Tracks.fCoordinates.fZ'][ievt] tracks_fromPV0 = events['Tracks_fromPV0'][ievt] tracks_matchedToPFCandidate = events['Tracks_matchedToPFCandidate'][ievt] tracks_E = np.sqrt(tracks_x**2 + tracks_y**2 + tracks_z**2 + 0.13957**2) tracks = uproot_methods.TLorentzVectorArray.from_cartesian( ak.to_awkward0(tracks_x), ak.to_awkward0(tracks_y), ak.to_awkward0(tracks_z), ak.to_awkward0(tracks_E)) # Select good tracks tracks = tracks[(tracks.pt > 1.) & (abs(tracks.eta) < 2.5) & (ak.to_awkward0(tracks_fromPV0) >= 2) & (ak.to_awkward0(tracks_matchedToPFCandidate) > 0)] # Get AK4 jets jets_pt = events['Jets.fCoordinates.fPt'][ievt] jets_eta = events['Jets.fCoordinates.fEta'][ievt] jets_phi = events['Jets.fCoordinates.fPhi'][ievt] jets_e = events['Jets.fCoordinates.fE'][ievt] jets = uproot_methods.TLorentzVectorArray.from_ptetaphie( ak.to_awkward0(jets_pt), ak.to_awkward0(jets_eta), ak.to_awkward0(jets_phi), ak.to_awkward0(jets_e))
for ievt in range((options.part-1)*N_events,options.part*N_events): ievt_normalized = ievt - (options.part-1)*N_events if ievt%1000 == 0: print("Processing event %d. Progress: %.2f%%"%(ievt,100*ievt_normalized/N_events)) if events['HT'][ievt] < 1200: continue tracks_x = events['Tracks.fCoordinates.fX'][ievt] tracks_y = events['Tracks.fCoordinates.fY'][ievt] tracks_z = events['Tracks.fCoordinates.fZ'][ievt] tracks_fromPV0 = events['Tracks_fromPV0'][ievt] tracks_matchedToPFCandidate = events['Tracks_matchedToPFCandidate'][ievt] tracks_E = np.sqrt(tracks_x**2+tracks_y**2+tracks_z**2+0.13957**2) tracks = uproot_methods.TLorentzVectorArray.from_cartesian(ak.to_awkward0(tracks_x), ak.to_awkward0(tracks_y), ak.to_awkward0(tracks_z), ak.to_awkward0(tracks_E)) # Select good tracks tracks = tracks[(tracks.pt > 1.) & (abs(tracks.eta) < 2.5) & (ak.to_awkward0(tracks_fromPV0) >= 2) & (ak.to_awkward0(tracks_matchedToPFCandidate) > 0)] # Get AK4 jets jets_pt = events['Jets.fCoordinates.fPt'][ievt] jets_eta = events['Jets.fCoordinates.fEta'][ievt] jets_phi = events['Jets.fCoordinates.fPhi'][ievt] jets_e = events['Jets.fCoordinates.fE'][ievt] jets = uproot_methods.TLorentzVectorArray.from_ptetaphie(ak.to_awkward0(jets_pt),