def transform_single_file(file_path,
                          output_path,
                          servicex=None,
                          tree_name='Events'):
    print("Transforming a single path: " + str(file_path))

    try:
        import generated_transformer
        start_transform = time.time()
        table = generated_transformer.run_query(file_path, tree_name)
        end_transform = time.time()
        print(
            f'generated_transformer.py: {round(end_transform - start_transform, 2)} sec'
        )

        start_serialization = time.time()
        table_awk1 = awkward1.from_awkward0(table)
        new_table = awkward1.to_awkward0(table_awk1)
        arrow = awkward.toarrow(new_table)
        end_serialization = time.time()
        print(
            f'awkward Table -> Arrow: {round(end_serialization - start_serialization, 2)} sec'
        )

        if output_path:
            writer = pq.ParquetWriter(output_path, arrow.schema)
            writer.write_table(table=arrow)
            writer.close()

    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_tb(exc_traceback, limit=20, file=sys.stdout)
        print(exc_value)

        raise RuntimeError("Failed to transform input file " + file_path +
                           ": " + str(exc_value))

    if messaging:
        arrow_writer = ArrowWriter(file_format=args.result_format,
                                   object_store=None,
                                   messaging=messaging)

        #Todo implement chunk size parameter
        transformer = ArrowIterator(arrow,
                                    chunk_size=1000,
                                    file_path=file_path)
        arrow_writer.write_branches_to_arrow(transformer=transformer,
                                             topic_name=args.request_id,
                                             file_id=None,
                                             request_id=args.request_id)
Пример #2
0
 def __getitem__(self, key):
     if key in self._dict:
         return self._dict[key]
     elif key in self._tree:
         self._materialized.add(key)
         array = self._tree[key].array(**self._branchargs)
         if self._flatten and isinstance(
                 awkward1.type(array).type, awkward1.types.ListType):
             array = awkward1.flatten(array)
         array = awkward1.to_awkward0(array)
         self._dict[key] = array
         return self._dict[key]
     else:
         raise KeyError(key)
Пример #3
0
evtShape5 = -np.ones(len(events['Tracks.fCoordinates.fX']))

for ievt in [
        x for x in range(len(events['Tracks.fCoordinates.fX'])) if x > 1065
]:
    if ievt % 1000 == 0:
        print("Processing event %d. Progress: %.2f%%" %
              (ievt, 100 * ievt / len(events['Tracks.fCoordinates.fX'])))
    if events['HT'][ievt] < 1200:
        continue
    tracks_x = events['Tracks.fCoordinates.fX'][ievt]
    tracks_y = events['Tracks.fCoordinates.fY'][ievt]
    tracks_z = events['Tracks.fCoordinates.fZ'][ievt]
    tracks_E = np.sqrt(tracks_x**2 + tracks_y**2 + tracks_z**2 + 0.13957**2)
    tracks = uproot_methods.TLorentzVectorArray.from_cartesian(
        ak.to_awkward0(tracks_x), ak.to_awkward0(tracks_y),
        ak.to_awkward0(tracks_z), ak.to_awkward0(tracks_E))

    tracks_fromPV0 = events['Tracks_fromPV0'][ievt]
    tracks_matchedToPFCandidate = events['Tracks_matchedToPFCandidate'][ievt]
    tracks = tracks[(tracks.pt > 1.) & (abs(tracks.eta) < 2.5) &
                    (ak.to_awkward0(tracks_fromPV0) >= 2) &
                    (ak.to_awkward0(tracks_matchedToPFCandidate) > 0)]

    # Cluster AK15 jets
    jetsAK15 = suepsUtilities.makeJets(tracks, 1.5)
    if len(jetsAK15) > 0:
        isrJet = suepsUtilities.isrTagger(jetsAK15)
        # Boost everything to scalar's rest frame
        tracks_boosted = tracks.boost(-isrJet.p3 / isrJet.energy)
    else:
Пример #4
0
    genParticles_phi = events['GenParticles.fCoordinates.fPhi'][ievt]
    genParticles_E = events['GenParticles.fCoordinates.fE'][ievt]
    genParticles_ParentId = events['GenParticles_ParentId'][ievt]
    genParticles_PdgId = events['GenParticles_PdgId'][ievt]
    genParticles_Status = events['GenParticles_Status'][ievt]
    crossSection = events['CrossSection'][ievt]

    tracks_x = events['Tracks.fCoordinates.fX'][ievt]
    tracks_y = events['Tracks.fCoordinates.fY'][ievt]
    tracks_z = events['Tracks.fCoordinates.fZ'][ievt]
    tracks_fromPV0 = events['Tracks_fromPV0'][ievt]
    tracks_matchedToPFCandidate = events['Tracks_matchedToPFCandidate'][ievt]

    tracks_E = np.sqrt(tracks_x**2 + tracks_y**2 + tracks_z**2 + 0.13957**2)
    tracks = uproot_methods.TLorentzVectorArray.from_cartesian(
        ak.to_awkward0(tracks_x), ak.to_awkward0(tracks_y),
        ak.to_awkward0(tracks_z), ak.to_awkward0(tracks_E))
    # Select good tracks
    tracks = tracks[(tracks.pt > 1.) & (abs(tracks.eta) < 2.5) &
                    (ak.to_awkward0(tracks_fromPV0) >= 2) &
                    (ak.to_awkward0(tracks_matchedToPFCandidate) > 0)]

    genParticles = uproot_methods.TLorentzVectorArray.from_ptetaphie(
        ak.to_awkward0(genParticles_pt), ak.to_awkward0(genParticles_eta),
        ak.to_awkward0(genParticles_phi), ak.to_awkward0(genParticles_E))
    # Keep only final particles
    genParticles_ParentId = genParticles_ParentId[
        (ak.to_awkward0(genParticles_Status) == 1) & (genParticles.pt > 1) &
        (abs(genParticles.eta) < 2.5)]
    genParticles = genParticles[(ak.to_awkward0(genParticles_Status) == 1)
                                & (genParticles.pt > 1) &
Пример #5
0
def test_toawkward0():
    array = awkward1.from_iter([1.1, 2.2, 3.3, 4.4], highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), numpy.ndarray)
    assert awkward1.to_awkward0(array).tolist() == [1.1, 2.2, 3.3, 4.4]

    array = awkward1.from_numpy(numpy.arange(2 * 3 * 5).reshape(2, 3, 5),
                                highlevel=False).toRegularArray()
    assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray)
    assert awkward1.to_awkward0(array).tolist() == [[[0, 1, 2, 3, 4],
                                                     [5, 6, 7, 8, 9],
                                                     [10, 11, 12, 13, 14]],
                                                    [[15, 16, 17, 18, 19],
                                                     [20, 21, 22, 23, 24],
                                                     [25, 26, 27, 28, 29]]]

    array = awkward1.from_iter([[1.1, 2.2, 3.3], [], [4.4, 5.5]],
                               highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray)
    assert awkward1.to_awkward0(array).tolist() == [[1.1, 2.2, 3.3], [],
                                                    [4.4, 5.5]]

    array = awkward1.layout.ListArray64(
        awkward1.layout.Index64(numpy.array([4, 999, 1], dtype=numpy.int64)),
        awkward1.layout.Index64(numpy.array([7, 999, 3], dtype=numpy.int64)),
        awkward1.layout.NumpyArray(
            numpy.array([3.14, 4.4, 5.5, 123, 1.1, 2.2, 3.3, 321])))
    assert isinstance(awkward1.to_awkward0(array), awkward0.JaggedArray)
    assert awkward1.to_awkward0(array).tolist() == [[1.1, 2.2, 3.3], [],
                                                    [4.4, 5.5]]

    array = awkward1.from_iter([{
        "x": 0,
        "y": []
    }, {
        "x": 1.1,
        "y": [1]
    }, {
        "x": 2.2,
        "y": [2, 2]
    }, {
        "x": 3.3,
        "y": [3, 3, 3]
    }],
                               highlevel=False)
    assert isinstance(awkward1.to_awkward0(array[2]), dict)
    assert awkward1.to_awkward0(array[2])["x"] == 2.2
    assert isinstance(awkward1.to_awkward0(array[2])["y"], numpy.ndarray)
    assert awkward1.to_awkward0(array[2])["y"].tolist() == [2, 2]

    assert isinstance(awkward1.to_awkward0(array), awkward0.Table)
    assert awkward1.to_awkward0(array).tolist() == [{
        "x": 0,
        "y": []
    }, {
        "x": 1.1,
        "y": [1]
    }, {
        "x": 2.2,
        "y": [2, 2]
    }, {
        "x": 3.3,
        "y": [3, 3, 3]
    }]

    array = awkward1.from_iter([(0, []), (1.1, [1]), (2.2, [2, 2]),
                                (3.3, [3, 3, 3])],
                               highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), awkward0.Table)
    assert awkward1.to_awkward0(array).tolist() == [(0, []), (1.1, [1]),
                                                    (2.2, [2, 2]),
                                                    (3.3, [3, 3, 3])]
    assert isinstance(awkward1.to_awkward0(array[2]), tuple)
    assert awkward1.to_awkward0(array[2])[0] == 2.2
    assert awkward1.to_awkward0(array[2])[1].tolist() == [2, 2]

    array = awkward1.from_iter(
        [0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]], highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), awkward0.UnionArray)
    assert awkward1.to_awkward0(array).tolist() == [
        0.0, [], 1.1, [1], 2.2, [2, 2], 3.3, [3, 3, 3]
    ]

    array = awkward1.from_iter([1.1, 2.2, None, None, 3.3, None, 4.4],
                               highlevel=False)
    assert isinstance(awkward1.to_awkward0(array), awkward0.IndexedMaskedArray)
    assert awkward1.to_awkward0(array).tolist() == [
        1.1, 2.2, None, None, 3.3, None, 4.4
    ]

    content = awkward1.layout.NumpyArray(
        numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
    index = awkward1.layout.Index64(
        numpy.array([3, 2, 2, 5, 0], dtype=numpy.int64))
    array = awkward1.layout.IndexedArray64(index, content)
    assert isinstance(awkward1.to_awkward0(array), awkward0.IndexedArray)
    assert awkward1.to_awkward0(array).tolist() == [3.3, 2.2, 2.2, 5.5, 0.0]
Пример #6
0
for ievt in range(N_events):
    if ievt % 1000 == 0:
        print("Processing event %d. Progress: %.2f%%" %
              (ievt, 100 * ievt / N_events))
    if events['HT'][ievt] < 1200:
        continue

    tracks_x = events['Tracks.fCoordinates.fX'][ievt]
    tracks_y = events['Tracks.fCoordinates.fY'][ievt]
    tracks_z = events['Tracks.fCoordinates.fZ'][ievt]
    tracks_fromPV0 = events['Tracks_fromPV0'][ievt]
    tracks_matchedToPFCandidate = events['Tracks_matchedToPFCandidate'][ievt]

    tracks_E = np.sqrt(tracks_x**2 + tracks_y**2 + tracks_z**2 + 0.13957**2)
    tracks = uproot_methods.TLorentzVectorArray.from_cartesian(
        ak.to_awkward0(tracks_x), ak.to_awkward0(tracks_y),
        ak.to_awkward0(tracks_z), ak.to_awkward0(tracks_E))
    # Select good tracks
    tracks = tracks[(tracks.pt > 1.) & (abs(tracks.eta) < 2.5) &
                    (ak.to_awkward0(tracks_fromPV0) >= 2) &
                    (ak.to_awkward0(tracks_matchedToPFCandidate) > 0)]

    # Get AK4 jets
    jets_pt = events['Jets.fCoordinates.fPt'][ievt]
    jets_eta = events['Jets.fCoordinates.fEta'][ievt]
    jets_phi = events['Jets.fCoordinates.fPhi'][ievt]
    jets_e = events['Jets.fCoordinates.fE'][ievt]
    jets = uproot_methods.TLorentzVectorArray.from_ptetaphie(
        ak.to_awkward0(jets_pt), ak.to_awkward0(jets_eta),
        ak.to_awkward0(jets_phi), ak.to_awkward0(jets_e))
Пример #7
0
for ievt in range((options.part-1)*N_events,options.part*N_events):
    ievt_normalized = ievt - (options.part-1)*N_events
    if ievt%1000 == 0:
        print("Processing event %d. Progress: %.2f%%"%(ievt,100*ievt_normalized/N_events))
    if events['HT'][ievt] < 1200:
        continue

    tracks_x = events['Tracks.fCoordinates.fX'][ievt]
    tracks_y = events['Tracks.fCoordinates.fY'][ievt]
    tracks_z = events['Tracks.fCoordinates.fZ'][ievt]
    tracks_fromPV0 = events['Tracks_fromPV0'][ievt]
    tracks_matchedToPFCandidate = events['Tracks_matchedToPFCandidate'][ievt]

    tracks_E = np.sqrt(tracks_x**2+tracks_y**2+tracks_z**2+0.13957**2)
    tracks = uproot_methods.TLorentzVectorArray.from_cartesian(ak.to_awkward0(tracks_x),
                                                               ak.to_awkward0(tracks_y),
                                                               ak.to_awkward0(tracks_z),
                                                               ak.to_awkward0(tracks_E))
    # Select good tracks
    tracks = tracks[(tracks.pt > 1.) &
                    (abs(tracks.eta) < 2.5) &
                    (ak.to_awkward0(tracks_fromPV0) >= 2) &
                    (ak.to_awkward0(tracks_matchedToPFCandidate) > 0)]

    # Get AK4 jets
    jets_pt = events['Jets.fCoordinates.fPt'][ievt]
    jets_eta = events['Jets.fCoordinates.fEta'][ievt]
    jets_phi = events['Jets.fCoordinates.fPhi'][ievt]
    jets_e = events['Jets.fCoordinates.fE'][ievt]
    jets = uproot_methods.TLorentzVectorArray.from_ptetaphie(ak.to_awkward0(jets_pt),