Python Dataset示例，hepaccelerate.utils.Dataset Python示例

示例#1

0

显示文件

文件： kernel_test.py 项目： irenedutta23/hepaccelerate

def load_dataset(numpy_lib, num_iter=1):
    print("loading dataset")
    download_if_not_exists(
        "data/nanoaod_test.root",
        "https://jpata.web.cern.ch/jpata/opendata_files/DY2JetsToLL-merged/1.root",
    )
    datastructures = {
        "Muon": [
            ("Muon_pt", "float32"),
            ("Muon_eta", "float32"),
            ("Muon_phi", "float32"),
            ("Muon_mass", "float32"),
            ("Muon_charge", "int32"),
            ("Muon_pfRelIso03_all", "float32"),
            ("Muon_tightId", "bool"),
        ],
        "Electron": [
            ("Electron_pt", "float32"),
            ("Electron_eta", "float32"),
            ("Electron_phi", "float32"),
            ("Electron_mass", "float32"),
            ("Electron_charge", "int32"),
            ("Electron_pfRelIso03_all", "float32"),
            ("Electron_pfId", "bool"),
        ],
        "Jet": [
            ("Jet_pt", "float32"),
            ("Jet_eta", "float32"),
            ("Jet_phi", "float32"),
            ("Jet_mass", "float32"),
            ("Jet_btag", "float32"),
            ("Jet_puId", "bool"),
        ],
        "EventVariables": [
            ("HLT_IsoMu24", "bool"),
            ("MET_pt", "float32"),
            ("MET_phi", "float32"),
            ("MET_sumet", "float32"),
            ("MET_significance", "float32"),
            ("MET_CovXX", "float32"),
            ("MET_CovXY", "float32"),
            ("MET_CovYY", "float32"),
        ],
    }
    dataset = Dataset(
        "nanoaod",
        num_iter * ["./data/nanoaod_test.root"],
        datastructures,
        treename="Events",
        datapath="",
    )

    dataset.load_root(verbose=True)
    dataset.merge_inplace(verbose=True)
    print("dataset has {0} events, {1:.2f} MB".format(
        dataset.numevents(),
        dataset.memsize() / 1000 / 1000))
    dataset.move_to_device(numpy_lib, verbose=True)
    return dataset

示例#2

0

显示文件

    def setUpClass(self):
        self.NUMPY_LIB, self.ha = choose_backend(use_cuda=USE_CUPY)

        import hmumu_utils
        hmumu_utils.NUMPY_LIB = self.NUMPY_LIB
        hmumu_utils.ha = self.ha

        download_if_not_exists(
            "data/myNanoProdMc2016_NANO.root",
            "https://jpata.web.cern.ch/jpata/hmm/test_files/myNanoProdMc2016_NANO.root"
        )

        #Load a simple sync dataset
        self.datastructures = create_datastructure("vbf_sync",
                                                   True,
                                                   "2016",
                                                   do_fsr=True)
        self.dataset = Dataset("vbf_sync", ["data/myNanoProdMc2016_NANO.root"],
                               self.datastructures,
                               datapath="",
                               treename="Events",
                               is_mc=True)
        self.dataset.num_chunk = 0
        self.dataset.era = "2016"
        self.dataset.load_root()

        self.dataset.numpy_lib = self.NUMPY_LIB
        self.dataset.move_to_device(self.NUMPY_LIB)

        #disable everything that requires ROOT which is not easily available on travis tests
        from pars import analysis_parameters
        self.analysis_parameters = analysis_parameters
        self.analysis_parameters["baseline"][
            "do_rochester_corrections"] = False
        self.analysis_parameters["baseline"]["do_lepton_sf"] = False
        self.analysis_parameters["baseline"]["save_dnn_vars"] = False
        self.analysis_parameters["baseline"]["do_bdt_ucsd"] = False
        self.analysis_parameters["baseline"]["do_bdt_pisa"] = False
        self.analysis_parameters["baseline"]["do_factorized_jec"] = False
        self.analysis_parameters["baseline"]["do_jec"] = {"2016:": False}
        self.analysis_parameters["baseline"]["do_jer"] = {"2016": True}

        from argparse import Namespace
        self.cmdline_args = Namespace(use_cuda=USE_CUPY,
                                      datapath=".",
                                      do_fsr=False,
                                      nthreads=1,
                                      async_data=False,
                                      do_sync=False,
                                      out="test_out")

        from analysis_hmumu import AnalysisCorrections
        self.analysis_corrections = AnalysisCorrections(
            self.cmdline_args, True)

示例#3

0

显示文件

 def load_dataset(num_iter=1):
     datastructures = {
             "Muon": [
                 ("Muon_Px", "float32"),
                 ("Muon_Py", "float32"),
                 ("Muon_Pz", "float32"), 
                 ("Muon_E", "float32"),
                 ("Muon_Charge", "int32"),
                 ("Muon_Iso", "float32")
             ],
             "Jet": [
                 ("Jet_Px", "float32"),
                 ("Jet_Py", "float32"),
                 ("Jet_Pz", "float32"),
                 ("Jet_E", "float32"),
                 ("Jet_btag", "float32"),
                 ("Jet_ID", "bool")
             ],
             "EventVariables": [
                 ("NPrimaryVertices", "int32"),
                 ("triggerIsoMu24", "bool"),
                 ("EventWeight", "float32")
             ]
         }
     dataset = Dataset("HZZ", num_iter*["data/HZZ.root"], datastructures, treename="events", datapath="")
     assert(dataset.filenames[0] == "data/HZZ.root")
     assert(len(dataset.filenames) == num_iter)
     assert(len(dataset.structs["Jet"]) == 0)
     assert(len(dataset.eventvars) == 0)
     return dataset

示例#4

0

显示文件

文件： test_hmumu_utils.py 项目： nanlu06/hepaccelerate-cms

class TestAnalysisSmall(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.NUMPY_LIB, self.ha = choose_backend(use_cuda=USE_CUPY)

        import hmumu_utils
        hmumu_utils.NUMPY_LIB = self.NUMPY_LIB
        hmumu_utils.ha = self.ha

        download_if_not_exists(
            "data/myNanoProdMc2016_NANO.root",
            "https://jpata.web.cern.ch/jpata/hmm/test_files/myNanoProdMc2016_NANO.root"
        )

        #Load a simple sync dataset
        self.datastructures = create_datastructure("vbf_sync", True, "2016", do_fsr=True)
        self.dataset = Dataset(
            "vbf_sync",
            ["data/myNanoProdMc2016_NANO.root"],
            self.datastructures,
            datapath="",
            treename="Events",
            is_mc=True)
        self.dataset.num_chunk = 0
        self.dataset.era = "2016"
        self.dataset.load_root()

        self.dataset.numpy_lib = self.NUMPY_LIB
        self.dataset.move_to_device(self.NUMPY_LIB)
        
        #disable everything that requires ROOT which is not easily available on travis tests
        from pars import analysis_parameters
        self.analysis_parameters = analysis_parameters
        self.analysis_parameters["baseline"]["do_rochester_corrections"] = False
        self.analysis_parameters["baseline"]["do_lepton_sf"] = False
        self.analysis_parameters["baseline"]["save_dnn_vars"] = False
        self.analysis_parameters["baseline"]["do_bdt_ucsd"] = False
        self.analysis_parameters["baseline"]["do_bdt_pisa"] = False
        self.analysis_parameters["baseline"]["do_factorized_jec"] = False
        self.analysis_parameters["baseline"]["do_jec"] = True
        self.analysis_parameters["baseline"]["do_jer"] = {"2016": True}
        
        from argparse import Namespace
        self.cmdline_args = Namespace(use_cuda=USE_CUPY, datapath=".", do_fsr=False, nthreads=1, async_data=False, do_sync=False, out="test_out")
        
        from analysis_hmumu import AnalysisCorrections
        self.analysis_corrections = AnalysisCorrections(self.cmdline_args, True)

    def setUp(self):
        pass

    def test_dnn(self):
        import keras
        dnn_model = keras.models.load_model("data/DNN27vars_sig_vbf_ggh_bkg_dyvbf_dy105To160_ewk105To160_split_60_40_mod10_191008.h5")
        inp = np.zeros((1000,26), dtype=np.float32)
        out = dnn_model.predict(inp) 
        print(np.mean(out))

    def testDataset(self):
        nev = self.dataset.numevents()
        print("Loaded dataset from {0} with {1} events".format(self.dataset.filenames[0], nev))
        assert(nev>0)

    def test_get_genpt(self):
        from hmumu_utils import get_genpt_cpu, get_genpt_cuda
        NUMPY_LIB = self.NUMPY_LIB

        muons = self.dataset.structs["Muon"][0]
        genpart = self.dataset.structs["GenPart"][0]
        muons_genpt = NUMPY_LIB.zeros(muons.numobjects(), dtype=NUMPY_LIB.float32)
        if USE_CUPY:
            get_genpt_cuda[32,1024](muons.offsets, muons.genPartIdx, genpart.offsets, genpart.pt, muons_genpt)
            cuda.synchronize()
        else:
            get_genpt_cpu(muons.offsets, muons.genPartIdx, genpart.offsets, genpart.pt, muons_genpt)
        muons_genpt = NUMPY_LIB.asnumpy(muons_genpt)
        self.assertAlmostEqual(NUMPY_LIB.sum(muons_genpt), 250438.765625)
        self.assertListEqual(list(muons_genpt[:10]), [16.875, 53.125, 50.5, 0.0, 153.5, 32.5, 53.75, 53.125, 55.125, 22.6875])

    def test_fix_muon_fsrphoton_index(self):
        from hmumu_utils import fix_muon_fsrphoton_index
        NUMPY_LIB = self.NUMPY_LIB
       
        analysis_parameters = self.analysis_parameters
 
        muons = self.dataset.structs["Muon"][0]
        fsrphotons = self.dataset.structs["FsrPhoton"][0]
        
        out_muons_fsrPhotonIdx = np.zeros_like(NUMPY_LIB.asnumpy(muons.fsrPhotonIdx))

        mu_pt = NUMPY_LIB.asnumpy(muons.pt)
        mu_eta = NUMPY_LIB.asnumpy(muons.eta)
        mu_phi = NUMPY_LIB.asnumpy(muons.phi)
        mu_mass = NUMPY_LIB.asnumpy(muons.mass)
        mu_iso = NUMPY_LIB.asnumpy(muons.pfRelIso04_all)

        fix_muon_fsrphoton_index(
            mu_pt, mu_eta, mu_phi, mu_mass,
            NUMPY_LIB.asnumpy(fsrphotons.offsets),
            NUMPY_LIB.asnumpy(muons.offsets),
            NUMPY_LIB.asnumpy(fsrphotons.dROverEt2),
            NUMPY_LIB.asnumpy(fsrphotons.relIso03),
            NUMPY_LIB.asnumpy(fsrphotons.pt),
            NUMPY_LIB.asnumpy(fsrphotons.muonIdx),
            NUMPY_LIB.asnumpy(muons.fsrPhotonIdx),
            out_muons_fsrPhotonIdx, 
            analysis_parameters["baseline"]["fsr_dROverEt2"], 
            analysis_parameters["baseline"]["fsr_relIso03"], 
            analysis_parameters["baseline"]["pt_fsr_over_mu_e"]
        )

    def test_analyze_function(self):
        import hmumu_utils
        from hmumu_utils import analyze_data, load_puhist_target
        from analysis_hmumu import JetMetCorrections, BTagWeights
        from coffea.lookup_tools import extractor
        NUMPY_LIB = self.NUMPY_LIB
        hmumu_utils.NUMPY_LIB = self.NUMPY_LIB
        hmumu_utils.ha = self.ha

        analysis_parameters = self.analysis_parameters

        puid_maps = "data/puidSF/PUIDMaps.root"
        puid_extractor = extractor()
        puid_extractor.add_weight_sets(["* * {0}".format(puid_maps)])
        puid_extractor.finalize()
       
        random_seed = 0 

        ret = analyze_data(
            self.dataset, self.analysis_corrections,
            analysis_parameters["baseline"], "baseline", random_seed, do_fsr=True, use_cuda=False)
        h = ret["hist__dimuon_invmass_z_peak_cat5__M_mmjj"]
        
        nev_zpeak_nominal = np.sum(h["nominal"].contents)

        if not USE_CUPY:
            self.assertAlmostEqual(nev_zpeak_nominal, 0.0034586303, places=4)
        
        self.assertTrue("Total__up" in h.keys())
        self.assertTrue("Total__down" in h.keys())
        self.assertTrue("jerB1__up" in h.keys())
        self.assertTrue("jerB1__down" in h.keys())
        self.assertTrue("jerB2__up" in h.keys())
        self.assertTrue("jerB2__down" in h.keys())
        self.assertTrue("jerF1__up" in h.keys())
        self.assertTrue("jerF1__down" in h.keys())
        self.assertTrue("jerF2__up" in h.keys())
        self.assertTrue("jerF2__down" in h.keys())
        self.assertTrue("jerEC1__up" in h.keys())
        self.assertTrue("jerEC1__down" in h.keys())
        self.assertTrue("jerEC2__up" in h.keys())
        self.assertTrue("jerEC2__down" in h.keys())

示例#5

0

显示文件

    #Predefine which branches to read from the TTree and how they are grouped to objects
    #This will be verified against the actual ROOT TTree when it is loaded
    datastructures = {
        "Muon": [("Muon_Px", "float32"), ("Muon_Py", "float32"),
                 ("Muon_Pz", "float32"), ("Muon_E", "float32"),
                 ("Muon_Charge", "int32"), ("Muon_Iso", "float32")],
        "Jet": [("Jet_Px", "float32"), ("Jet_Py", "float32"),
                ("Jet_Pz", "float32"), ("Jet_E", "float32"),
                ("Jet_btag", "float32"), ("Jet_ID", "bool")],
        "EventVariables": [("NPrimaryVertices", "int32"),
                           ("triggerIsoMu24", "bool"),
                           ("EventWeight", "float32")]
    }

    #Define a dataset, given the data structure and a list of filenames
    dataset = Dataset("HZZ", [filename], datastructures, treename="events")

    #Load the ROOT files
    dataset.load_root(verbose=True)

    #merge arrays across files into one big array
    dataset.merge_inplace(verbose=True)

    #move to GPU if CUDA was specified
    dataset.move_to_device(nplib, verbose=True)

    #process data, save output as a json file
    results = dataset.analyze(analyze_data_function,
                              verbose=True,
                              parameters={"muons_ptcut": 30.0})
    results.save_json("out.json")

示例#6

0

显示文件

文件： atlas_hzz.py 项目： hepaccelerate/hepaccelerate

    # Load this input file
    #filename = "data/data_A.4lep.root"
    if not os.path.isdir("data/atlas"):
        os.makedirs("data/atlas")

    walltime_t0 = time.time()
    for ds, fn_pattern, is_mc in datasets:
        filename = glob.glob(fn_pattern)
        print(filename)
        if len(filename) == 0:
            raise Exception(
                "Could not find any filenames for dataset={0}: {{fn_pattern}}={1}"
                .format(ds, fn_pattern))

        # Define a dataset, given the data structure and a list of filenames
        dataset = Dataset(ds, filename, datastructures, treename="mini")
        # Load the ROOT files
        dataset.load_root(verbose=True)

        # merge arrays across files into one big array
        dataset.merge_inplace(verbose=True)

        # move to GPU if CUDA was specified
        dataset.move_to_device(nplib, verbose=True)

        # process data, save output as a json file
        results = dataset.analyze(
            analyze_data_function,
            verbose=True,
            parameters={
                "lep_ptcut": 10000.0,  #MeV units

示例#7

0

显示文件

文件： ex2.py 项目： irenedutta23/hepaccelerate

        ("Muon_Py", "float32"),
    ],
    "Jet": [
        ("Jet_E", "float32"),
        ("Jet_btag", "float32"),
    ],
    "EventVariables": [
        ("NPrimaryVertices", "int32"),
        ("triggerIsoMu24", "bool"),
        ("EventWeight", "float32"),
    ],
}

# Define the dataset across the files
dataset = Dataset("HZZ", ["data/HZZ.root"],
                  datastructures,
                  treename="events",
                  datapath="")

# Load the data to memory
dataset.load_root()

# Jets in the first file
ifile = 0
jets = dataset.structs["Jet"][ifile]

# common offset array for jets
jets_offsets = jets.offsets
print(jets_offsets)

# data arrays
jets_energy = jets.E

示例#8

0

显示文件

文件： test_hmumu_utils.py 项目： hepaccelerate/hepaccelerate-cms

class TestAnalysisSmall(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.NUMPY_LIB, self.ha = choose_backend(use_cuda=USE_CUPY)

        import hmumu_utils
        hmumu_utils.NUMPY_LIB = self.NUMPY_LIB
        hmumu_utils.ha = self.ha

        download_if_not_exists(
            "data/myNanoProdMc2016_NANO.root",
            "https://jpata.web.cern.ch/jpata/hmm/test_files/myNanoProdMc2016_NANO.root"
        )

        #Load a simple sync dataset
        self.datastructures = create_datastructure("vbf_sync", True, "2016", do_fsr=True)
        self.dataset = Dataset(
            "vbf_sync",
            ["data/myNanoProdMc2016_NANO.root"],
            self.datastructures,
            datapath="",
            treename="Events",
            is_mc=True)
        self.dataset.num_chunk = 0
        self.dataset.era = "2016"
        self.dataset.load_root()

        self.dataset.numpy_lib = self.NUMPY_LIB
        self.dataset.move_to_device(self.NUMPY_LIB)
        
        #disable everything that requires ROOT which is not easily available on travis tests
        from pars import analysis_parameters
        self.analysis_parameters = analysis_parameters
        self.analysis_parameters["baseline"]["do_rochester_corrections"] = False
        self.analysis_parameters["baseline"]["do_lepton_sf"] = False
        self.analysis_parameters["baseline"]["save_dnn_vars"] = False
        self.analysis_parameters["baseline"]["do_bdt_ucsd"] = False
        self.analysis_parameters["baseline"]["do_bdt_pisa"] = False
        self.analysis_parameters["baseline"]["do_factorized_jec"] = False
        self.analysis_parameters["baseline"]["do_jec"] = True
        self.analysis_parameters["baseline"]["do_jer"] = {"2016": True}
        
        from argparse import Namespace
        self.cmdline_args = Namespace(use_cuda=USE_CUPY, datapath=".", do_fsr=False, nthreads=1, async_data=False, do_sync=False, out="test_out")
        
        if os.path.isfile("tests/hmm/libhmm.so"):
            from analysis_hmumu import AnalysisCorrections
            self.analysis_corrections = AnalysisCorrections(self.cmdline_args, True)
        else:
            print("Could not load analysis corrections with ROOT, skipping this in further tests")
            self.analysis_corrections = None

    def setUp(self):
        pass

    def testDataset(self):
        nev = self.dataset.numevents()
        print("Loaded dataset from {0} with {1} events".format(self.dataset.filenames[0], nev))
        assert(nev>0)

    def test_get_genpt(self):
        from hmumu_utils import get_genpt_cpu, get_genpt_cuda
        NUMPY_LIB = self.NUMPY_LIB

        muons = self.dataset.structs["Muon"][0]
        genpart = self.dataset.structs["GenPart"][0]
        muons_genpt = NUMPY_LIB.zeros(muons.numobjects(), dtype=NUMPY_LIB.float32)
        if USE_CUPY:
            get_genpt_cuda[32,1024](muons.offsets, muons.genPartIdx, genpart.offsets, genpart.pt, muons_genpt)
            cuda.synchronize()
        else:
            get_genpt_cpu(muons.offsets, muons.genPartIdx, genpart.offsets, genpart.pt, muons_genpt)
        muons_genpt = NUMPY_LIB.asnumpy(muons_genpt)
        self.assertAlmostEqual(NUMPY_LIB.sum(muons_genpt), 11943932)
        self.assertListEqual(list(muons_genpt[:10]), [105.0, 30.4375, 0.0, 0.0, 140.5, 28.625, 102.75, 41.25, 120.5, 80.5])

    def test_fix_muon_fsrphoton_index(self):
        from hmumu_utils import fix_muon_fsrphoton_index
        NUMPY_LIB = self.NUMPY_LIB
        
        muons = self.dataset.structs["Muon"][0]
        fsrphotons = self.dataset.structs["FsrPhoton"][0]
        
        out_muons_fsrPhotonIdx = np.zeros_like(NUMPY_LIB.asnumpy(muons.fsrPhotonIdx))
        fix_muon_fsrphoton_index(
            NUMPY_LIB.asnumpy(fsrphotons.offsets),
            NUMPY_LIB.asnumpy(muons.offsets),
            NUMPY_LIB.asnumpy(fsrphotons.dROverEt2),
            NUMPY_LIB.asnumpy(fsrphotons.muonIdx),
            NUMPY_LIB.asnumpy(muons.fsrPhotonIdx),
            out_muons_fsrPhotonIdx
        )

    def test_analyze_function(self):
        import hmumu_utils
        from hmumu_utils import analyze_data, load_puhist_target
        from analysis_hmumu import JetMetCorrections
        from coffea.lookup_tools import extractor
        NUMPY_LIB = self.NUMPY_LIB
        hmumu_utils.NUMPY_LIB = self.NUMPY_LIB
        hmumu_utils.ha = self.ha

        analysis_parameters = self.analysis_parameters

        puid_maps = "data/puidSF/PUIDMaps.root"
        puid_extractor = extractor()
        puid_extractor.add_weight_sets(["* * {0}".format(puid_maps)])
        puid_extractor.finalize()
        
        kwargs = {
            "pu_corrections": {"2016": load_puhist_target("data/pileup/RunII_2016_data.root")},
            "puidreweighting": puid_extractor.make_evaluator(),
            "jetmet_corrections": {
                "2016": {
                    "Summer16_07Aug2017_V11":
                        JetMetCorrections(
                        jec_tag="Summer16_07Aug2017_V11_MC",
                        jec_tag_data={
                            "RunB": "Summer16_07Aug2017BCD_V11_DATA",
                            "RunC": "Summer16_07Aug2017BCD_V11_DATA",
                            "RunD": "Summer16_07Aug2017BCD_V11_DATA",
                            "RunE": "Summer16_07Aug2017EF_V11_DATA",
                            "RunF": "Summer16_07Aug2017EF_V11_DATA",
                            "RunG": "Summer16_07Aug2017GH_V11_DATA",
                            "RunH": "Summer16_07Aug2017GH_V11_DATA",
                        },
                        jer_tag="Summer16_25nsV1_MC",
                        jmr_vals=[1.0, 1.2, 0.8],
                        do_factorized_jec=True),
                },
            },
            "do_fsr": True
        }

        ret = self.dataset.analyze(
            analyze_data,
            use_cuda = USE_CUPY,
            parameter_set_name = "baseline",
            parameters = analysis_parameters["baseline"],
            dataset_era = self.dataset.era,
            dataset_name = self.dataset.name,
            dataset_num_chunk = self.dataset.num_chunk,
            is_mc = self.dataset.is_mc,
            **kwargs
        )
        h = ret["hist__dimuon_invmass_z_peak_cat5__M_mmjj"]
        
        nev_zpeak_nominal = np.sum(h["nominal"].contents)

        if not USE_CUPY:
            self.assertAlmostEqual(nev_zpeak_nominal, 0.012528435, places=4)
        
        self.assertTrue("Total__up" in h.keys())
        self.assertTrue("Total__down" in h.keys())
        self.assertTrue("jer__up" in h.keys())
        self.assertTrue("jer__down" in h.keys())