示例#1
0
    def save(self, filename_out, shuffle=True):
        """
        Saves the observable definitions, observable values, and event weights in a MadMiner file. The parameter,
        benchmark, and morphing setup is copied from the file provided during initialization. Nuisance benchmarks found
        in the LHE file are added.

        Parameters
        ----------
        filename_out : str
            Path to where the results should be saved.

        shuffle : bool, optional
            If True, events are shuffled before being saved. That's important when there are multiple distinct
            samples (e.g. signal and background). Default value: True.

        Returns
        -------
            None

        """

        if self.observations is None or self.weights is None:
            logger.warning("No events to save!")
            return

        logger.debug("Loading HDF5 data from %s and saving file to %s",
                     self.filename, filename_out)

        # Save nuisance parameters and benchmarks
        weight_names = list(self.weights.keys())
        logger.debug("Weight names: %s", weight_names)

        save_nuisance_setup(
            file_name=filename_out,
            file_override=True,
            nuisance_benchmarks=weight_names,
            nuisance_parameters=self.nuisance_parameters,
            reference_benchmark=self.reference_benchmark,
            copy_from_path=self.filename,
        )

        # Save events
        save_events(
            file_name=filename_out,
            file_override=True,
            observables=self.observables,
            observations=self.observations,
            weights=self.weights,
            sampling_benchmarks=self.events_sampling_benchmark_ids,
            num_signal_events=self.signal_events_per_benchmark,
            num_background_events=self.background_events,
        )

        if shuffle:
            combine_and_shuffle([filename_out], filename_out)
    'delta_phi_zz',
    '(lep1ZZ+lep2ZZ).deltaphi(lep3ZZ+lep4ZZ) * (-1. + 2.*float((lep1ZZ+lep2ZZ).eta > (lep3ZZ+lep4ZZ).eta))',
    required=False,
    default=float('nan'),
    #default=0,
)

delphes.add_observable(
    'n_jets',
    'len(j)',
    required=True,
)

delphes.add_observable(
    'met',
    'met.pt',
    required=True,
)

delphes.add_cut('isZZcand == 1')
delphes.add_cut('n_jets >= 2')
delphes.add_cut('m_jj > 700.')

delphes.analyse_delphes_samples()

delphes.save('/data_CMS/cms/cortinovis/ewdim6/data_ew_1M_az/delphes_data.h5')

combine_and_shuffle(
    ['/data_CMS/cms/cortinovis/ewdim6/data_ew_1M_az/delphes_data.h5'],
    '/data_CMS/cms/cortinovis/ewdim6/data_ew_1M_az/delphes_data_shuffled.h5')
# run this from terminal with madminer stuff installed to be safe
from __future__ import absolute_import, division, print_function, unicode_literals

import logging
from madminer.sampling import combine_and_shuffle



# MadMiner output
logging.basicConfig(
    format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',
    datefmt='%H:%M',
    level=logging.DEBUG
)

# Output of all other modules (e.g. matplotlib)
for key in logging.Logger.manager.loggerDict:
    if "madminer" not in key:
        logging.getLogger(key).setLevel(logging.WARNING)
        
        
        
mg_dir = '/home/software/MG5_aMC_v2_6_2/'
        
delphesDatasetList = ['data/delphes_data{}.h5'.format(i) for i in range (1,101)]
#delphesDatasetList = ['data/delphes_data{}.h5'.format(i) for i in range (1,6)] #for testing

combine_and_shuffle(
    delphesDatasetList,
    'data/delphes_data_shuffledBig.h5'
)
示例#4
0
methods = inputs['methods']
print(methods)
methods = map(lambda x: str(x), methods)

test_split = float(inputs['test_split'])  #training-test split

# get number of paramenters
hf = h5py.File(h5_file, 'r')
parameters = len(hf['parameters']['names'])

#to shuffle or not to shuffle
if (inputs['shuffle']):
    h5shuffle_file = '/home/data/madminer_example_shuffled.h5'

    combine_and_shuffle([h5_file], h5shuffle_file)

    sampler = SampleAugmenter(h5shuffle_file,
                              include_nuisance_parameters=nuisance
                              )  #'data/madminer_example_shuffled.h5'

else:
    sampler = SampleAugmenter(h5_file, include_nuisance_parameters=nuisance)

for method in methods:
    print('sampling from method ', method)

    for i in range(n_trainsamples):

        # creates training samples
示例#5
0
#e can also add cuts, again in parse-able strings. In addition to the objects discussed above, they can contain the observables:

# In[335]:

delphes.add_cut('isZZcand == 1')
delphes.add_cut('pt_j1 > 20.')
delphes.add_cut('n_jets >= 2')

# ## 4. Analyse events and store data

# The function `analyse_samples` then calculates all observables from the Delphes file(s) generated before and checks which events pass the cuts:

# In[336]:

delphes.analyse_delphes_samples()

# In[337]:

delphes.save('data_hel_hw_hb/delphes_data.h5')

# ## 6. Combine and shuffle different samples

# To reduce disk usage, you can generate several small event samples with the steps given above, and combine them now. Note that (for now) it is essential that all of them are generated with the same setup, including the same benchmark points / morphing basis!
#
# This is generally good practice even if you use just one sample, since the events might have some inherent ordering (e.g. from sampling from different hypotheses). Later when we split the events into a training and test fraction, such an ordering could cause problems.

# In[340]:

combine_and_shuffle(['data_hel_hw_hb/delphes_data.h5'],
                    'data_hel_hw_hb/delphes_data_shuffled.h5')
示例#6
0
delphes.analyse_delphes_samples()

# In[238]:

delphes.save('data_ew_wphi2/delphes_data.h5')

# ## 6. Combine and shuffle different samples

# To reduce disk usage, you can generate several small event samples with the steps given above, and combine them now. Note that (for now) it is essential that all of them are generated with the same setup, including the same benchmark points / morphing basis!
#
# This is generally good practice even if you use just one sample, since the events might have some inherent ordering (e.g. from sampling from different hypotheses). Later when we split the events into a training and test fraction, such an ordering could cause problems.

# In[245]:

combine_and_shuffle(['data_ew_wphi2/delphes_data.h5'],
                    'data_ew_wphi2/delphes_data_shuffled.h5')

# In[ ]:

#import h5py

#def get_all(name):
#   print(name)

#ith h5py.File('data_ew/delphes_data.h5', 'r') as f:
#   g_name = f.visit(get_all)
#   d = f['samples/observations']
#   print(len([a for a in d[:,0] if a!=0]))
#   print(len(d[:,0]))
#for item in d[:]:
#print(item[1])
示例#7
0

delphes.add_cut('isZZcand == 1')
delphes.add_cut('n_jets >= 2')


# ## 4. Analyse events and store data

# The function `analyse_samples` then calculates all observables from the Delphes file(s) generated before and checks which events pass the cuts:

# In[237]:


delphes.analyse_delphes_samples()


# In[238]:


delphes.save('data_sme_hw_hbox/delphes_data.h5')


combine_and_shuffle(
    ['data_sme_hw_hbox/delphes_data.h5'],
    'data_sme_hw_hbox/delphes_data_shuffled.h5'
)




# MadMiner output
logging.basicConfig(
    format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',
    datefmt='%H:%M',
    level=logging.DEBUG
)

# Output of all other modules (e.g. matplotlib)
for key in logging.Logger.manager.loggerDict:
    if "madminer" not in key:
        logging.getLogger(key).setLevel(logging.WARNING)
        
        
        
mg_dir = '/home/software/MG5_aMC_v2_6_2/'
        

path = "./data/"
delphesDatasetList = [f for f in glob.glob(path + "delphes_data?.h5")]
delphesDatasetList += [f for f in glob.glob(path + "delphes_data??.h5")]
delphesDatasetList += [f for f in glob.glob(path + "delphes_data???.h5")]
delphesDatasetList += [f for f in glob.glob(path + "delphes_data????.h5")]
#delphesDatasetList = ['data/delphes_data.h5'.format(i) for i in range (1,201)]

combine_and_shuffle(
    delphesDatasetList,
    'data/delphes_data_shuffled.h5',
    k_factors=0.00029507, # specific to 1k events in run card and suboptimal simulating!!!
)
print ("Files combined: ",len(delphesDatasetList))
示例#9
0
#e can also add cuts, again in parse-able strings. In addition to the objects discussed above, they can contain the observables:

# In[335]:

delphes.add_cut('isZZcand == 1')
delphes.add_cut('n_jets >= 2')
delphes.add_cut('pt_j1 > 20.')

# ## 4. Analyse events and store data

# The function `analyse_samples` then calculates all observables from the Delphes file(s) generated before and checks which events pass the cuts:

# In[336]:

delphes.analyse_delphes_samples()

# In[337]:

delphes.save('data_dim6_3/delphes_data.h5')

# ## 6. Combine and shuffle different samples

# To reduce disk usage, you can generate several small event samples with the steps given above, and combine them now. Note that (for now) it is essential that all of them are generated with the same setup, including the same benchmark points / morphing basis!
#
# This is generally good practice even if you use just one sample, since the events might have some inherent ordering (e.g. from sampling from different hypotheses). Later when we split the events into a training and test fraction, such an ordering could cause problems.

# In[340]:

combine_and_shuffle(['data_dim6_3/delphes_data.h5'],
                    'data_dim6_3/delphes_data_shuffled.h5')