Пример #1
0
def analyze_losses( run_n, SM_sample_id, BSM_sample_ids, plot_suffix=''):

    experiment = ex.Experiment(run_n=run_n).setup(fig_dir=True)
    paths = sf.SamplePathFactory(experiment)  # 'default' datasample because reading only results

    all_samples = [SM_sample_id] + BSM_sample_ids

    data = OrderedDict()
    for sample_id in all_samples:
        data[sample_id] = js.JetSample.from_input_file(sample_id, paths.result_path(sample_id))

    alo.analyze_losses(experiment, data, all_samples, plot_suffix)

    alo.analyze_loss_strategies(experiment, data, all_samples, plot_suffix)
fig_format = '.png'

# loss strategies
strategy_ids_total_loss = ['s1', 's2', 's3', 's4', 's5']
strategy_ids_reco_kl_loss = ['rk5', 'rk5_1', 'rk5_01']
strategy_ids_kl_loss = ['kl1', 'kl2', 'kl3', 'kl4', 'kl5']

# set background sample to use
BG_sample = samp.BG_SR_sample
SIG_samples = samp.SIG_samples_na
mass_centers = [1500, 2500, 3500, 4500]
plot_name_suffix = BG_sample + '_vs_' + (
    'narrow' if SIG_samples == samp.SIG_samples_na else 'broad') + '_sig'

# set up analysis outputs
experiment = ex.Experiment(run_n).setup(model_analysis_dir=True)
paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': experiment.run_dir})
print('Running analysis on experiment {}, plotting results to {}'.format(
    run_n, experiment.model_analysis_dir))
# read in data
data = sf.read_inputs_to_jet_sample_dict_from_dir(samp.all_samples, paths)

# *****************************************
#					ROC
# *****************************************
if 'roc' in do_analyses:

    # for each signal
    for SIG_sample, mass_center in zip(SIG_samples, mass_centers):
        # for each type of loss strategy
Пример #3
0
Parameters = recordtype(
    'Parameters',
    'run_n, qcd_sample_id, qcd_ext_sample_id, qcd_train_sample_id, qcd_test_sample_id, strategy_id, epochs, read_n'
)
params = Parameters(run_n=113,
                    qcd_sample_id='qcdSigReco',
                    qcd_ext_sample_id='qcdSigExtReco',
                    qcd_train_sample_id='qcdSigAllTrainReco',
                    qcd_test_sample_id='qcdSigAllTestReco',
                    strategy_id='rk5_05',
                    epochs=100,
                    read_n=None)

# set directories for saving and loading with extra envelope subdir for qr models
experiment = ex.Experiment(run_n=params.run_n).setup(model_dir_qr=True,
                                                     analysis_dir_qr=True)
experiment.model_dir_qr = os.path.join(experiment.model_dir_qr, 'envelope')
pathlib.Path(experiment.model_dir_qr).mkdir(parents=True, exist_ok=True)
result_dir = '/eos/user/k/kiwoznia/data/QR_results/analysis/run_' + str(
    params.run_n) + '/envelope'
pathlib.Path(result_dir).mkdir(parents=True, exist_ok=True)

#****************************************#
#           read in qcd data
#****************************************#
paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': 'run_' + str(params.run_n)})

data_qcd_all = dapr.merge_qcd_base_and_ext_datasets(params, paths)
print('qcd all: min mjj = {}, max mjj = {}'.format(np.min(data_qcd_all['mJJ']),
                                                   np.max(
Пример #4
0
import pofah.util.sample_factory as sf
import pofah.path_constants.sample_dict_file_parts_input as sdi 
import pofah.path_constants.sample_dict_file_parts_reco as sdr 


# ********************************************************
#               runtime params
# ********************************************************

#test_samples = ['GtoWW15na', 'GtoWW15br', 'GtoWW25na', 'GtoWW25br', 'GtoWW35na', 'GtoWW35br', 'GtoWW45na', 'GtoWW45br']
test_samples = ['GtoWW35na', 'GtoWW35br']
#test_samples = ['qcdSig']

run_n = 101

experiment = ex.Experiment(run_n=run_n)

# ********************************************
#               load model
# ********************************************

vae = VAE_3D(run=run_n, model_dir=experiment.model_dir)
vae.load( )

input_paths = sf.SamplePathDirFactory(sdi.path_dict)
result_paths = sf.SamplePathDirFactory(sdr.path_dict).extend_base_path(experiment.run_dir)

for sample_id in test_samples:

    # ********************************************
    #               read test data (events)
Пример #5
0
        description='run mjj spectrum analysis with QR cuts applied')
    parser.add_argument('-x',
                        dest='sig_xsec',
                        type=float,
                        default=100.,
                        help='signal injection cross section')
    args = parser.parse_args()

    run = 113
    sample_ids = ['qcdSigAllTestReco', 'GtoWW35brReco']
    quantiles = [0.1, 0.3, 0.5, 0.7, 0.9, 0.99]
    # quantiles = [0.9]
    mjj_key = 'mJJ'
    param_dict = {
        '$run$': str(run),
        '$sig_name$': sample_ids[1],
        '$sig_xsec$': str(int(args.sig_xsec))
    }

    input_paths = sf.SamplePathDirFactory(sdfs.path_dict).update_base_path(
        param_dict)  # in selection paths new format with run_x, sig_x, ...
    fig_dir = exp.Experiment(
        run_n=run,
        param_dict=param_dict).setup(analysis_dir_qr=True).analysis_dir_qr_mjj

    for sample_id in sample_ids:
        for quantile in quantiles:
            sample = js.JetSample.from_input_file(
                sample_id, input_paths.sample_file_path(sample_id))
            plot_mjj_spectrum(sample, quantile, fig_dir)
SIG_samples = [
    'GtoWW15naReco', 'GtoWW25naReco', 'GtoWW35naReco', 'GtoWW45naReco'
]
#SIG_samples = ['GtoWW25naReco', 'qcdSigAllReco']
mass_centers = [1500, 2500, 3500, 4500]
all_samples = [BG_sample, BG_SR_sample] + SIG_samples

strategy_ids_total_loss = ['s1', 's2', 's3', 's4', 's5']
strategy_ids_kl_loss = ['kl1', 'kl2', 'kl3', 'kl4', 'kl5']
strategy = lost.loss_strategy_dict['s5']  # L1 & L2 > LT

run_model101 = 101
run_model502 = 502
run_model701 = 701

experiment101 = ex.Experiment(run_model101)
experiment502 = ex.Experiment(run_model502)
experiment701 = ex.Experiment(run_model701)
experiment_result = ex.Experiment(param_dict={
    '$run1$': experiment101.run_dir,
    '$run2$': experiment502.run_dir
}).setup(model_comparison_dir=True)

#read run 102 data
paths102 = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': experiment101.run_dir})
data102 = sf.read_inputs_to_jet_sample_dict_from_dir(all_samples, paths102)

#read run 501 data
paths501 = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': experiment502.run_dir})
run_n_model1 = 46
run_n_model2 = 49

SM_sample = 'qcdSideReco'
BSM_samples = [
    'GtoWW15naReco', 'GtoWW15brReco', 'GtoWW25naReco', 'GtoWW25brReco',
    'GtoWW35naReco', 'GtoWW35brReco', 'GtoWW45naReco', 'GtoWW45brReco'
]
strategies = ['s1', 's2', 's3', 's4', 's5', 'k1', 'k2']

all_sample_ids = [SM_sample] + BSM_samples
mass_centers = [1500, 1500, 2500, 2500, 3500, 3500, 4500, 4500]

# read JET IMAGE VAE model results
experiment = ex.Experiment(run_n_model1)
data_img_vae = sf.read_results_to_jet_sample_dict(all_sample_ids,
                                                  experiment,
                                                  mode='img-local')

# read 3D LOSS VAE model results
experiment = ex.Experiment(run_n_model2).setup(fig_dir=True)
data_3d_vae = sf.read_results_to_jet_sample_dict(all_sample_ids,
                                                 experiment,
                                                 mode='img-local')

for s in strategies:

    strategy = ls.loss_strategies[s]

    for BSM_sample, mass_center in zip(BSM_samples, mass_centers):
Пример #8
0
        int(quantile * 100)) + date_str


# read in qcd signal region sample
run_n = 101
SM_sample = 'qcdSigAllReco'
#BSM_samples = ['GtoWW15naReco', 'GtoWW15brReco', 'GtoWW25naReco', 'GtoWW25brReco','GtoWW35naReco', 'GtoWW35brReco', 'GtoWW45naReco', 'GtoWW45brReco']
BSM_samples = [
    'GtoWW15naReco', 'GtoWW25naReco', 'GtoWW35naReco', 'GtoWW45naReco'
]
all_samples = [SM_sample] + BSM_samples
mjj_key = 'mJJ'
reco_loss_j1_key = 'j1RecoLoss'
QR_train_share = 0.3

experiment = ex.Experiment(run_n)
paths = sf.SamplePathDirFactory(sd.path_dict).update_base_path(
    {'$run$': experiment.run_dir})

data = sf.read_inputs_to_jet_sample_dict_from_dir(all_samples, paths)

# define quantile and loss-strategy for discimination
quantiles = [0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9]  # 5%
strategy = lost.loss_strategy_dict['rk5']  # L1 & L2 > LT
qcd_sig_sample = data[SM_sample]
#split qcd sample into training and testing
qcd_train, qcd_test = js.split_jet_sample_train_test(qcd_sig_sample,
                                                     QR_train_share)
# update data_dictionary
data[SM_sample] = qcd_test
print(qcd_sig_sample.features())
Пример #9
0
from vae.vae_highres_model import VAE_HR
import vae.losses as lo
import pofah.util.input_data_reader as idr
import pofah.util.sample_factory as sf
import pofah.jet_sample as js
import pofah.util.experiment as ex


# ********************************************************
#               runtime params
# ********************************************************

run_n = 4
data_sample = 'img-local-54'

experiment = ex.Experiment(run_n).setup(result_dir=True)
paths = sf.SamplePathFactory(experiment,data_sample)

# ********************************************
#               load model
# ********************************************

vae = VAE(run=run_n, model_dir=experiment.model_dir)
vae.load()

# ********************************************
#               read test data (images)
# ********************************************

#sample_ids = ['qcdSide', 'qcdSig', 'GtoWW15na', 'GtoWW15br', 'GtoWW25na', 'GtoWW25br', 'GtoWW35na', 'GtoWW35br', 'GtoWW45na', 'GtoWW45br']
sample_ids = ['GtoWW25br', 'GtoWW35na']
Пример #10
0
                    kernel_sz=(1, 3),
                    kernel_ini_n=12,
                    beta=0.01,
                    epochs=400,
                    train_total_n=int(10e6),
                    valid_total_n=int(1e6),
                    gen_part_n=int(5e5),
                    batch_n=256,
                    z_sz=12,
                    activation='elu',
                    initializer='he_uniform',
                    learning_rate=0.001,
                    max_lr_decay=8,
                    lambda_reg=0.0)  # 'L1L2'

experiment = expe.Experiment(params.run_n).setup(model_dir=True, fig_dir=True)
paths = safa.SamplePathDirFactory(sdi.path_dict)

# ********************************************************
#       prepare training (generator) and validation data
# ********************************************************

# train (generator)
print('>>> Preparing training dataset generator')
data_train_generator = dage.DataGenerator(
    path=paths.sample_dir_path('qcdSide'),
    sample_part_n=params.gen_part_n,
    sample_max_n=params.train_total_n,
    **cuts.global_cuts)  # generate 10 M jet samples
train_ds = tf.data.Dataset.from_generator(
    data_train_generator,
Пример #11
0
import pofah.phase_space.cut_constants as cuts
import training as train


# ********************************************************
#               runtime params
# ********************************************************

# test_samples = ['qcdSig', 'qcdSigExt', 'GtoWW15na', 'GtoWW15br', 'GtoWW25na', 'GtoWW25br', 'GtoWW35na', 'GtoWW35br', 'GtoWW45na', 'GtoWW45br']
#test_samples = ['qcdSig', 'GtoWW35na']
test_samples = ['qcdSideExt']

run_n = 113
cuts = cuts.sideband_cuts if 'qcdSideExt' in test_samples else cuts.signalregion_cuts #{}

experiment = ex.Experiment(run_n=run_n).setup(model_dir=True)
batch_n = 4096*16
	
# ********************************************
#               load model
# ********************************************

vae = VAEparticle.from_saved_model(path=os.path.join(experiment.model_dir, 'best_so_far'))
print('beta factor: ', vae.beta)
loss_fn = losses.threeD_loss


input_paths = sf.SamplePathDirFactory(sdi.path_dict)
result_paths = sf.SamplePathDirFactory(sdr.path_dict).update_base_path({'$run$': experiment.run_dir})

for sample_id in test_samples:
Пример #12
0
from collections import OrderedDict
import os

import analysis.analysis_roc as ar
import discriminator.loss_strategy as ls
import pofah.sample_dict as sd
import pofah.jet_sample as js
import pofah.util.experiment as ex


strategies = ['s1', 's2', 's3', 's4', 's5']

legend = [ls.loss_strategies[s].title_str for s in strategies]

run_n = 45
experiment = ex.Experiment(run_n).setup(fig_dir=True)

SM_sample = 'qcdSideReco'
BSM_samples = ['qcdSigReco', 'GtoWW15naReco', 'GtoWW15brReco', 'GtoWW25naReco', 'GtoWW25brReco','GtoWW35naReco', 'GtoWW35brReco', 'GtoWW45naReco', 'GtoWW45brReco']

all_samples = [SM_sample] + BSM_samples


data = OrderedDict()
for sample_id in all_samples:
    data[sample_id] = js.JetSample.from_input_file(sample_id, os.path.join(experiment.result_dir, sd.file_names[sample_id]+'.h5'))

# plot standard ROC for all strategies
for BSM_sample in BSM_samples:

    neg_class_losses = [strategy( data[SM_sample] ) for strategy in ls.loss_strategies.values()]
Пример #13
0
import dadrah.selection.discriminator as disc
import pofah.path_constants.sample_dict_file_parts_reco as sdfr

#****************************************#
#			set runtime params
#****************************************#
Parameters = namedtuple('Parameters', 'run_n, sample_id, quantile, strategy')
params = Parameters(run_n=101,
                    sample_id='qcdSigBisReco',
                    quantile=0.1,
                    strategy=lost.loss_strategy_dict['s5'])

#****************************************#
#			read in data
#****************************************#
experiment = ex.Experiment(params.run_n)
paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': experiment.run_dir})
sample = js.JetSample.from_input_dir(params.sample_id,
                                     paths.sample_dir_path(params.sample_id))

#****************************************#
#		load quantile regression
#****************************************#
discriminator = disc.QRDiscriminator(quantile=params.quantile,
                                     loss_strategy=params.strategy)
discriminator.load('./my_new_model.h5')

#****************************************#
#		apply quantile regression
#****************************************#