def analyze_losses( run_n, SM_sample_id, BSM_sample_ids, plot_suffix=''): experiment = ex.Experiment(run_n=run_n).setup(fig_dir=True) paths = sf.SamplePathFactory(experiment) # 'default' datasample because reading only results all_samples = [SM_sample_id] + BSM_sample_ids data = OrderedDict() for sample_id in all_samples: data[sample_id] = js.JetSample.from_input_file(sample_id, paths.result_path(sample_id)) alo.analyze_losses(experiment, data, all_samples, plot_suffix) alo.analyze_loss_strategies(experiment, data, all_samples, plot_suffix)
fig_format = '.png' # loss strategies strategy_ids_total_loss = ['s1', 's2', 's3', 's4', 's5'] strategy_ids_reco_kl_loss = ['rk5', 'rk5_1', 'rk5_01'] strategy_ids_kl_loss = ['kl1', 'kl2', 'kl3', 'kl4', 'kl5'] # set background sample to use BG_sample = samp.BG_SR_sample SIG_samples = samp.SIG_samples_na mass_centers = [1500, 2500, 3500, 4500] plot_name_suffix = BG_sample + '_vs_' + ( 'narrow' if SIG_samples == samp.SIG_samples_na else 'broad') + '_sig' # set up analysis outputs experiment = ex.Experiment(run_n).setup(model_analysis_dir=True) paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path( {'$run$': experiment.run_dir}) print('Running analysis on experiment {}, plotting results to {}'.format( run_n, experiment.model_analysis_dir)) # read in data data = sf.read_inputs_to_jet_sample_dict_from_dir(samp.all_samples, paths) # ***************************************** # ROC # ***************************************** if 'roc' in do_analyses: # for each signal for SIG_sample, mass_center in zip(SIG_samples, mass_centers): # for each type of loss strategy
Parameters = recordtype( 'Parameters', 'run_n, qcd_sample_id, qcd_ext_sample_id, qcd_train_sample_id, qcd_test_sample_id, strategy_id, epochs, read_n' ) params = Parameters(run_n=113, qcd_sample_id='qcdSigReco', qcd_ext_sample_id='qcdSigExtReco', qcd_train_sample_id='qcdSigAllTrainReco', qcd_test_sample_id='qcdSigAllTestReco', strategy_id='rk5_05', epochs=100, read_n=None) # set directories for saving and loading with extra envelope subdir for qr models experiment = ex.Experiment(run_n=params.run_n).setup(model_dir_qr=True, analysis_dir_qr=True) experiment.model_dir_qr = os.path.join(experiment.model_dir_qr, 'envelope') pathlib.Path(experiment.model_dir_qr).mkdir(parents=True, exist_ok=True) result_dir = '/eos/user/k/kiwoznia/data/QR_results/analysis/run_' + str( params.run_n) + '/envelope' pathlib.Path(result_dir).mkdir(parents=True, exist_ok=True) #****************************************# # read in qcd data #****************************************# paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path( {'$run$': 'run_' + str(params.run_n)}) data_qcd_all = dapr.merge_qcd_base_and_ext_datasets(params, paths) print('qcd all: min mjj = {}, max mjj = {}'.format(np.min(data_qcd_all['mJJ']), np.max(
import pofah.util.sample_factory as sf import pofah.path_constants.sample_dict_file_parts_input as sdi import pofah.path_constants.sample_dict_file_parts_reco as sdr # ******************************************************** # runtime params # ******************************************************** #test_samples = ['GtoWW15na', 'GtoWW15br', 'GtoWW25na', 'GtoWW25br', 'GtoWW35na', 'GtoWW35br', 'GtoWW45na', 'GtoWW45br'] test_samples = ['GtoWW35na', 'GtoWW35br'] #test_samples = ['qcdSig'] run_n = 101 experiment = ex.Experiment(run_n=run_n) # ******************************************** # load model # ******************************************** vae = VAE_3D(run=run_n, model_dir=experiment.model_dir) vae.load( ) input_paths = sf.SamplePathDirFactory(sdi.path_dict) result_paths = sf.SamplePathDirFactory(sdr.path_dict).extend_base_path(experiment.run_dir) for sample_id in test_samples: # ******************************************** # read test data (events)
description='run mjj spectrum analysis with QR cuts applied') parser.add_argument('-x', dest='sig_xsec', type=float, default=100., help='signal injection cross section') args = parser.parse_args() run = 113 sample_ids = ['qcdSigAllTestReco', 'GtoWW35brReco'] quantiles = [0.1, 0.3, 0.5, 0.7, 0.9, 0.99] # quantiles = [0.9] mjj_key = 'mJJ' param_dict = { '$run$': str(run), '$sig_name$': sample_ids[1], '$sig_xsec$': str(int(args.sig_xsec)) } input_paths = sf.SamplePathDirFactory(sdfs.path_dict).update_base_path( param_dict) # in selection paths new format with run_x, sig_x, ... fig_dir = exp.Experiment( run_n=run, param_dict=param_dict).setup(analysis_dir_qr=True).analysis_dir_qr_mjj for sample_id in sample_ids: for quantile in quantiles: sample = js.JetSample.from_input_file( sample_id, input_paths.sample_file_path(sample_id)) plot_mjj_spectrum(sample, quantile, fig_dir)
SIG_samples = [ 'GtoWW15naReco', 'GtoWW25naReco', 'GtoWW35naReco', 'GtoWW45naReco' ] #SIG_samples = ['GtoWW25naReco', 'qcdSigAllReco'] mass_centers = [1500, 2500, 3500, 4500] all_samples = [BG_sample, BG_SR_sample] + SIG_samples strategy_ids_total_loss = ['s1', 's2', 's3', 's4', 's5'] strategy_ids_kl_loss = ['kl1', 'kl2', 'kl3', 'kl4', 'kl5'] strategy = lost.loss_strategy_dict['s5'] # L1 & L2 > LT run_model101 = 101 run_model502 = 502 run_model701 = 701 experiment101 = ex.Experiment(run_model101) experiment502 = ex.Experiment(run_model502) experiment701 = ex.Experiment(run_model701) experiment_result = ex.Experiment(param_dict={ '$run1$': experiment101.run_dir, '$run2$': experiment502.run_dir }).setup(model_comparison_dir=True) #read run 102 data paths102 = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path( {'$run$': experiment101.run_dir}) data102 = sf.read_inputs_to_jet_sample_dict_from_dir(all_samples, paths102) #read run 501 data paths501 = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path( {'$run$': experiment502.run_dir})
run_n_model1 = 46 run_n_model2 = 49 SM_sample = 'qcdSideReco' BSM_samples = [ 'GtoWW15naReco', 'GtoWW15brReco', 'GtoWW25naReco', 'GtoWW25brReco', 'GtoWW35naReco', 'GtoWW35brReco', 'GtoWW45naReco', 'GtoWW45brReco' ] strategies = ['s1', 's2', 's3', 's4', 's5', 'k1', 'k2'] all_sample_ids = [SM_sample] + BSM_samples mass_centers = [1500, 1500, 2500, 2500, 3500, 3500, 4500, 4500] # read JET IMAGE VAE model results experiment = ex.Experiment(run_n_model1) data_img_vae = sf.read_results_to_jet_sample_dict(all_sample_ids, experiment, mode='img-local') # read 3D LOSS VAE model results experiment = ex.Experiment(run_n_model2).setup(fig_dir=True) data_3d_vae = sf.read_results_to_jet_sample_dict(all_sample_ids, experiment, mode='img-local') for s in strategies: strategy = ls.loss_strategies[s] for BSM_sample, mass_center in zip(BSM_samples, mass_centers):
int(quantile * 100)) + date_str # read in qcd signal region sample run_n = 101 SM_sample = 'qcdSigAllReco' #BSM_samples = ['GtoWW15naReco', 'GtoWW15brReco', 'GtoWW25naReco', 'GtoWW25brReco','GtoWW35naReco', 'GtoWW35brReco', 'GtoWW45naReco', 'GtoWW45brReco'] BSM_samples = [ 'GtoWW15naReco', 'GtoWW25naReco', 'GtoWW35naReco', 'GtoWW45naReco' ] all_samples = [SM_sample] + BSM_samples mjj_key = 'mJJ' reco_loss_j1_key = 'j1RecoLoss' QR_train_share = 0.3 experiment = ex.Experiment(run_n) paths = sf.SamplePathDirFactory(sd.path_dict).update_base_path( {'$run$': experiment.run_dir}) data = sf.read_inputs_to_jet_sample_dict_from_dir(all_samples, paths) # define quantile and loss-strategy for discimination quantiles = [0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9] # 5% strategy = lost.loss_strategy_dict['rk5'] # L1 & L2 > LT qcd_sig_sample = data[SM_sample] #split qcd sample into training and testing qcd_train, qcd_test = js.split_jet_sample_train_test(qcd_sig_sample, QR_train_share) # update data_dictionary data[SM_sample] = qcd_test print(qcd_sig_sample.features())
from vae.vae_highres_model import VAE_HR import vae.losses as lo import pofah.util.input_data_reader as idr import pofah.util.sample_factory as sf import pofah.jet_sample as js import pofah.util.experiment as ex # ******************************************************** # runtime params # ******************************************************** run_n = 4 data_sample = 'img-local-54' experiment = ex.Experiment(run_n).setup(result_dir=True) paths = sf.SamplePathFactory(experiment,data_sample) # ******************************************** # load model # ******************************************** vae = VAE(run=run_n, model_dir=experiment.model_dir) vae.load() # ******************************************** # read test data (images) # ******************************************** #sample_ids = ['qcdSide', 'qcdSig', 'GtoWW15na', 'GtoWW15br', 'GtoWW25na', 'GtoWW25br', 'GtoWW35na', 'GtoWW35br', 'GtoWW45na', 'GtoWW45br'] sample_ids = ['GtoWW25br', 'GtoWW35na']
kernel_sz=(1, 3), kernel_ini_n=12, beta=0.01, epochs=400, train_total_n=int(10e6), valid_total_n=int(1e6), gen_part_n=int(5e5), batch_n=256, z_sz=12, activation='elu', initializer='he_uniform', learning_rate=0.001, max_lr_decay=8, lambda_reg=0.0) # 'L1L2' experiment = expe.Experiment(params.run_n).setup(model_dir=True, fig_dir=True) paths = safa.SamplePathDirFactory(sdi.path_dict) # ******************************************************** # prepare training (generator) and validation data # ******************************************************** # train (generator) print('>>> Preparing training dataset generator') data_train_generator = dage.DataGenerator( path=paths.sample_dir_path('qcdSide'), sample_part_n=params.gen_part_n, sample_max_n=params.train_total_n, **cuts.global_cuts) # generate 10 M jet samples train_ds = tf.data.Dataset.from_generator( data_train_generator,
import pofah.phase_space.cut_constants as cuts import training as train # ******************************************************** # runtime params # ******************************************************** # test_samples = ['qcdSig', 'qcdSigExt', 'GtoWW15na', 'GtoWW15br', 'GtoWW25na', 'GtoWW25br', 'GtoWW35na', 'GtoWW35br', 'GtoWW45na', 'GtoWW45br'] #test_samples = ['qcdSig', 'GtoWW35na'] test_samples = ['qcdSideExt'] run_n = 113 cuts = cuts.sideband_cuts if 'qcdSideExt' in test_samples else cuts.signalregion_cuts #{} experiment = ex.Experiment(run_n=run_n).setup(model_dir=True) batch_n = 4096*16 # ******************************************** # load model # ******************************************** vae = VAEparticle.from_saved_model(path=os.path.join(experiment.model_dir, 'best_so_far')) print('beta factor: ', vae.beta) loss_fn = losses.threeD_loss input_paths = sf.SamplePathDirFactory(sdi.path_dict) result_paths = sf.SamplePathDirFactory(sdr.path_dict).update_base_path({'$run$': experiment.run_dir}) for sample_id in test_samples:
from collections import OrderedDict import os import analysis.analysis_roc as ar import discriminator.loss_strategy as ls import pofah.sample_dict as sd import pofah.jet_sample as js import pofah.util.experiment as ex strategies = ['s1', 's2', 's3', 's4', 's5'] legend = [ls.loss_strategies[s].title_str for s in strategies] run_n = 45 experiment = ex.Experiment(run_n).setup(fig_dir=True) SM_sample = 'qcdSideReco' BSM_samples = ['qcdSigReco', 'GtoWW15naReco', 'GtoWW15brReco', 'GtoWW25naReco', 'GtoWW25brReco','GtoWW35naReco', 'GtoWW35brReco', 'GtoWW45naReco', 'GtoWW45brReco'] all_samples = [SM_sample] + BSM_samples data = OrderedDict() for sample_id in all_samples: data[sample_id] = js.JetSample.from_input_file(sample_id, os.path.join(experiment.result_dir, sd.file_names[sample_id]+'.h5')) # plot standard ROC for all strategies for BSM_sample in BSM_samples: neg_class_losses = [strategy( data[SM_sample] ) for strategy in ls.loss_strategies.values()]
import dadrah.selection.discriminator as disc import pofah.path_constants.sample_dict_file_parts_reco as sdfr #****************************************# # set runtime params #****************************************# Parameters = namedtuple('Parameters', 'run_n, sample_id, quantile, strategy') params = Parameters(run_n=101, sample_id='qcdSigBisReco', quantile=0.1, strategy=lost.loss_strategy_dict['s5']) #****************************************# # read in data #****************************************# experiment = ex.Experiment(params.run_n) paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path( {'$run$': experiment.run_dir}) sample = js.JetSample.from_input_dir(params.sample_id, paths.sample_dir_path(params.sample_id)) #****************************************# # load quantile regression #****************************************# discriminator = disc.QRDiscriminator(quantile=params.quantile, loss_strategy=params.strategy) discriminator.load('./my_new_model.h5') #****************************************# # apply quantile regression #****************************************#