def sub_test_store(self, readWrite): td = TrainData() x,y,w = self.createSimpleArray('int32'), self.createSimpleArray('float32'), self.createSimpleArray('int32') x_orig=x.copy() x2,y2,_ = self.createSimpleArray('float32'), self.createSimpleArray('float32'), self.createSimpleArray('int32') x2_orig=x2.copy() y_orig=y.copy() td._store([x,x2], [y,y2], [w]) if readWrite: td.writeToFile("testfile.tdjctd") td = TrainData() td.readFromFile("testfile.tdjctd") os.system('rm -f testfile.tdjctd') shapes = td.getNumpyFeatureShapes() self.assertEqual([[3, 5, 6], [1], [3, 5, 6], [1]], shapes,"shapes") self.assertEqual(2, td.nFeatureArrays()) self.assertEqual(2, td.nTruthArrays()) self.assertEqual(1, td.nWeightArrays()) f = td.transferFeatureListToNumpy(False) t = td.transferTruthListToNumpy(False) w = td.transferWeightListToNumpy(False) xnew = SimpleArray(f[0],np.array(f[1],dtype='int64')) self.assertEqual(x_orig, xnew) xnew = SimpleArray(f[2],np.array(f[3],dtype='int64')) self.assertEqual(x2_orig, xnew) ynew = SimpleArray(t[0],np.array(t[1],dtype='int64')) self.assertEqual(y_orig, ynew)
def test_TrainDataRead(self): print('TestCompatibility TrainData') td = TrainData() td.readFromFile('trainData_previous.djctd') self.assertEqual(td.nFeatureArrays(), 1) arr = np.load("np_arr.npy") rs = np.load("np_rs.npy") b = SimpleArray(arr, rs) a = td.transferFeatureListToNumpy(False) a, rs = a[0], a[1] a = SimpleArray(a, np.array(rs, dtype='int64')) self.assertEqual(a, b)
def test_AddToFile(self): print('TestTrainData: AddToFile') td = TrainData() x, y, w = self.createSimpleArray('int32'), self.createSimpleArray( 'float32'), self.createSimpleArray('int32') xo, yo, wo = x.copy(), y.copy(), w.copy() x2, y2, _ = self.createSimpleArray('float32'), self.createSimpleArray( 'float32'), self.createSimpleArray('int32') x2o, y2o = x2.copy(), y2.copy() td._store([x, x2], [y, y2], [w]) td.writeToFile("testfile.tdjctd") td.addToFile("testfile.tdjctd") td2 = TrainData() td2._store([xo, x2o], [yo, y2o], [wo]) td2.append(td) td.readFromFile("testfile.tdjctd") os.system('rm -f testfile.tdjctd') self.assertEqual(td, td2)
def __init__( self, samplefile, function_to_apply=None, #needs to be function(counter,[model_input], [predict_output], [truth]) after_n_batches=50, batchsize=10, on_epoch_end=False, use_event=0, decay_function=None, offset=0): super(PredictCallback, self).__init__() self.samplefile = samplefile self.function_to_apply = function_to_apply self.counter = 0 self.call_counter = offset self.decay_function = decay_function self.after_n_batches = after_n_batches self.run_on_epoch_end = on_epoch_end if self.run_on_epoch_end and self.after_n_batches >= 0: print( 'PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end' ) self.after_n_batches = 0 td = TrainData() td.readFromFile(samplefile) if use_event >= 0: td.skim(use_event) self.batchsize = 1 = td self.gen = trainDataGenerator() self.gen.setBatchSize(batchsize) self.gen.setSkipTooLargeBatches(False)
import matplotlib.pyplot as plt import matplotlib.image as mpimg import matplotlib.patches as patches import math from numba import jit from inference import collect_condensates, make_inference_dict parser = ArgumentParser('make plots') parser.add_argument('inputFile') args = parser.parse_args() #use traindata as data storage td = TrainData() td.readFromFile(args.inputFile) td.x = td.transferFeatureListToNumpy() data = make_inference_dict(td.x[0], td.x[1], td.x[2]) betaselection = collect_condensates(data, 0.1, 0.8) #0.2/2.0 print('betaselection', betaselection.shape) def makeRectangle(size, pos, edgecolor='y'): return patches.Rectangle([pos[0] - size[0] / 2., pos[1] - size[1] / 2.], size[0], size[1], linewidth=1,
parser.add_argument("-e", help="event number ", default="0") args = parser.parse_args() import DeepJetCore from keras.models import load_model from DeepJetCore.compiled.c_trainDataGenerator import trainDataGenerator from DeepJetCore.evaluation import predict_from_TrainData from DeepJetCore.customObjects import get_custom_objects from DeepJetCore.TrainData import TrainData import matplotlib.pyplot as plt from ragged_plotting_tools import make_cluster_coordinates_plot, make_original_truth_shower_plot from index_dicts import create_index_dict, create_feature_dict td = TrainData() td.readFromFile(args.i) td.skim(int(args.e)) #td=td.split(int(args.e)+1)#get the first e+1 elements #if int(args.e)>0: # td.split(1) #reduce to the last element (the e'th one) model = load_model(args.inputModel, custom_objects=get_custom_objects()) predicted = predict_from_TrainData(model, td, batchsize=100000) pred = predicted[0] feat = td.transferFeatureListToNumpy() rs = feat[1] feat = feat[0] #weights = td.transferWeightListToNumpy() truth = td.transferTruthListToNumpy()[0]
args = parser.parse_args() allparticles = [] all_ev_prop = [] names = "" with open(args.inputFile) as file: for inputfile in file: inputfile = inputfile.replace('\n', '') if len(inputfile) < 1: continue print('inputfile', inputfile) td = TrainData() td.readFromFile(inputfile) indata = td.transferFeatureListToNumpy() pred, feat, truth = indata[0], indata[1], indata[2] del td d = make_particle_inference_dict(pred, feat, truth) condensate_mask = np.squeeze(collect_condensates(d, 0.1, 0.8), axis=2) #B x V x 1 pred_E = d['f_E'] * d['p_E_corr'] pred_pos = d['f_pos'] + d['p_pos_offs'] calo_energy = None #not supported by data formet.. #np.sum(d['f_E'][:,0:16*16,0],axis=-1)#calo energy #loop over events here.. easier nevents = pred.shape[0]
from DeepJetCore.TrainData import TrainData from DeepJetCore.dataPipeline import TrainDataGenerator from LayersRagged import RaggedConstructTensor import index_dicts import tensorflow as tf import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "4" td = TrainData() td.readFromFile( '/eos/cms/store/cmst3/group/hgcal/CMG_studies/pepr/50_part_with_noise_Jul2020/converted/HGCalML_data/50_part_with_noise_Jul2020/988_windowntup.djctd' ) gen = TrainDataGenerator() gen.setBatchSize(100000) gen.setSkipTooLargeBatches(False) gen.setBuffer(td) with tf.device('/CPU:0'): ragged_constructor = RaggedConstructTensor() while True: feat, truth = next( gen.feedNumpyData()) # this is [ [features],[truth],[None] ] if gen.lastBatch(): break row_splits = feat[1][:, 0]
import numpy as np from DeepJetCore.TrainData import TrainData from argparse import ArgumentParser import matplotlib.pyplot as plt import matplotlib.image as mpimg import matplotlib.patches as patches import math from numba import jit from inference import collect_condensates, make_inference_dict td = TrainData() #td.readFromFile("../results_partial/predictions/pred_9.djctd") td.readFromFile("../data/test_data/9.djctd") td.x = td.transferFeatureListToNumpy() td.y = td.transferTruthListToNumpy() td.z = td.transferWeightListToNumpy() x = td.x y = td.y z = td.z print(len(x)) print(x[0].shape) print(x[1].shape) print(x[2].shape) #print(y.shape) #print(z.shape) data = make_inference_dict(td.x[0], td.x[1], td.x[2])
from argparse import ArgumentParser from plotting_tools import plotevent import numpy as np parser = ArgumentParser('Make some plots') parser.add_argument('inputFile') args = parser.parse_args() infile = str(args.inputFile) from DeepJetCore.TrainData import TrainData import matplotlib.pyplot as plt td=TrainData() td.readFromFile(infile) feat = td.transferFeatureListToNumpy()[0] truth = td.transferTruthListToNumpy()[0] nevents = min(len(feat),10) for e in range(nevents): print('true energy', truth[e]) print('reco sum ', np.sum(feat[e,:,:,:,0])) fig = plt.figure() ax = fig.gca(projection='3d') ax.set_xlabel("x [idx]") ax.set_zlabel("y [idx]")
def __init__(self, samplefile, accumulate_after_batches=5, plot_after_batches=50, batchsize=10, beta_threshold=0.6, distance_threshold=0.6, iou_threshold=0.1, n_windows_for_plots=5, n_windows_for_scalar_metrics=5000000, outputdir=None, publish = None, n_ccoords=None, n_average_over_samples=5, ): """ :param samplefile: the file to pick validation data from :param accumulate_after_batches: run performance metrics after n batches (a good value is 5) :param plot_after_batches: update and upload plots after n batches :param batchsize: batch size :param beta_threshold: beta threshold for running prediction on obc :param distance_threshold: distance threshold for running prediction on obc :param iou_threshold: iou threshold to use to match both for obc and for ticl :param n_windows_for_plots: how many windows to average to do running performance plots :param n_windows_for_scalar_metrics: the maximum windows to store data for scalar performance metrics as a function of iteration :param outputdir: the output directory where to store results :param publish: where to publish, could be ssh'able path :param n_ccoords: n coords for plots :param n_average_over_samples: average scalar metrics over samples """ super(plotRunningPerformanceMetrics, self).__init__() self.samplefile = samplefile self.counter = 0 self.call_counter = 0 self.decay_function = None self.outputdir = outputdir self.n_ccords=n_ccoords self.publish=publish self.accumulate_after_batches = accumulate_after_batches self.plot_after_batches = plot_after_batches self.run_on_epoch_end = False if self.run_on_epoch_end and self.accumulate_after_batches >= 0: print('PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end') self.accumulate_after_batches = 0 td = TrainData() td.readFromFile(samplefile) # td_selected = td.split(self.n_events) # check if this works in ragged out of the box # if use_event >= 0: # if use_event < td.nElements(): # td.skim(use_event) # else: # td.skim(use_event % td.nElements()) self.batchsize = batchsize = td self.gen = TrainDataGenerator() self.gen.setBatchSize(self.batchsize) self.gen.setSkipTooLargeBatches(False) self.gen.setBuffer(td) self.n_batches=self.gen.getNBatches() with tf.device('/CPU:0'): self.ragged_constructor = RaggedConstructTensor() self.window_id = 0 self.window_analysis_dicts = [] self.n_windows_for_plots = n_windows_for_plots self.n_windows_for_scalar_metrics = n_windows_for_scalar_metrics self.beta_threshold = beta_threshold self.distance_threshold = distance_threshold self.iou_threshold = iou_threshold self.scalar_metrics = dict() self.scalar_metrics['efficiency'] = [] self.scalar_metrics['efficiency_ticl'] = [] self.scalar_metrics['fake_rate'] = [] self.scalar_metrics['fake_rate_ticl'] = [] self.scalar_metrics['var_response'] = [] self.scalar_metrics['var_response_ticl'] = [] self.scalar_metrics['iteration'] = [] self.n_average_over_samples = n_average_over_samples self.plot_process = None