def getBinIdxLimits(fileDescr): from rucio.client import DIDClient didClient = DIDClient() scope, dataset = extract_scope(fileDescr) gen = didClient.list_dids(scope, {'name': appendToFileName(dataset, '*')}) reEtaEt = re.compile(r'.*_et(\d+)_eta(\d+)') nEtaBins = -1 nEtBins = -1 for ds in gen: m = reEtaEt.match(ds) if m: nEtBins = max(nEtBins, int(m.group(1))) nEtaBins = max(nEtaBins, int(m.group(2))) if nEtaBins == -1 or nEtBins == -1: mainLogger.fatal( "Couldn't automatically retrieve et/eta bins using rucio, have you uploaded fileJuicer output files?", RuntimeError) return [0, nEtBins], [0, nEtaBins]
parser.add_argument('-d', '--data', action='store', dest='data', required=True, nargs='+', help="The input tuning files.") import sys, os if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() # Take all files paths = csvStr2List(args.data) paths = expandFolders(paths) from RingerCore import load, save, appendToFileName for f in paths: ff = load(f) for k in ff.keys(): if 'SP' in k: etBin = ff[k]['etBinIdx'] etaBin = ff[k]['etaBinIdx'] print 'etBin = ', etBin, ', etaBin = ', etaBin outname = f.split('/')[len(f.split('/')) - 2] cOutputName = appendToFileName(outname, ('et%d_eta%d') % (etBin, etaBin)) save(ff, cOutputName, compress=True)
def create_weights(self, discrList, filename): self._logger.info('Getting weights...') from TuningTools import TuningToolCores modelDict = {} modelDict['__version__'] = self._version modelDict['__type__'] = 'Fex' modelDict['__core__'] = TuningToolCores.FastNet modelDict['metadata'] = { 'UseEtaVar': self._useEtaVar, 'UseLumiVar': self._useLumiVar, } modelDict['tuning'] = [] def tolist(a): if isinstance(a, list): return a else: return a.tolist() for model in discrList: #etBinIdx = model['etBinIdx'] #etaBinIdx = model['etaBinIdx'] ## Discriminator configuration discrData = {} discrData['etBin'] = model['etBin'].tolist() if discrData['etBin'][0] == 0 and discrData['etBin'][1] == 20.0: discrData['etBin'] = [15.0, 20.0] discrData['etaBin'] = model['etaBin'].tolist() discrData['nodes'] = tolist(model['discriminator']['nodes']) discrData['bias'] = tolist(model['discriminator']['bias']) discrData['weights'] = tolist(model['discriminator']['weights']) modelDict['tuning'].append({'discriminator': discrData}) if self._toPickle: self._logger.info('Export weights to pickle format...') modelDict['__version__'] = self._version modelDict['__core__'] = TuningToolCores.keras from RingerCore import save save(modelDict, filename) from ROOT import TFile, TTree from ROOT import std self._logger.info('Export weights to root format...') ### Create the discriminator root object fdiscr = TFile(appendToFileName(filename, '.root', separator=''), 'recreate') self.__createRootParameter('int', '__version__', self._version).Write() self.__createRootParameter('int', '__core__', TuningToolCores.FastNet).Write() fdiscr.mkdir('tuning') fdiscr.cd('tuning') tdiscr = TTree('discriminators', '') for idx, b in enumerate(self._discrBranches): b[2] = std.vector(b[0])() tdiscr.Branch(b[1], 'vector<%s>' % b[0], b[2]) for t in modelDict['tuning']: for idx, b in enumerate(self._discrBranches): self.__attachToVector(t['discriminator'][b[1]], b[2]) tdiscr.Fill() tdiscr.Write() ### Create the thresholds root object fdiscr.mkdir('metadata') fdiscr.cd('metadata') for key, value in modelDict['metadata'].iteritems(): self._logger.info('Saving metadata %s as %s', key, value) self.__createRootParameter('int' if type(value) is int else 'bool', key, value).Write() fdiscr.Close()
if args.expert_networks: for i in range(len(args.expert_networks)): expertNetworksStr += '%EXPERTPATH_' + str(i) + ' ' else: expertNetworksStr = None refStr = subsetStr = None if args.get_job_submission_option('debug') is not None: args.set_job_submission_option('nFiles', 10) # Build secondary datasets input dataDS_SecondaryDatasets = [] for idx, dataDS in enumerate(args.dataDS): dataDS = dataDS if not args.multi_files else appendToFileName( args.dataDS[idx], '_et%d_eta%d' % (args.et_bins[0], args.eta_bins[0])) # dummy et/eta dataDS_SecondaryDatasets.append( SecondaryDataset(key="DATA_%d" % idx, nFilesPerJob=1, container=dataDS, reusable=True)) # Build secondary datasets expert neural networks configurations expertPaths_SecondaryDatasets = [] if args.expert_networks: for idx, expertPath in enumerate(args.expert_networks): expertPath = expertPath if not args.multi_files else appendToFileName( args.expert_networks[idx], '_et%d_eta%d' % (args.et_bins[0], args.eta_bins[0])) expertPaths_SecondaryDatasets.append(
printf = mainLogger.info ) except TypeError: args.binFilters = csvStr2List( args.binFilters ) args.inputFiles = select( args.inputFiles, args.binFilters ) if len(args.binFilters) is 1: args.inputFiles = [args.inputFiles] else: args.inputFiles = [args.inputFiles] import re searchFormat = re.compile(r'.*\.(tar.gz|tgz|pic)(\.[0-9]*)?$') for idx, fileCollection in enumerate(args.inputFiles): m = searchFormat.match( fileCollection[0] ) # Treat output file name: try: cOutputName = appendToFileName( args.outputFile, args.binFilters[idx] ) except (IndexError, TypeError,): if len(args.inputFiles) > 1: cOutputName = appendToFileName( args.outputFile, str(idx) ) else: cOutputName = args.outputFile cOutputName = ensureExtension( cOutputName, 'tgz|tar.gz' ) if m: file_format = m.group(1) wantedFormat = re.compile(r'.*\.' + file_format + r'(\.[0-9]*)?$') isSame = [ bool(wantedFormat.match(filename)) for filename in fileCollection ] from RingerCore.util import cat_files_py if all(isSame): if file_format in ("tgz", "tar.gz"): cat_files_py( fileCollection, cOutputName, args.writeMethod, mainLogger ) elif file_format == "pic":
def create_thresholds(self, discrList, filename): self._logger.info('Getting thresholds...') from TuningTools import TuningToolCores modelDict = {} modelDict['__version__'] = self._version modelDict['__type__'] = 'Hypo' modelDict['__core__'] = TuningToolCores.FastNet modelDict['metadata'] = { 'UseNoActivationFunctionInTheLastLayer': self._removeOutputTansigTF, 'DoPileupCorrection': self._doPileupCorrection, 'LumiCut': self._maxPileupLinearCorrectionValue, } modelDict['tuning'] = [] for model in discrList: thresData = {} #etBinIdx = model['etBinIdx'] #etaBinIdx = model['etaBinIdx'] thresData['etBin'] = model['etBin'].tolist() thresData['etaBin'] = model['etaBin'].tolist() thresData['thresholds'] = model['threshold'] modelDict['tuning'].append(thresData) if self._toPickle: self._logger.info('Export Thresholds to pickle format...') modelDict['__version__'] = self._version modelDict['__core__'] = TuningToolCores.keras from RingerCore import save save(modelDict, filename) from ROOT import TFile, TTree from ROOT import std self._logger.info('Export Thresholds to root format...') ### Create the thresholds root object fthres = TFile(appendToFileName(filename, '.root', separator=''), 'recreate') self.__createRootParameter('int', '__version__', self._version).Write() self.__createRootParameter('int', '__core__', TuningToolCores.FastNet).Write() fthres.mkdir('tuning') fthres.cd('tuning') tthres = TTree('thresholds', '') for idx, b in enumerate(self._thresBranches): b[2] = std.vector(b[0])() tthres.Branch(b[1], 'vector<%s>' % b[0], b[2]) for t in modelDict['tuning']: for idx, b in enumerate(self._thresBranches): self.__attachToVector(t[b[1]], b[2]) tthres.Fill() tthres.Write() fthres.mkdir('metadata') fthres.cd('metadata') for key, value in modelDict['metadata'].iteritems(): self._logger.info('Saving metadata %s as %s', key, value) self.__createRootParameter('int' if type(value) is int else 'bool', key, value).Write() fthres.Close()
mainLogger.fatal( "Currently runGRIDtuning cannot automatically retrieve et bins available.", NotImplementedError) setrootcore = 'source ./setrootcore.sh' if args.multi_thread and args.cores < 2: mainLogger.fatal("Requested multi-thread job and the number of requested cores are lower than 2.") setrootcore_opts = '--grid --ncpus={CORES} --no-color;'.format( CORES = args.cores if args.multi_thread else 1 ) if args.multi_thread: args.set_job_submission_option('nCore', args.cores) tuningJob = '\$ROOTCOREBIN/user_scripts/TuningTools/standalone/runTuning.py' dataStr, configStr, ppStr, crossFileStr = '%DATA', '%IN', '%PP', '%CROSSVAL' refStr = subsetStr = None if args.get_job_submission_option('debug') is not None: args.set_job_submission_option('nFiles', 10) # Fix secondaryDSs string: dataDS = args.dataDS[0] if not args.multi_files else appendToFileName(args.dataDS[0],'_et%d_eta%d' % (args.et_bins[0], args.eta_bins[0])) args.append_to_job_submission_option( 'secondaryDSs' , SecondaryDatasetCollection ( [ SecondaryDataset( key = "DATA", nFilesPerJob = 1, container = dataDS, reusable = True) , SecondaryDataset( key = "PP", nFilesPerJob = 1, container = args.ppFileDS[0], reusable = True) , SecondaryDataset( key = "CROSSVAL", nFilesPerJob = 1, container = args.crossValidDS[0], reusable = True) , ] ) ) if not args.refDS is None: args.append_to_job_submission_option( 'secondaryDSs', SecondaryDataset( key = "REF", nFilesPerJob = 1, container = args.refDS[0], reusable = True) ) refStr = '%REF' if not args.subsetDS is None: args.append_to_job_submission_option( 'secondaryDSs', SecondaryDataset( key = "SUBSET", nFilesPerJob = 1, container = args.subsetDS[0], reusable = True) ) subsetStr = '%SUBSET'
except (TypeError, AttributeError): args.binFilters = csvStr2List(args.binFilters) args.inputFiles = select(args.inputFiles, args.binFilters) if len(args.binFilters) is 1: args.inputFiles = [args.inputFiles] else: args.inputFiles = [args.inputFiles] import re searchFormat = re.compile(r'.*\.(tar.gz|tgz|pic)(\.[0-9]*)?$') for idx, fileCollection in enumerate(args.inputFiles): m = searchFormat.match(fileCollection[0]) # Treat output file name: try: cOutputName = appendToFileName(args.outputFile, args.binFilters[idx]) except ( IndexError, TypeError, ): if len(args.inputFiles) > 1: cOutputName = appendToFileName(args.outputFile, str(idx)) else: cOutputName = args.outputFile cOutputName = ensureExtension(cOutputName, 'tgz|tar.gz') if m: file_format = m.group(1) wantedFormat = re.compile(r'.*\.' + file_format + r'(\.[0-9]*)?$') isSame = [ bool(wantedFormat.match(filename)) for filename in fileCollection ]
if args.outputPath is None: args.outputPath = os.path.dirname(args.inputFile) if not os.path.isdir( args.outputPath ): mkdir_p( args.outputPath ) f = load(args.inputFile) # Copy all metada information baseDict = { k : f[k] for k in f.keys() if not '_etBin_' in k and not '_etaBin_' in k } nEtBins = f['nEtBins'].item() nEtaBins = f['nEtaBins'].item() for etIdx, etaIdx in progressbar( product(xrange(nEtBins), xrange(nEtaBins)) , nEtBins*nEtaBins , logger = mainLogger , prefix = 'Juicing file '): binDict= {k:f[k] for k in f.keys() if 'etBin_%d_etaBin_%d'%(etIdx,etaIdx) in k} binDict.update(baseDict) from copy import deepcopy for layer in caloLayers: pp=PreProcChain([RingerLayerSegmentation(layer=layer)]) tmpBinDict = deepcopy(binDict) for key in binDict.keys(): if 'Patterns' in key: tmpBinDict[key] = pp(binDict[key]) outFile = os.path.join( args.outputPath, os.path.basename( appendToFileName(args.inputFile.replace('calo','calo'+RingerLayer.tostring(layer)), 'et%d_eta%d' % (etIdx, etaIdx) ) ) ) save(tmpBinDict, outFile, protocol = 'savez_compressed' )
def create_weights(self, discrList, filename): self._logger.info('Getting weights...') from TuningTools import TuningToolCores modelDict = {} modelDict['__version__'] = self._version modelDict['__type__'] = 'Fex' modelDict['__core__'] = TuningToolCores.FastNet modelDict['metadata'] = { 'UseEtaVar': self._useEtaVar, 'UseLumiVar': self._useLumiVar, } modelDict['tuning'] = {} def tolist(a): if isinstance(a, list): return a else: return a.tolist() for model in discrList: etBinIdx = model['etBinIdx'] etaBinIdx = model['etaBinIdx'] ## Discriminator configuration discrData = {} discrData['etBin'] = model['etBin'].tolist() discrData['etaBin'] = model['etaBin'].tolist() discrData['muBin'] = model['muBin'].tolist() discrData['nodes'] = tolist(model['discriminator']['nodes']) discrData['bias'] = tolist(model['discriminator']['bias']) discrData['weights'] = tolist(model['discriminator']['weights']) modelDict['tuning']['et{}_eta{}'.format(etBinIdx, etaBinIdx)] = { 'discriminator': discrData } if self._toPython: self._logger.info('Export weights to python file') pyfile = open(appendToFileName(filename, '.py', separator=''), 'w') pyfile.write('def SignaturesMap():\n') pyfile.write(' s=dict()\n') for key in modelDict.keys(): pyfile.write(' s["%s"]=%s\n' % (key, modelDict[key])) pyfile.write(' return s\n') from ROOT import TFile, TTree from ROOT import std self._logger.info('Export weights to root format...') ### Create the discriminator root object fdiscr = TFile(appendToFileName(filename, '.root', separator=''), 'recreate') self.__createRootParameter('int', '__version__', self._version).Write() self.__createRootParameter('int', '__core__', TuningToolCores.FastNet).Write() fdiscr.mkdir('tuning') fdiscr.cd('tuning') tdiscr = TTree('discriminators', '') for idx, b in enumerate(self._discrBranches): b[2] = std.vector(b[0])() tdiscr.Branch(b[1], 'vector<%s>' % b[0], b[2]) for key in sorted(modelDict['tuning'].keys()): for idx, b in enumerate(self._discrBranches): self.__attachToVector( modelDict['tuning'][key]['discriminator'][b[1]], b[2]) tdiscr.Fill() tdiscr.Write() ### Create the thresholds root object fdiscr.mkdir('metadata') fdiscr.cd('metadata') for key, value in modelDict['metadata'].iteritems(): self._logger.info('Saving metadata %s as %s', key, value) self.__createRootParameter('int' if type(value) is int else 'bool', key, value).Write() fdiscr.Close()
def create_thresholds(self, discrList, filename): self._logger.info('Getting thresholds...') from TuningTools import TuningToolCores modelDict = {} modelDict['__version__'] = self._version modelDict['__type__'] = 'Hypo' modelDict['__core__'] = TuningToolCores.FastNet modelDict['metadata'] = { 'UseNoActivationFunctionInTheLastLayer': self._removeOutputTansigTF, 'DoPileupCorrection': self._doPileupCorrection, 'LumiCut': self._maxPileupLinearCorrectionValue, } modelDict['tuning'] = {} for model in discrList: thresData = {} etBinIdx = model['etBinIdx'] etaBinIdx = model['etaBinIdx'] thresData['etBin'] = model['etBin'].tolist() thresData['etaBin'] = model['etaBin'].tolist() thresData['muBin'] = model['muBin'].tolist() thresData['thresholds'] = model['thresholds'] modelDict['tuning']['et{}_eta{}'.format(etBinIdx, etaBinIdx)] = thresData if self._toPython: self._logger.info('Export thresholds to python file') pyfile = open(appendToFileName(filename, '.py', separator=''), 'w') pyfile.write('def ThresholdsMap():\n') pyfile.write(' s=dict()\n') for key in modelDict.keys(): pyfile.write(' s["%s"]=%s\n' % (key, modelDict[key])) pyfile.write(' return s\n') from ROOT import TFile, TTree from ROOT import std self._logger.info('Export weights to root format...') ### Create the thresholds root object fthres = TFile(appendToFileName(filename, '.root', separator=''), 'recreate') self.__createRootParameter('int', '__version__', self._version).Write() self.__createRootParameter('int', '__core__', TuningToolCores.FastNet).Write() fthres.mkdir('tuning') fthres.cd('tuning') tthres = TTree('thresholds', '') for idx, b in enumerate(self._thresBranches): b[2] = std.vector(b[0])() tthres.Branch(b[1], 'vector<%s>' % b[0], b[2]) for key in sorted(modelDict['tuning'].keys()): for idx, b in enumerate(self._thresBranches): self.__attachToVector(modelDict['tuning'][key][b[1]], b[2]) tthres.Fill() tthres.Write() fthres.mkdir('metadata') fthres.cd('metadata') for key, value in modelDict['metadata'].iteritems(): self._logger.info('Saving metadata %s as %s', key, value) self.__createRootParameter('int' if type(value) is int else 'bool', key, value).Write() fthres.Close()
dest='inputFile', required=True, help="File to Juice!") import sys, os if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() from RingerCore import load, save from RingerCore import changeExtension, ensureExtension, appendToFileName import numpy as np f = load(args.inputFile) # Copy all metada information baseDict = { k: f[k] for k in f.keys() if not '_etBin_' in k and not '_etaBin_' in k } for etIdx in xrange(f['nEtBins'].item()): for etaIdx in xrange(f['nEtaBins'].item()): binDict = { k: f[k] for k in f.keys() if 'etBin_%d_etaBin_%d' % (etIdx, etaIdx) in k } binDict.update(baseDict) mainLogger.info('Saving Et: %d Eta: %d', etIdx, etaIdx) outFile = appendToFileName(args.inputFile, 'et%d_eta%d' % (etIdx, etaIdx)) save(binDict, outFile, protocol='savez_compressed')
def create_weights(self, discrList, filename): self._logger.info('Getting weights...') from TuningTools import TuningToolCores modelDict = {} modelDict['__version__'] = self._version modelDict['__type__'] = 'Fex' modelDict['__core__'] = TuningToolCores.FastNet modelDict['metadata'] = { 'UseEtaVar': self._useEtaVar, 'UseLumiVar': self._useLumiVar, 'UseCaloRings': True, 'UseTrack': False, 'UseShowerShape': False, 'RemoveOutputTansigTF': self._removeOutputTansigTF, } modelDict['tuning'] = [] def tolist(a): if isinstance(a, list): return a elif isinstance(a, tuple): return a else: return a.tolist() from keras.models import model_from_json from keras.layers import Conv2D, Dense, Activation import json from copy import deepcopy for model in discrList: etBinIdx = model['etBinIdx'] etaBinIdx = model['etaBinIdx'] ## Discriminator configuration discrData = {} discrData['etBin'] = tolist(model['etBin']) discrData['etaBin'] = tolist(model['etaBin']) discrData['muBin'] = tolist(model['muBin']) if self._toPickle: discrData['model'] = deepcopy(model['discriminator']['model']) discrData['weights'] = deepcopy( model['discriminator']['weights']) else: keras_model = model_from_json( json.dumps(model['discriminator']['model'], separators=(',', ':'))) keras_model.set_weights(model['discriminator']['weights']) ### Extract and reshape Keras weights dense_weights = [] dense_bias = [] dense_nodes = [] dense_tfnames = [] conv_kernel = [] conv_kernel_i = [] conv_kernel_j = [] conv_bias = [] conv_tfnames = [] conv_nodes = [] useConvLayer = False ### Loop over layers for idx, obj in enumerate(keras_model.layers): dobj = model['discriminator']['model']['config'][idx][ 'config'] if type(obj) is Conv2D: useConvLayer = True conv_nodes.append(dobj['filters']) conv_tfnames.append(str(dobj['activation'])) w, b = obj.get_weights() for wn in w.T: for wu in wn: conv_kernel.extend( wu.T.reshape( (dobj['kernel_size'][0] * dobj['kernel_size'][1], )).tolist()) conv_bias.extend(b.tolist()) conv_kernel_i.append(dobj['kernel_size'][0]) conv_kernel_j.append(dobj['kernel_size'][1]) elif type(obj) is Dense: dense_nodes.append(dobj['units']) dense_tfnames.append(str(dobj['activation'])) w, b = obj.get_weights() dense_weights.extend(w.reshape(-1, order='F')) dense_bias.extend(b.reshape(-1, order='F')) # TODO: Need to implement something smart to tread this case elif type(obj) is Activation: dense_tfnames.pop() dense_tfnames.append(str(dobj['activation'])) else: continue discrData['dense_nodes'] = tolist(dense_nodes) discrData['dense_bias'] = tolist(dense_bias) discrData['dense_weights'] = tolist(dense_weights) discrData['dense_tfnames'] = tolist(dense_tfnames) discrData['useConvLayer'] = [useConvLayer] # Convolutional neural network if useConvLayer: discrData['conv_nodes'] = tolist(conv_nodes) discrData['conv_tfnames'] = tolist(conv_tfnames) discrData['conv_kernel_i'] = tolist(conv_kernel_i) discrData['conv_kernel_j'] = tolist(conv_kernel_j) discrData['conv_kernel'] = tolist(conv_kernel) discrData['conv_bias'] = tolist(conv_bias) discrData['conv_input_i'] = [ model['discriminator']['model']['config'][0]['config'] ['batch_input_shape'][1] ] discrData['conv_input_j'] = [ model['discriminator']['model']['config'][0]['config'] ['batch_input_shape'][2] ] i = discrData['conv_input_i'][0] - (sum(conv_kernel_i) - len(conv_kernel_i)) j = discrData['conv_input_j'][0] - (sum(conv_kernel_j) - len(conv_kernel_j)) input_layer = i * j * discrData['conv_nodes'][-1] discrData['dense_nodes'] = [input_layer ] + discrData['dense_nodes'] ### Attach modelDict['tuning'].append({'discriminator': discrData}) if self._toPickle: self._logger.info('Export weights to pickle format...') modelDict['__version__'] = self._version modelDict['__core__'] = TuningToolCores.keras from RingerCore import save save(modelDict, filename) from ROOT import TFile, TTree from ROOT import std self._logger.info('Export weights to root format...') ### Create the discriminator root object fdiscr = TFile(appendToFileName(filename, '.root', separator=''), 'recreate') self.__createRootParameter('int', '__version__', self._version).Write() self.__createRootParameter('int', '__core__', TuningToolCores.FastNet).Write() fdiscr.mkdir('tuning') fdiscr.cd('tuning') tdiscr = TTree('discriminators', '') for idx, b in enumerate(self._discrBranches): b[2] = std.vector(b[0])() tdiscr.Branch(b[1], 'vector<%s>' % b[0], b[2]) for t in modelDict['tuning']: for idx, b in enumerate(self._discrBranches): self.__attachToVector(t['discriminator'][b[1]], b[2]) tdiscr.Fill() tdiscr.Write() ### Create the thresholds root object fdiscr.mkdir('metadata') fdiscr.cd('metadata') for key, value in modelDict['metadata'].iteritems(): self._logger.info('Saving metadata %s as %s', key, value) self.__createRootParameter('int' if type(value) is int else 'bool', key, value).Write() fdiscr.Close()