jobFileCollection = select( files, jobFilters ) nFilesCollection = [len(l) for l in jobFileCollection] mainLogger.info("A total of %r files were found.", nFilesCollection ) except DataIdentifierNotFound, e: mainLogger.fatal("Could not retrieve number of files on informed data DID. Rucio error:\n%s" % str(e)) except ImportError, e: mainLogger.fatal("rucio environment was not set, please set rucio and try again. Full error:\n%s" % str(e)) args.setMergeExec("""source ./setrootcore.sh --grid; {fileMerging} -i %IN -o %OUT {OUTPUT_LEVEL} """.format( fileMerging = r"\\\$ROOTCOREBIN/user_scripts/TuningTools/standalone/fileMerging.py" , OUTPUT_LEVEL = conditionalOption("--output-level", args.output_level ) if args.output_level is not LoggingLevel.INFO else '', ) ) startBin = True for jobFiles, nFiles, jobFilter in zip(jobFileCollection, nFilesCollection, jobFilters): #output_file = '{USER_SCOPE}.{MERGING_JOBID}.merge._000001.tunedDiscrXYZ.tgz'.format( # USER_SCOPE = user_scope, # MERGING_JOBID = jobFilter) output_file = 'merge.tunedDiscr.tgz'.format( USER_SCOPE = user_scope, MERGING_JOBID = jobFilter) if startBin: if args.grid_outTarBall is None: args.grid_outTarBall = 'workspace.tar'
args.set_job_submission_option('match', '"' + jobFilter + '"') # Set execute: args.setExec("""source ./setrootcore.sh --grid --no-color; {tuningJob} -d @input.csv {REF_PERF} {OPERATION} {DO_MONITORING} {DO_MATLAB} {DO_COMPRESS} {DEBUG} {OUTPUT_LEVEL} """.format( tuningJob= "\$ROOTCOREBIN/user_scripts/TuningTools/standalone/crossValStatAnalysis.py", REF_PERF=conditionalOption("--refFile", refStr), OPERATION=conditionalOption("--operation", args.operation), DO_MONITORING=conditionalOption("--doMonitoring", args.doMonitoring) if args.doMonitoring is not NotSet else '', DO_MATLAB=conditionalOption("--doMatlab", args.doMatlab) if args.doMatlab is not NotSet else '', DO_COMPRESS=conditionalOption("--doCompress", args.doCompress), OUTPUT_LEVEL=conditionalOption("--output-level", args.output_level) if args.output_level is not LoggingLevel.INFO else '', DEBUG="--test" if (args.get_job_submission_option('debug') is not None) or args.test else '', )) # And run args.run() if args.get_job_submission_option('debug') is not None: break
jobFileCollection = select( files, jobFilters ) nFilesCollection = [len(l) for l in jobFileCollection] mainLogger.info("A total of %r files were found.", nFilesCollection ) except DataIdentifierNotFound, e: mainLogger.fatal("Could not retrieve number of files on informed data DID. Rucio error:\n%s" % str(e)) except ImportError, e: mainLogger.fatal("rucio environment was not set, please set rucio and try again. Full error:\n%s" % str(e)) args.setMergeExec("""source ./setrootcore.sh --grid --no-color; {fileMerging} -i %IN -o %OUT {OUTPUT_LEVEL} """.format( fileMerging = r"\\\$ROOTCOREBIN/user_scripts/TuningTools/standalone/fileMerging.py" , OUTPUT_LEVEL = conditionalOption("--output-level", args.output_level ) if args.output_level is not LoggingLevel.INFO else '', ) ) startBin = True for jobFiles, nFiles, jobFilter in zip(jobFileCollection, nFilesCollection, jobFilters): #output_file = '{USER_SCOPE}.{MERGING_JOBID}.merge._000001.tunedDiscrXYZ.tgz'.format( # USER_SCOPE = user_scope, # MERGING_JOBID = jobFilter) if startBin: if args.get_job_submission_option('outTarBall') is None and not args.get_job_submission_option('inTarBall'): args.set_job_submission_option('outTarBall', 'workspace.tar') startBin = False else: if args.get_job_submission_option('outTarBall') is not None:
{ET_BINS} {ETA_BINS} {OUTPUT_LEVEL} {CORE} {PROJECTTAG} {RINGERCORETAG} {TUNINGTOOLTAG} {DEVELOPMENT} """.format( setrootcore = setrootcore, setrootcore_opts = setrootcore_opts, tuningJob = tuningJob, DATA = dataStr, CONFIG = configStr, PP = ppStr, CROSS = crossFileStr, SUBSET = conditionalOption("--clusterFile", subsetStr ) , EXPERTNETWORKS = conditionalOption("--expert-networks",expertNetworksStr ) , REF = conditionalOption("--refFile", refStr ) , OUTPUTDIR = conditionalOption("--outputDir", _outputDir ) , COMPRESS = conditionalOption("--compress", args.compress ) , SHOW_EVO = conditionalOption("--show-evo", args.show_evo ) , MAX_FAIL = conditionalOption("--max-fail", args.max_fail ) , EPOCHS = conditionalOption("--epochs", args.epochs ) , DO_PERF = conditionalOption("--do-perf", args.do_perf ) , BATCH_SIZE = conditionalOption("--batch-size", args.batch_size ) , BATCH_METHOD = conditionalOption("--batch-method", args.batch_method ) , ALGORITHM_NAME = conditionalOption("--algorithm-name", args.algorithm_name ) , NETWORK_ARCH = conditionalOption("--network-arch", args.network_arch ) , COST_FUNCTION = conditionalOption("--cost-function", args.cost_function ) , SHUFFLE = conditionalOption("--shuffle", args.shuffle ) , SEED = conditionalOption("--seed", args.seed ) ,
break exec_str = """\ env -i {bsub} \\ {bsub_script} \\ --jobConfig {jobFile} \\ {ppFile} \\ {crossValidFile} \\ --datasetPlace {data} \\ --output {output} \\ --outputPlace {outputPlace} """.format(bsub = "bsub -q {queue} -u \"\" -J pyTrain -n 8 -R \"span[ptile=8]\"".format(queue = args.queue) if not args.local \ else "", bsub_script = os.path.expandvars("$ROOTCOREBIN/user_scripts/TuningTools/run_on_grid/bsub_script.sh"), data = args.data, jobFile = f, ppFile = conditionalOption('--ppFile', args.ppFile), crossValidFile = conditionalOption('--crossValidFile', args.crossValidFile), output = args.output, outputPlace = args.outputPlace, ) logger.info("Executing following command:\n%s", exec_str) import re exec_str = re.sub(' +',' ',exec_str) exec_str = re.sub('\\\\','',exec_str) # FIXME We should be abble to do this only in one line... exec_str = re.sub('\n','',exec_str) #logger.info("Command without spaces:\n%s", exec_str) os.system(exec_str) import time time.sleep(args.pause)
args.grid_nFilesPerJob = nFiles #args.grid_maxNFilesPerJob = nFiles args.grid_match = '"' + jobFilter + '"' # Set execute: args.setExec("""source ./setrootcore.sh --grid; {tuningJob} -d @input.csv {REF_PERF} {OPERATION} {DO_MONITORING} {DO_MATLAB} {DO_COMPRESS} {DEBUG} {OUTPUT_LEVEL} """.format( tuningJob = "\$ROOTCOREBIN/user_scripts/TuningTools/standalone/crossValStatAnalysis.py" , REF_PERF = conditionalOption("--refFile", refPerfArg ) , OPERATION = conditionalOption("--operation", args.operation ) , DO_MONITORING = conditionalOption("--doMonitoring", args._doMonitoring ) if args._doMonitoring is not NotSet else '', DO_MATLAB = conditionalOption("--doMatlab", args.doMatlab ) if args.doMatlab is not NotSet else '', DO_COMPRESS = conditionalOption("--doCompress", args._doCompress ) , OUTPUT_LEVEL = conditionalOption("--output-level", args.output_level ) if args.output_level is not LoggingLevel.INFO else '', DEBUG = "--test" if ( args.gridExpand_debug != "--skipScout" ) or args.test else '', ) ) # And run args.run_cmd() # FIXME We should want something more sofisticated if args.gridExpand_debug != '--skipScout': break # Finished running all bins
break exec_str = """\ env -i {bsub} \\ {bsub_script} \\ --jobConfig {jobFile} \\ {ppFile} \\ {crossValidFile} \\ --datasetPlace {data} \\ --output {output} \\ --outputPlace {outputPlace} """.format(bsub = "bsub -q {queue} -u \"\" -J pyTrain -n 8 -R \"span[ptile=8]\"".format(queue = args.queue) if not args.local \ else "", bsub_script = os.path.expandvars("$ROOTCOREBIN/user_scripts/TuningTools/run_on_grid/bsub_script.sh"), data = args.data, jobFile = f, ppFile = conditionalOption('--ppFile', args.ppFile), crossValidFile = conditionalOption('--crossValidFile', args.crossValidFile), output = args.output, outputPlace = args.outputPlace, ) logger.info("Executing following command:\n%s", exec_str) import re exec_str = re.sub(' +', ' ', exec_str) exec_str = re.sub( '\\\\', '', exec_str) # FIXME We should be abble to do this only in one line... exec_str = re.sub('\n', '', exec_str) #logger.info("Command without spaces:\n%s", exec_str) os.system(exec_str) import time time.sleep(args.pause)
help=argparse.SUPPRESS) emptyArgumentsPrintHelp(parser) args = parser.parse_args(namespace=TuningToolGridNamespace('prun')) mainLogger = Logger.getModuleLogger(__name__, args.output_level) printArgs(args, mainLogger.debug) import os.path user_scope = 'user.%s' % os.path.expandvars('$USER') # hold the output args.grid_outputs = args.grid_outputs.replace('*', args.trigger.replace('HLT_', '')) mainLogger.info(('Hold the output with name %s') % (args.grid_outputs)) args.setExec("""source ./setrootcore.sh --grid --no-color; {full2SlimJob} --inputFiles %IN {TRIGGER_LIST} {PATH} {TREENAME} {OUTPUT_FILE} """.format( full2SlimJob= "\$ROOTCOREBIN/user_scripts/TuningTools/standalone/full2Slim.py", TRIGGER_LIST=conditionalOption("--trigger", args.trigger), PATH=conditionalOption("--basepath", args.basepath), TREENAME=conditionalOption("--treename", args.treename), OUTPUT_FILE=conditionalOption("-o", args.grid_outputs))) # And run args.run_cmd()
{CROSS_VALID_METHOD} {CROSS_VALID_SHUFFLE} {REDO_DECISION_MAKING} {THRES_ET_BINS} {THRES_ETA_BINS} {DECISION_MAKING_METHOD} {PILEUP_REF} {PILEUP_LIMITS} {MAX_CORR} """.format( setrootcore=setrootcore, setrootcore_opts=setrootcore_opts, crossValStatAnalysis=crossValStatAnalysis, TUNED_DATA=tunedDataStr, BIN_FILTER=binFilterStr, OUTPUT_FILE_BASE=conditionalOption("--outputFileBase", args.outputFileBase), REF_FILE=conditionalOption("--refFile", refStr), OPERATION=conditionalOption("--operation", args.operation), DO_MONITORING=conditionalOption("--doMonitoring", args.doMonitoring) if args.doMonitoring is not NotSet else '', DO_MATLAB=conditionalOption("--doMatlab", args.doMatlab) if args.doMatlab is not NotSet else '', TMP_DIR=conditionalOption("--tmpDir", args.tmpFolder), EPSILON=conditionalOption("--epsilon", args.epsilon), AUC_EPSILON=conditionalOption("--AUC-epsilon", args.AUC_epsilon), ROC_METHOD=conditionalOption("--roc-method", args.roc_method), MODEL_METHOD=conditionalOption("--model-method", args.model_method), INIT_MODEL_METHOD=conditionalOption("--init-model-method", args.init_model_method), EXPAND_OP=conditionalOption("--expandOP", args.expandOP), ALWAYS_USE_SP_NETWORK=conditionalOption("--always-use-SP-network",
args.eta_bins = MatlabLoopingBounds(args.eta_bins) args.grid_allowTaskDuplication = True else: args.eta_bins = Holder([ args.eta_bins ]) mainLogger = Logger.getModuleLogger( __name__, args.output_level ) printArgs( args, mainLogger.debug ) args.setMergeExec("""source ./setrootcore.sh --grid; {fileMerging} -i %IN -o %OUT {OUTPUT_LEVEL} """.format( fileMerging = r"\\\$ROOTCOREBIN/user_scripts/TuningTools/standalone/fileMerging.py" , OUTPUT_LEVEL = conditionalOption("--output-level", args.output_level ) if args.output_level is not LoggingLevel.INFO else '', ) ) # Prepare to run from itertools import product startBin = True for etBin, etaBin in product( args.et_bins(), args.eta_bins() ): # When running multiple bins, dump workspace to a file and re-use it: if etBin is not None or etaBin is not None: if startBin: if args.grid_outTarBall is None and not args.grid_inTarBall: args.grid_outTarBall = 'workspace.tar' startBin = False
printArgs( args, mainLogger.debug ) import os.path user_scope = 'user.%s' % os.path.expandvars('$USER') # hold the output args.grid_outputs=args.grid_outputs.replace('*',args.trigger.replace('HLT_','')) mainLogger.info( ( 'Hold the output with name %s')%(args.grid_outputs) ) args.setExec("""source ./setrootcore.sh --grid; {full2SlimJob} --inputFiles %IN {TRIGGER_LIST} {PATH} {TREENAME} {OUTPUT_FILE} """.format( full2SlimJob = "\$ROOTCOREBIN/user_scripts/TuningTools/standalone/full2Slim.py", TRIGGER_LIST = conditionalOption("--trigger", args.trigger ), PATH = conditionalOption("--basepath", args.basepath ), TREENAME = conditionalOption("--treename", args.treename ), OUTPUT_FILE = conditionalOption("-o", args.grid_outputs )) ) # And run args.run_cmd()