if __name__ == "__main__": sys.stdout = os.fdopen(sys.stdout.fileno(), "w", 0) ROOT.gSystem.Load("libFWCoreFWLite.so") AutoLibraryLoader.enable() parser = OptionParser() (options, args) = parser.parse_args() if len(args) != 1: print "Usage: copyAnaDataLocal.py localDir" print " usefull for making local copy of data (e.g. nfs to /tmp/)" sys.exit(0) target = args[0] treeFilesAndNormalizations = getTreeFilesAndNormalizations() anaDefFile = os.environ["SmallXAnaDefFile"] mod_dir, filename = os.path.split(anaDefFile) mod, ext = os.path.splitext(filename) f, filename, desc = imp.find_module(mod, [mod_dir]) mod = imp.load_module(mod, f, filename, desc) localBasePathPAT = mod.PATbasePATH localBasePathTrees = mod.TTreeBasePATH targetPAT = target + "/PAT/" targetTrees = target + "/trees/" print "Copy to:" print targetPAT
def runAll(cls, treeName, outFile, sampleList = None, \ maxFilesMC=None, maxFilesData=None, maxNevents = -1, \ slaveParameters = None, nWorkers=None, usePickle=False, useProofOFile = False, verbosity=1): if slaveParameters == None: # When default param is used reset contents on every call to runAll slaveParameters = {} cwd = os.getcwd()+"/" treeFilesAndNormalizations = getTreeFilesAndNormalizations(maxFilesMC=maxFilesMC, maxFilesData=maxFilesData, samplesToProcess=sampleList, usePickle=usePickle) if sampleList == None: todo = treeFilesAndNormalizations.keys() # run them all else: todo = sampleList slaveParameters["useProofOFile"] = useProofOFile if not useProofOFile: of = ROOT.TFile(outFile,"RECREATE") if not of: print "Cannot create outfile:", outFile sys.exit() of.Close() # so we dont mess with file opens during proof ana slaveParameters["outFile"] = outFile if maxNevents == None: maxNevents = -1 # extra security, run on all events skipped = [] sampleListFullInfo = CommonFSQFramework.Core.Util.getAnaDefinition("sam") sampleCnt = 0 for t in todo: sampleCnt += 1 print "#"*60 print "Next sample:", t, "("+str(sampleCnt)+"/"+str(len(todo))+")" print "#"*60 if len(treeFilesAndNormalizations[t]["files"])==0: print "Skipping, empty filelist for",t skipped.append(t) continue dataset = ROOT.TDSet( 'TTree', 'data', treeName) # the last name is the directory name inside the root file for file in treeFilesAndNormalizations[t]["files"]: dataset.Add(file) slaveParameters["datasetName"] = t slaveParameters["isData"] = sampleListFullInfo[t]["isData"] slaveParameters["normalizationFactor"] = treeFilesAndNormalizations[t]["normFactor"] ROOT.TProof.AddEnvVar("PATH2",ROOT.gSystem.Getenv("PYTHONPATH")+":"+os.getcwd()) #ROOT.gSystem.Setenv("TMFDatasetName", t) supportedTypes = set(["int", "str", "float", "bool"]) variablesToFetch = "" coma = "" variablesToSetInProof = {} for p in slaveParameters: encodedName = cls.encodeEnvString(p) # Check if parameter is supported. Adding another type is easy - see # getVariables method paramType = slaveParameters[p].__class__.__name__ if paramType not in supportedTypes: raise Exception("Parameter of type "+paramType \ + " is not of currently supported types: " + ", ".join(supportedTypes) ) ROOT.gSystem.Setenv(encodedName, str(slaveParameters[p])+";;;"+paramType) variablesToSetInProof[encodedName] = str(slaveParameters[p])+";;;"+paramType variablesToFetch += coma + p coma = "," ROOT.gSystem.Setenv(cls.encodeEnvString("VariablesToFetch"), variablesToFetch) variablesToSetInProof[cls.encodeEnvString("VariablesToFetch")] = variablesToFetch proofConnectionString = None if "proofConnectionString" in os.environ: proofConnectionString = os.environ["proofConnectionString"] print "Found proof environment. Will try to connect to", proofConnectionString if not proofConnectionString: if nWorkers == None: proof = ROOT.TProof.Open('') else: proof = ROOT.TProof.Open('workers='+str(nWorkers)) else: proof = ROOT.TProof.Open(proofConnectionString) proof.Exec( 'gSystem->Setenv("PYTHONPATH",gSystem->Getenv("PATH2"));') # for some reason cannot use method below for python path proof.Exec( 'gSystem->Setenv("PATH", "'+ROOT.gSystem.Getenv("PATH") + '");') for v in variablesToSetInProof: # if you get better implemenation (GetParameter?) mail me proof.Exec('gSystem->Setenv("'+v+'","'+variablesToSetInProof[v]+'");') print dataset.Process( 'TPySelector', cls.__name__, maxNevents) # with parameter to limit on number of events try: print "Logs saved to:" logs = proof.GetManager().GetSessionLogs().GetListOfLogs() for l in logs: print l.GetTitle() if len(logs) > 1: with open(logs[1].GetTitle(),"r") as f: if verbosity > 1: print "Printing output of first worker node:" for l in f: if "error" in l.lower() or "exception" in l.lower(): print bcolors.ERROR + l.rstrip('\n') + bcolors.ENDC elif "warn" in l.lower(): print bcolors.WARNING + l.rstrip('\n') + bcolors.ENDC else: print l, elif verbosity == 1: if any("error" in l.lower() for l in f) or any("exception" in l.lower() for l in f) or any("warning" in l.lower() for l in f): print "Error/Warning found in log file. Printing first log of worker node:" for l in f: if "error" in l.lower() or "exception" in l.lower(): print bcolors.ERROR + l.rstrip('\n') + bcolors.ENDC elif "warn" in l.lower(): print bcolors.WARNING + l.rstrip('\n') + bcolors.ENDC else: print l, else: #logs has not enough entries print "Cannot print log file of first worker node. Only" + len(logs) + "log(s) available" except: print "Cannot get lognames" curPath = ROOT.gDirectory.GetPath() if useProofOFile: bigFileName = outFile.replace(".root","")+"_"+t+".root" of = ROOT.TFile(bigFileName,"UPDATE") else: of = ROOT.TFile(outFile,"UPDATE") # Write norm value and other info saveDir = of.Get(t) if not saveDir: print "Cannot get directory from plot file" continue saveDir.cd() norm = treeFilesAndNormalizations[t]["normFactor"] hist = ROOT.TH1D("norm", "norm", 1,0,1) hist.SetBinContent(1, norm) #saveDir.WriteObject(hist, hist.GetName()) hist.Write(hist.GetName()) of.Close() ROOT.gDirectory.cd(curPath) # clean environment for v in variablesToSetInProof: #command = 'gSystem->Unsetenv("'+v+'");' #print command proof.Exec('gSystem->Unsetenv("'+v+'");') if len(skipped)>0: print "Note: following samples were skipped:" for sk in skipped: print " ",sk print "Analyzed:" done = set(todo)-set(skipped) for t in done: print t if useProofOFile: ''' partFiles = [] for t in done: partFiles.append(outFile.replace(".root","")+"_"+t+".root") print "Running hadd" os.system("hadd -f " + outFile + " " + " ".join(partFiles)) ''' # // note: calling hadd directly is problematic, when there are RooUnfold objects inside # this way RooUnfold library is allready loaded, so objects get merged properly merger = ROOT.TFileMerger( False, False); merger.OutputFile(outFile, True, 1) for t in done: merger.AddFile(outFile.replace(".root","")+"_"+t+".root") status = merger.Merge() print "Merge status: ", status
def main(): parser = OptionParser() #parser.add_option("-s", "--sample", action="store", type="string", dest="sample", help="sample name" ) #parser.add_option("-l", "--listSamples", action="store", type="string", dest="list", help="listAllSamples" ) (options, args) = parser.parse_args() anaVersion = getVariant() print " Current active skim: ", anaVersion anaDef = getAnaDefinition("sam") directlyFromRootfile = False if len(args) != 1 or (not args[0].endswith(".root") and args[0] not in anaDef): print "Usage: printTTree.py sampleName" print " - or -" print "Usage: printTTree.py rootfile" print "Avaliable samples:" for t in anaDef: print " ", t sys.exit(0) if args[0].endswith(".root"): print "Will print structure of given file" directlyFromRootfile = True if not directlyFromRootfile: sample= args[0] treeFilesAndNormalizations = getTreeFilesAndNormalizations(maxFilesMC=1, maxFilesData=1, quiet = True, samplesToProcess=[sample,]) if not treeFilesAndNormalizations[sample]["files"]: print "No files found for sample", sample, "- exiting" sys.exit(0) filename = treeFilesAndNormalizations[sample]["files"][0] else: filename = args[0] rootfile = ROOT.TFile.Open(filename, "read") todo = [(rootfile, 0), ] for t in todo: indent = t[1] print " "*indent, t[0].GetName() lst = t[0].GetListOfKeys() for l in lst: #print l.GetName() #continue #print "Going through", l.GetName(), l.ClassName() current = l.ReadObj() #current = rootfile.Get(l.GetName()) if not current: continue if "TDirectory" in current.ClassName(): todo.append( (current, indent+2) ) else: print " "*(indent+2), current.GetName(), current.GetTitle(), "/"+current.ClassName()+"/" #print " "*(indent+2), current.GetTitle(), "/"+current.ClassName()+"/" if current.ClassName() == "TTree": branches = current.GetListOfBranches() branchesNames = [] for b in branches: branchesNames.append(b.GetName()) for b in sorted(branchesNames): print " "*(indent+4), b
def main(): parser = OptionParser() #parser.add_option("-s", "--sample", action="store", type="string", dest="sample", help="sample name" ) #parser.add_option("-l", "--listSamples", action="store", type="string", dest="list", help="listAllSamples" ) (options, args) = parser.parse_args() anaVersion = getVariant() print " Current active skim: ", anaVersion anaDef = getAnaDefinition("sam") directlyFromRootfile = False if len(args) != 1 or (not args[0].endswith(".root") and args[0] not in anaDef): print "Usage: printTTree.py sampleName" print " - or -" print "Usage: printTTree.py rootfile" print "Avaliable samples:" for t in anaDef: print " ", t sys.exit(0) if args[0].endswith(".root"): print "Will print structure of given file" directlyFromRootfile = True if not directlyFromRootfile: sample = args[0] treeFilesAndNormalizations = getTreeFilesAndNormalizations( maxFilesMC=1, maxFilesData=1, quiet=True, samplesToProcess=[ sample, ]) if not treeFilesAndNormalizations[sample]["files"]: print "No files found for sample", sample, "- exiting" sys.exit(0) filename = treeFilesAndNormalizations[sample]["files"][0] else: filename = args[0] rootfile = ROOT.TFile.Open(filename, "read") todo = [ (rootfile, 0), ] for t in todo: indent = t[1] print " " * indent, t[0].GetName() lst = t[0].GetListOfKeys() for l in lst: #print l.GetName() #continue #print "Going through", l.GetName(), l.ClassName() current = l.ReadObj() #current = rootfile.Get(l.GetName()) if not current: continue if "TDirectory" in current.ClassName(): todo.append((current, indent + 2)) else: print " " * (indent + 2), current.GetName(), current.GetTitle( ), "/" + current.ClassName() + "/" #print " "*(indent+2), current.GetTitle(), "/"+current.ClassName()+"/" if current.ClassName() == "TTree": branches = current.GetListOfBranches() branchesNames = [] for b in branches: branchesNames.append(b.GetName()) for b in sorted(branchesNames): print " " * (indent + 4), b
def main(): filesToMerge = 10 parser = OptionParser() parser.add_option("-f", "--force", action="store_true", dest="force", help="run even if target dir is present" ) (options, args) = parser.parse_args() anaDef = getAnaDefinition("sam") if len(args) != 1 or args[0] not in anaDef: print "Usage: printTTree.py sampleName" print "Avaliable samples:" for t in anaDef: print " ", t sys.exit(1) sample= args[0] treeFilesAndNormalizations = getTreeFilesAndNormalizations(maxFilesMC=None, maxFilesData=None, quiet = True, samplesToProcess=[sample,], usePickle=False, donotvalidate=True) if not treeFilesAndNormalizations[sample]["files"]: print "No files found for sample", sample, "- exiting" sys.exit(1) indirName = os.path.dirname(anaDef[sample]["pathSE"]) if not indirName.startswith("srm://"): print "Dont know how to process: ", indirName odirName = indirName+"_merged/" odirExists = subprocess.call(["lcg-ls", odirName], stdout=subprocess.PIPE, stderr=subprocess.PIPE)==0 if odirExists and not options.force: print "output directory seems to allready exist", odirName sys.exit(1) if not odirExists: odirCreated = subprocess.call(["srmmkdir", odirName], stdout=subprocess.PIPE, stderr=subprocess.PIPE)==0 if not odirCreated: print "cannot create output directory", odirName sys.exit(1) aTodo = [] todos = [] for f in treeFilesAndNormalizations[sample]["files"]: aTodo.append(f) if len(aTodo)==filesToMerge: todos.append(aTodo) aTodo = [] if aTodo: todos.append(aTodo) cnt = 0 for t in todos: cnt += 1 print "Doing", cnt, len(todos),"...", goodFiles = validateRootFiles(t, quiet=True)["fileList"] # be extra careful here - oname will be removed!! onamebase = "trees_"+str(cnt)+"_1_TMF.root" onameForCopy = odirName+"/"+onamebase command = ["lcg-ls", onameForCopy] if runQuiet(command)==0: print "Looks like file is allready present, skipping" continue oname = "/tmp/"+onamebase command = ["hadd", oname] command.extend(goodFiles) print "Hadd...", ret=runQuiet(command) if ret!=0: print "Problem with hadd, ofile", cnt runQuiet(["rm", oname]) continue command = ["lcg-cp", "file:"+oname, onameForCopy] print "lcg-cp...", ret=runQuiet(command) if ret!=0: print "Problem with lcg-cp, ofile", cnt runQuiet(["rm", oname]) runQuiet(["srmrm", onameForCopy]) continue print "rm local file...", runQuiet(["rm", oname]) print "done"
maxFiles = None ''' Neutrino_Pt-2to20_gun_10GeV_Pu20to50 QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_5GeV_Pu0to10 QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_5GeV_Pu20to50 Neutrino_Pt-2to20_gun_5GeV_Pu20to50 QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_10GeV_Pu20to50 QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_10GeV_Pu0to10 ''' #samplesToProcess = ["QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_5GeV_Pu0to10"] #samplesToProcess = ["QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_10GeV_Pu0to10"] samplesToProcess = ["QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8"] samples2files = getTreeFilesAndNormalizations( maxFilesMC=maxFiles, samplesToProcess=samplesToProcess) if len(samples2files) != 1: raise Exception("TODO") mychain = ROOT.TChain("MNTriggerAnaNew/data") for s in samples2files: flist = samples2files[s]["files"] for f in flist: mychain.Add(f) entries = mychain.GetEntriesFast() skimfile = ROOT.TFile(samplesToProcess[0] + ".root", "recreate") initialized = False cnt = 0
def runAll(cls, treeName, outFile, sampleList = None, \ maxFilesMC=None, maxFilesData=None, \ slaveParameters = None, nWorkers=None, usePickle=False, useProofOFile = False): if slaveParameters == None: # When default param is used reset contents on every call to runAll slaveParameters = {} cwd = os.getcwd() + "/" treeFilesAndNormalizations = getTreeFilesAndNormalizations( maxFilesMC=maxFilesMC, maxFilesData=maxFilesData, samplesToProcess=sampleList, usePickle=usePickle) if sampleList == None: todo = treeFilesAndNormalizations.keys() # run them all else: todo = sampleList slaveParameters["useProofOFile"] = useProofOFile if not useProofOFile: of = ROOT.TFile(outFile, "RECREATE") if not of: print "Cannot create outfile:", outFile sys.exit() of.Close() # so we dont mess with file opens during proof ana slaveParameters["outFile"] = outFile skipped = [] sampleListFullInfo = CommonFSQFramework.Core.Util.getAnaDefinition( "sam") sampleCnt = 0 for t in todo: sampleCnt += 1 print "#" * 60 print "Next sample:", t, "(" + str(sampleCnt) + "/" + str( len(todo)) + ")" print "#" * 60 if len(treeFilesAndNormalizations[t]["files"]) == 0: print "Skipping, empty filelist for", t skipped.append(t) continue dataset = ROOT.TDSet( 'TTree', 'data', treeName ) # the last name is the directory name inside the root file for file in treeFilesAndNormalizations[t]["files"]: dataset.Add(file) slaveParameters["datasetName"] = t slaveParameters["isData"] = sampleListFullInfo[t]["isData"] slaveParameters[ "normalizationFactor"] = treeFilesAndNormalizations[t][ "normFactor"] ROOT.TProof.AddEnvVar( "PATH2", ROOT.gSystem.Getenv("PYTHONPATH") + ":" + os.getcwd()) #ROOT.gSystem.Setenv("TMFDatasetName", t) supportedTypes = set(["int", "str", "float", "bool"]) variablesToFetch = "" coma = "" variablesToSetInProof = {} for p in slaveParameters: encodedName = cls.encodeEnvString(p) # Check if parameter is supported. Adding another type is easy - see # getVariables method paramType = slaveParameters[p].__class__.__name__ if paramType not in supportedTypes: raise Exception("Parameter of type "+paramType \ + " is not of currently supported types: " + ", ".join(supportedTypes) ) ROOT.gSystem.Setenv( encodedName, str(slaveParameters[p]) + ";;;" + paramType) variablesToSetInProof[encodedName] = str( slaveParameters[p]) + ";;;" + paramType variablesToFetch += coma + p coma = "," ROOT.gSystem.Setenv(cls.encodeEnvString("VariablesToFetch"), variablesToFetch) variablesToSetInProof[cls.encodeEnvString( "VariablesToFetch")] = variablesToFetch proofConnectionString = None if "proofConnectionString" in os.environ: proofConnectionString = os.environ["proofConnectionString"] print "Found proof environment. Will try to connect to", proofConnectionString if not proofConnectionString: if nWorkers == None: proof = ROOT.TProof.Open('') else: proof = ROOT.TProof.Open('workers=' + str(nWorkers)) else: proof = ROOT.TProof.Open(proofConnectionString) proof.Exec( 'gSystem->Setenv("PYTHONPATH",gSystem->Getenv("PATH2"));' ) # for some reason cannot use method below for python path proof.Exec('gSystem->Setenv("PATH", "' + ROOT.gSystem.Getenv("PATH") + '");') for v in variablesToSetInProof: # if you get better implemenation (GetParameter?) mail me proof.Exec('gSystem->Setenv("' + v + '","' + variablesToSetInProof[v] + '");') print dataset.Process('TPySelector', cls.__name__) try: print "Logs saved to:" logs = proof.GetManager().GetSessionLogs().GetListOfLogs() for l in logs: print l.GetTitle() except: print "Cannot get lognames" curPath = ROOT.gDirectory.GetPath() if useProofOFile: bigFileName = outFile.replace(".root", "") + "_" + t + ".root" of = ROOT.TFile(bigFileName, "UPDATE") else: of = ROOT.TFile(outFile, "UPDATE") # Write norm value and other info saveDir = of.Get(t) if not saveDir: print "Cannot get directory from plot file" continue saveDir.cd() norm = treeFilesAndNormalizations[t]["normFactor"] hist = ROOT.TH1D("norm", "norm", 1, 0, 1) hist.SetBinContent(1, norm) #saveDir.WriteObject(hist, hist.GetName()) hist.Write(hist.GetName()) of.Close() ROOT.gDirectory.cd(curPath) # clean environment for v in variablesToSetInProof: #command = 'gSystem->Unsetenv("'+v+'");' #print command proof.Exec('gSystem->Unsetenv("' + v + '");') if len(skipped) > 0: print "Note: following samples were skipped:" for sk in skipped: print " ", sk print "Analyzed:" done = set(todo) - set(skipped) for t in done: print t if useProofOFile: ''' partFiles = [] for t in done: partFiles.append(outFile.replace(".root","")+"_"+t+".root") print "Running hadd" os.system("hadd -f " + outFile + " " + " ".join(partFiles)) ''' # // note: calling hadd directly is problematic, when there are RooUnfold objects inside # this way RooUnfold library is allready loaded, so objects get merged properly merger = ROOT.TFileMerger(False, False) merger.OutputFile(outFile, True, 1) for t in done: merger.AddFile( outFile.replace(".root", "") + "_" + t + ".root") status = merger.Merge() print "Merge status: ", status
if __name__ == "__main__": sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) ROOT.gSystem.Load("libFWCoreFWLite.so") AutoLibraryLoader.enable() parser = OptionParser() (options, args) = parser.parse_args() if len(args) != 1: print "Usage: copyAnaDataLocal.py localDir" print " usefull for making local copy of data (e.g. nfs to /tmp/)" sys.exit(0) target = args[0] treeFilesAndNormalizations = getTreeFilesAndNormalizations() anaDefFile = os.environ["SmallXAnaDefFile"] mod_dir, filename = os.path.split(anaDefFile) mod, ext = os.path.splitext(filename) f, filename, desc = imp.find_module(mod, [mod_dir]) mod = imp.load_module(mod, f, filename, desc) localBasePathPAT = mod.PATbasePATH localBasePathTrees = mod.TTreeBasePATH targetPAT = target + "/PAT/" targetTrees = target + "/trees/" print "Copy to:" print targetPAT
''' Neutrino_Pt-2to20_gun_10GeV_Pu20to50 QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_5GeV_Pu0to10 QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_5GeV_Pu20to50 Neutrino_Pt-2to20_gun_5GeV_Pu20to50 QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_10GeV_Pu20to50 QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_10GeV_Pu0to10 ''' #samplesToProcess = ["QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_5GeV_Pu0to10"] #samplesToProcess = ["QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8_10GeV_Pu0to10"] samplesToProcess = ["QCD_Pt-15to3000_Tune4C_Flat_13TeV_pythia8"] samples2files = getTreeFilesAndNormalizations(maxFilesMC=maxFiles, samplesToProcess=samplesToProcess) if len(samples2files) != 1: raise Exception("TODO") mychain = ROOT.TChain("MNTriggerAnaNew/data") for s in samples2files: flist = samples2files[s]["files"] for f in flist: mychain.Add(f) entries = mychain.GetEntriesFast() skimfile = ROOT.TFile(samplesToProcess[0]+".root","recreate") initialized = False cnt = 0