示例#1
0
def main():

    channels = args.channels
    years = args.years
    batchSystem = 'psibatch_runner.sh'
    chunkpattern = re.compile(r".*_(\d+)_[a-z]+\.root")

    for year in years:

        # GET LIST
        samplelist = []
        outdir = "output_%s/" % (year)
        for directory in sorted(os.listdir(outdir)):
            if not os.path.isdir(outdir + directory): continue
            if args.samples and not matchSampleToPattern(
                    directory, args.samples):
                continue
            if args.veto and not matchSampleToPattern(directory, args.veto):
                continue
            if args.type == 'mc' and any(
                    s in directory[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau']):
                continue
            if args.type == 'data' and not any(
                    s in directory[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau']):
                continue
            samplelist.append(directory)
        if not samplelist:
            print "No samples found in %s!" % (outdir)
        if args.verbose:
            print samplelist

        # RESUBMIT samples
        for channel in channels:
            print header(year, channel)

            for directory in samplelist:
                #if directory.find('W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8__ytakahas-NanoTest_20180507_W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8-a7a5b67d3e3590e4899e147be08660be__USER')==-1: continue
                outdir = "output_%s/%s" % (year, directory)
                outfilelist = glob.glob(outdir + '/*_' + channel + '.root')
                nFilesPerJob = args.nFilesPerJob
                jobName = getSampleShortName(directory)[1]
                if not outfilelist: continue

                # GET INPUT FILES
                if 'LQ' in directory:
                    infiles = getFileListPNFS(directory)
                else:
                    infiles = getFileListDAS('/' +
                                             directory.replace('__', '/'))

                # NFILESPERJOBS CHECKS
                # Diboson (WW, WZ, ZZ) have very large files and acceptance,
                # and the jet-binned DY and WJ files need to be run separately because of a bug affecting LHE_Njets
                if nFilesPerJob > 1 and any(vv in jobName[:8] for vv in [
                        'WW', 'WZ', 'ZZ', 'DY', 'WJ', 'W1J', 'W2J', 'W3J',
                        'W4J', 'Single', 'Tau'
                ]):
                    print bcolors.BOLD + bcolors.WARNING + "[WN] setting number of files per job from %s to 1 for %s" % (
                        nFilesPerJob, jobName) + bcolors.ENDC
                    nFilesPerJob = 1

                infilelists = list(split_seq(infiles, nFilesPerJob))

                badchunks = []
                misschunks = range(0, len(infilelists))
                jobList = 'joblist/joblist%s_%s_retry.txt' % (directory,
                                                              channel)
                with open(jobList, 'w') as jobslog:
                    for filename in outfilelist:
                        match = chunkpattern.search(filename)
                        if match:
                            chunk = int(match.group(1))
                        else:
                            print bcolors.BOLD + bcolors.FAIL + '[NG] did not recognize output file %s !' % (
                                filename) + bcolors.ENDC
                            exit(1)
                        if chunk in misschunks:
                            misschunks.remove(chunk)
                        elif chunk >= len(infilelists):
                            print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found chunk %s >= total number of chunks %s ! Please make sure you have chosen the correct number of files per job (-n=%s) !' % (
                                filename, chunk, len(infilelists),
                                nFilesPerJob) + bcolors.ENDC
                        else:
                            print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found weird chunk %s ! Please check if there is any overcounting !' % (
                                filename, chunk,
                                len(infilelists)) + bcolors.ENDC
                        file = TFile(filename, 'READ')
                        if not file.IsZombie() and file.GetListOfKeys(
                        ).Contains('tree') and file.GetListOfKeys().Contains(
                                'cutflow'):
                            continue
                        infiles = infilelists[chunk]
                        createJobs(jobslog,
                                   infiles,
                                   outdir,
                                   directory,
                                   chunk,
                                   channel,
                                   year=year)
                        badchunks.append(chunk)

                    # BAD CHUNKS
                    if len(badchunks) > 0:
                        badchunks.sort()
                        chunktext = ('chunks ' if len(badchunks) > 1 else
                                     'chunk ') + ', '.join(
                                         str(ch) for ch in badchunks)
                        print bcolors.BOLD + bcolors.WARNING + '[NG] %s, %d/%d failed! Resubmitting %s...' % (
                            directory, len(badchunks), len(outfilelist),
                            chunktext) + bcolors.ENDC

                    # MISSING CHUNKS
                    if len(misschunks) > 0:
                        chunktext = ('chunks ' if len(misschunks) > 1 else
                                     'chunk ') + ', '.join(
                                         str(i) for i in misschunks)
                        print bcolors.BOLD + bcolors.WARNING + "[WN] %s missing %d/%d files ! Resubmitting %s..." % (
                            directory, len(misschunks), len(outfilelist),
                            chunktext) + bcolors.ENDC
                        for chunk in misschunks:
                            infiles = infilelists[chunk]
                            createJobs(jobslog,
                                       infiles,
                                       outdir,
                                       directory,
                                       chunk,
                                       channel,
                                       year=year)

                # RESUBMIT
                jobName += "_%s_%s" % (channel, year)
                nChunks = len(badchunks) + len(misschunks)
                if nChunks == 0:
                    print bcolors.BOLD + bcolors.OKBLUE + '[OK] ' + directory + bcolors.ENDC
                elif args.force:
                    submitJobs(jobName, jobList, nChunks, outdir, batchSystem)
                else:
                    submit = raw_input(
                        "Do you also want to submit %d jobs to the batch system? [y/n] "
                        % (nChunks))
                    if submit.lower() == 'force':
                        submit = 'y'
                        args.force = True
                    if submit.lower() == 'quit':
                        exit(0)
                    if submit.lower() == 'y':
                        submitJobs(jobName, jobList, nChunks, outdir,
                                   batchSystem)
                    else:
                        print "Not submitting jobs"
                print
示例#2
0
def main():
    
    channels    = args.channels
    years       = args.years
    tes         = args.tes
    ltf         = args.ltf
    jtf         = args.jtf
    Zmass       = args.Zmass
    prefetch    = args.prefetch
    batchscript = 'submit_SGE.sh'
    tag         = args.tag
    
    if tag and tag[0]!='_': tag = '_'+tag
    if tes!=1.: tag += "_TES%.3f"%(tes)
    if ltf!=1.: tag += "_LTF%.3f"%(ltf)
    if jtf!=1.: tag += "_JTF%.3f"%(jtf)
    if Zmass:   tag += "_Zmass"
    tag = tag.replace('.','p')
    
    for year in years:
      
      # READ SAMPLES
      directories = [ ]
      samplelist  = "samples_%s.cfg"%(year)
      with open(samplelist, 'r') as file:
        for line in file:
          line = line.rstrip().lstrip().split(' ')[0].rstrip('/')
          if line[:2].count('#')>0: continue
          if line=='': continue
          if line.count('/')<2: continue
          sample = '/'.join(line.split('/')[-3:])
          if args.samples and not matchSampleToPattern(sample,args.samples): continue
          if args.vetoes and matchSampleToPattern(sample,args.vetoes): continue
          if args.type=='mc' and any(s in sample[:len(s)+2] for s in ['SingleMuon','SingleElectron','Tau','EGamma']): continue
          if args.type=='data' and not any(s in sample[:len(s)+2] for s in ['SingleMuon','SingleElectron','Tau','EGamma']): continue
          directories.append(line)
      if args.testrun:
        directories = directories[:1]
      #print directories
      blacklist = getBlackList("filelist/blacklist.txt")
      
      tasks = []
      for channel in channels:
        print header(year,channel,tag)
        
        # SUBMIT SAMPLES
        for directory in directories:
            
            if args.verbose:
              print "\ndirectory =",directory
            
            # FILTER
            if 'SingleMuon' in directory and channel not in ['mutau','mumu','elemu']: continue
            if ('SingleElectron' in directory or 'EGamma' in directory) and channel!='eletau': continue
            if 'Tau' in directory[:5] and channel!='tautau': continue
            if 'LQ3' in directory[:5] and channel not in ['mutau','eletau','tautau']: continue
            
            # GET SKIMMED
            if args.useSkim and isSkimmed(directory,year):
              directory = isSkimmed(directory,year)
              if args.verbose:
                print "skimmed:", directory
            
            print bcolors.BOLD + bcolors.OKGREEN + directory + bcolors.ENDC
            
            # FILE LIST
            files = [ ]
            if not args.useDAS:
                files = getFileListLocal(directory,blacklist=blacklist)
            if not files:
              if not args.useDAS:
                print "Getting file list from DAS/PNFS..."
              files = getFileList(directory,blacklist=blacklist)
              if files:
                saveFileListLocal(directory,files,blacklist=blacklist)
            if not files:
              print bcolors.BOLD + bcolors.WARNING + "Warning! EMPTY filelist for " + directory + bcolors.ENDC
              continue
            elif args.verbose:
              print "FILELIST = "+files[0]
              for file in files[1:]:
                print "           "+file
            if args.testrun:
              files = files[:1]
            
            # JOB LIST
            sample       = '__'.join(directory.split('/')[-3:])
            ensureDirectory('joblist')
            jobList      = 'joblist/joblist_%s_%s%s.txt'%(sample,channel,tag)
            print "Creating job file %s..."%(jobList)
            jobName      = getSampleShortName(directory)[1]
            jobName     += "_%s_%s"%(channel,year)+tag
            jobs         = open(jobList,'w')
            # outdir       = ensureDirectory("output_%s/%s"%(year,sample))
            outdir = "$PWD"
            ensureDirectory(outdir+'/logs/')
            
            # NFILESPERJOBS
            nFilesPerJob = args.nFilesPerJob
            if nFilesPerJob<1:
              for default, patterns in nFilesPerJob_defaults:
                if matchSampleToPattern(sample,patterns):
                  nFilesPerJob = default
                  break
              else:
                nFilesPerJob = nFilesPerJob_default
            if args.verbose:
              print "nFilesPerJob = %s"%nFilesPerJob
            filelists = chunkify(files,nFilesPerJob)
            
            # CREATE JOBS
            nChunks = 0
            checkExistingFiles(outdir,channel,len(filelists))
            #filelists = chunkify(files,1)
            for file in filelists:
            #print "FILES = ",f
                createJobs(jobs,file,outdir,sample,nChunks,channel,year=year,tes=tes,ltf=ltf,jtf=jtf,Zmass=Zmass,tag=tag,prefetch=prefetch)
                nChunks = nChunks+1
            jobs.close()
            
            # SUBMIT
            if args.force:
              submitJobs_gc(jobName,jobList,nChunks,outdir,batchscript, args.workdir, year, channel)
            else:
              submit = raw_input("Do you also want to submit %d jobs to the batch system? [y/n] "%(nChunks))
              if submit.lower()=='force':
                submit = 'y'
                args.force = True
              if submit.lower()=='quit':
                exit(0)
              if submit.lower()=='y':
                submitJobs_gc(jobName,jobList,nChunks,outdir,batchscript, args.workdir, year, channel)
              else:
                print "Not submitting jobs"
            print
            tasks.append("_".join([jobName, str(year), channel]))
    # Assemble while script to start every gc task.
    while_temp = open(os.environ["CMSSW_BASE"]+"/src/NanoTreeProducer/while_temp.sh", "r").read()
    task_list = ["go.py {}gc_conf/{}.conf".format(args.workdir,task) for task in tasks]
    while_temp = while_temp.format(TASK_COMMANDS="\n".join(task_list))
    with open(os.path.join(args.workdir, "while.sh"), "w") as out:
        out.write(while_temp)
    print 'Submit samples with: "bash {}"'.format(os.path.join(args.workdir, "while.sh"))
示例#3
0
def main():

    channels = args.channels
    years = args.years
    tes = args.tes
    ltf = args.ltf
    jtf = args.jtf
    batchSystem = 'slurm_runner.sh'
    chunkpattern = re.compile(r".*_(\d+)_[a-z]+(?:_[A-Z]+\dp\d+)?\.root")
    tag = ""

    if tes != 1.:
        tag += "_TES%.3f" % (tes)
    if ltf != 1.:
        tag += "_LTF%.3f" % (ltf)
    if jtf != 1.:
        tag += "_JTF%.3f" % (jtf)
    tag = tag.replace('.', 'p')

    for year in years:

        # GET LIST
        samplelist = []
        outdir = "/work/pbaertsc/heavy_resonance/output_%s/" % (year)
        for directory in sorted(os.listdir(outdir)):
            if not os.path.isdir(outdir + directory): continue
            if args.samples and not matchSampleToPattern(
                    directory, args.samples):
                continue
            if args.vetos and matchSampleToPattern(directory, args.vetos):
                continue
            if args.type == 'mc' and any(s in directory[:len(s) + 2] for s in [
                    'SingleMuon', 'SingleElectron', 'EGamma', 'MET',
                    'SinglePhoton'
            ]):
                continue
            if args.type == 'data' and not any(
                    s in directory[:len(s) + 2] for s in [
                        'SingleMuon', 'SingleElectron', 'EGamma', 'MET',
                        'SinglePhoton'
                    ]):
                continue
            samplelist.append(directory)
        if not samplelist:
            print "No samples found in %s!" % (outdir)
        if args.verbose:
            print samplelist

        # RESUBMIT samples
        for channel in channels:
            print header(year, channel, tag)

            for directory in samplelist:
                #if directory.find('W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8__ytakahas-NanoTest_20180507_W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8-a7a5b67d3e3590e4899e147be08660be__USER')==-1: continue
                outdir = "/work/pbaertsc/heavy_resonance/output_%s/%s" % (
                    year, directory)
                outfilelist = glob.glob("%s/*_%s%s.root" %
                                        (outdir, channel, tag))
                nFilesPerJob = args.nFilesPerJob
                jobName = getSampleShortName(directory, year)[1]
                jobName += "_%s_%s" % (channel, year) + tag
                #if not outfilelist: continue

                # FILE LIST
                infiles = []
                if not args.useDAS:
                    infiles = getFileListLocal(directory)
                if not infiles:
                    if not args.useDAS:
                        print "Getting file list from DAS..."
                    infiles = getFileListDAS('/' + directory)
                    if infiles:
                        saveFileListLocal(directory, infiles)
                if not infiles:
                    print bcolors.BOLD + bcolors.WARNING + "Warning!!! FILELIST empty" + bcolors.ENDC
                    continue
                elif args.verbose:
                    print "FILELIST = " + infiles[0]
                    for file in infiles[1:]:
                        print "           " + file

                # NFILESPERJOBS
                nFilesPerJob = 1
                if args.verbose:
                    print "nFilesPerJob = %s" % nFilesPerJob
                infilelists = list(split_seq(infiles, nFilesPerJob))

                # JOB LIST
                badchunks = []
                misschunks = range(0, len(infilelists))
                jobList = '/work/pbaertsc/heavy_resonance/NanoTreeProducer/joblist/joblist_%s_%s%s_retry.txt' % (
                    directory, channel, tag)
                with open(jobList, 'w') as jobslog:
                    for filename in outfilelist:
                        match = chunkpattern.search(filename)
                        if match:
                            chunk = int(match.group(1))
                        else:
                            print bcolors.BOLD + bcolors.FAIL + '[NG] did not recognize output file %s !' % (
                                filename) + bcolors.ENDC
                            exit(1)
                        if chunk in misschunks:
                            misschunks.remove(chunk)
                        elif chunk >= len(infilelists):
                            print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found chunk %s >= total number of chunks %s ! Please make sure you have chosen the correct number of files per job (-n=%s), check DAS, or resubmit everything!' % (
                                filename, chunk, len(infilelists),
                                nFilesPerJob) + bcolors.ENDC
                        else:
                            print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found weird chunk %s ! Please check if there is any overcounting !' % (
                                filename, chunk,
                                len(infilelists)) + bcolors.ENDC
                        file = TFile(filename, 'READ')
                        if not file.IsZombie() and (
                                file.GetListOfKeys().Contains('tree')
                                and file.GetListOfKeys().Contains('pileup')
                                and file.GetListOfKeys().Contains('Events')):
                            continue
                        infiles = infilelists[chunk]
                        createJobs(jobslog,
                                   infiles,
                                   outdir,
                                   directory,
                                   chunk,
                                   channel,
                                   year=year,
                                   tes=tes,
                                   ltf=ltf,
                                   jtf=jtf)
                        badchunks.append(chunk)

                    # BAD CHUNKS
                    if len(badchunks) > 0:
                        badchunks.sort()
                        chunktext = ('chunks ' if len(badchunks) > 1 else
                                     'chunk ') + ', '.join(
                                         str(ch) for ch in badchunks)
                        print bcolors.BOLD + bcolors.WARNING + '[NG] %s, %d/%d failed! Resubmitting %s...' % (
                            directory, len(badchunks), len(outfilelist),
                            chunktext) + bcolors.ENDC

                    # MISSING CHUNKS
                    if len(misschunks) > 0:
                        chunktext = ('chunks ' if len(misschunks) > 1 else
                                     'chunk ') + ', '.join(
                                         str(i) for i in misschunks)
                        print bcolors.BOLD + bcolors.WARNING + "[WN] %s missing %d/%d files ! Resubmitting %s..." % (
                            directory, len(misschunks), len(outfilelist),
                            chunktext) + bcolors.ENDC
                        for chunk in misschunks:
                            infiles = infilelists[chunk]
                            createJobs(jobslog,
                                       infiles,
                                       outdir,
                                       directory,
                                       chunk,
                                       channel,
                                       year=year,
                                       tes=tes,
                                       ltf=ltf,
                                       jtf=jtf)

                # RESUBMIT
                nChunks = len(badchunks) + len(misschunks)
                if nChunks == 0:
                    print bcolors.BOLD + bcolors.OKBLUE + '[OK] ' + directory + bcolors.ENDC
                elif args.force:
                    submitJobs(jobName, jobList, nChunks, outdir, batchSystem)
                else:
                    submit = raw_input(
                        "Do you also want to submit %d jobs to the batch system? [y/n] "
                        % (nChunks))
                    if submit.lower() == 'force':
                        submit = 'y'
                        args.force = True
                    if submit.lower() == 'quit':
                        exit(0)
                    if submit.lower() == 'y':
                        submitJobs(jobName, jobList, nChunks, outdir,
                                   batchSystem)
                    else:
                        print "Not submitting jobs"
                print
def main():

    channels = args.channels
    years = args.years
    tes = args.tes
    ltf = args.ltf
    jtf = args.jtf
    batchSystem = 'psibatch_runner.sh'
    tag = ""

    if tes != 1.:
        tag += "_TES%.3f" % (tes)
    if ltf != 1.:
        tag += "_LTF%.3f" % (ltf)
    if jtf != 1.:
        tag += "_JTF%.3f" % (jtf)
    tag = tag.replace('.', 'p')

    for year in years:

        # READ SAMPLES
        directories = []
        samplelist = "samples_%s.cfg" % (year)
        with open(samplelist, 'r') as file:
            for line in file:
                line = line.rstrip().lstrip().split(' ')[0].rstrip('/')
                if line[:2].count('#') > 0: continue
                if line == '': continue
                if args.samples and not matchSampleToPattern(
                        line, args.samples):
                    continue
                if args.vetos and matchSampleToPattern(line, args.vetos):
                    continue
                if args.type == 'mc' and any(
                        s in line[:len(s) + 2] for s in
                    ['SingleMuon', 'SingleElectron', 'EGamma', 'MET']):
                    continue
                if args.type == 'data' and not any(
                        s in line[:len(s) + 2] for s in
                    ['SingleMuon', 'SingleElectron', 'EGamma', 'MET']):
                    continue
                directories.append(line)
        #print directories

        for channel in channels:
            print header(year, channel, tag)

            # SUBMIT SAMPLES
            for directory in directories:

                if args.verbose:
                    print "\ndirectory =", directory

                # FILTER

                print bcolors.BOLD + bcolors.OKGREEN + directory + bcolors.ENDC

                # FILE LIST
                files = []
                name = directory.split('/')[-3].replace(
                    '/', '') + '__' + directory.split('/')[-2].replace(
                        '/', '') + '__' + directory.split('/')[-1].replace(
                            '/', '')
                if not args.useDAS:
                    files = getFileListLocal(directory)
                if not files:
                    if not args.useDAS:
                        print "Getting file list from DAS..."
                    if 'pnfs' in directory:
                        files = getFileListPNFS(directory)
                    else:
                        files = getFileListDAS(directory)
                    if files:
                        saveFileListLocal(name, files)
                if not files:
                    print bcolors.BOLD + bcolors.WARNING + "Warning!!! FILELIST empty" + bcolors.ENDC
                    continue
                elif args.verbose:
                    print "FILELIST = " + files[0]
                    for file in files[1:]:
                        print "           " + file

                # JOB LIST
                ensureDirectory('joblist_skim')
                jobList = 'joblist_skim/joblist_%s_%s%s.txt' % (name, channel,
                                                                tag)
                print "Creating job file %s..." % (jobList)
                jobName = getSampleShortName(directory, year)[1]
                jobName += "_%s_%s" % (channel, year) + tag
                jobs = open(jobList, 'w')
                outdir = ensureDirectory(
                    "/work/pbaertsc/heavy_resonance/output_skim_%s/%s" %
                    (year, name))
                ensureDirectory(outdir + '/logs/')

                # NFILESPERJOBS
                nFilesPerJob = 1
                if nFilesPerJob < 1:
                    for default, patterns in nFilesPerJob_defaults:
                        if matchSampleToPattern(directory, patterns):
                            nFilesPerJob = default
                            break
                    else:
                        nFilesPerJob = 4  # default
                if args.verbose:
                    print "nFilesPerJob = %s" % nFilesPerJob
                filelists = list(split_seq(files, nFilesPerJob))

                # CREATE JOBS
                nChunks = 0
                checkExistingFiles(outdir, channel, len(filelists))
                #filelists = list(split_seq(files,1))
                for file in filelists:
                    #print "FILES = ",f
                    createJobs(jobs,
                               file,
                               outdir,
                               name,
                               nChunks,
                               channel,
                               year=year,
                               tes=tes,
                               ltf=ltf,
                               jtf=jtf)
                    nChunks = nChunks + 1
                jobs.close()

                # SUBMIT
                if args.force:
                    submitJobs(jobName, jobList, nChunks, outdir, batchSystem)
                else:
                    submit = raw_input(
                        "Do you also want to submit %d jobs to the batch system? [y/n] "
                        % (nChunks))
                    if submit.lower() == 'force':
                        submit = 'y'
                        args.force = True
                    if submit.lower() == 'quit':
                        exit(0)
                    if submit.lower() == 'y':
                        submitJobs(jobName, jobList, nChunks, outdir,
                                   batchSystem)
                    else:
                        print "Not submitting jobs"
                print
示例#5
0
def main(args):

    years = args.years
    tag = args.tag
    checkEvents = args.checkEvents
    outbasedir = "/scratch/ineuteli"
    batchscript = 'submit_SGE.sh'
    director = "root://t3dcachedb.psi.ch:1094/"  #"root://xrootd-cms.infn.it/"

    for year in years:
        samplesdir = args.outdir if args.outdir else "/pnfs/psi.ch/cms/trivcat/store/user/ineuteli/samples/NANOAOD_%s" % (
            year)

        # GET LIST
        samplelist = []
        if args.instance == None:
            directories = glob.glob(samplesdir +
                                    "/*/*/NANOAOD*") + glob.glob(samplesdir +
                                                                 "/*/*/USER*")
        else:
            directories = glob.glob(samplesdir + "/*/*/" + args.instance)
        for directory in sorted(directories):
            sample = '/'.join(directory.split('/')[-3:])
            if args.samples and not matchSampleToPattern(sample, args.samples):
                continue
            if args.vetos and matchSampleToPattern(sample, args.vetos):
                continue
            if args.type == 'mc' and any(
                    s in sample[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma'
                              ]):
                continue
            if args.type == 'data' and not any(
                    s in sample[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma'
                              ]):
                continue
            if not os.path.isdir(directory): continue
            samplelist.append(directory)
        if not samplelist:
            print "No samples found in %s!" % (samplesdir)
        if args.verbose:
            print 'samplelist = %s\n' % (samplelist)

        # CHECK samples
        print header(year, tag)
        for directory in samplelist:

            sample = '__'.join(directory.split('/')[-3:])
            filelist = '%s/*_skim%s*.root' % (directory, tag)
            if args.verbose:
                print "directory  = %s" % (directory)
                print "filelist   = %s" % (filelist)
                print "sample     = %s" % (sample)

            # FILE LIST ON SE
            filelist = [
                director + d
                for d in sorted(glob.glob(filelist), key=naturalSort)
            ]
            if not filelist:
                print bcolors.BOLD + bcolors.WARNING + "[WN] %s empty filelist" % directory + bcolors.ENDC
            elif args.verbose:
                print "filelist   = %s" % (filelist[0])
                for file in filelist[1:]:
                    print "             " + file

            # FILE LIST ON DAS
            infilelist = []
            if args.useLocal:
                infilelist = getFileListLocal(sample)
            if not infilelist:
                if args.useLocal:
                    print "Getting file list from DAS/PNFS..."
                infilelist = getFileListDAS(sample)
            if not infilelist:
                print bcolors.BOLD + bcolors.WARNING + "Warning! EMPTY filelist for " + directory + bcolors.ENDC
            elif args.verbose:
                print "infilelist = %s" % (infilelist[0])
                for file in infilelist[1:]:
                    print "             " + file

            # FILE LIST FOR RESUBMISSION
            nevents, resubmitfiles = checkFiles(
                filelist,
                infilelist,
                directory,
                clean=args.removeBadFiles,
                force=args.force,
                cleanBug=args.removeBuggedFiles,
                checkEvents=checkEvents)
            if len(resubmitfiles) == 0:
                print bcolors.BOLD + bcolors.OKGREEN + '[OK] %s is complete ! ' % sample + bcolors.ENDC
            elif len(resubmitfiles) > len(infilelist):
                print bcolors.BOLD + bcolors.FAIL + 'WARNING! %s has more output files %d than %d input files from DAS!' % (
                    sample, len(resubmitfiles), len(infilelist)) + bcolors.ENDC
            else:
                print bcolors.BOLD + bcolors.WARNING + '[WN] %d / %d of %s need to be resubmitted...' % (
                    len(resubmitfiles), len(infilelist), sample) + bcolors.ENDC

            if checkEvents and not any(s in directory for s in ['LQ3']):
                compareEventsToDAS(nevents, sample, treename='Events')
            if len(resubmitfiles) == 0:
                print
                continue

            # JOB LIST
            ensureDirectory('joblist')
            jobList = 'joblist/joblist_%s_skim%s_retry.txt' % (sample, tag)
            print "Creating job file %s..." % (jobList)
            jobName = getSampleShortName(directory)[1]
            jobName += "_%s_skim" % (year) + tag
            outdir = "%s/output_%s/%s" % (outbasedir, year, sample)
            logdir = ensureDirectory("skim_logs_%s/%s" % (year, sample))

            # NFILESPERJOBS
            nFilesPerJob = args.nFilesPerJob
            if nFilesPerJob < 1:
                for default, patterns in nFilesPerJob_defaults:
                    if matchSampleToPattern(directory, patterns):
                        nFilesPerJob = default
                        break
                else:
                    nFilesPerJob = 1  # default
            if args.verbose:
                print "nFilesPerJob = %s" % nFilesPerJob
            filelists = chunkify(resubmitfiles, nFilesPerJob)

            # CREATE JOBS
            with open(jobList, 'w') as jobs:
                nChunks = 0
                for filelist in filelists:
                    createSkimJobs(jobs,
                                   year,
                                   sample,
                                   filelist,
                                   outdir,
                                   prefetch=args.prefetch)
                    nChunks = nChunks + 1

            # SUBMIT
            if args.force:
                submitSkimJobs(jobName, jobList, nChunks, logdir, batchscript)
            else:
                submit = raw_input(
                    "Do you also want to submit %d jobs to the batch system? [y/n] "
                    % (nChunks))
                if submit.lower() == 'force':
                    submit = 'y'
                    args.force = True
                if submit.lower() == 'quit':
                    exit(0)
                if submit.lower() == 'y':
                    submitSkimJobs(jobName, jobList, nChunks, logdir,
                                   batchscript)
                else:
                    print "Not submitting jobs"
            print
示例#6
0
def main(args):

    years = args.years
    channels = args.channels
    njobs = args.njobs

    getFilesOfRunningJobs()
    exit(0)

    if args.running:
        getSubmittedJobs()
        return

    for year in years:
        indir = "output_%s/" % (year)
        os.chdir(indir)

        # GET LIST
        samplelist = []
        for directory in sorted(os.listdir('./')):
            if not os.path.isdir(directory): continue
            if args.samples and not matchSampleToPattern(
                    directory, args.samples):
                continue
            if args.veto and matchSampleToPattern(directory, args.veto):
                continue
            if args.type == 'mc' and any(
                    s in directory[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau']):
                continue
            if args.type == 'data' and not any(
                    s in directory[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau']):
                continue
            samplelist.append(directory)
        if not samplelist:
            print "No samples found in %s!" % (indir)
        if args.verbose:
            print 'samplelist = %s\n' % (samplelist)

        # CHECK samples
        for channel in channels:
            print header(year, channel)

            for directory in samplelist:
                print ">>> %s" % (directory)

                infiles = "%s/logs/*%s_%d*.o*.*" % (directory, channel, year)
                filelist = glob.glob(infiles)
                if not filelist:
                    continue

                jobids = []
                for filename in filelist:
                    jobid, taskid = getJobID(filename)
                    if jobid not in jobids:
                        jobids.append(jobid)
                jobids.sort(reverse=True)
                jobids_max = jobids[:njobs]

                jobs = {id: [] for id in jobids_max}
                stuck = {id: [] for id in jobids_max}
                failed = {id: [] for id in jobids_max}
                running = {id: [] for id in jobids_max}
                done = {id: [] for id in jobids_max}
                for filename in filelist:
                    if not any(".o%d." % (id) in filename
                               for id in jobids_max):
                        continue
                    job = Job(filename)
                    jobs[job.jobid].append(job)
                    if job.stuck:
                        stuck[job.jobid].append(job)
                    if job.running:
                        running[job.jobid].append(job)
                    if job.failed:
                        failed[job.jobid].append(job)
                    if job.done:
                        done[job.jobid].append(job)

                for jobid, joblist in sorted(jobs.iteritems()):
                    ntot = len(joblist)
                    jobs[jobid].sort()
                    stuck[jobid].sort()
                    failed[jobid].sort()
                    running[jobid].sort()
                    done[jobid].sort()
                    print ">>>   %d" % (jobid)
                    if running[jobid]:
                        print ">>>     running: %4d /%4d, %12s" % (len(
                            running[jobid]), ntot, average(running[jobid]))
                    if failed[jobid]:
                        print ">>>     failed:  %4d /%4d" % (
                            len(failed[jobid]), ntot
                        )  #+ ', '.join([str(j) for j in failed[jobid]])
                    if stuck[jobid]:
                        print ">>>     stuck:   %4d /%4d" % (len(
                            stuck[jobid]), ntot)
                    print ">>>     done:    %4d /%4d, %12s" % (
                        len(done[jobid]), ntot,
                        average(done[jobid]) if done[jobid] else ""
                    )  #+ ', '.join([str(j) for j in done[jobid]])

                print ">>>"
        os.chdir('..')
示例#7
0
def main():

    channels = args.channels
    years = args.years
    tes = args.tes
    batchSystem = 'psibatch_runner.sh'

    for year in years:

        # READ SAMPLES
        directories = []
        samplelist = "samples_%s.cfg" % (year)
        for line in open(samplelist, 'r'):
            line = line.rstrip().lstrip().split(' ')[0]
            if line[:2].count('#') > 0: continue
            if line == '': continue
            if args.samples and not matchSampleToPattern(line, args.samples):
                continue
            if args.veto and matchSampleToPattern(line, args.veto): continue
            if args.type == 'mc' and any(
                    s in line[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau']):
                continue
            if args.type == 'data' and not any(
                    s in line[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau']):
                continue
            directories.append(line)
        #print directories

        for channel in channels:
            print header(year, channel)

            # SUBMIT SAMPLES
            for directory in directories:

                if args.verbose:
                    print "\ndirectory =", directory

                # FILTER
                if 'SingleMuon' in directory and channel not in [
                        'mutau', 'mumu'
                ]:
                    continue
                if 'SingleElectron' in directory and channel != 'eletau':
                    continue
                if 'Tau' in directory[:5] and channel != 'tautau': continue
                if 'LQ3' in directory[:5] and channel not in [
                        'mutau', 'eletau', 'tautau'
                ]:
                    continue

                print bcolors.BOLD + bcolors.OKGREEN + directory + bcolors.ENDC
                files = None
                name = None

                if 'pnfs' in directory:
                    name = directory.split('/')[8].replace(
                        '/', '') + '__' + directory.split('/')[9].replace(
                            '/', '') + '__' + directory.split('/')[10].replace(
                                '/', '')
                    #files = getFileListPNFS(directory)
                    files = getFileListPNFS(name)
                else:
                    files = getFileListDAS(directory)
                    name = directory.split('/')[1].replace(
                        '/', '') + '__' + directory.split('/')[2].replace(
                            '/', '') + '__' + directory.split('/')[3].replace(
                                '/', '')

                if not files:
                    print bcolors.BOLD + bcolors.WARNING + "Warning!!! FILELIST empty" + bcolors.ENDC
                    continue
                elif args.verbose:
                    print "FILELIST = " + files[0]
                    for file in files[1:]:
                        print "           " + file

                # JOBLIST
                ensureDirectory('joblist')
                jobList = 'joblist/joblist%s_%s.txt' % (name, channel)
                print "Creating job file %s..." % (jobList)
                jobName = getSampleShortName(directory)[1]
                jobs = open(jobList, 'w')
                nFilesPerJob = args.nFilesPerJob
                outdir = ensureDirectory("output_%s/%s" % (year, name))

                # NFILESPERJOBS CHECKS
                # Diboson (WW, WZ, ZZ) have very large files and acceptance,
                # and the jet-binned DY and WJ files need to be run separately because of a bug affecting LHE_Njets
                if nFilesPerJob > 1 and any(vv in jobName[:8] for vv in [
                        'WW', 'WZ', 'ZZ', 'DY', 'WJ', 'W1J', 'W2J', 'W3J',
                        'W4J', 'Single', 'Tau'
                ]):
                    print bcolors.BOLD + bcolors.WARNING + "[WN] setting number of files per job from %s to 1 for %s" % (
                        nFilesPerJob, jobName) + bcolors.ENDC
                    nFilesPerJob = 1

                try:
                    os.stat(outdir)
                except:
                    os.mkdir(outdir)
                try:
                    os.stat(outdir + '/logs/')
                except:
                    os.mkdir(outdir + '/logs/')

                # CREATE JOBS
                nChunks = 0
                filelists = list(split_seq(files, nFilesPerJob))
                checkExistingFiles(outdir, channel, len(filelists))
                #filelists = list(split_seq(files,1))
                for file in filelists:
                    #print "FILES = ",f
                    createJobs(jobs,
                               file,
                               outdir,
                               name,
                               nChunks,
                               channel,
                               year=year)
                    nChunks = nChunks + 1
                jobs.close()

                # SUBMIT
                jobName += "_%s_%s" % (channel, year)
                if args.force:
                    submitJobs(jobName, jobList, nChunks, outdir, batchSystem)
                else:
                    submit = raw_input(
                        "Do you also want to submit %d jobs to the batch system? [y/n] "
                        % (nChunks))
                    if submit.lower() == 'force':
                        submit = 'y'
                        args.force = True
                    if submit.lower() == 'quit':
                        exit(0)
                    if submit.lower() == 'y':
                        submitJobs(jobName, jobList, nChunks, outdir,
                                   batchSystem)
                    else:
                        print "Not submitting jobs"
                print
示例#8
0
def main():

    channels = args.channels
    years = args.years
    tes = args.tes
    ltf = args.ltf
    jtf = args.jtf
    Zmass = args.Zmass
    prefetch = args.prefetch
    batchSystem = 'submit_SGE.sh'
    tag = args.tag

    if tag and tag[0] != '_': tag = '_' + tag
    if tes != 1.: tag += "_TES%.3f" % (tes)
    if ltf != 1.: tag += "_LTF%.3f" % (ltf)
    if jtf != 1.: tag += "_JTF%.3f" % (jtf)
    if Zmass: tag += "_Zmass"
    tag = tag.replace('.', 'p')
    chunkpattern = re.compile(r".*_(\d+)_[a-z]+%s\.root" % tag)

    for year in years:

        # GET LIST
        samplelist = []
        outdir = "output_%s/" % (year)
        for directory in sorted(os.listdir(outdir)):
            if not os.path.isdir(outdir + directory): continue
            if args.samples and not matchSampleToPattern(
                    directory, args.samples):
                continue
            if args.vetoes and matchSampleToPattern(directory, args.vetoes):
                continue
            if args.type == 'mc' and any(
                    s in directory[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma'
                              ]):
                continue
            if args.type == 'data' and not any(
                    s in directory[:len(s) + 2]
                    for s in ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma'
                              ]):
                continue
            samplelist.append(directory)
        if not samplelist:
            print "No samples found in %s!" % (outdir)
        if args.verbose:
            print samplelist
        blacklist = getBlackList("filelist/blacklist.txt")

        # RESUBMIT samples
        for channel in channels:
            print header(year, channel, tag)

            for directory in samplelist:
                #if directory.find('W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8__ytakahas-NanoTest_20180507_W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8-a7a5b67d3e3590e4899e147be08660be__USER')==-1: continue
                outdir = "output_%s/%s" % (year, directory)
                outfilelist = glob.glob("%s/*_%s%s.root" %
                                        (outdir, channel, tag))
                nFilesPerJob = args.nFilesPerJob
                jobName = getSampleShortName(directory)[1]
                jobName += "_%s_%s" % (channel, year) + tag
                if not outfilelist: continue

                # GET SKIMMED
                if args.useSkim and isSkimmed(directory, year):
                    directory = isSkimmed(directory, year)

                # FILE LIST
                infiles = []
                if not args.useDAS:
                    infiles = getFileListLocal(directory)
                if not infiles:
                    if not args.useDAS:
                        print "Getting file list from DAS..."
                    infiles = getFileList(directory)
                    if infiles:
                        saveFileListLocal(directory, infiles)
                if not infiles:
                    print bcolors.BOLD + bcolors.WARNING + "Warning! EMPTY filelist for " + directory + bcolors.ENDC
                    continue
                elif args.verbose:
                    print "FILELIST = " + infiles[0]
                    for file in infiles[1:]:
                        print "           " + file

                # NFILESPERJOBS
                sample = '__'.join(directory.split('/')[-3:])
                nFilesPerJob = args.nFilesPerJob
                if nFilesPerJob < 1:
                    for default, patterns in nFilesPerJob_defaults:
                        if matchSampleToPattern(sample, patterns):
                            nFilesPerJob = default
                            break
                    else:
                        nFilesPerJob = nFilesPerJob_default
                if args.verbose:
                    print "nFilesPerJob = %s" % nFilesPerJob
                infilelists = chunkify(infiles, nFilesPerJob)

                # JOB LIST
                badchunks = []
                misschunks = range(0, len(infilelists))
                jobList = 'joblist/joblist_%s_%s%s_retry.txt' % (sample,
                                                                 channel, tag)
                with open(jobList, 'w') as jobslog:
                    for filename in outfilelist:
                        match = chunkpattern.search(filename)
                        if match:
                            chunk = int(match.group(1))
                        else:
                            print bcolors.BOLD + bcolors.FAIL + '[NG] did not recognize output file %s !' % (
                                filename) + bcolors.ENDC
                            exit(1)
                        if chunk in misschunks:
                            misschunks.remove(chunk)
                        elif chunk >= len(infilelists):
                            print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found chunk %s >= total number of chunks %s ! Please make sure you have chosen the correct number of files per job (-n=%s), check DAS, or resubmit everything!' % (
                                filename, chunk, len(infilelists),
                                nFilesPerJob) + bcolors.ENDC
                        else:
                            print bcolors.BOLD + bcolors.FAIL + '[WN] %s: found weird chunk %s ! Please check if there is any overcounting !' % (
                                filename, chunk,
                                len(infilelists)) + bcolors.ENDC
                        file = TFile(filename, 'READ')
                        if not file.IsZombie() and file.GetListOfKeys(
                        ).Contains('tree') and file.GetListOfKeys().Contains(
                                'cutflow'):
                            continue
                        infiles = infilelists[chunk]
                        for filename in blacklist:
                            if filename in infiles:
                                print ">>> removing blacklisted %s" % filename
                                infiles.remove(filename)
                        createJobs(jobslog,
                                   infiles,
                                   outdir,
                                   sample,
                                   chunk,
                                   channel,
                                   year=year,
                                   tes=tes,
                                   ltf=ltf,
                                   jtf=jtf,
                                   Zmass=Zmass,
                                   tag=tag,
                                   prefetch=prefetch)
                        badchunks.append(chunk)

                    # BAD CHUNKS
                    if len(badchunks) > 0:
                        badchunks.sort()
                        chunktext = ('chunks ' if len(badchunks) > 1 else
                                     'chunk ') + ', '.join(
                                         str(ch) for ch in badchunks)
                        print bcolors.BOLD + bcolors.WARNING + '[NG] %s, %d/%d jobs failed!\n     Resubmitting %s...' % (
                            directory, len(badchunks), len(outfilelist),
                            chunktext) + bcolors.ENDC

                    # MISSING CHUNKS
                    if len(misschunks) > 0:
                        chunktext = ('chunks ' if len(misschunks) > 1 else
                                     'chunk ') + ', '.join(
                                         str(i) for i in misschunks)
                        print bcolors.BOLD + bcolors.WARNING + "[WN] %s missing %d/%d files !\n     Resubmitting %s..." % (
                            directory, len(misschunks), len(outfilelist),
                            chunktext) + bcolors.ENDC
                        for chunk in misschunks:
                            infiles = infilelists[chunk]
                            createJobs(jobslog,
                                       infiles,
                                       outdir,
                                       sample,
                                       chunk,
                                       channel,
                                       year=year,
                                       tes=tes,
                                       ltf=ltf,
                                       jtf=jtf,
                                       Zmass=Zmass,
                                       tag=tag,
                                       prefetch=prefetch)

                # RESUBMIT
                nChunks = len(badchunks) + len(misschunks)
                if nChunks == 0:
                    print bcolors.BOLD + bcolors.OKGREEN + '[OK] %s' % directory + bcolors.ENDC
                elif args.force:
                    submitJobs(jobName, jobList, nChunks, outdir, batchSystem)
                else:
                    submit = raw_input(
                        "Do you also want to submit %d jobs to the batch system? [y/n] "
                        % (nChunks))
                    if submit.lower() == 'force':
                        submit = 'y'
                        args.force = True
                    if submit.lower() == 'quit':
                        exit(0)
                    if submit.lower() == 'y':
                        submitJobs(jobName, jobList, nChunks, outdir,
                                   batchSystem)
                    else:
                        print "Not submitting jobs"
                print
def main():

    years = args.years
    outbasedir = "/scratch/ineuteli"
    batchscript = 'submit_SGE.sh'
    tag = ""

    for year in years:

        # READ SAMPLES
        directories = []
        samplelist = "samples_%s.cfg" % (year)
        with open(samplelist, 'r') as file:
            for line in file:
                line = line.rstrip().lstrip().split(' ')[0].rstrip('/')
                if line[:2].count('#') > 0: continue
                if line == '': continue
                if '/pnfs/' in line: continue
                if args.samples and not matchSampleToPattern(
                        line, args.samples):
                    continue
                if args.vetos and matchSampleToPattern(line, args.vetos):
                    continue
                if args.type == 'mc' and any(
                        s in line[:len(s) + 2] for s in
                    ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma']):
                    continue
                if args.type == 'data' and not any(
                        s in line[:len(s) + 2] for s in
                    ['SingleMuon', 'SingleElectron', 'Tau', 'EGamma']):
                    continue
                directories.append(line)
        if args.testrun:
            directories = directories[:1]
        blacklist = []  #getBlackList("filelist/blacklist.txt")

        print header(year, tag)

        # SUBMIT SAMPLES
        for directory in directories:

            if args.verbose:
                print "\ndirectory =", directory
            print bcolors.BOLD + bcolors.OKGREEN + directory + bcolors.ENDC

            # FILE LIST
            files = []
            name = '__'.join(directory.split('/')[-3:])
            sample = '/' + name.replace('__', '/')
            ###if not args.useDAS:
            ###    files = getFileListLocal(directory,blacklist=blacklist)
            ###if not files:
            ###if not args.useDAS:
            ###  print "Getting file list from DAS..."
            files = getFileListDAS(directory, blacklist=blacklist)
            ###  if files:
            ###    saveFileListLocal(name,files,blacklist=blacklist)
            if not files:
                print bcolors.BOLD + bcolors.WARNING + "Warning! EMPTY filelist for " + directory + bcolors.ENDC
                continue
            elif args.verbose:
                print "FILELIST = " + files[0]
                for file in files[1:]:
                    print "           " + file
            if args.testrun:
                files = files[:1]

            # JOB LIST
            ensureDirectory('joblist')
            jobList = 'joblist/joblist_%s_skim%s.txt' % (name, tag)
            print "Creating job file %s..." % (jobList)
            jobName = getSampleShortName(directory)[1]
            jobName += "_%s_skim" % (year) + tag
            outdir = "%s/output_%s/%s" % (outbasedir, year, name)
            logdir = ensureDirectory("skim_logs_%s/%s" % (year, name))

            # NFILESPERJOBS
            nFilesPerJob = args.nFilesPerJob
            if nFilesPerJob < 1:
                for default, patterns in nFilesPerJob_defaults:
                    if matchSampleToPattern(directory, patterns):
                        nFilesPerJob = default
                        break
                else:
                    nFilesPerJob = nFilesPerJob_default  # default
            if args.verbose:
                print "nFilesPerJob = %s" % nFilesPerJob
            filelists = chunkify(files, nFilesPerJob)

            # CREATE JOBS
            with open(jobList, 'w') as jobs:
                nChunks = 0
                for filelist in filelists:
                    createSkimJobs(jobs,
                                   year,
                                   name,
                                   filelist,
                                   outdir,
                                   prefetch=args.prefetch)
                    nChunks = nChunks + 1

            # SUBMIT
            if args.force:
                submitSkimJobs(jobName, jobList, nChunks, logdir, batchscript)
            else:
                submit = raw_input(
                    "Do you also want to submit %d jobs to the batch system? [y/n] "
                    % (nChunks))
                if submit.lower() == 'force':
                    submit = 'y'
                    args.force = True
                if submit.lower() == 'quit':
                    exit(0)
                if submit.lower() == 'y':
                    submitSkimJobs(jobName, jobList, nChunks, logdir,
                                   batchscript)
                else:
                    print "Not submitting jobs"
            print