def createDataDatasets(self):
   self.dataDatasets = {}
   for d in self.datasets:
     dsLumiList = None
     if not os.path.isfile(d['json']):
       oldSsArgv = sys.argv; sys.argv=[] # sys argv fix
       dasC = dasTools.myDasClient();dasC.limit=0
       dsLumiList = dasC.getJsonOfDataset(d["dataset"])
       dsLumiList.writeJSON(d['json'])
       sys.argv = oldSsArgv
     else:
       dsLumiList = LumiList(compactList=json.load(open(d['json'])))
     dsRuns = dsLumiList.getRuns()
     self.dataDatasets[d['label']] = ('{ \n '
       '\t"xSec":None\n'
       '\t,"localFile":None\n'
       '\t,"datasetName":"'+d["dataset"]+'"\n'
       '\t,"label":"Data_'+d['label']+'"\n'
       '\t,"datasetJSON":"'+d['json']+'"\n'
       '\t,"crabConfig":{\n'
         '\t\t"CMSSW":{"lumis_per_job":5\n'
           '\t\t\t,"lumi_mask": os.getenv("CMSSW_BASE") + '+'"/'+d['goldenJson'].lstrip('/')+'"\n'
           '\t\t\t,"total_number_of_lumis" : -1}\n'
         '\t\t}\n'
       '\t,"color":0\n'
       '\t,"runRange":"'+str(dsRuns[0])+"-"+str(dsRuns[-1])+'"\n'
     '\t}\n');
示例#2
0
def getRuns(name=None, bfield=None, bunchSpacing=None):
    ll = LumiList()
    for rp in runPeriods:
        if name is None or rp.name == name:
            if bfield is None or rp.bfield == bfield:
                if bunchSpacing is None or rp.bunchSpacing == bunchSpacing:
                    newll = LumiListForRunPeriod(rp)
                    ll += LumiListForRunPeriod(rp)
    return ll.getRuns()
示例#3
0
def getRuns(name=None,bfield=None,bunchSpacing=None):
	ll = LumiList()
	for rp in runPeriods:
		if name is None or rp.name == name:
			if bfield is None or rp.bfield == bfield:
				if bunchSpacing is None or rp.bunchSpacing == bunchSpacing:
					newll = LumiListForRunPeriod(rp)
					ll += LumiListForRunPeriod(rp)
	return ll.getRuns()
示例#4
0
def LumiListForRunPeriod(rp, MIN_LUMIS=0):
	ll = LumiList(filename = rp.json)
	runs = [ run for run in map(int,ll.getRuns()) if run >= rp.firstRun and run <= rp.lastRun]

	lumis = ll.getLumis()
	nlumis = defaultdict(int)
	for r,l in lumis:
		nlumis[r]+=1
	select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS]
	ll.selectRuns(select_runs)
	return ll
示例#5
0
def LumiListForRunPeriod(rp, MIN_LUMIS=0):
    ll = LumiList(filename=rp.json)
    runs = [
        run for run in map(int, ll.getRuns())
        if run >= rp.firstRun and run <= rp.lastRun
    ]

    lumis = ll.getLumis()
    nlumis = defaultdict(int)
    for r, l in lumis:
        nlumis[r] += 1
    select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS]
    ll.selectRuns(select_runs)
    return ll
示例#6
0
def shortenJson(jsonFile,minRun=0,maxRun=-1,output=None,debug=False):
  from copy import deepcopy
  runList = jsonFile 
  if isinstance(runList,LumiList):
    runList = deepcopy(jsonFile)
  else:
    runList = LumiList (filename = jsonFile)  # Read in first  JSON file
  allRuns = runList.getRuns()
  runsToRemove=[]
  for run in allRuns:
      if  int(run) < minRun:
          runsToRemove.append (run)
      if maxRun > 0 and int(run) > maxRun:
          runsToRemove.append (run)
  if debug:
	print " runsToRemove ",runsToRemove
  runList.removeRuns (runsToRemove)
  if output:
    runList.writeJSON (output)
  else:
    return  runList
示例#7
0
            raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs
        if 'X509_USER_PROXY' in os.environ:
            dbs += " --key {0} --cert {0}".format(
                os.environ['X509_USER_PROXY'])
        logger.info('DAS query\t: %s', dbs)
        return os.popen(dbs)

    dbs = 'das_client --query="run dataset=%s instance=prod/%s" --limit %i' % (
        prompt.heppy.dataset, 'global', 0)
    prompt_runs = [int(r) for r in _dasPopen(dbs).readlines()]
    dbs = 'das_client --query="run dataset=%s instance=prod/%s" --limit %i' % (
        rereco.heppy.dataset, 'global', 0)
    rereco_runs = [int(r) for r in _dasPopen(dbs).readlines()]

    runs = []
    for str_run in lumiList.getRuns():
        run = int(str_run)
        if run in prompt_runs and run in rereco_runs:
            runs.append(run)

    print "Now running %i jobs: %r" % (len(runs), runs)

    import subprocess

    def wrapper(run_):
        subprocess.call([
            "python", "jetTreeMaker.py", ("--era=%s" % args.era),
            ("--run=%i" % run_)
        ])

    from multiprocessing import Pool
示例#8
0
#!/usr/bin/env python

import os, re, sys
from collections import defaultdict
from itertools import combinations
from FWCore.PythonUtilities.LumiList import LumiList

# Should rewrite not to hit the db for every cfg, but just get the HLT
# key for each run and then only get cfgs for unique keys.

dcsonly_ll = LumiList(
    "/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions12/8TeV/DCSOnly/json_DCSONLY.txt"
)  # JMTBAD use DCSOnly_ll from goodlumis once sorted
runs = sorted(int(run) for run in dcsonly_ll.getRuns())
cmd = "edmConfigFromDB --cff --runNumber %i --noedsources --noes --noservices --nomodules"

path_re = re.compile(r"(HLT_Mu40_eta2p1_v\d+)")
prescaled_path_re = re.compile(r"(HLT_Mu15_eta2p1_v\d+)")

paths_and_filters = defaultdict(list)

for run in runs:
    print "run:", run,
    sys.stdout.flush()
    path = prescaled_path = filter = prescaled_filter = None
    for line in os.popen(cmd % run):
        if "cms.Path" not in line:
            continue
        filt = line.split(" + ")[-2]  # JMTBAD fragile
        mo = path_re.search(line)
        if mo is not None:
示例#9
0
    # required parameters
    (options, args) = parser.parse_args()
    if len(args) != 1:
        raise RuntimeError("Must provide exactly one input file")

    if options.min and options.max and options.min > options.max:
        raise RuntimeError(
            "Minimum value (%d) is greater than maximum value (%d)" %
            (options.min, options.max))

    commaRE = re.compile(r',')
    runsToRemove = []
    for chunk in options.runs:
        runs = commaRE.split(chunk)
        runsToRemove.extend(runs)

    alphaList = LumiList(filename=args[0])  # Read in first  JSON file
    allRuns = alphaList.getRuns()
    for run in allRuns:
        if options.min and int(run) < options.min:
            runsToRemove.append(run)
        if options.max and int(run) > options.max:
            runsToRemove.append(run)

    alphaList.removeRuns(runsToRemove)

    if options.output:
        alphaList.writeJSON(options.output)
    else:
        print(alphaList)
示例#10
0
    def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
        lumiSecExtend = ""
        if firstRun or lastRun or jsonPath:
            if not jsonPath:
                selectedRunList = self.__getRunList()
                if firstRun:
                    selectedRunList = [ run for run in selectedRunList \
                                        if self.__findInJson(run, "run_number") >= firstRun ]
                if lastRun:
                    selectedRunList = [ run for run in selectedRunList \
                                        if self.__findInJson(run, "run_number") <= lastRun ]
                lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
                             + str( self.__findInJson(run, "run_number") ) + ":max" \
                             for run in selectedRunList ]
                splitLumiList = list( self.__chunks( lumiList, 255 ) )
            else:
                theLumiList = None
                try:
                    theLumiList = LumiList ( filename = jsonPath )
                except ValueError:
                    pass

                if theLumiList is not None:
                    allRuns = theLumiList.getRuns()
                    runsToRemove = []
                    for run in allRuns:
                        if firstRun and int( run ) < firstRun:
                            runsToRemove.append( run )
                        if lastRun and int( run ) > lastRun:
                            runsToRemove.append( run )
                    theLumiList.removeRuns( runsToRemove )
                    splitLumiList = list( self.__chunks(
                        theLumiList.getCMSSWString().split(','), 255 ) )
                    if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]):
                        splitLumiList = None
                else:
                    with open(jsonPath) as f:
                        jsoncontents = f.read()
                        if "process.source.lumisToProcess" in jsoncontents:
                            msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet.  Trying to use it" % jsonPath
                            if firstRun or lastRun:
                                msg += ("\n  (after applying firstRun and/or lastRun)")
                            msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
                            msg += "\nCheck your config file to make sure that it worked properly."
                            print msg

                            runlist = self.__getRunList()
                            if firstRun or lastRun:
                                self.__firstusedrun = -1
                                self.__lastusedrun = -1
                                jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
                                jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","")
                                                            .replace('"",\n','').replace('""\n',''))
                                self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
                                self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
                                if self.__lastusedrun < self.__firstusedrun:
                                    jsoncontents = None
                            else:
                                self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
                                self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
                            lumiSecExtend = jsoncontents
                            splitLumiList = None
                        else:
                            raise AllInOneError("%s is not a valid json file!" % jsonPath)

            if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
                lumiSecStr = [ "',\n'".join( lumis ) \
                               for lumis in splitLumiList ]
                lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
                               for lumis in lumiSecStr ]
                lumiSecExtend = "\n".join( lumiSecStr )
                runlist = self.__getRunList()
                self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
                self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
            elif lumiSecExtend:
                pass
            else:
                msg = "You are trying to run a validation without any runs!  Check that:"
                if firstRun or lastRun:
                    msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
                if jsonPath:
                    msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
                if (firstRun or lastRun) and jsonPath:
                    msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
                raise AllInOneError(msg)

        else:
            if self.__inputMagneticField is not None:
                pass  #never need self.__firstusedrun or self.__lastusedrun
            else:
                runlist = self.__getRunList()
                self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
                self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))

        return lumiSecExtend
示例#11
0
 def __createSnippet( self, jsonPath = None, begin = None, end = None,
                      firstRun = None, lastRun = None, repMap = None,
                      crab = False ):
     if firstRun:
         firstRun = int( firstRun )
     if lastRun:
         lastRun = int( lastRun )
     if ( begin and firstRun ) or ( end and lastRun ):
         msg = ( "The Usage of "
                 + "'begin' & 'firstRun' " * int( bool( begin and
                                                        firstRun ) )
                 + "and " * int( bool( ( begin and firstRun ) and
                                      ( end and lastRun ) ) )
                 + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
                 + "is ambigous." )
         raise AllInOneError( msg )
     if begin or end:
         ( firstRun, lastRun ) = self.convertTimeToRun(
             begin = begin, end = end, firstRun = firstRun,
             lastRun = lastRun )
     if ( firstRun and lastRun ) and ( firstRun > lastRun ):
         msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
                 "chosen is greater than the upper time/runrange limit "
                 "('end'/'lastRun').")
         raise AllInOneError( msg )
     goodLumiSecStr = ""
     lumiStr = ""
     lumiSecExtend = ""
     if firstRun or lastRun:
         goodLumiSecStr = ( "lumiSecs = cms.untracked."
                            "VLuminosityBlockRange()\n" )
         lumiStr = "                    lumisToProcess = lumiSecs,\n"
         if not jsonPath:
             selectedRunList = self.__getRunList()
             if firstRun:
                 selectedRunList = [ run for run in selectedRunList \
                                     if run["run_number"] >= firstRun ]
             if lastRun:
                 selectedRunList = [ run for run in selectedRunList \
                                     if run["run_number"] <= lastRun ]
             lumiList = [ str( run["run_number"] ) + ":1-" \
                          + str( run["run_number"] ) + ":max" \
                          for run in selectedRunList ]
             splitLumiList = list( self.__chunks( lumiList, 255 ) )
         else:
             theLumiList = LumiList ( filename = jsonPath )
             allRuns = theLumiList.getRuns()
             runsToRemove = []
             for run in allRuns:
                 if firstRun and int( run ) < firstRun:
                     runsToRemove.append( run )
                 if lastRun and int( run ) > lastRun:
                     runsToRemove.append( run )
             theLumiList.removeRuns( runsToRemove )
             splitLumiList = list( self.__chunks(
                 theLumiList.getCMSSWString().split(','), 255 ) )
         if not len(splitLumiList[0][0]) == 0:
             lumiSecStr = [ "',\n'".join( lumis ) \
                            for lumis in splitLumiList ]
             lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
                            for lumis in lumiSecStr ]
             lumiSecExtend = "\n".join( lumiSecStr )
     elif jsonPath:
             goodLumiSecStr = ( "goodLumiSecs = LumiList.LumiList(filename"
                                "= '%(json)s').getCMSSWString().split(',')\n"
                                "lumiSecs = cms.untracked"
                                ".VLuminosityBlockRange()\n"
                                )
             lumiStr = "                    lumisToProcess = lumiSecs,\n"
             lumiSecExtend = "lumiSecs.extend(goodLumiSecs)\n"
     if crab:
         files = ""
     else:
         splitFileList = list( self.__chunks( self.fileList(), 255 ) )
         fileStr = [ "',\n'".join( files ) for files in splitFileList ]
         fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
                     for files in fileStr ]
         files = "\n".join( fileStr )
     theMap = repMap
     theMap["files"] = files
     theMap["json"] = jsonPath
     theMap["lumiStr"] = lumiStr
     theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
     theMap["lumiSecExtend"] = lumiSecExtend
     if crab:
         dataset_snippet = self.__dummy_source_template%( theMap )
     else:
         dataset_snippet = self.__source_template%( theMap )
     return dataset_snippet
示例#12
0
    def run(self, filecacheurl=None):
        """
        Override run() for JobType
        """

        taskDict, webdir = self.getTaskDict()
        addoutputfiles = literal_eval(getColumn(taskDict, 'tm_outfiles'))
        tfileoutfiles = literal_eval(getColumn(taskDict, 'tm_tfile_outfiles'))
        edmoutfiles = literal_eval(getColumn(taskDict, 'tm_edm_outfiles'))
        jobarch = getColumn(taskDict, 'tm_job_arch')
        jobsw = getColumn(taskDict, 'tm_job_sw')

        sandboxFilename = os.path.join(self.workdir, 'sandbox.tar.gz')
        curlGetFileFromURL(webdir + '/sandbox.tar.gz', sandboxFilename,
                           self.proxyfilename)

        configArguments = {
            'addoutputfiles': addoutputfiles,
            'tfileoutfiles': tfileoutfiles,
            'edmoutfiles': edmoutfiles,
            'jobarch': jobarch,
            'jobsw': jobsw,
        }

        # Maybe the user wnat to change the dataset
        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ufc = CRABClient.Emulator.getEmulator('ufc')({
            'endpoint': filecacheurl,
            "pycurl": True
        })
        result = ufc.upload(sandboxFilename,
                            excludeList=NEW_USER_SANDBOX_EXCLUSIONS)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" %
                              str(result))
            raise CachefileNotFoundException

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % str(result['hashkey'])

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug(
                "Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(
                self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" %
                              (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
            except ValueError as ex:
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name,
                                                              ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$',
                                           run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(
                            len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs=run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [
                str(reduce(lambda x, y: x + y,
                           lumi_mask[run]))[1:-1].replace(' ', '')
                for run in configArguments['runs']
            ]

        configArguments['jobtype'] = 'Analysis'

        return sandboxFilename, configArguments
示例#13
0
from JMTucker.Tools.general import from_pickle, to_pickle

os.system('mkdir -p prescales_temp')


def popen(cmd):
    return subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                            shell=True).communicate()[0]


ll = LumiList(
    'prescales_temp/Cert_190456-208686_8TeV_PromptReco_Collisions12_JSON.txt')
ll_compact = ll.getCompactList()
runs = [int(i) for i in ll.getRuns()]
runs.sort()


def dump_lumibyls(runs):
    l = float(len(runs))
    for i, run in enumerate(runs):
        out_fn = 'prescales_temp/lumibyls/%i.csv' % run
        already = os.path.isfile(out_fn)
        print 'run %i (%i/%i)%s' % (run, i + 1, l,
                                    ' (skipping since already dumped)'
                                    if already else '')
        if already:
            continue
        popen('lumiCalc2.py lumibyls -r %i -o %s' % (run, out_fn))
示例#14
0
    parser.add_option ('--output', dest='output', type='string',
                       help='Save output to file OUTPUT')
    # required parameters
    (options, args) = parser.parse_args()
    if len (args) != 1:
        raise RuntimeError, "Must provide exactly one input file"

    if options.min and options.max and options.min > options.max:
        raise RuntimeError, "Minimum value (%d) is greater than maximum value (%d)" % (options.min, options.max)

    commaRE = re.compile (r',')
    runsToRemove = []
    for chunk in options.runs:
        runs = commaRE.split (chunk)
        runsToRemove.extend (runs)

    alphaList = LumiList (filename = args[0]) # Read in first JSON file
    allRuns = alphaList.getRuns()
    for run in allRuns:
        if options.min and int(run) < options.min:
            runsToRemove.append (run)
        if options.max and int(run) > options.max:
            runsToRemove.append (run)

    alphaList.removeRuns (runsToRemove)

    if options.output:
        alphaList.writeJSON (options.output)
    else:
        print alphaList
示例#15
0
    def __lumiSelectionSnippet(self,
                               jsonPath=None,
                               firstRun=None,
                               lastRun=None):
        lumiSecExtend = ""
        if firstRun or lastRun or jsonPath:
            if not jsonPath:
                selectedRunList = self.__getRunList()
                if firstRun:
                    selectedRunList = [ run for run in selectedRunList \
                                        if self.__findInJson(run, "run_number") >= firstRun ]
                if lastRun:
                    selectedRunList = [ run for run in selectedRunList \
                                        if self.__findInJson(run, "run_number") <= lastRun ]
                lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
                             + str( self.__findInJson(run, "run_number") ) + ":max" \
                             for run in selectedRunList ]
                splitLumiList = list(self.__chunks(lumiList, 255))
            else:
                theLumiList = None
                try:
                    theLumiList = LumiList(filename=jsonPath)
                except ValueError:
                    pass

                if theLumiList is not None:
                    allRuns = theLumiList.getRuns()
                    runsToRemove = []
                    for run in allRuns:
                        if firstRun and int(run) < firstRun:
                            runsToRemove.append(run)
                        if lastRun and int(run) > lastRun:
                            runsToRemove.append(run)
                    theLumiList.removeRuns(runsToRemove)
                    splitLumiList = list(
                        self.__chunks(theLumiList.getCMSSWString().split(','),
                                      255))
                    if not (splitLumiList and splitLumiList[0]
                            and splitLumiList[0][0]):
                        splitLumiList = None
                else:
                    with open(jsonPath) as f:
                        jsoncontents = f.read()
                        if "process.source.lumisToProcess" in jsoncontents:
                            msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet.  Trying to use it" % jsonPath
                            if firstRun or lastRun:
                                msg += (
                                    "\n  (after applying firstRun and/or lastRun)"
                                )
                            msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
                            msg += "\nCheck your config file to make sure that it worked properly."
                            print msg

                            runlist = self.__getRunList()
                            if firstRun or lastRun:
                                self.__firstusedrun = -1
                                self.__lastusedrun = -1
                                jsoncontents = re.sub(
                                    r"\d+:(\d+|max)(-\d+:(\d+|max))?",
                                    self.getForceRunRangeFunction(
                                        firstRun, lastRun), jsoncontents)
                                jsoncontents = (jsoncontents.replace(
                                    "'',\n", "").replace("''\n", "").replace(
                                        '"",\n', '').replace('""\n', ''))
                                self.__firstusedrun = max(
                                    self.__firstusedrun,
                                    int(
                                        self.__findInJson(
                                            runlist[0], "run_number")))
                                self.__lastusedrun = min(
                                    self.__lastusedrun,
                                    int(
                                        self.__findInJson(
                                            runlist[-1], "run_number")))
                                if self.__lastusedrun < self.__firstusedrun:
                                    jsoncontents = None
                            else:
                                self.__firstusedrun = int(
                                    self.__findInJson(runlist[0],
                                                      "run_number"))
                                self.__lastusedrun = int(
                                    self.__findInJson(runlist[-1],
                                                      "run_number"))
                            lumiSecExtend = jsoncontents
                            splitLumiList = None
                        else:
                            raise AllInOneError(
                                "%s is not a valid json file!" % jsonPath)

            if splitLumiList and splitLumiList[0] and splitLumiList[0][0]:
                lumiSecStr = [ "',\n'".join( lumis ) \
                               for lumis in splitLumiList ]
                lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
                               for lumis in lumiSecStr ]
                lumiSecExtend = "\n".join(lumiSecStr)
                runlist = self.__getRunList()
                self.__firstusedrun = max(
                    int(splitLumiList[0][0].split(":")[0]),
                    int(self.__findInJson(runlist[0], "run_number")))
                self.__lastusedrun = min(
                    int(splitLumiList[-1][-1].split(":")[0]),
                    int(self.__findInJson(runlist[-1], "run_number")))
            elif lumiSecExtend:
                pass
            else:
                msg = "You are trying to run a validation without any runs!  Check that:"
                if firstRun or lastRun:
                    msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
                if jsonPath:
                    msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
                if (firstRun or lastRun) and jsonPath:
                    msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
                raise AllInOneError(msg)

        else:
            runlist = self.__getRunList()
            self.__firstusedrun = int(
                self.__findInJson(self.__getRunList()[0], "run_number"))
            self.__lastusedrun = int(
                self.__findInJson(self.__getRunList()[-1], "run_number"))

        return lumiSecExtend
示例#16
0
#!/usr/bin/env python

import os, re, sys
from collections import defaultdict
from itertools import combinations
from FWCore.PythonUtilities.LumiList import LumiList

# Should rewrite not to hit the db for every cfg, but just get the HLT
# key for each run and then only get cfgs for unique keys.

dcsonly_ll = LumiList(
    '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions12/8TeV/DCSOnly/json_DCSONLY.txt'
)  # JMTBAD use DCSOnly_ll from goodlumis once sorted
runs = sorted(int(run) for run in dcsonly_ll.getRuns())
cmd = 'edmConfigFromDB --cff --runNumber %i --noedsources --noes --noservices --nomodules'

path_re = re.compile(r'(HLT_Mu40_eta2p1_v\d+)')
prescaled_path_re = re.compile(r'(HLT_Mu15_eta2p1_v\d+)')

paths_and_filters = defaultdict(list)

for run in runs:
    print 'run:', run,
    sys.stdout.flush()
    path = prescaled_path = filter = prescaled_filter = None
    for line in os.popen(cmd % run):
        if 'cms.Path' not in line:
            continue
        filt = line.split(' + ')[-2]  # JMTBAD fragile
        mo = path_re.search(line)
        if mo is not None:
示例#17
0
文件: dataset.py 项目: DesyTau/cmssw
    def __createSnippet( self, jsonPath = None, begin = None, end = None,
                         firstRun = None, lastRun = None, repMap = None,
                         crab = False, parent = False ):
        if firstRun:
            firstRun = int( firstRun )
        if lastRun:
            lastRun = int( lastRun )
        if ( begin and firstRun ) or ( end and lastRun ):
            msg = ( "The Usage of "
                    + "'begin' & 'firstRun' " * int( bool( begin and
                                                           firstRun ) )
                    + "and " * int( bool( ( begin and firstRun ) and
                                         ( end and lastRun ) ) )
                    + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
                    + "is ambigous." )
            raise AllInOneError( msg )
        if begin or end:
            ( firstRun, lastRun ) = self.convertTimeToRun(
                begin = begin, end = end, firstRun = firstRun,
                lastRun = lastRun )
        if ( firstRun and lastRun ) and ( firstRun > lastRun ):
            msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
                    "chosen is greater than the upper time/runrange limit "
                    "('end'/'lastRun').")
            raise AllInOneError( msg )
        if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
            msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
                    "only work for official datasets, not predefined _cff.py files" )
            raise AllInOneError( msg )
        goodLumiSecStr = ""
        lumiStr = ""
        lumiSecExtend = ""
        if firstRun or lastRun or jsonPath:
            goodLumiSecStr = ( "lumiSecs = cms.untracked."
                               "VLuminosityBlockRange()\n" )
            lumiStr = "                    lumisToProcess = lumiSecs,\n"
            if not jsonPath:
                selectedRunList = self.__getRunList()
                if firstRun:
                    selectedRunList = [ run for run in selectedRunList \
                                        if self.__findInJson(run, "run_number") >= firstRun ]
                if lastRun:
                    selectedRunList = [ run for run in selectedRunList \
                                        if self.__findInJson(run, "run_number") <= lastRun ]
                lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
                             + str( self.__findInJson(run, "run_number") ) + ":max" \
                             for run in selectedRunList ]
                splitLumiList = list( self.__chunks( lumiList, 255 ) )
            else:
                theLumiList = None
                try:
                    theLumiList = LumiList ( filename = jsonPath )
                except ValueError:
                    pass

                if theLumiList is not None:
                    allRuns = theLumiList.getRuns()
                    runsToRemove = []
                    for run in allRuns:
                        if firstRun and int( run ) < firstRun:
                            runsToRemove.append( run )
                        if lastRun and int( run ) > lastRun:
                            runsToRemove.append( run )
                    theLumiList.removeRuns( runsToRemove )
                    splitLumiList = list( self.__chunks(
                        theLumiList.getCMSSWString().split(','), 255 ) )
                else:
                    with open(jsonPath) as f:
                        jsoncontents = f.read()
                        if "process.source.lumisToProcess" in jsoncontents:
                            msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet.  Trying to use it" % jsonPath
                            if firstRun or lastRun:
                                msg += ("\n  (after applying firstRun and/or lastRun)")
                            msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
                            msg += "\nCheck your config file to make sure that it worked properly."
                            print msg

                            runlist = self.__getRunList()
                            if firstRun or lastRun:
                                self.__firstusedrun = -1
                                self.__lastusedrun = -1
                                jsoncontents = re.sub("\d+:(\d+|max)-\d+:(\d+|max)", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
                                self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number")))
                                self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number")))
                            else:
                                self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number"))
                                self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number"))
                            lumiSecExtend = jsoncontents
                            splitLumiList = [[""]]

            if not len(splitLumiList[0][0]) == 0:
                lumiSecStr = [ "',\n'".join( lumis ) \
                               for lumis in splitLumiList ]
                lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
                               for lumis in lumiSecStr ]
                lumiSecExtend = "\n".join( lumiSecStr )
                runlist = self.__getRunList()
                self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number")))
                self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number")))
        else:
            runlist = self.__getRunList()
            self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
            self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))

        if crab:
            files = ""
        else:
            splitFileList = list( self.__chunks( self.fileList(), 255 ) )
            fileStr = [ "',\n'".join( files ) for files in splitFileList ]
            fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
                        for files in fileStr ]
            files = "\n".join( fileStr )

            if parent:
                splitParentFileList = list( self.__chunks( self.fileList(parent = True), 255 ) )
                parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
                parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
                            for parentFiles in parentFileStr ]
                parentFiles = "\n".join( parentFileStr )
                files += "\n\n" + parentFiles


        theMap = repMap
        theMap["files"] = files
        theMap["json"] = jsonPath
        theMap["lumiStr"] = lumiStr
        theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap )
        theMap["lumiSecExtend"] = lumiSecExtend
        if crab:
            dataset_snippet = self.__dummy_source_template%( theMap )
        else:
            dataset_snippet = self.__source_template%( theMap )
        return dataset_snippet
示例#18
0
import re, os, subprocess
from pprint import pprint
from collections import defaultdict
from FWCore.PythonUtilities.LumiList import LumiList
from RecoLuminosity.LumiDB import sessionManager, lumiCalcAPI, revisionDML
from JMTucker.Tools.general import from_pickle, to_pickle

os.system('mkdir -p prescales_temp')

def popen(cmd):
    return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()[0]

ll = LumiList('prescales_temp/Cert_190456-208686_8TeV_PromptReco_Collisions12_JSON.txt')
ll_compact = ll.getCompactList()
runs = [int(i) for i in ll.getRuns()]
runs.sort()

def dump_lumibyls(runs):
    l = float(len(runs))
    for i,run in enumerate(runs):
        out_fn = 'prescales_temp/lumibyls/%i.csv' % run
        already = os.path.isfile(out_fn)
        print 'run %i (%i/%i)%s' % (run, i+1, l, ' (skipping since already dumped)' if already else '')
        if already:
            continue
        popen('lumiCalc2.py lumibyls -r %i -o %s' % (run, out_fn))

def parse_lumibyls(run):
    d = defaultdict(dict)
    for line in open('prescales_temp/lumibyls/%i.csv' % run):
示例#19
0
文件: dataset.py 项目: yjcho10/cmssw
 def __createSnippet(self,
                     jsonPath=None,
                     begin=None,
                     end=None,
                     firstRun=None,
                     lastRun=None,
                     repMap=None,
                     crab=False):
     if firstRun:
         firstRun = int(firstRun)
     if lastRun:
         lastRun = int(lastRun)
     if (begin and firstRun) or (end and lastRun):
         msg = (
             "The Usage of " +
             "'begin' & 'firstRun' " * int(bool(begin and firstRun)) +
             "and " * int(bool(
                 (begin and firstRun) and (end and lastRun))) +
             "'end' & 'lastRun' " * int(bool(end and lastRun)) +
             "is ambigous.")
         raise AllInOneError(msg)
     if begin or end:
         (firstRun, lastRun) = self.convertTimeToRun(begin=begin,
                                                     end=end,
                                                     firstRun=firstRun,
                                                     lastRun=lastRun)
     if (firstRun and lastRun) and (firstRun > lastRun):
         msg = ("The lower time/runrange limit ('begin'/'firstRun') "
                "chosen is greater than the upper time/runrange limit "
                "('end'/'lastRun').")
         raise AllInOneError(msg)
     goodLumiSecStr = ""
     lumiStr = ""
     lumiSecExtend = ""
     if firstRun or lastRun:
         goodLumiSecStr = ("lumiSecs = cms.untracked."
                           "VLuminosityBlockRange()\n")
         lumiStr = "                    lumisToProcess = lumiSecs,\n"
         if not jsonPath:
             selectedRunList = self.__getRunList()
             if firstRun:
                 selectedRunList = [ run for run in selectedRunList \
                                     if run["run_number"] >= firstRun ]
             if lastRun:
                 selectedRunList = [ run for run in selectedRunList \
                                     if run["run_number"] <= lastRun ]
             lumiList = [ str( run["run_number"] ) + ":1-" \
                          + str( run["run_number"] ) + ":max" \
                          for run in selectedRunList ]
             splitLumiList = list(self.__chunks(lumiList, 255))
         else:
             theLumiList = LumiList(filename=jsonPath)
             allRuns = theLumiList.getRuns()
             runsToRemove = []
             for run in allRuns:
                 if firstRun and int(run) < firstRun:
                     runsToRemove.append(run)
                 if lastRun and int(run) > lastRun:
                     runsToRemove.append(run)
             theLumiList.removeRuns(runsToRemove)
             splitLumiList = list(
                 self.__chunks(theLumiList.getCMSSWString().split(','),
                               255))
         if not len(splitLumiList[0][0]) == 0:
             lumiSecStr = [ "',\n'".join( lumis ) \
                            for lumis in splitLumiList ]
             lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
                            for lumis in lumiSecStr ]
             lumiSecExtend = "\n".join(lumiSecStr)
     elif jsonPath:
         goodLumiSecStr = ("goodLumiSecs = LumiList.LumiList(filename"
                           "= '%(json)s').getCMSSWString().split(',')\n"
                           "lumiSecs = cms.untracked"
                           ".VLuminosityBlockRange()\n")
         lumiStr = "                    lumisToProcess = lumiSecs,\n"
         lumiSecExtend = "lumiSecs.extend(goodLumiSecs)\n"
     if crab:
         files = ""
     else:
         splitFileList = list(self.__chunks(self.fileList(), 255))
         fileStr = ["',\n'".join(files) for files in splitFileList]
         fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
                     for files in fileStr ]
         files = "\n".join(fileStr)
     theMap = repMap
     theMap["files"] = files
     theMap["json"] = jsonPath
     theMap["lumiStr"] = lumiStr
     theMap["goodLumiSecStr"] = goodLumiSecStr % (theMap)
     theMap["lumiSecExtend"] = lumiSecExtend
     if crab:
         dataset_snippet = self.__dummy_source_template % (theMap)
     else:
         dataset_snippet = self.__source_template % (theMap)
     return dataset_snippet
示例#20
0
    def run(
        self,
        filecacheurl=None,
    ):  # pylint: disable=arguments-differ
        """
        Override run() for JobType
        """
        configArguments = {
            'addoutputfiles': [],
            'tfileoutfiles': [],
            'edmoutfiles': [],
        }

        if getattr(self.config.Data, 'useParent', False) and getattr(
                self.config.Data, 'secondaryInputDataset', None):
            msg = "Invalid CRAB configuration: Parameters Data.useParent and Data.secondaryInputDataset cannot be used together."
            raise ConfigurationException(msg)

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({
            'jobarch': scram.getScramArch(),
            'jobsw': scram.getCmsswVersion()
        })

        # Build tarball
        if self.workdir:
            tarUUID = str(uuid.uuid4())
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename = os.path.join(self.workdir,
                                           tarUUID + 'default.tgz')
                debugTarFilename = os.path.join(self.workdir, 'debugFiles.tgz')
                cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE)
            else:
                raise EnvironmentException(
                    'Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _, tarFilename = tempfile.mkstemp(suffix='.tgz')
            _, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ## Create CMSSW config.
        self.logger.debug("self.config: %s" % (self.config))
        self.logger.debug("self.config.JobType.psetName: %s" %
                          (self.config.JobType.psetName))
        ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent
        ## in the sense that a second loading of the same pset may not produce the same
        ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW
        ## pset twice. However, some "complicated" psets seem to evade the caching.
        ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that
        ## it can be reused later if wanted (for example, in PrivateMC when checking if
        ## the pset has an LHE source) instead of having to load the pset again.
        ## As for what does "complicated" psets mean, Daniel Riley said that there are
        ## some psets where one module modifies the configuration from another module.
        self.cmsswCfg = CMSSWConfig(config=self.config,
                                    logger=self.logger,
                                    userConfig=self.config.JobType.psetName)

        ## If there is a CMSSW pset, do a basic validation of it.
        if not bootstrapDone() and self.config.JobType.psetName:
            valid, msg = self.cmsswCfg.validateConfig()
            if not valid:
                raise ConfigurationException(msg)

        ## We need to put the pickled CMSSW configuration in the right place.
        ## Here, we determine if the bootstrap script already run and prepared everything
        ## for us. In such case we move the file, otherwise we pickle.dump the pset
        if not bootstrapDone():
            # Write out CMSSW config
            self.cmsswCfg.writeFile(cfgOutputName)
        else:
            # Move the pickled and the configuration files created by the bootstrap script
            self.moveCfgFile(cfgOutputName)

        ## Interrogate the CMSSW pset for output files (only output files produced by
        ## PoolOutputModule or TFileService are identified automatically). Do this
        ## automatic detection even if JobType.disableAutomaticOutputCollection = True,
        ## so that we can still classify the output files in EDM, TFile and additional
        ## output files in the Task DB (and the job ad).
        ## TODO: Do we really need this classification at all? cmscp and PostJob read
        ## the FJR to know if an output file is EDM, TFile or other.
        edmfiles, tfiles = self.cmsswCfg.outputFiles()
        ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile
        ## output files that are not listed in JobType.outputFiles.
        if getattr(
                self.config.JobType, 'disableAutomaticOutputCollection',
                getParamDefaultValue(
                    'JobType.disableAutomaticOutputCollection')):
            outputFiles = [
                re.sub(r'^file:', '', f)
                for f in getattr(self.config.JobType, 'outputFiles', [])
            ]
            edmfiles = [f for f in edmfiles if f in outputFiles]
            tfiles = [f for f in tfiles if f in outputFiles]
        ## Get the list of additional output files that have to be collected as given
        ## in JobType.outputFiles, but remove duplicates listed already as EDM files or
        ## TFiles.
        addoutputFiles = [
            re.sub(r'^file:', '', f)
            for f in getattr(self.config.JobType, 'outputFiles', [])
            if re.sub(r'^file:', '', f) not in edmfiles + tfiles
        ]
        outputWarn = "The following user output files (not listed as PoolOuputModule or TFileService in the CMSSW PSet) will be collected: %s" % ", ".join(
            ["'{0}'".format(x) for x in addoutputFiles])
        self.logger.debug(
            "The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug(
            "The following TFile output files will be collected: %s" % tfiles)
        if getattr(self.config.Data, 'publication',
                   False) and len(edmfiles) > 1:
            self.logger.error(
                "The input PSet produces multiple EDM output files: %s",
                edmfiles)
            self.logger.error(
                "But current CRAB version can't publish more than one dataset per task"
            )
            self.logger.error(
                "Either disable publication or submit multiple times with only one output at a time"
            )
            msg = "Submission refused"
            raise ClientException(msg)
        if addoutputFiles:
            self.logger.warning(outputWarn)
        else:
            self.logger.debug(outputWarn)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)
        ## Give warning message in case no output file was detected in the CMSSW pset
        ## nor was any specified in the CRAB configuration.
        if not configArguments['edmoutfiles'] and not configArguments[
                'tfileoutfiles'] and not configArguments['addoutputfiles']:
            msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
            if getattr(
                    self.config.JobType, 'disableAutomaticOutputCollection',
                    getParamDefaultValue(
                        'JobType.disableAutomaticOutputCollection')):
                msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration"
                msg += " and no output file was explicitly specified in the CRAB configuration."
            else:
                msg += " CRAB could not detect any output file in the CMSSW configuration"
                msg += " nor was any explicitly specified in the CRAB configuration."
            msg += " Hence CRAB will not collect any output file from this task."
            self.logger.warning(msg)

        ## UserTarball calls ScramEnvironment which can raise EnvironmentException.
        ## Since ScramEnvironment is already called above and the exception is not
        ## handled, we are sure that if we reached this point it will not raise EnvironmentException.
        ## But otherwise we should take this into account.
        with UserTarball(name=tarFilename,
                         logger=self.logger,
                         config=self.config,
                         crabserver=self.crabserver,
                         s3tester=self.s3tester) as tb:
            inputFiles = [
                re.sub(r'^file:', '', f)
                for f in getattr(self.config.JobType, 'inputFiles', [])
            ]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            try:
                uploadResult = tb.upload(filecacheurl=filecacheurl)
            except HTTPException as hte:
                if 'X-Error-Info' in hte.headers:
                    reason = hte.headers['X-Error-Info']
                    reason_re = re.compile(
                        r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$'
                    )
                    re_match = reason_re.match(reason)
                    if re_match:
                        ISBSize = int(re_match.group(1))
                        ISBSizeLimit = int(re_match.group(2))
                        reason = "%sError%s:" % (colors.RED, colors.NORMAL)
                        reason += " Input sandbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % (
                            ISBSize / 1024 / 1024, ISBSizeLimit / 1024 / 1024)
                        reason += tb.printSortedContent()
                        raise ClientException(reason)
                raise hte
            except Exception as e:
                msg = (
                    "Impossible to upload the sandbox tarball.\nError message: %s.\n"
                    "More details can be found in %s" %
                    (e, self.logger.logfile))
                raise ClientException(msg)

        # upload debug files
        debugFilesUploadResult = None
        with UserTarball(name=debugTarFilename,
                         logger=self.logger,
                         config=self.config,
                         crabserver=self.crabserver,
                         s3tester=self.s3tester) as dtb:
            dtb.addMonFiles()
            try:
                debugFilesUploadResult = dtb.upload(filecacheurl=filecacheurl)
            except Exception as e:
                msg = (
                    "Problem uploading debug_files.tar.gz.\nError message: %s.\n"
                    "More details can be found in %s" %
                    (e, self.logger.logfile))
                LOGGERS['CRAB3'].exception(
                    msg)  #the traceback is only printed into the logfile

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % uploadResult
        if debugFilesUploadResult is not None:
            configArguments[
                'debugfilename'] = "%s.tar.gz" % debugFilesUploadResult
        self.logger.debug("Result uploading input files: %(cachefilename)s " %
                          configArguments)

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug(
                "Attaching list of user-specified primary input files.")
            userFilesList = [f.strip() for f in userFilesList]
            userFilesList = [f for f in userFilesList if f]
            if len(userFilesList) != len(set(userFilesList)):
                msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(
                self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" %
                              (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
            except ValueError as ex:
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name,
                                                              ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$',
                                           run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(
                            len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs=run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [
                str(reduce(lambda x, y: x + y,
                           lumi_mask[run]))[1:-1].replace(' ', '')
                for run in configArguments['runs']
            ]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments
示例#21
0
    def __createSnippet(self,
                        jsonPath=None,
                        begin=None,
                        end=None,
                        firstRun=None,
                        lastRun=None,
                        repMap=None,
                        crab=False,
                        parent=False):
        if firstRun:
            firstRun = int(firstRun)
        if lastRun:
            lastRun = int(lastRun)
        if (begin and firstRun) or (end and lastRun):
            msg = (
                "The Usage of " +
                "'begin' & 'firstRun' " * int(bool(begin and firstRun)) +
                "and " * int(bool(
                    (begin and firstRun) and (end and lastRun))) +
                "'end' & 'lastRun' " * int(bool(end and lastRun)) +
                "is ambigous.")
            raise AllInOneError(msg)
        if begin or end:
            (firstRun, lastRun) = self.convertTimeToRun(begin=begin,
                                                        end=end,
                                                        firstRun=firstRun,
                                                        lastRun=lastRun)
        if (firstRun and lastRun) and (firstRun > lastRun):
            msg = ("The lower time/runrange limit ('begin'/'firstRun') "
                   "chosen is greater than the upper time/runrange limit "
                   "('end'/'lastRun').")
            raise AllInOneError(msg)
        if self.predefined() and (jsonPath or begin or end or firstRun
                                  or lastRun):
            msg = (
                "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
                "only work for official datasets, not predefined _cff.py files"
            )
            raise AllInOneError(msg)
        goodLumiSecStr = ""
        lumiStr = ""
        lumiSecExtend = ""
        if firstRun or lastRun or jsonPath:
            goodLumiSecStr = ("lumiSecs = cms.untracked."
                              "VLuminosityBlockRange()\n")
            lumiStr = "                    lumisToProcess = lumiSecs,\n"
            if not jsonPath:
                selectedRunList = self.__getRunList()
                if firstRun:
                    selectedRunList = [ run for run in selectedRunList \
                                        if self.__findInJson(run, "run_number") >= firstRun ]
                if lastRun:
                    selectedRunList = [ run for run in selectedRunList \
                                        if self.__findInJson(run, "run_number") <= lastRun ]
                lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \
                             + str( self.__findInJson(run, "run_number") ) + ":max" \
                             for run in selectedRunList ]
                splitLumiList = list(self.__chunks(lumiList, 255))
            else:
                theLumiList = None
                try:
                    theLumiList = LumiList(filename=jsonPath)
                except ValueError:
                    pass

                if theLumiList is not None:
                    allRuns = theLumiList.getRuns()
                    runsToRemove = []
                    for run in allRuns:
                        if firstRun and int(run) < firstRun:
                            runsToRemove.append(run)
                        if lastRun and int(run) > lastRun:
                            runsToRemove.append(run)
                    theLumiList.removeRuns(runsToRemove)
                    splitLumiList = list(
                        self.__chunks(theLumiList.getCMSSWString().split(','),
                                      255))
                else:
                    with open(jsonPath) as f:
                        jsoncontents = f.read()
                        if "process.source.lumisToProcess" in jsoncontents:
                            msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet.  Trying to use it" % jsonPath
                            if firstRun or lastRun:
                                msg += (
                                    "\n  (after applying firstRun and/or lastRun)"
                                )
                            msg += ".\nPlease note that, depending on the format of this file, it may not work as expected."
                            msg += "\nCheck your config file to make sure that it worked properly."
                            print msg

                            runlist = self.__getRunList()
                            if firstRun or lastRun:
                                self.__firstusedrun = -1
                                self.__lastusedrun = -1
                                jsoncontents = re.sub(
                                    "\d+:(\d+|max)-\d+:(\d+|max)",
                                    self.getForceRunRangeFunction(
                                        firstRun, lastRun), jsoncontents)
                                self.__firstusedrun = max(
                                    self.__firstusedrun,
                                    int(
                                        self.__findInJson(
                                            runlist[0], "run_number")))
                                self.__lastusedrun = min(
                                    self.__lastusedrun,
                                    int(
                                        self.__findInJson(
                                            runlist[-1], "run_number")))
                            else:
                                self.__firstusedrun = int(
                                    self.__findInJson(runlist[0],
                                                      "run_number"))
                                self.__lastusedrun = int(
                                    self.__findInJson(runlist[-1],
                                                      "run_number"))
                            lumiSecExtend = jsoncontents
                            splitLumiList = [[""]]

            if splitLumiList and splitLumiList[0]:
                if splitLumiList[0][0]:
                    lumiSecStr = [ "',\n'".join( lumis ) \
                                   for lumis in splitLumiList ]
                    lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \
                                   for lumis in lumiSecStr ]
                    lumiSecExtend = "\n".join(lumiSecStr)
                    runlist = self.__getRunList()
                    self.__firstusedrun = max(
                        int(splitLumiList[0][0].split(":")[0]),
                        int(self.__findInJson(runlist[0], "run_number")))
                    self.__lastusedrun = min(
                        int(splitLumiList[-1][-1].split(":")[0]),
                        int(self.__findInJson(runlist[-1], "run_number")))
            else:
                msg = "You are trying to run a validation without any runs!  Check that:"
                if firstRun or lastRun:
                    msg += "\n - firstRun and lastRun are correct for this dataset, and there are runs in between containing data"
                if jsonPath:
                    msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
                if (firstRun or lastRun) and jsonPath:
                    msg += "\n - firstRun and lastRun are consistent with your JSON file"
                if begin:
                    msg = msg.replace("firstRun", "begin")
                if end:
                    msg = msg.replace("lastRun", "end")
                raise AllInOneError(msg)

        else:
            runlist = self.__getRunList()
            self.__firstusedrun = int(
                self.__findInJson(self.__getRunList()[0], "run_number"))
            self.__lastusedrun = int(
                self.__findInJson(self.__getRunList()[-1], "run_number"))

        if crab:
            files = ""
        else:
            splitFileList = list(self.__chunks(self.fileList(), 255))
            fileStr = ["',\n'".join(files) for files in splitFileList]
            fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
                        for files in fileStr ]
            files = "\n".join(fileStr)

            if parent:
                splitParentFileList = list(
                    self.__chunks(self.fileList(parent=True), 255))
                parentFileStr = [
                    "',\n'".join(parentFiles)
                    for parentFiles in splitParentFileList
                ]
                parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
                            for parentFiles in parentFileStr ]
                parentFiles = "\n".join(parentFileStr)
                files += "\n\n" + parentFiles

        theMap = repMap
        theMap["files"] = files
        theMap["json"] = jsonPath
        theMap["lumiStr"] = lumiStr
        theMap["goodLumiSecStr"] = goodLumiSecStr % (theMap)
        theMap["lumiSecExtend"] = lumiSecExtend
        if crab:
            dataset_snippet = self.__dummy_source_template % (theMap)
        else:
            dataset_snippet = self.__source_template % (theMap)
        return dataset_snippet
示例#22
0
    def _dasPopen(dbs):
        if 'LSB_JOBID' in os.environ:
            raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs
        if 'X509_USER_PROXY' in os.environ:
            dbs += " --key {0} --cert {0}".format(os.environ['X509_USER_PROXY'])
        logger.info('DAS query\t: %s',  dbs)
        return os.popen(dbs)

    dbs='das_client --query="run dataset=%s instance=prod/%s" --limit %i'%(prompt.heppy.dataset, 'global', 0)
    prompt_runs = [int(r) for r in _dasPopen(dbs).readlines()]
    dbs='das_client --query="run dataset=%s instance=prod/%s" --limit %i'%(rereco.heppy.dataset, 'global', 0)
    rereco_runs =[int(r) for r in  _dasPopen(dbs).readlines()]

    runs = []
    for str_run in lumiList.getRuns():
        run = int(str_run)
        if run in prompt_runs and run in rereco_runs:
            runs.append(run)

    print "Now running %i jobs: %r"%( len(runs), runs )

    import subprocess
    def wrapper( run_ ):
        subprocess.call(["python", "jetTreeMaker.py", ("--era=%s"%args.era), ("--run=%i"%run_) ])

    from multiprocessing import Pool
    pool = Pool( 10 )
    results = pool.map(wrapper, runs)
    pool.close()