Exemplo n.º 1
0
 def mergeLumis(inputdata, lumimask):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedlumis = LumiList()
     doublelumis = LumiList()
     for report in inputdata:
         doublelumis = mergedlumis & LumiList(runsAndLumis=report)
         mergedlumis = mergedlumis | LumiList(runsAndLumis=report)
     return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) - mergedlumis).getCompactList(), doublelumis.getCompactList()
Exemplo n.º 2
0
 def mergeLumis(inputdata, lumimask):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedlumis = LumiList()
     doublelumis = LumiList()
     for report in inputdata:
         doublelumis = mergedlumis & LumiList(runsAndLumis=report)
         mergedlumis = mergedlumis | LumiList(runsAndLumis=report)
         if doublelumis:
             self.logger.info("Warning: double run-lumis processed %s" % doublelumis)
     return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) - mergedlumis).getCompactList()
Exemplo n.º 3
0
    def getInputRunLumi(self, file):
        import xml.dom.minidom

        dom = xml.dom.minidom.parse(file)
        ll=[]

        for elem in dom.getElementsByTagName("Job"):
            nJob = int(elem.getAttribute("JobID"))
            lumis = elem.getAttribute('Lumis')
            #lumis = '193752:1'
            #lumis = '193752:1-193752:5,193774:1-193774:5,193775:1'
            if lumis:
                tmp=str.split(str(lumis), ",")
                #print "tmp = ", tmp
            else:
                msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created"
                common.logger.info(msg)
                return
                

            #tmp = [193752:1-193752:5] [193774:1-193774:5]
            for entry in tmp:
                run_lumi=str.split(entry, "-")
                # run_lumi = [193752:1] [193752:5] 
                if len(run_lumi) == 0: pass
                if len(run_lumi) == 1:
                    lumi = str.split(run_lumi[0],":")[1]
                    run = str.split(run_lumi[0],":")[0]
                    ll.append((run,int(lumi)))
    
                if len(run_lumi) == 2:
                    lumi_max = str.split(run_lumi[1],":")[1]
                    lumi_min = str.split(run_lumi[0],":")[1]
                    run = str.split(run_lumi[1],":")[0]
                    for count in range(int(lumi_min),int(lumi_max) + 1): 
                        ll.append((run,count))
        if len(ll):
            lumiList = LumiList(lumis = ll)
            compactList = lumiList.getCompactList()

            totalLumiFilename = self.fjrDirectory + 'inputLumiSummaryOfTask.json'
            totalLumiSummary = open(totalLumiFilename, 'w')
            json.dump(compactList, totalLumiSummary)
            totalLumiSummary.write('\n')
            totalLumiSummary.close()
            msg = "Summary file of input run and lumi to be analize with this task: %s\n" %totalLumiFilename
            common.logger.info(msg)
        else:    
            msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created"
            common.logger.info(msg)
        return totalLumiFilename 
Exemplo n.º 4
0
    def getInputRunLumi(self, file):
        import xml.dom.minidom

        dom = xml.dom.minidom.parse(file)
        ll=[]

        for elem in dom.getElementsByTagName("Job"):
            nJob = int(elem.getAttribute("JobID"))
            lumis = elem.getAttribute('Lumis')
            #lumis = '193752:1'
            #lumis = '193752:1-193752:5,193774:1-193774:5,193775:1'
            if lumis:
                tmp=str.split(str(lumis), ",")
                #print "tmp = ", tmp
            else:
                msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created"
                common.logger.info(msg)
                return
                

            #tmp = [193752:1-193752:5] [193774:1-193774:5]
            for entry in tmp:
                run_lumi=str.split(entry, "-")
                # run_lumi = [193752:1] [193752:5] 
                if len(run_lumi) == 0: pass
                if len(run_lumi) == 1:
                    lumi = str.split(run_lumi[0],":")[1]
                    run = str.split(run_lumi[0],":")[0]
                    ll.append((run,int(lumi)))
    
                if len(run_lumi) == 2:
                    lumi_max = str.split(run_lumi[1],":")[1]
                    lumi_min = str.split(run_lumi[0],":")[1]
                    run = str.split(run_lumi[1],":")[0]
                    for count in range(int(lumi_min),int(lumi_max) + 1): 
                        ll.append((run,count))
        if len(ll):
            lumiList = LumiList(lumis = ll)
            compactList = lumiList.getCompactList()

            totalLumiFilename = self.fjrDirectory + 'inputLumiSummaryOfTask.json'
            totalLumiSummary = open(totalLumiFilename, 'w')
            json.dump(compactList, totalLumiSummary)
            totalLumiSummary.write('\n')
            totalLumiSummary.close()
            msg = "Summary file of input run and lumi to be analize with this task: %s\n" %totalLumiFilename
            common.logger.info(msg)
        else:    
            msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created"
            common.logger.info(msg)
        return totalLumiFilename 
Exemplo n.º 5
0
 def mergeLumis(inputdata, lumimask):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedlumis = LumiList()
     doublelumis = LumiList()
     for report in inputdata:
         doublelumis = mergedlumis & LumiList(runsAndLumis=report)
         mergedlumis = mergedlumis | LumiList(runsAndLumis=report)
         if doublelumis:
             self.logger.info("Warning: double run-lumis processed %s" %
                              doublelumis)
     return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) -
                                           mergedlumis).getCompactList()
Exemplo n.º 6
0
 def mergeLumis(inputdata):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedLumis = set()
     #merge the lumis from single files
     for reports in inputdata.values():
         for report in reports:
             for run, lumis in literal_eval(report['runlumi']).items():
                 if isinstance(run, bytes):
                     run = run.decode(encoding='UTF-8')
                 for lumi in lumis:
                     mergedLumis.add(
                         (run, int(lumi)))  #lumi is str, but need int
     mergedLumis = LumiList(lumis=mergedLumis)
     return mergedLumis.getCompactList()
Exemplo n.º 7
0
 def getDuplicateLumis(lumisDict):
     """
     Get the run-lumis appearing more than once in the input
     dictionary of runs and lumis, which is assumed to have
     the following format:
         {
         '1': [1,2,3,4,6,7,8,9,10],
         '2': [1,4,5,20]
         }
     """
     doubleLumis = set()
     for run, lumis in lumisDict.items():
         seen = set()
         doubleLumis.update(
             set((run, lumi) for lumi in lumis
                 if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Exemplo n.º 8
0
 def subtractLumis(lumisA, lumisB):
     result = LumiList(compactList=lumisA) - LumiList(compactList=lumisB)
     return result.getCompactList()
Exemplo n.º 9
0
 def intersectLumis(lumisA, lumisB):
     result = LumiList(compactList=lumisA) & LumiList(compactList=lumisB)
     return result.getCompactList()
Exemplo n.º 10
0
    for aRaw in data:
        run=aRaw['run'][0]['run_number']
        run_lumis=aRaw['lumi'][0]['number']
        if(len(run_lumis)):
#        for r in aRaw['run']:
#            run_2=r['run_number']
#        for l in aRaw['lumi']:
#            lumis_2=l['number']
            #print run, run_lumis
            #lumis.append((run,run_lumis))
            lumis[run]=run_lumis
#    print lumis

    ll = LumiList(compactList=lumis)
#print "ll", ll
    runrange = sorted(int(x) for x in ll.getCompactList().keys())
    dcs_ll = LumiList('/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/DCSOnly/json_DCSONLY.txt') # JMTBAD import from goodlumis
	

    #print "dcs_ll", dcs_ll
    dcs_runrange = sorted(int(x) for x in dcs_ll.getCompactList().keys())

    dcs_ll.removeRuns(xrange(dcs_runrange[0], runrange[0]))
    dcs_ll.removeRuns(xrange(runrange[-1]+1, dcs_runrange[-1]))

    ok = LumiList(compactList={

                  })#from lumiSummary.json

    print 'run range for', latest_dataset, ':', runrange[0], runrange[-1]
    print 'these lumis are in the DCS-only JSON but not (yet) in', latest_dataset
Exemplo n.º 11
0
def add_merged_sample(NAME, type, AnaUrl, FWUrl, samples, comment):
    # samples is a simple dict containing three keys: 'process', 'dataset_id', 'sample_id'
    dbstore = DbStore()
    sample = None

    # check that source dataset exist
    # Skip: should exist, the check has been done before calling this function

    # check that there is no existing entry
    update = False
    localpath = ''
    nevents = 0
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    # collecting contents
    sample.nevents_processed = 0
    sample.nevents = 0
    sample.normalization = 1
    sample.event_weight_sum = 0
    extras_event_weight_sum = {}
    dataset_nevents = 0
    processed_lumi = LumiList()
    for i, s in enumerate(samples):
        if i == 0:
            sample.source_dataset_id = s['dataset_id']
            sample.source_sample_id = s['sample_id']
        results = dbstore.find(Sample, Sample.sample_id == s['sample_id'])
        # Should exist, the check has been done before calling this function
        sample.nevents_processed += results[0].nevents_processed
        sample.nevents += results[0].nevents
        sample.event_weight_sum += results[0].event_weight_sum
        extra_sumw = results[0].extras_event_weight_sum
        if extra_sumw is not None:
            extra_sumw = json.loads(extra_sumw)
            for key in extra_sumw:
                try:
                    extras_event_weight_sum[key] += extra_sumw[key]
                except KeyError:
                    extras_event_weight_sum[key] = extra_sumw[key]
        tmp_processed_lumi = results[0].processed_lumi
        if tmp_processed_lumi is not None:
            tmp_processed_lumi = json.loads( tmp_processed_lumi )
            processed_lumi = processed_lumi | LumiList(compactList = tmp_processed_lumi)
        # Get info from file table
        results = dbstore.find(File, File.sample_id == s['sample_id'])
        for lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents in list(results.values(File.lfn, File.pfn, File.event_weight_sum, File.extras_event_weight_sum, File.nevents)):
            f = File(lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents)
            sample.files.add(f)
        # Get info from parent datasets
        results = dbstore.find(Dataset, Dataset.dataset_id == s['dataset_id'])
        dataset_nevents +=  results[0].nevents
    if len(extras_event_weight_sum) > 0:
        sample.extras_event_weight_sum = unicode(json.dumps(extras_event_weight_sum))
    if len(processed_lumi.getCompactList()) > 0:
        sample.processed_lumi = unicode(json.dumps(processed_lumi.getCompactList()))
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
    if sample.nevents_processed != dataset_nevents:
        sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment)
    else:
        sample.user_comment = unicode(comment)
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        dbstore.commit()
        return

    else:
        sample.luminosity = sample.getLuminosity()
        print("Sample updated")
        print(sample)

        dbstore.commit()
        return

    # rollback
    dbstore.rollback()
Exemplo n.º 12
0
    def run(self):
        """
        The main method of the class: report status of a task
        """
        common.logger.debug( "Reporter::run() called")
        task = common._db.getTask()

        msg= "--------------------\n"
        msg +=  "Dataset: %s\n"%str(task['dataset'])
        if self.cfg_params.has_key('USER.copy_data') and int(self.cfg_params['USER.copy_data'])==1:
            msg+=  "Remote output :\n"
            ## TODO: SL should come from jobDB!
            from PhEDExDatasvcInfo import PhEDExDatasvcInfo

            stageout = PhEDExDatasvcInfo(self.cfg_params)
            endpoint, lfn, SE, SE_PATH, user = stageout.getEndpoint()
            #print endpoint, lfn, SE, SE_PATH, user

            msg+=  "SE: %s %s  srmPath: %s\n"%(self.cfg_params['USER.storage_element'],SE,endpoint)

        else:
            msg += "Local output: %s\n" % task['outputDirectory']
        #print task
        possible_status = [ 'Created',
                            'Undefined',
                            'Submitting',
                            'Submitted',
                            'NotSubmitted',
                            'Waiting',
                            'Ready',
                            'Scheduled',
                            'Running',
                            'Done',
                            'Killing',
                            'Killed',
                            'Aborted',
                            'Unknown',
                            'Done (Failed)',
                            'Cleared',
                            'Retrieved'
                            ]
        eventsRead=0
        eventsRequired=0
        filesRead=0
        filesRequired=0
        lumis = []
        for job in task.getJobs():
            if (job.runningJob['applicationReturnCode']!=0 or job.runningJob['wrapperReturnCode']!=0): continue
            # get FJR filename
            fjr = self.fjrDirectory + job['outputFiles'][-1]

            jobReport = readJobReport(fjr)
            if len(jobReport) > 0:
                inputFiles = jobReport[0].inputFiles
                for inputFile in inputFiles:
                    # Accumulate the list of lum sections run over
                    for run in inputFile.runs.keys():
                        for lumi in inputFile.runs[run]:
                            lumis.append((run, lumi))
                    filesRead+=1
                    eventsRead+=int(inputFile['EventsRead'])
                #print jobReport[0].inputFiles,'\n'
            else:
                pass
                #print 'no FJR avaialble for job #%s'%job['jobId']
            #print "--------------------------"

        # Compact and write the list of successful lumis

        lumiList = LumiList(lumis = lumis)
        compactList = lumiList.getCompactList()

        lumiFilename = task['outputDirectory'] + 'lumiSummary.json'
        lumiSummary = open(lumiFilename, 'w')
        json.dump(compactList, lumiSummary)
        lumiSummary.write('\n')
        lumiSummary.close()

        msg += "Total Events read: %s\n" % eventsRead
        msg += "Total Files read: %s\n" % filesRead
        msg += "Total Jobs : %s\n" % len(task.getJobs())
        msg += "Luminosity section summary file: %s\n" % lumiFilename
        list_ID={}

        # TEMPORARY by Fabio, to be removed
        # avoid clashes between glite_slc5 and glite schedulers when a server is used
        # otherwise, -report with a server requires a local scheduler
        if self.cfg_params.get('CRAB.server_name', None) is None:
            common.logger.debug( "Reporter updating task status")
            task = common.scheduler.queryEverything(task['id'])

        for st in possible_status:
            list_ID = common._db.queryAttrRunJob({'statusScheduler':st},'jobId')
            if (len(list_ID)>0):
                msg+=  "   # Jobs: %s:%s\n"%(str(st),len(list_ID))
            pass
        msg+=  "\n----------------------------\n"
        common.logger.info(msg)


        file = common.work_space.shareDir() + 'arguments.xml'
        #print "file = ", file
        
        ### starting from the arguments.xml file, a json file containing the run:lumi
        ### that should be analyzed with the task
        inputRunLumiFileName = self.getInputRunLumi(file)

        
        ### missing lumi to analyze: starting from lumimask or from argument file
        ### calculate the difference with report.json
        ### if a lumimask is used in the crab.cfg
        if (self.cfg_params.get('CMSSW.lumi_mask')): 
            lumimask=self.cfg_params.get('CMSSW.lumi_mask')
            #print "lumimask = ", lumimask 
            self.compareJsonFile(lumimask)
        ### without lumimask    
        elif (inputRunLumiFileName):
            self.compareJsonFile(inputRunLumiFileName)
        else:
            common.logger.info("No json file to compare")
        return
Exemplo n.º 13
0
from RecoLuminosity.LumiDB import sessionManager, lumiCalcAPI, revisionDML
from JMTucker.Tools.general import from_pickle, to_pickle

os.system('mkdir -p prescales_temp')


def popen(cmd):
    return subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                            shell=True).communicate()[0]


ll = LumiList(
    'prescales_temp/Cert_190456-208686_8TeV_PromptReco_Collisions12_JSON.txt')
ll_compact = ll.getCompactList()
runs = [int(i) for i in ll.getRuns()]
runs.sort()


def dump_lumibyls(runs):
    l = float(len(runs))
    for i, run in enumerate(runs):
        out_fn = 'prescales_temp/lumibyls/%i.csv' % run
        already = os.path.isfile(out_fn)
        print 'run %i (%i/%i)%s' % (run, i + 1, l,
                                    ' (skipping since already dumped)'
                                    if already else '')
        if already:
            continue
        popen('lumiCalc2.py lumibyls -r %i -o %s' % (run, out_fn))
Exemplo n.º 14
0
    for j in inputJSONFiles:
        impLumis = impLumis | LumiList(filename=j)

if impLumis is not None:
    if args.union:
        edmLumis = edmLumis | impLumis
    if args.subtract:
        edmLumis = edmLumis - impLumis
    if args.intersect:
        edmLumis = edmLumis & impLumis

reclumiData = None
dellumiData = None
if lumiCalc is not None:
    print "Accessing LumiDB... can take a while..."
    dellumiData = lumiCalc.deliveredLumiForRange(edmLumis.getCompactList())
    reclumiData = lumiCalc.recordedLumiForRange(edmLumis.getCompactList())
    totalRec = 0.0
    totalDel = 0.0
    for dpr in dellumiData:
        if dpr[2] != 'N/A':
            totalDel += float(dpr[2])
    for dpr in reclumiData:
        totalRec += lumiCalc.calculateTotalRecorded(dpr[2])
    print "Delivered Luminosity: ", totalDel
    print "Recorded Luminosity: ", totalRec

if args.outputJSON:
    edmLumis.writeJSON(outputJSON)

if args.printJSON:
Exemplo n.º 15
0
    for j in inputJSONFiles:
        impLumis = impLumis | LumiList(filename=j)

if impLumis is not None:
    if args.union:
        edmLumis = edmLumis | impLumis
    if args.subtract:
        edmLumis = edmLumis - impLumis
    if args.intersect:
        edmLumis = edmLumis & impLumis

reclumiData=None
dellumiData=None
if lumiCalc is not None:
    print "Accessing LumiDB... can take a while..."
    dellumiData=lumiCalc.deliveredLumiForRange(edmLumis.getCompactList())
    reclumiData=lumiCalc.recordedLumiForRange(edmLumis.getCompactList())    
    totalRec = 0.0
    totalDel = 0.0
    for dpr in dellumiData:
        if dpr[2] != 'N/A':
            totalDel += float(dpr[2])
    for dpr in reclumiData:
        totalRec += lumiCalc.calculateTotalRecorded(dpr[2])
    print "Delivered Luminosity: ",totalDel
    print "Recorded Luminosity: ",totalRec

if args.outputJSON:
    edmLumis.writeJSON(outputJSON)

if args.printJSON:
Exemplo n.º 16
0
sys.exit(1)

import re, os, subprocess
from pprint import pprint
from collections import defaultdict
from FWCore.PythonUtilities.LumiList import LumiList
from RecoLuminosity.LumiDB import sessionManager, lumiCalcAPI, revisionDML
from JMTucker.Tools.general import from_pickle, to_pickle

os.system('mkdir -p prescales_temp')

def popen(cmd):
    return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()[0]

ll = LumiList('prescales_temp/Cert_190456-208686_8TeV_PromptReco_Collisions12_JSON.txt')
ll_compact = ll.getCompactList()
runs = [int(i) for i in ll.getRuns()]
runs.sort()

def dump_lumibyls(runs):
    l = float(len(runs))
    for i,run in enumerate(runs):
        out_fn = 'prescales_temp/lumibyls/%i.csv' % run
        already = os.path.isfile(out_fn)
        print 'run %i (%i/%i)%s' % (run, i+1, l, ' (skipping since already dumped)' if already else '')
        if already:
            continue
        popen('lumiCalc2.py lumibyls -r %i -o %s' % (run, out_fn))

def parse_lumibyls(run):
    d = defaultdict(dict)
Exemplo n.º 17
0
    def run(
        self,
        filecacheurl=None,
    ):  # pylint: disable=arguments-differ
        """
        Override run() for JobType
        """
        configArguments = {
            'addoutputfiles': [],
            'tfileoutfiles': [],
            'edmoutfiles': [],
        }

        if getattr(self.config.Data, 'useParent', False) and getattr(
                self.config.Data, 'secondaryInputDataset', None):
            msg = "Invalid CRAB configuration: Parameters Data.useParent and Data.secondaryInputDataset cannot be used together."
            raise ConfigurationException(msg)

        # Get SCRAM environment
        scram = ScramEnvironment(logger=self.logger)

        configArguments.update({
            'jobarch': scram.getScramArch(),
            'jobsw': scram.getCmsswVersion()
        })

        # Build tarball
        if self.workdir:
            tarUUID = str(uuid.uuid4())
            self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
            if len(tarUUID):
                tarFilename = os.path.join(self.workdir,
                                           tarUUID + 'default.tgz')
                debugTarFilename = os.path.join(self.workdir, 'debugFiles.tgz')
                cfgOutputName = os.path.join(self.workdir, BOOTSTRAP_CFGFILE)
            else:
                raise EnvironmentException(
                    'Problem with uuidgen while preparing for Sandbox upload.')
        else:
            _, tarFilename = tempfile.mkstemp(suffix='.tgz')
            _, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')

        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ## Create CMSSW config.
        self.logger.debug("self.config: %s" % (self.config))
        self.logger.debug("self.config.JobType.psetName: %s" %
                          (self.config.JobType.psetName))
        ## The loading of a CMSSW pset in the CMSSWConfig constructor is not idempotent
        ## in the sense that a second loading of the same pset may not produce the same
        ## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW
        ## pset twice. However, some "complicated" psets seem to evade the caching.
        ## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that
        ## it can be reused later if wanted (for example, in PrivateMC when checking if
        ## the pset has an LHE source) instead of having to load the pset again.
        ## As for what does "complicated" psets mean, Daniel Riley said that there are
        ## some psets where one module modifies the configuration from another module.
        self.cmsswCfg = CMSSWConfig(config=self.config,
                                    logger=self.logger,
                                    userConfig=self.config.JobType.psetName)

        ## If there is a CMSSW pset, do a basic validation of it.
        if not bootstrapDone() and self.config.JobType.psetName:
            valid, msg = self.cmsswCfg.validateConfig()
            if not valid:
                raise ConfigurationException(msg)

        ## We need to put the pickled CMSSW configuration in the right place.
        ## Here, we determine if the bootstrap script already run and prepared everything
        ## for us. In such case we move the file, otherwise we pickle.dump the pset
        if not bootstrapDone():
            # Write out CMSSW config
            self.cmsswCfg.writeFile(cfgOutputName)
        else:
            # Move the pickled and the configuration files created by the bootstrap script
            self.moveCfgFile(cfgOutputName)

        ## Interrogate the CMSSW pset for output files (only output files produced by
        ## PoolOutputModule or TFileService are identified automatically). Do this
        ## automatic detection even if JobType.disableAutomaticOutputCollection = True,
        ## so that we can still classify the output files in EDM, TFile and additional
        ## output files in the Task DB (and the job ad).
        ## TODO: Do we really need this classification at all? cmscp and PostJob read
        ## the FJR to know if an output file is EDM, TFile or other.
        edmfiles, tfiles = self.cmsswCfg.outputFiles()
        ## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile
        ## output files that are not listed in JobType.outputFiles.
        if getattr(
                self.config.JobType, 'disableAutomaticOutputCollection',
                getParamDefaultValue(
                    'JobType.disableAutomaticOutputCollection')):
            outputFiles = [
                re.sub(r'^file:', '', f)
                for f in getattr(self.config.JobType, 'outputFiles', [])
            ]
            edmfiles = [f for f in edmfiles if f in outputFiles]
            tfiles = [f for f in tfiles if f in outputFiles]
        ## Get the list of additional output files that have to be collected as given
        ## in JobType.outputFiles, but remove duplicates listed already as EDM files or
        ## TFiles.
        addoutputFiles = [
            re.sub(r'^file:', '', f)
            for f in getattr(self.config.JobType, 'outputFiles', [])
            if re.sub(r'^file:', '', f) not in edmfiles + tfiles
        ]
        outputWarn = "The following user output files (not listed as PoolOuputModule or TFileService in the CMSSW PSet) will be collected: %s" % ", ".join(
            ["'{0}'".format(x) for x in addoutputFiles])
        self.logger.debug(
            "The following EDM output files will be collected: %s" % edmfiles)
        self.logger.debug(
            "The following TFile output files will be collected: %s" % tfiles)
        if getattr(self.config.Data, 'publication',
                   False) and len(edmfiles) > 1:
            self.logger.error(
                "The input PSet produces multiple EDM output files: %s",
                edmfiles)
            self.logger.error(
                "But current CRAB version can't publish more than one dataset per task"
            )
            self.logger.error(
                "Either disable publication or submit multiple times with only one output at a time"
            )
            msg = "Submission refused"
            raise ClientException(msg)
        if addoutputFiles:
            self.logger.warning(outputWarn)
        else:
            self.logger.debug(outputWarn)
        configArguments['edmoutfiles'] = edmfiles
        configArguments['tfileoutfiles'] = tfiles
        configArguments['addoutputfiles'].extend(addoutputFiles)
        ## Give warning message in case no output file was detected in the CMSSW pset
        ## nor was any specified in the CRAB configuration.
        if not configArguments['edmoutfiles'] and not configArguments[
                'tfileoutfiles'] and not configArguments['addoutputfiles']:
            msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
            if getattr(
                    self.config.JobType, 'disableAutomaticOutputCollection',
                    getParamDefaultValue(
                        'JobType.disableAutomaticOutputCollection')):
                msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration"
                msg += " and no output file was explicitly specified in the CRAB configuration."
            else:
                msg += " CRAB could not detect any output file in the CMSSW configuration"
                msg += " nor was any explicitly specified in the CRAB configuration."
            msg += " Hence CRAB will not collect any output file from this task."
            self.logger.warning(msg)

        ## UserTarball calls ScramEnvironment which can raise EnvironmentException.
        ## Since ScramEnvironment is already called above and the exception is not
        ## handled, we are sure that if we reached this point it will not raise EnvironmentException.
        ## But otherwise we should take this into account.
        with UserTarball(name=tarFilename,
                         logger=self.logger,
                         config=self.config,
                         crabserver=self.crabserver,
                         s3tester=self.s3tester) as tb:
            inputFiles = [
                re.sub(r'^file:', '', f)
                for f in getattr(self.config.JobType, 'inputFiles', [])
            ]
            tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
            try:
                uploadResult = tb.upload(filecacheurl=filecacheurl)
            except HTTPException as hte:
                if 'X-Error-Info' in hte.headers:
                    reason = hte.headers['X-Error-Info']
                    reason_re = re.compile(
                        r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$'
                    )
                    re_match = reason_re.match(reason)
                    if re_match:
                        ISBSize = int(re_match.group(1))
                        ISBSizeLimit = int(re_match.group(2))
                        reason = "%sError%s:" % (colors.RED, colors.NORMAL)
                        reason += " Input sandbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % (
                            ISBSize / 1024 / 1024, ISBSizeLimit / 1024 / 1024)
                        reason += tb.printSortedContent()
                        raise ClientException(reason)
                raise hte
            except Exception as e:
                msg = (
                    "Impossible to upload the sandbox tarball.\nError message: %s.\n"
                    "More details can be found in %s" %
                    (e, self.logger.logfile))
                raise ClientException(msg)

        # upload debug files
        debugFilesUploadResult = None
        with UserTarball(name=debugTarFilename,
                         logger=self.logger,
                         config=self.config,
                         crabserver=self.crabserver,
                         s3tester=self.s3tester) as dtb:
            dtb.addMonFiles()
            try:
                debugFilesUploadResult = dtb.upload(filecacheurl=filecacheurl)
            except Exception as e:
                msg = (
                    "Problem uploading debug_files.tar.gz.\nError message: %s.\n"
                    "More details can be found in %s" %
                    (e, self.logger.logfile))
                LOGGERS['CRAB3'].exception(
                    msg)  #the traceback is only printed into the logfile

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % uploadResult
        if debugFilesUploadResult is not None:
            configArguments[
                'debugfilename'] = "%s.tar.gz" % debugFilesUploadResult
        self.logger.debug("Result uploading input files: %(cachefilename)s " %
                          configArguments)

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug(
                "Attaching list of user-specified primary input files.")
            userFilesList = [f.strip() for f in userFilesList]
            userFilesList = [f for f in userFilesList if f]
            if len(userFilesList) != len(set(userFilesList)):
                msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(
                self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" %
                              (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
            except ValueError as ex:
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name,
                                                              ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$',
                                           run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(
                            len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs=run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [
                str(reduce(lambda x, y: x + y,
                           lumi_mask[run]))[1:-1].replace(' ', '')
                for run in configArguments['runs']
            ]

        configArguments['jobtype'] = 'Analysis'

        return tarFilename, configArguments
Exemplo n.º 18
0
            
        if not good_lumis.contains(int(run)): continue
        run_hlt_data = hlt_data[run]
        hlt_menu = run_hlt_data["hlt_menu"]
        trig_mode = run_hlt_data["trig_mode"]

        l1_ps_tbl = l1_ps_data[trig_mode]
        hlt_ps_tbl = hlt_ps_data[hlt_menu]
        hlt_ps_dict = make_hlt_ps_dict(hlt_ps_tbl)
        
        if trig_mode not in lowest_l1_prescales:
            lowest_l1_prescales[trig_mode] = L1PreScaleCache(get_nr_ps_col(l1_ps_tbl))
        runs_lowest_l1_ps = lowest_l1_prescales[trig_mode]
        

        good_lumis_compact = good_lumis.getCompactList()[run]
        good_lumis_unpacked = uncompact_list(good_lumis_compact)
        ps_cols = run_hlt_data['ps_cols']


        for lumi in good_lumis_unpacked:
            lowest_l1_ps_cache = {}
            ps_col = get_ps_col(ps_cols,lumi)

            for line in hlt_ps_tbl:
                hlt_pathname = get_pathname_from_ps_tbl(line[1]) 
                if hlt_pathname.find("HLT_")==0 and (hlt_pathname.find("Ele")!=-1 or hlt_pathname.find("Pho")!=-1 or hlt_pathname.find("pho")!=-1 or hlt_pathname.find("SC")!=-1):
                   # if hlt_pathname.find("HLT_Mu8_DiEle12_CaloIdL_TrackIdL_")!=0: continue
                    if hlt_pathname not in hlt_paths:
                        hlt_paths[hlt_pathname]=HLTPath(hlt_pathname)
                    hlt_path = hlt_paths[hlt_pathname]
Exemplo n.º 19
0
    def run(self, filecacheurl=None):
        """
        Override run() for JobType
        """

        taskDict, webdir = self.getTaskDict()
        addoutputfiles = literal_eval(getColumn(taskDict, 'tm_outfiles'))
        tfileoutfiles = literal_eval(getColumn(taskDict, 'tm_tfile_outfiles'))
        edmoutfiles = literal_eval(getColumn(taskDict, 'tm_edm_outfiles'))
        jobarch = getColumn(taskDict, 'tm_job_arch')
        jobsw = getColumn(taskDict, 'tm_job_sw')

        sandboxFilename = os.path.join(self.workdir, 'sandbox.tar.gz')
        curlGetFileFromURL(webdir + '/sandbox.tar.gz', sandboxFilename,
                           self.proxyfilename)

        configArguments = {
            'addoutputfiles': addoutputfiles,
            'tfileoutfiles': tfileoutfiles,
            'edmoutfiles': edmoutfiles,
            'jobarch': jobarch,
            'jobsw': jobsw,
        }

        # Maybe the user wnat to change the dataset
        if getattr(self.config.Data, 'inputDataset', None):
            configArguments['inputdata'] = self.config.Data.inputDataset

        ufc = CRABClient.Emulator.getEmulator('ufc')({
            'endpoint': filecacheurl,
            "pycurl": True
        })
        result = ufc.upload(sandboxFilename,
                            excludeList=NEW_USER_SANDBOX_EXCLUSIONS)
        if 'hashkey' not in result:
            self.logger.error("Failed to upload source files: %s" %
                              str(result))
            raise CachefileNotFoundException

        configArguments['cacheurl'] = filecacheurl
        configArguments['cachefilename'] = "%s.tar.gz" % str(result['hashkey'])

        # Upload list of user-defined input files to process as the primary input
        userFilesList = getattr(self.config.Data, 'userInputFiles', None)
        if userFilesList:
            self.logger.debug(
                "Attaching list of user-specified primary input files.")
            userFilesList = map(string.strip, userFilesList)
            userFilesList = [file for file in userFilesList if file]
            if len(userFilesList) != len(set(userFilesList)):
                msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
                msg += " Duplicated entries will be removed."
                self.logger.warning(msg)
            configArguments['userfiles'] = set(userFilesList)
            configArguments['primarydataset'] = getattr(
                self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')

        lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
        lumi_list = None
        if lumi_mask_name:
            self.logger.debug("Attaching lumi mask %s to the request" %
                              (lumi_mask_name))
            try:
                lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
            except ValueError as ex:
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name,
                                                              ex)
                raise ConfigurationException(msg)
        run_ranges = getattr(self.config.Data, 'runRange', None)
        if run_ranges:
            run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$',
                                           run_ranges)
            if run_ranges_is_valid:
                run_list = getRunList(run_ranges)
                if lumi_list:
                    lumi_list.selectRuns(run_list)
                    if not lumi_list:
                        msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
                        raise ConfigurationException(msg)
                else:
                    if len(run_list) > 50000:
                        msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(
                            len(run_list))
                        msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
                        raise ConfigurationException(msg)
                    lumi_list = LumiList(runs=run_list)
            else:
                msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
                raise ConfigurationException(msg)
        if lumi_list:
            configArguments['runs'] = lumi_list.getRuns()
            ## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
            lumi_mask = lumi_list.getCompactList()
            configArguments['lumis'] = [
                str(reduce(lambda x, y: x + y,
                           lumi_mask[run]))[1:-1].replace(' ', '')
                for run in configArguments['runs']
            ]

        configArguments['jobtype'] = 'Analysis'

        return sandboxFilename, configArguments
Exemplo n.º 20
0
    def run(self):
        """
        The main method of the class: report status of a task
        """
        common.logger.debug("Reporter::run() called")
        task = common._db.getTask()

        msg = "--------------------\n"
        msg += "Dataset: %s\n" % str(task['dataset'])
        if self.cfg_params.has_key('USER.copy_data') and int(
                self.cfg_params['USER.copy_data']) == 1:
            msg += "Remote output :\n"
            ## TODO: SL should come from jobDB!
            from PhEDExDatasvcInfo import PhEDExDatasvcInfo

            stageout = PhEDExDatasvcInfo(self.cfg_params)
            endpoint, lfn, SE, SE_PATH, user = stageout.getEndpoint()
            #print endpoint, lfn, SE, SE_PATH, user

            msg += "SE: %s %s  srmPath: %s\n" % (
                self.cfg_params['USER.storage_element'], SE, endpoint)

        else:
            msg += "Local output: %s\n" % task['outputDirectory']
        #print task
        possible_status = [
            'Created', 'Undefined', 'Submitting', 'Submitted', 'NotSubmitted',
            'Waiting', 'Ready', 'Scheduled', 'Running', 'Done', 'Killing',
            'Killed', 'Aborted', 'Unknown', 'Done (Failed)', 'Cleared',
            'Retrieved'
        ]
        eventsRead = 0
        eventsRequired = 0
        filesRead = 0
        filesRequired = 0
        lumis = []
        for job in task.getJobs():
            if (job.runningJob['applicationReturnCode'] != 0
                    or job.runningJob['wrapperReturnCode'] != 0):
                continue
            # get FJR filename
            fjr = self.fjrDirectory + job['outputFiles'][-1]

            jobReport = readJobReport(fjr)
            if len(jobReport) > 0:
                inputFiles = jobReport[0].inputFiles
                for inputFile in inputFiles:
                    # Accumulate the list of lum sections run over
                    for run in inputFile.runs.keys():
                        for lumi in inputFile.runs[run]:
                            lumis.append((run, lumi))
                    filesRead += 1
                    eventsRead += int(inputFile['EventsRead'])
                #print jobReport[0].inputFiles,'\n'
            else:
                pass
                #print 'no FJR avaialble for job #%s'%job['jobId']
            #print "--------------------------"

        # Compact and write the list of successful lumis

        lumiList = LumiList(lumis=lumis)
        compactList = lumiList.getCompactList()

        lumiFilename = task['outputDirectory'] + 'lumiSummary.json'
        lumiSummary = open(lumiFilename, 'w')
        json.dump(compactList, lumiSummary)
        lumiSummary.write('\n')
        lumiSummary.close()

        msg += "Total Events read: %s\n" % eventsRead
        msg += "Total Files read: %s\n" % filesRead
        msg += "Total Jobs : %s\n" % len(task.getJobs())
        msg += "Luminosity section summary file: %s\n" % lumiFilename
        list_ID = {}

        # TEMPORARY by Fabio, to be removed
        # avoid clashes between glite_slc5 and glite schedulers when a server is used
        # otherwise, -report with a server requires a local scheduler
        if self.cfg_params.get('CRAB.server_name', None) is None:
            common.logger.debug("Reporter updating task status")
            task = common.scheduler.queryEverything(task['id'])

        for st in possible_status:
            list_ID = common._db.queryAttrRunJob({'statusScheduler': st},
                                                 'jobId')
            if (len(list_ID) > 0):
                msg += "   # Jobs: %s:%s\n" % (str(st), len(list_ID))
            pass
        msg += "\n----------------------------\n"
        common.logger.info(msg)

        file = common.work_space.shareDir() + 'arguments.xml'
        #print "file = ", file

        ### starting from the arguments.xml file, a json file containing the run:lumi
        ### that should be analyzed with the task
        inputRunLumiFileName = self.getInputRunLumi(file)

        ### missing lumi to analyze: starting from lumimask or from argument file
        ### calculate the difference with report.json
        ### if a lumimask is used in the crab.cfg
        if (self.cfg_params.get('CMSSW.lumi_mask')):
            lumimask = self.cfg_params.get('CMSSW.lumi_mask')
            #print "lumimask = ", lumimask
            self.compareJsonFile(lumimask)
        ### without lumimask
        elif (inputRunLumiFileName):
            self.compareJsonFile(inputRunLumiFileName)
        else:
            common.logger.info("No json file to compare")
        return
def add_merged_sample(samples, name, comment, store):

    # Retrieve the sample from the database if it already exists. Otherwise, create a new
    # sample
    update = False
    sample = store.find(Sample, Sample.name == unicode(name)).one()
    if not sample:
        sample = Sample(unicode(name), unicode(''), unicode('NTUPLES'), 0)
        store.add(sample)
    else:
        update = True
        sample.removeFiles(store)

    store.flush()

    # Set as parent dataset of the merged sample the parent dataset
    # of the first sample
    sample.source_dataset_id = samples[0].source_dataset_id

    # Reset sample content
    sample.nevents_processed = 0
    sample.nevents = 0
    sample.normalization = 1
    sample.event_weight_sum = 0
    extras_event_weight_sum = {}
    dataset_nevents = 0
    processed_lumi = LumiList()

    for i, s in enumerate(samples):
        sample.derived_samples.add(s)

        sample.nevents_processed += s.nevents_processed
        sample.nevents += s.nevents
        sample.event_weight_sum += s.event_weight_sum
        extra_sumw = s.extras_event_weight_sum
        if extra_sumw:
            extra_sumw = json.loads(extra_sumw)
            for key in extra_sumw:
                if key in extras_event_weight_sum:
                    extras_event_weight_sum[key] += extra_sumw[key]
                else:
                    extras_event_weight_sum[key] = extra_sumw[key]

        if s.processed_lumi is not None:
            sample_processed_lumi = json.loads(s.processed_lumi)
            processed_lumi = processed_lumi | LumiList(
                compactList=sample_processed_lumi)

        for f in s.files:
            sample.files.add(f)

        # Get info from parent datasets
        dataset_nevents += s.source_dataset.nevents

    if len(extras_event_weight_sum) > 0:
        sample.extras_event_weight_sum = unicode(
            json.dumps(extras_event_weight_sum))

    if len(processed_lumi.getCompactList()) > 0:
        sample.processed_lumi = unicode(
            json.dumps(processed_lumi.getCompactList()))

    sample.code_version = samples[0].code_version

    if sample.nevents_processed != dataset_nevents:
        sample.user_comment = unicode("Sample was not fully processed, only " +
                                      str(sample.nevents_processed) + "/" +
                                      str(dataset_nevents) +
                                      " events were processed. " + comment)
    else:
        sample.user_comment = unicode(comment)

    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    sample.luminosity = sample.getLuminosity()

    print("")
    print("Merged sample %s:" % ("updated" if update else "created"))
    print(sample)

    store.commit()