Пример #1
0
    def save(self):
        """
        _save_

        Pack the data object into an IMProvNode

        """
        output = None
        try:
            try:
                config = os.environ.get("PRODAGENT_CONFIG", None)
                if config == None:
                    msg = "No ProdAgent Config file provided\n"
                    raise RuntimeError, msg

                cfgObject = ProdAgentConfiguration()
                cfgObject.loadFromFile(config)
                alertHandlerConfig = cfgObject.get("AlertHandler")
                workingDir = alertHandlerConfig["ComponentDir"]

                dir = os.path.join(os.path.expandvars(workingDir), "Alerts")

                if not os.path.exists(dir):
                    os.makedirs(dir)
                self.FileName = os.path.join(dir, "alert-%s.dat" % makeUUID())
                output = open(self.FileName, "wb")
                pickle.dump(self, output)
            except Exception, ex:
                # to do: Exception handling
                print ex
                raise RuntimeError, str(ex)
        finally:
            if output:
                output.close()
        return
Пример #2
0
    def save(self):
        """
        _save_

        Pack the data object into an IMProvNode

        """
        output = None
        try:
            try:
                config = os.environ.get("PRODAGENT_CONFIG", None)
                if config == None:
                    msg = "No ProdAgent Config file provided\n"
                    raise RuntimeError, msg

                cfgObject = ProdAgentConfiguration()
                cfgObject.loadFromFile(config)
                alertHandlerConfig = cfgObject.get("AlertHandler")
                workingDir = alertHandlerConfig['ComponentDir']

                dir = os.path.join(os.path.expandvars(workingDir), 'Alerts')

                if not os.path.exists(dir):
                    os.makedirs(dir)
                self.FileName = os.path.join(dir, "alert-%s.dat" % makeUUID())
                output = open(self.FileName, 'wb')
                pickle.dump(self, output)
            except Exception, ex:
                # to do: Exception handling
                print ex
                raise RuntimeError, str(ex)
        finally:
            if output:
                output.close()
        return
Пример #3
0
def createCleanupJobSpec(workflowSpec, site, *lfns):
    """
    _createCleanupJob_

    Create a Cleanup JobSpec definition, using the cleanup
    workflow template, site name and the list of LFNs to be
    removed

    """

    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (workflowSpec.workflowName(), makeUUID())
    jobSpec.setJobName(jobName)
    jobSpec.setJobType("CleanUp") 
    
    jobSpec.addWhitelistSite(site)

    
    lfnList = ""
    for lfn in lfns:
        lfnList += "%s\n" % lfn

    jobSpec.payload.configuration = lfnList

    return jobSpec
Пример #4
0
def newConvJobID():
    """
    _newConvJobID_

    Return a new conversion job ID

    """
    return "Conversion-%s" % makeUUID()
Пример #5
0
def newRepackMergeJobID(run):
    """
    _newRepackMergeJobID_

    Return a new repacker merge job ID from the DB that is associated
    to a given run

    """
    return "RepackerMerge-Run%s-%s" %(run, makeUUID())
Пример #6
0
def newRepackJobID(run):
    """
    _newRepackJobID_

    Get a new repacker job ID from the DB that is associated
    to a given run

    """
    
    return "Repacker-Run%s-%s" %(run, makeUUID())
Пример #7
0
def createLogCollectorJobSpec(workflowSpec, originalWf, site, lfnBase,
                              stageOutParams, *lfns):
    """
    createLogCollectorJobSpec

    Create a LogArchive JobSpec definition, using the LogArchive
    workflow template, site name and the list of LFNs to be
    removed

    """

    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (workflowSpec.workflowName(), makeUUID())
    jobSpec.setJobName(jobName)
    jobSpec.setJobType("LogCollect")

    jobSpec.addWhitelistSite(site)

    confNode = IMProvNode("LogCollectorConfig")

    # add site and workflow to collect
    confNode.addNode(IMProvNode("wf", originalWf))
    confNode.addNode(IMProvNode("se", site))
    confNode.addNode(IMProvNode("lfnBase", lfnBase))

    # add logs to collect
    logNode = IMProvNode("LogsToCollect")
    for lfn in lfns:
        logNode.addNode(IMProvNode("lfn", lfn))

    confNode.addNode(logNode)

    # stageout
    if stageOutParams:
        stageOutNode = IMProvNode("Override")
        #    WorkflowTools.addStageOutOverride(confNode, stageOutParams['command'],
        #                                      stageOutParams['option'],
        #                                      stageOutParams['se-name'],
        #                                      stageOutParams['lfnPrefix'])

        stageOutNode.addNode(IMProvNode("command", stageOutParams['command']))
        stageOutNode.addNode(IMProvNode("option", stageOutParams['option']))
        stageOutNode.addNode(IMProvNode("se-name", stageOutParams['se-name']))
        stageOutNode.addNode(
            IMProvNode("lfn-prefix", stageOutParams['lfnPrefix']))
        confNode.addNode(stageOutNode)

    #jobSpec.payload.configuration = logNode.makeDOMElement().toprettyxml()
    jobSpec.payload.configuration = confNode.makeDOMElement().toprettyxml()

    return jobSpec
Пример #8
0
def createLogCollectorJobSpec(workflowSpec, originalWf, site, lfnBase, stageOutParams, *lfns):
    """
    createLogCollectorJobSpec

    Create a LogArchive JobSpec definition, using the LogArchive
    workflow template, site name and the list of LFNs to be
    removed

    """

    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (workflowSpec.workflowName(), makeUUID())
    jobSpec.setJobName(jobName)
    jobSpec.setJobType("LogCollect")

    jobSpec.addWhitelistSite(site)

    confNode = IMProvNode("LogCollectorConfig")

    # add site and workflow to collect
    confNode.addNode(IMProvNode("wf", originalWf))
    confNode.addNode(IMProvNode("se", site))
    confNode.addNode(IMProvNode("lfnBase", lfnBase))

    # add logs to collect
    logNode = IMProvNode("LogsToCollect")
    for lfn in lfns:
        logNode.addNode(IMProvNode("lfn", lfn))

    confNode.addNode(logNode)

    # stageout
    if stageOutParams:
        stageOutNode = IMProvNode("Override")
        #    WorkflowTools.addStageOutOverride(confNode, stageOutParams['command'],
        #                                      stageOutParams['option'],
        #                                      stageOutParams['se-name'],
        #                                      stageOutParams['lfnPrefix'])

        stageOutNode.addNode(IMProvNode("command", stageOutParams["command"]))
        stageOutNode.addNode(IMProvNode("option", stageOutParams["option"]))
        stageOutNode.addNode(IMProvNode("se-name", stageOutParams["se-name"]))
        stageOutNode.addNode(IMProvNode("lfn-prefix", stageOutParams["lfnPrefix"]))
        confNode.addNode(stageOutNode)

    # jobSpec.payload.configuration = logNode.makeDOMElement().toprettyxml()
    jobSpec.payload.configuration = confNode.makeDOMElement().toprettyxml()

    return jobSpec
Пример #9
0
    def __init__(self, t0astFile=None, **args):
        """
        ___init___

        Initialize all attributes.
        If T0ASTFile is passed as parameter, create the block instance 
        using T0ASTFile by extracting necessary information 
        from given T0ASTFile and assign to a Block instance
        """
        dict.__init__(self)
        
        self.setdefault("BLOCK_ID", None)
        self.setdefault("STATUS", "Active")
        self.setdefault("MIGRATE_STATUS", "NotMigrated")    
        
        if t0astFile != None:
            # full path used in dbs
            self.setdefault("BLOCK_NAME", "/%s/%s/%s#%s" % 
                            (t0astFile["PRIMARY_DATASET"], 
                             t0astFile["PROCESSED_DATASET"],
                             t0astFile["DATA_TIER"],
                             makeUUID())
                            )
            self.setdefault("RUN_ID", t0astFile.getRunID())
            self.setdefault("DATASET_ID", t0astFile["DATASET_ID"])
            self.setdefault("DATASET_PATH_ID", t0astFile.getDatasetPathID())
            self.setdefault("DATA_TIER", t0astFile["DATA_TIER"])
            self.setdefault("BLOCKSIZE", t0astFile["FILESIZE"])
            self.setdefault("FILECOUNT", 1)
        else:
    
            self.setdefault("BLOCK_NAME", None)
            self.setdefault("RUN_ID", None)
            self.setdefault("DATASET_ID", None)
            self.setdefault("DATASET_PATH_ID", None)
            self.setdefault("DATA_TIER", None)
            self.setdefault("BLOCKSIZE", 0) # byte
            self.setdefault("FILECOUNT", 0)
        
        self.update(args)
Пример #10
0
def createCleanupJobSpec(workflowSpec, site, *lfns):
    """
    _createCleanupJob_

    Create a Cleanup JobSpec definition, using the cleanup
    workflow template, site name and the list of LFNs to be
    removed

    """

    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (workflowSpec.workflowName(), makeUUID())
    jobSpec.setJobName(jobName)
    jobSpec.setJobType("CleanUp")

    jobSpec.addWhitelistSite(site)

    lfnList = ""
    for lfn in lfns:
        lfnList += "%s\n" % lfn

    jobSpec.payload.configuration = lfnList

    return jobSpec
Пример #11
0
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()

        #  //
        # // Add Stage Out node
        #//
        self.saveOutputFor.append(self.cmsRunNode.name)
        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1",
                                      *self.saveOutputFor)
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // Input Dataset?
        #//
        if self.inputDataset['IsUsed']:
            inputDataset = self.cmsRunNodes[0].addInputDataset(
                self.inputDataset['Primary'], self.inputDataset['Processed'])
            inputDataset["DataTier"] = self.inputDataset['DataTier']
            for keyname in [
                    'SplitType',
                    'SplitSize',
                    'OnlySites',
                    'OnlyBlocks',
                    'OnlyClosedBlocks',
            ]:
                if self.inputDataset[keyname] != None:
                    self.workflow.parameters[keyname] = self.inputDataset[
                        keyname]

        #  //
        # // Pileup Datasets?
        #//
        for pileupDataset in self.pileupDatasets:
            puDataset = self.cmsRunNodes[0].addPileupDataset(
                pileupDataset['Primary'], pileupDataset['DataTier'],
                pileupDataset['Processed'])
            puDataset['FilesPerJob'] = pileupDataset['FilesPerJob']
            if pileupDataset['TargetModule'] is not None:
                puDataset['TargetModule'] = pileupDataset['TargetModule']

        #  //
        # // Extract dataset info from cfg
        #//
        datasets = {}
        datasetsToForward = {}
        for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations):

            # Ignore nodes that don't save any output. But keep input dataset
            # in case we need to forward it.
            if cmsRunNode.name not in self.saveOutputFor:
                # Store parent dataset in case we need to forward it.
                if self.inputDataset['IsUsed'] and \
                                            cmsRunNode == self.cmsRunNodes[0]:
                    datasetsToForward[cmsRunNode.name] = \
                                            self.inputDataset['DatasetName']
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        # If the previous cmsRunNode stages out, pull down the
                        # dataset it produced.
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasets['%s:%s' % (inputLink['InputNode'],
                                inputLink['OutputModule'])]
                        # If the previous cmsRunNode does not stage out, then
                        # use it's parent.
                        else:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasetsToForward[inputLink['InputNode']]
                continue

            for outModName in config.outputModules.keys():
                moduleInstance = config.getOutputModule(outModName)
                dataTier = moduleInstance['dataTier']
                filterName = moduleInstance["filterName"]
                primaryName = DatasetConventions.primaryDatasetName(
                    PhysicsChannel=self.channel, )

                if self.useProperNamingConventions:
                    if self.processingString and filterName:
                        processingString = "_".join(
                            (self.processingString, filterName))
                    elif self.processingString:
                        processingString = self.processingString
                    elif filterName:
                        processingString = filterName
                    else:
                        processingString = None
                    processedName = DatasetConventions.properProcessedDatasetName(
                        AcquisitionEra=self.acquisitionEra,
                        ProcessingString=processingString,
                        ProcessingVersion=self.processingVersion,
                        Unmerged=True)
                elif self.acquisitionEra == None:
                    processedName = DatasetConventions.processedDatasetName(
                        Version=cmsRunNode.application['Version'],
                        Label=self.label,
                        Group=self.group,
                        FilterName=filterName,
                        RequestId=self.requestId,
                        Unmerged=True)
                else:
                    processedName = DatasetConventions.csa08ProcessedDatasetName(
                        AcquisitionEra=self.acquisitionEra,
                        Conditions=self.workflow.parameters['Conditions'],
                        ProcessingVersion=self.workflow.
                        parameters['ProcessingVersion'],
                        FilterName=filterName,
                        Unmerged=True)

                dataTier = DatasetConventions.checkDataTier(dataTier)

                moduleInstance['primaryDataset'] = primaryName
                moduleInstance['processedDataset'] = processedName

                outDS = cmsRunNode.addOutputDataset(primaryName, processedName,
                                                    outModName)

                outDS['Status'] = self.outputDatasetStatus
                outDS['DataTier'] = dataTier
                outDS["ApplicationName"] = \
                                         cmsRunNode.application["Executable"]
                outDS["ApplicationFamily"] = outModName
                outDS["PhysicsGroup"] = self.group

                # check for input dataset for first node
                if self.inputDataset[
                        'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]:
                    outDS['ParentDataset'] = self.inputDataset['DatasetName']
                # check for staged out intermediates
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            outDS['ParentDataset'] = datasets[
                                '%s:%s' % (inputLink['InputNode'],
                                           inputLink['OutputModule'])]
                        elif datasetsToForward.get(
                                inputLink['InputNode']) is not None:
                            outDS['ParentDataset'] = \
                                    datasetsToForward[inputLink['InputNode']]

                if self.options['FakeHash']:
                    guid = makeUUID()
                    outDS['PSetHash'] = "hash=%s;guid=%s" % \
                            (self.psetHashes[cmsRunNode.name], guid)
                else:
                    outDS['PSetHash'] = self.psetHashes[cmsRunNode.name]

                # record output in case used as input to a later node
                datasets['%s:%s' % (cmsRunNode.name, outModName)] = \
                                "/%s/%s/%s" % ( outDS['PrimaryDataset'],
                                                  outDS['ProcessedDataset'],
                                                  outDS['DataTier'])

        # optionally remap sibling relationships to parent-child (i.e HLTDEBUG)
        remapParentageForWorkflow(self.workflow)
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow
Пример #12
0
    def createSuccessReport(self, jobSpecLoaded, workerNodeInfo, reportFilePath):
        """
        _createSuccessReport_

        Create a job report representing the successful completion
        of a job.

        The jobSpecLoaded parameter is a reference to an instance
        of the JobSpec class that has been initialized with the
        job spec that we are generating a report for.

        """
        jobSpecPayload, newReport = \
                self.__fwkJobReportCommon(jobSpecLoaded, workerNodeInfo)
        newReport.exitCode = 0
        newReport.status = "Success"

        if "jobId" in jobSpecLoaded.parameters.keys():
            newReport.jobSpecId = jobSpecLoaded.parameters["jobId"]

        # Create a list of datasets from the JobSpec
        # then associate file to these later on
        datasets = getOutputDatasetDetails(jobSpecPayload)
        datasets.extend(getSizeBasedMergeDatasetsFromNode(jobSpecPayload))
        outModules = jobSpecPayload.cfgInterface.outputModules

        inputFiles = jobSpecPayload.cfgInterface.inputFiles

        for dataset in datasets:
            modName = dataset.get('OutputModuleName', None)

            if outModules.has_key(modName):
                dataset['LFNBase'] = outModules[modName].get('LFNBase', None)
                self.setDefaultForNoneValue('LFNBase', dataset['LFNBase'])
                dataset['MergedLFNBase'] = \
                                outModules[modName].get('MergedLFNBase', None)

        datasetMap = {}
        for dataset in datasets:
            datasetMap[dataset['OutputModuleName']] = dataset

        for outName, outMod in \
                jobSpecPayload.cfgInterface.outputModules.items():

            theFile = newReport.newFile()
            guid = makeUUID()
            
            if outMod.has_key("LFNBase"):
                theFile['LFN'] = "%s/%s.root" % (outMod['LFNBase'], guid)
            else:
                theFile['LFN'] = "/some/madeup/path/%s.root" % guid
                
            self.setDefaultForNoneValue('LFNBase', theFile['LFN'])
            theFile['PFN'] ="fakefile:%s" % theFile['LFN']
            theFile['GUID'] = guid
            theFile['MergedBySize'] = choice(["True", "False"])
            theFile['ModuleLabel'] = outName
            # basic measurement is byte (minumum 4MB, max 4GB)
            theFile['Size'] = 4000000 * randrange(1, 1000)
            runNum = jobSpecLoaded.parameters["RunNumber"]
            # need to get lumi
            lumiList = jobSpecLoaded.parameters.get("LumiSections", [])
            theFile.runs[runNum] = RunInfo(runNum, *lumiList)
            #check if the maxEvents['output'] is set if not set totalEvent using maxEvents['input']
            totalEvent = jobSpecPayload.cfgInterface.maxEvents['output']
            if totalEvent == None:
                totalEvent = jobSpecPayload.cfgInterface.maxEvents['input']

            # if there is no input and output, print out error message and set default to 1000
            totalEvent = self.setDefaultForNoneValue(
                                           "maxEvent['input' and 'output']",
                                            totalEvent,
                                            100)
            
            try:
                totalEvent = int(totalEvent)
            except ValueError, ex:
                logging.error("totalEvent is not a number. \n%s" % ex)

            # event size should be  >= 0
            # totalEvent is  -1 process all event 
            if totalEvent < 0:
                totalEvent = 200
                
            if (random() > self.avgEventProcessingRate):
                # Gauss distribution of totalEvent.
                meanEvent = int(totalEvent * 0.7)
                stdDev = totalEvent * 0.15
                tempTotalEvent = int(gauss(meanEvent,stdDev))
                if tempTotalEvent <= 0 :
                    totalEvent = 1
                elif tempTotalEvent >= totalEvent:
                    totalEvent = totalEvent - 1
                else:
                    totalEvent = tempTotalEvent

            #logging.debug("---------- Total Event ----------: %s \n" % totalEvent)
            theFile['TotalEvents'] = totalEvent

            theFile['SEName'] = workerNodeInfo['se-name']
            theFile['CEname'] = workerNodeInfo['ce-name']
            theFile['Catalog'] = outMod['catalog']
            theFile['Stream'] = outMod['stream']
            theFile['OutputModuleClass'] = "PoolOutputModule"

            theFile.addChecksum("cksum", randrange(1000000, 10000000))
            theFile.branches.extend(["fakeBranch_%d-%s.Rec" % (num, guid)
                                  for num in range(randrange(5,20))])
            #theFile.load(theFile.save())
            theFile["BranchHash"] = randrange(2000000, 30000000)
            [ theFile.addInputFile("fakefile:%s" % x , "%s" % x )
              for x in inputFiles ]

            if datasetMap.has_key(outName):
                datasetForFile = theFile.newDataset()
                datasetForFile.update(datasetMap[outName])
Пример #13
0
    def __call__(self, *fileList):

        jobSpec = self.spec.createJobSpec()

        jobId = "%s-%s" % (self.spec.workflowName(), self.count)

        jobSpec.setJobName(jobId)
        jobSpec.setJobType("Merge")



        jobSpec.addWhitelistSite("storage.element.edu")

        # get PSet
        cfg = jobSpec.payload.cfgInterface

        # set output module

        #print jobSpec.payload




        # set output file name

        prim = self.dataset['PrimaryDataset']
        tier = self.dataset['DataTier']
        lastBit = self.dataset['ProcessedDataset']

        acqEra = None
        #if .has_key("AcquisitionEra"):
        acqEra = jobSpec.parameters.get("AcquisitionEra", None)

        # compute LFN group based on merge jobs counter
        group = str(self.count // 1000).zfill(4)
        jobSpec.parameters['RunNumber'] = self.spec.workflowRunNumber()
        remainingBits = lastBit
        if acqEra != None:
            thingtoStrip = "%s_" % acqEra
            mypieces = lastBit.split(thingtoStrip, 1)
            if len(mypieces) > 1:
                remainingBits = mypieces[1].split("-unmerged", 1)[0]
            else:
                remainingBits=lastBit


        outModule = cfg.outputModules['Merged']
        lfnBase = outModule['LFNBase']
        extendedlfnBase = os.path.join(lfnBase, prim, tier, remainingBits,
                                       group)
        baseFileName = "%s.root" % makeUUID()

        outModule['fileName'] = baseFileName
        outModule['logicalFileName'] = os.path.join(extendedlfnBase,
                                                    baseFileName)

        # set output catalog
        outModule['catalog'] = "%s-merge.xml" % jobId

        # set input module


        # get input file names (expects a trivial catalog on site)
        cfg.inputFiles = ["%s" % fileName for fileName in fileList]


        # target file name
        mergeJobSpecFile = "%s/%s-spec.xml" % (
            self.dir, jobId)

        # save job specification
        jobSpec.save(mergeJobSpecFile)
        self.count += 1
        return jobSpec
Пример #14
0
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()
        
        #  //
        # // Add Stage Out node
        #//
        self.saveOutputFor.append(self.cmsRunNode.name)
        WorkflowTools.addStageOutNode(self.cmsRunNode,
                        "stageOut1", *self.saveOutputFor)
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // Input Dataset?
        #//
        if self.inputDataset['IsUsed']:
            inputDataset = self.cmsRunNodes[0].addInputDataset(
                self.inputDataset['Primary'],
                self.inputDataset['Processed']
                )
            inputDataset["DataTier"] = self.inputDataset['DataTier']
            for keyname in [
                'SplitType',
                'SplitSize',
                'OnlySites',
                'OnlyBlocks',
                'OnlyClosedBlocks',
                ]:
                if self.inputDataset[keyname] != None:
                    self.workflow.parameters[keyname] = self.inputDataset[keyname]
                    
            
        #  //
        # // Pileup Datasets?
        #//
        for pileupDataset in self.pileupDatasets:
            puDataset = self.cmsRunNodes[0].addPileupDataset(
                pileupDataset['Primary'],
                pileupDataset['DataTier'],
                pileupDataset['Processed'])
            puDataset['FilesPerJob'] = pileupDataset['FilesPerJob']
            if pileupDataset['TargetModule'] is not None:
                puDataset['TargetModule'] = pileupDataset['TargetModule']
            
        
        #  //
        # // Extract dataset info from cfg
        #//
        datasets = {}
        datasetsToForward = {}
        for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations):
            
            # Ignore nodes that don't save any output. But keep input dataset
            # in case we need to forward it.
            if cmsRunNode.name not in self.saveOutputFor:
                # Store parent dataset in case we need to forward it.
                if self.inputDataset['IsUsed'] and \
                                            cmsRunNode == self.cmsRunNodes[0]:
                    datasetsToForward[cmsRunNode.name] = \
                                            self.inputDataset['DatasetName']
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        # If the previous cmsRunNode stages out, pull down the
                        # dataset it produced.
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasets['%s:%s' % (inputLink['InputNode'],
                                inputLink['OutputModule'])]
                        # If the previous cmsRunNode does not stage out, then
                        # use it's parent.
                        else:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasetsToForward[inputLink['InputNode']]
                continue
            
            for outModName in config.outputModules.keys():
                moduleInstance = config.getOutputModule(outModName)
                dataTier = moduleInstance['dataTier']
                filterName = moduleInstance["filterName"]
                primaryName = DatasetConventions.primaryDatasetName(
                                        PhysicsChannel = self.channel,
                                        )

                if self.useProperNamingConventions:
                    if self.processingString and filterName:
                        processingString = "_".join((self.processingString, filterName))
                    elif self.processingString:
                        processingString = self.processingString
                    elif filterName:
                        processingString = filterName
                    else:
                        processingString = None
                    processedName = DatasetConventions.properProcessedDatasetName(
                        AcquisitionEra = self.acquisitionEra,
                        ProcessingString = processingString,
                        ProcessingVersion = self.processingVersion,
                        Unmerged = True
                        )
                elif self.acquisitionEra == None:
                    processedName = DatasetConventions.processedDatasetName(
                        Version = cmsRunNode.application['Version'],
                        Label = self.label,
                        Group = self.group,
                        FilterName = filterName,
                        RequestId = self.requestId,
                        Unmerged = True
                        )
                else:
                    processedName = DatasetConventions.csa08ProcessedDatasetName(
                        AcquisitionEra = self.acquisitionEra,
                        Conditions = self.workflow.parameters['Conditions'],
                        ProcessingVersion = self.workflow.parameters['ProcessingVersion'],
                        FilterName = filterName,
                        Unmerged = True
                        )
                  
                dataTier = DatasetConventions.checkDataTier(dataTier)

                moduleInstance['primaryDataset'] = primaryName
                moduleInstance['processedDataset'] = processedName
    
                outDS = cmsRunNode.addOutputDataset(primaryName, 
                                                         processedName,
                                                         outModName)

                outDS['Status'] = self.outputDatasetStatus                
                outDS['DataTier'] = dataTier
                outDS["ApplicationName"] = \
                                         cmsRunNode.application["Executable"]
                outDS["ApplicationFamily"] = outModName
                outDS["PhysicsGroup"] = self.group
    
                # check for input dataset for first node
                if self.inputDataset['IsUsed'] and cmsRunNode == self.cmsRunNodes[0]:
                    outDS['ParentDataset'] = self.inputDataset['DatasetName']
                # check for staged out intermediates
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            outDS['ParentDataset'] = datasets['%s:%s' % (inputLink['InputNode'],
                                                                    inputLink['OutputModule'])]
                        elif datasetsToForward.get(
                                inputLink['InputNode']) is not None:
                            outDS['ParentDataset'] = \
                                    datasetsToForward[inputLink['InputNode']]

                if self.options['FakeHash']:
                    guid = makeUUID()
                    outDS['PSetHash'] = "hash=%s;guid=%s" % \
                            (self.psetHashes[cmsRunNode.name], guid)
                else:
                    outDS['PSetHash'] = self.psetHashes[cmsRunNode.name]

                # record output in case used as input to a later node
                datasets['%s:%s' % (cmsRunNode.name, outModName)] = \
                                "/%s/%s/%s" % ( outDS['PrimaryDataset'],
                                                  outDS['ProcessedDataset'],
                                                  outDS['DataTier'])

        # optionally remap sibling relationships to parent-child (i.e HLTDEBUG)
        remapParentageForWorkflow(self.workflow)
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow
Пример #15
0
    def createSuccessReport(self, jobSpecLoaded, workerNodeInfo, 
                            reportFilePath):
        """
        _createSuccessReport_

        Create a job report representing the successful completion
        of a job.

        The jobSpecLoaded parameter is a reference to an instance
        of the JobSpec class that has been initialized with the
        job spec that we are generating a report for.

        """
        jobSpecPayload, newReport = \
                self.__fwkJobReportCommon(jobSpecLoaded, workerNodeInfo)
        newReport.exitCode = 0
        newReport.status = "Success"
        
        # parse newReport.jobSpecId (it should contain Job Name
        # "Repack-Run%s-%s", "RepackMerge-Run%s-%s", "PromptReco-Run%s-%s"
        
        specIDParts = newReport.jobSpecId.split('-')
        
        tier0JobType = None
        if len(specIDParts) != 3:
            logging.debug("JobReport jobSpecID not in correct format for tier 0: %s" %
                          newReport.jobSpecId)
        else:
            # Job type should be one of "Repack", "RepackMerge", "PromptReco"
            tier0JobType = specIDParts[0].strip()
        
         
        if "jobId" in jobSpecLoaded.parameters.keys():
            newReport.jobSpecId = jobSpecLoaded.parameters["jobId"]

        # Create a list of datasets from the JobSpec
        # then associate file to these later on
        datasets = getOutputDatasetDetails(jobSpecPayload)
        datasets.extend(getSizeBasedMergeDatasetsFromNode(jobSpecPayload))
        outModules = jobSpecPayload.cfgInterface.outputModules

        inputFiles = jobSpecPayload.cfgInterface.inputFiles

        for dataset in datasets:
            modName = dataset.get('OutputModuleName', None)

            if outModules.has_key(modName):
                dataset['LFNBase'] = outModules[modName].get('LFNBase', None)
                self.setDefaultForNoneValue('LFNBase', dataset['LFNBase'])
                dataset['MergedLFNBase'] = \
                                outModules[modName].get('MergedLFNBase', None)

        datasetMap = {}
        for dataset in datasets:
            datasetMap[dataset['OutputModuleName']] = dataset

        for outName, outMod in \
                jobSpecPayload.cfgInterface.outputModules.items():

            theFile = newReport.newFile()
            guid = makeUUID()
            
            theFile['GUID'] = guid
            theFile['ModuleLabel'] = outName
            runNum = jobSpecLoaded.parameters["RunNumber"]
            # need to get lumi
            lumiList = jobSpecLoaded.parameters.get("LumiSections", [])
            theFile.runs[runNum] = RunInfo(runNum, *lumiList)
            #check if the maxEvents['output'] iE    s set if not set totalEvent using maxEvents['input']
            totalEvent = jobSpecPayload.cfgInterface.maxEvents['output']
            if totalEvent == None:
                totalEvent = jobSpecPayload.cfgInterface.maxEvents['input']

            # if there is no input and output, print out error message and set default to 1000
            totalEvent = self.setDefaultForNoneValue(
                                           "maxEvent['input' and 'output']",
                                            totalEvent,
                                            100)

            try:
                totalEvent = int(totalEvent)
            except ValueError, ex:
                logging.error("totalEvent is not a number. \n%s" % ex)

            if (random() > self.avgEventProcessingRate):
                # Gauss distribution of totalEvent.
                meanEvent = int(totalEvent * 0.7)
                stdDev = totalEvent * 0.15
                tempTotalEvent = int(gauss(meanEvent,stdDev))
                if tempTotalEvent <= 0 :
                    totalEvent = 1
                elif tempTotalEvent >= totalEvent:
                    totalEvent = totalEvent - 1
                else:
                    totalEvent = tempTotalEvent

            #logging.debug("---------- Total Event ----------: %s \n" % totalEvent)
            theFile['TotalEvents'] = totalEvent

            theFile['SEName'] = workerNodeInfo['se-name']
            theFile['CEname'] = workerNodeInfo['ce-name']
            theFile['Catalog'] = outMod['catalog']
            theFile['Stream'] = outMod['stream']
            theFile['OutputModuleClass'] = "PoolOutputModule"

            theFile.addChecksum("cksum", randrange(1000000, 10000000))
            theFile.branches.extend(["fakeBranch_%d-%s.Rec" % (num, guid)
                                  for num in range(randrange(5,20))])
            #theFile.load(theFile.save())
            theFile["BranchHash"] = randrange(2000000, 30000000)
            [ theFile.addInputFile("fakefile:%s" % x , "%s" % x )
              for x in inputFiles ]

            
            if datasetMap.has_key(outName):
                datasetForFile = theFile.newDataset()
                datasetForFile.update(datasetMap[outName])

                # basic measurement is byte (minumum 4MB, max 4GB)
            
            # default value for the file size
            # it should be overridden if the primary dataset exist.
            # for the all other 
            theFile['Size'] = 4000000 * randrange(1, 1000) #random size
            theFile['MergedBySize'] = choice(["True", "False"])
            # setting up default LFN
            if outMod.has_key("LFNBase"):
                theFile['LFN'] = "%s%s.root" % (outMod['LFNBase'], guid)
            else:
                theFile['LFN'] = "/some/madeup/path/%s.root" % guid
                
            self.setDefaultForNoneValue('LFNBase', theFile['LFN'])
            
            if tier0JobType == "Repack":
                # parse dataset name set the size according to the threshold
                if len(theFile.dataset) == 0:
                    continue

                datasetNameParts = theFile.dataset[0]["PrimaryDataset"].split('_')
                # need to add sanity check
                if self.thresholdForMerge > int(datasetNameParts[2]):
                    theFile['Size'] = 500000000 #(500 MG)
                    theFile['MergedBySize'] = "False"
                else :
                    theFile['Size'] = 4000000000  #(4 G)
                    theFile['MergedBySize'] = "True"
                    #override LFN fro Merged file
                    theFile['LFN'] = "%s%s.root" % (outMod['MergedLFNBase'], guid)
                    
            elif tier0JobType == "RepackMerge":
                theFile['Size'] = 4000000000  #(4 G)
                theFile['MergedBySize'] = "True"
                    
            elif tier0JobType == "PromptReco": 
                theFile['Size'] = 2000000000  #(2 G)
            else :
                theFile['Size'] = 4000000 * randrange(1, 1000) #random size        
            
            theFile['PFN'] ="fakefile:%s" % theFile['LFN']