示例#1
0
    def getRunConfig(self, runNumber):
        """
        _getRunConfig_

        Get a RunConfig instance for the given run number.
        """
        if not self.runConfigCache:
            self.runConfigCache = Cache(promptSkimming = True)
            self.runConfigCache.t0astDBConn = self.t0astDBConn
            self.runConfigCache.configCache = os.path.join(self.config.PromptSkimScheduler.workloadCache,
                                                           "RunConfig")

        return self.runConfigCache.getRunConfig(runNumber)
示例#2
0
class PromptSkimPoller(BaseWorkerThread):
    def __init__(self, config):
        BaseWorkerThread.__init__(self)
        self.config = config
        return
    
    def setup(self, parameters = None):
        """
        _setup_

        Setup all the parameters.
        """
        logging.info("PromptSkimScheduler Component Started")

        # Workload related parameters
        self.promptSkimFactory = PromptSkimWorkloadFactory()
        self.workloads = {}
        self.workloadCache = self.config.PromptSkimScheduler.workloadCache

        if not os.path.exists(self.workloadCache):
            os.makedirs(self.workloadCache)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging,
                                     dbinterface = myThread.dbi)
        self.runConfigCache = None

        # Scram arch and path to cmssw needed to generate workflows.
        self.scramArch = self.config.PromptSkimScheduler.scramArch
        self.cmsPath = self.config.PromptSkimScheduler.cmsPath
        self.initCommand = self.config.PromptSkimScheduler.initCommand

        # Job splitting parameters
        self.minMergeSize = self.config.PromptSkimScheduler.minMergeSize
        self.maxMergeEvents = self.config.PromptSkimScheduler.maxMergeEvents
        self.maxMergeSize = self.config.PromptSkimScheduler.maxMergeSize
        self.maxMergeFiles = self.config.PromptSkimScheduler.maxMergeFiles

        self.t0astDBConn = None
        self.connectT0AST()

        self.workQueue = WorkQueue(CouchUrl = self.config.JobStateMachine.couchurl,
                                   CacheDir = os.path.join(self.config.General.workDir, "WorkQueueCacheDir"))
        return

    def connectT0AST(self):
        """
        _connectT0AST_

        Create a T0AST DB connection object.
        """
        self.t0astDBConn = Tier0DB(connectionParameters = {},
                                   manageGlobal = False,
                                   initConnection = False)

        self.t0astDBConn.dbFactory = DBFactory(logging, self.config.PromptSkimScheduler.t0astURL)
        self.t0astDBConn.connect()
        return

    def algorithm(self, parameters = None):
        """
        _algorithm_

        Poll for transfered blocks and complete runs.
        """
        self.pollForTransferedBlocks()
        return

    def getRunConfig(self, runNumber):
        """
        _getRunConfig_

        Get a RunConfig instance for the given run number.
        """
        if not self.runConfigCache:
            self.runConfigCache = Cache(promptSkimming = True)
            self.runConfigCache.t0astDBConn = self.t0astDBConn
            self.runConfigCache.configCache = os.path.join(self.config.PromptSkimScheduler.workloadCache,
                                                           "RunConfig")

        return self.runConfigCache.getRunConfig(runNumber)
    
    def createWorkloadsForBlock(self, acquisitionEra, skimConfig, blockInfo):
        """
        _createWorkloadsForBlock_

        Check to see if we're already created skimming workloads for the
        run/dataset that the block belongs to.  If no workload exists create one
        and install it into WMBS.
        """
        (datasetPath, guid) = blockInfo["BLOCK_NAME"].split("#", 1)
        (primary, processed, tier) = datasetPath[1:].split("/", 3)
        workloadName = "Run%s-%s-%s-%s" % (blockInfo["RUN_ID"], primary, processed, skimConfig.SkimName)

        if self.workloads.has_key(blockInfo["RUN_ID"]):
            if self.workloads[blockInfo["RUN_ID"]].has_key(skimConfig.SkimName):
                workload = self.workloads[blockInfo["RUN_ID"]][skimConfig.SkimName]
                workload.setBlockWhitelist(blockInfo["BLOCK_NAME"])
                specPath = os.path.join(self.workloadCache, workloadName, "%s.pkl" % guid)
                workload.setSpecUrl(specPath)
                workload.save(specPath)  
                self.workQueue.queueWork(specPath, team = "PromptSkimming", request = workloadName)                
                return

        runConfig = self.getRunConfig(blockInfo["RUN_ID"])
        configFile = runConfig.retrieveConfigFromURL(skimConfig.ConfigURL)

        if skimConfig.TwoFileRead:
            includeParents = True
        else:
            includeParents = False

        wfParams = {"AcquisitionEra": runConfig.getAcquisitionEra(),
                    "Requestor": "CMSPromptSkimming",
                    "CustodialSite": skimConfig.SiteName,
                    "BlockName": blockInfo["BLOCK_NAME"],
                    "InputDataset": datasetPath,
                    "CMSSWVersion": skimConfig.CMSSWVersion,
                    "ScramArch": self.scramArch,
                    "InitCommand": self.initCommand,
                    "CouchURL": self.config.JobStateMachine.couchurl,
                    "CouchDBName": self.config.JobStateMachine.configCacheDBName,
                    "ProcessingVersion": skimConfig.ProcessingVersion,
                    "GlobalTag": skimConfig.GlobalTag,
                    "CmsPath": self.cmsPath,
                    "SkimConfig": configFile,
                    "UnmergedLFNBase": "/store/unmerged",
                    "MergedLFNBase": "/store/data",
                    "MinMergeSize": self.minMergeSize,
                    "MaxMergeSize": self.maxMergeSize,
                    "MaxMergeEvents": self.maxMergeEvents,
                    "StdJobSplitAlgo": "FileBased",
                    "StdJobSplitArgs": {"files_per_job": 1,
                                        "include_parents": includeParents},
                    "ValidStatus": "VALID"}

        workload = self.promptSkimFactory(workloadName, wfParams)
        workload.setOwner("CMSDataOps")

        if not os.path.exists(os.path.join(self.workloadCache, workloadName)):
            os.makedirs(os.path.join(self.workloadCache, workloadName))
            
        specPath = os.path.join(self.workloadCache, workloadName, "%s.pkl" % guid)        
        workload.setSpecUrl(specPath)
        workload.save(specPath)

        self.workQueue.queueWork(specPath, team = "PromptSkimming", request = workloadName)

        if not self.workloads.has_key(blockInfo["RUN_ID"]):
            self.workloads[blockInfo["RUN_ID"]] = {}
        self.workloads[blockInfo["RUN_ID"]][skimConfig.SkimName] = workload
        return

    def pollForTransferedBlocks(self):
        """
        _pollForTransferedBlocks_

        Poll T0AST for any blocks that have been migrated to DBS and generate
        skims for them.  Mark the blocks as "Skimmed" once any skims have been
        injected into the Tier1 WMBS.
        """
        logging.info("pollForTransferedBlocks(): Running...")
        
        skimmableBlocks = ListBlock.listBlockInfoByStatus(self.t0astDBConn,
                                                          "Exported", "Migrated")

        logging.info("pollForTransferedBlocks(): Found %s blocks." % len(skimmableBlocks))
        logging.info("pollForTransferedBlocks(): %s" % skimmableBlocks)         

        for skimmableBlock in skimmableBlocks:
            logging.info("pollForTransferedBlocks(): Skimmable: %s" % skimmableBlock["BLOCK_ID"])
            runConfig = self.getRunConfig(int(skimmableBlock["RUN_ID"]))
            
            skims = runConfig.getSkimConfiguration(skimmableBlock["PRIMARY_ID"],
                                                   skimmableBlock["TIER_ID"])

            if skims == None:
                InsertBlock.updateBlockStatusByID(self.t0astDBConn,
                                                  skimmableBlock, "Skimmed")
                self.t0astDBConn.commit()
                logging.info("No skims for block %s" % skimmableBlock["BLOCK_ID"])
                continue

            insertParents = False
            for skimConfig in skims:
                if skimConfig.TwoFileRead:
                    insertParents = True
                    break

            if insertParents:
                if not ListBlock.isParentBlockExported(self.t0astDBConn, skimmableBlock["BLOCK_ID"]):
                    logging.info("Block %s has unexported parents." % skimmableBlock["BLOCK_ID"])
                    continue

            myThread = threading.currentThread()
            myThread.transaction.begin()

            for skimConfig in skims:
                try:
                    self.createWorkloadsForBlock(runConfig.getAcquisitionEra(),
                                                 skimConfig, skimmableBlock)
                except Exception, ex:
                    logging.info("Error making workflows: %s" % str(ex))
                    logging.info("Traceback: %s" % traceback.format_exc())
                    self.t0astDBConn.rollback()
                    myThread.transaction.rollback()
                    break
            else:
                InsertBlock.updateBlockStatusByID(self.t0astDBConn, skimmableBlock,
                                                  "Skimmed")
                self.t0astDBConn.commit()
                myThread.transaction.commit()

        self.t0astDBConn.commit()
        return