Пример #1
0
    def setupNextSteps(self, task, origArgs):
        """
        _setupNextSteps_

        Modify the step one task to include N more CMSSW steps and
        chain the output between all three steps.
        """
        configCacheUrl = self.configCacheUrl or self.couchURL
        stepMapping = {}
        stepMapping.setdefault(origArgs['Step1']['StepName'], ('Step1', 'cmsRun1'))

        for i in range(2, self.stepChain + 1):
            currentStepNumber = "Step%d" % i
            currentCmsRun = "cmsRun%d" % i
            stepMapping.setdefault(origArgs[currentStepNumber]['StepName'], (currentStepNumber, currentCmsRun))
            taskConf = {}
            for k, v in origArgs[currentStepNumber].iteritems():
                taskConf[k] = v

            parentStepNumber = stepMapping.get(taskConf['InputStep'])[0]
            parentCmsRun = stepMapping.get(taskConf['InputStep'])[1]
            parentCmsswStep = task.getStep(parentCmsRun)
            parentCmsswStepHelper = parentCmsswStep.getTypeHelper()

            # Set default values for the task parameters
            self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf)
            globalTag = taskConf.get("GlobalTag", self.globalTag)
            frameworkVersion = taskConf.get("CMSSWVersion", self.frameworkVersion)
            scramArch = taskConf.get("ScramArch", self.scramArch)

            childCmssw = parentCmsswStep.addTopStep(currentCmsRun)
            childCmssw.setStepType("CMSSW")
            template = StepFactory.getStepTemplate("CMSSW")
            template(childCmssw.data)

            childCmsswStepHelper = childCmssw.getTypeHelper()
            childCmsswStepHelper.setGlobalTag(globalTag)
            childCmsswStepHelper.setupChainedProcessing(parentCmsRun, taskConf['InputFromOutputModule'])
            childCmsswStepHelper.cmsswSetup(frameworkVersion, softwareEnvironment="", scramArch=scramArch)
            childCmsswStepHelper.setConfigCache(configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName)

            # Pileup check
            taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"])
            if taskConf["PileupConfig"]:
                self.setupPileup(task, taskConf['PileupConfig'])

            # Handling the output modules
            parentKeepOutput = strToBool(origArgs[parentStepNumber].get('KeepOutput', True))
            parentCmsswStepHelper.keepOutput(parentKeepOutput)
            childKeepOutput = strToBool(taskConf.get('KeepOutput', True))
            childCmsswStepHelper.keepOutput(childKeepOutput)
            self.setupOutputModules(task, taskConf["ConfigCacheID"], currentCmsRun, childKeepOutput,
                                    taskConf['StepName'])

        # Closing out the task configuration. The last step output must be saved/merged
        childCmsswStepHelper.keepOutput(True)

        return
Пример #2
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Go over each step and make sure it matches validation parameters.
        """
        outputMods = []
        numSteps = schema['StepChain']
        couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"]
        for i in range(1, numSteps + 1):
            stepName = "Step%s" % i
            if stepName not in schema:
                msg = "No Step%s entry present in the request" % i
                self.raiseValidationException(msg=msg)

            step = schema[stepName]
            # We can't handle non-dictionary steps
            if not isinstance(step, dict):
                msg = "Non-dictionary input for step in StepChain.\n"
                msg += "Could be an indicator of JSON error.\n"
                self.raiseValidationException(msg=msg)

            # Generic step parameter validation
            self.validateTask(
                step,
                self.getTaskArguments(i == 1, i == 1
                                      and 'InputDataset' not in step))

            # Validate the existence of the configCache
            if step["ConfigCacheID"]:
                self.validateConfigCacheExists(
                    configID=step['ConfigCacheID'],
                    couchURL=couchUrl,
                    couchDBName=schema["CouchDBName"],
                    getOutputModules=True)

            # keeping different outputs with the same output module is not allowed
            if strToBool(step.get("KeepOutput", True)):
                configOutput = self.determineOutputModules(
                    configDoc=step["ConfigCacheID"],
                    couchURL=couchUrl,
                    couchDBName=schema["CouchDBName"])
                for outputModuleName in configOutput.keys():
                    if outputModuleName in outputMods:
                        msg = "StepChain does not support KeepOutput sharing the same output module."
                        msg += "\n%s re-using outputModule: %s" % (
                            stepName, outputModuleName)
                        self.raiseValidationException(msg=msg)
                    else:
                        outputMods.append(outputModuleName)

        if 'KeepOutput' in schema[stepName] and not strToBool(
                schema[stepName]['KeepOutput']):
            msg = "Dropping the output of the last step is prohibited.\n"
            msg += "Set the 'KeepOutput' value to True and try again."
            self.raiseValidationException(msg=msg)
Пример #3
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Go over each step and make sure it matches validation parameters.
        """
        outputMods = []
        numSteps = schema['StepChain']
        couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"]
        for i in range(1, numSteps + 1):
            stepName = "Step%s" % i
            if stepName not in schema:
                msg = "No Step%s entry present in the request" % i
                self.raiseValidationException(msg=msg)

            step = schema[stepName]
            # We can't handle non-dictionary steps
            if not isinstance(step, dict):
                msg = "Non-dictionary input for step in StepChain.\n"
                msg += "Could be an indicator of JSON error.\n"
                self.raiseValidationException(msg=msg)

            # Generic step parameter validation
            self.validateTask(step, self.getTaskArguments(i == 1, i == 1 and 'InputDataset' not in step))

            # Validate the existence of the configCache
            if step["ConfigCacheID"]:
                self.validateConfigCacheExists(configID=step['ConfigCacheID'],
                                               couchURL=couchUrl,
                                               couchDBName=schema["CouchDBName"],
                                               getOutputModules=True)

            # keeping different outputs with the same output module is not allowed
            if strToBool(step.get("KeepOutput", True)):
                configOutput = self.determineOutputModules(configDoc=step["ConfigCacheID"],
                                                           couchURL=couchUrl,
                                                           couchDBName=schema["CouchDBName"])
                for outputModuleName in configOutput.keys():
                    if outputModuleName in outputMods:
                        msg = "StepChain does not support KeepOutput sharing the same output module."
                        msg += "\n%s re-using outputModule: %s" % (stepName, outputModuleName)
                        self.raiseValidationException(msg=msg)
                    else:
                        outputMods.append(outputModuleName)

        if 'KeepOutput' in schema[stepName] and not strToBool(schema[stepName]['KeepOutput']):
            msg = "Dropping the output of the last step is prohibited.\n"
            msg += "Set the 'KeepOutput' value to True and try again."
            self.raiseValidationException(msg=msg)
Пример #4
0
    def setupGeneratorTask(self, task, taskConf):
        """
        _setupGeneratorTask_

        Set up an initial generator task.
        """
        configCacheID = taskConf['ConfigCacheID']
        splitAlgorithm = taskConf["SplittingAlgo"]
        splitArguments = taskConf["SplittingArguments"]
        outMods = self.setupProcessingTask(task, "Production",
                                           couchURL=self.couchURL, couchDBName=self.couchDBName,
                                           configDoc=configCacheID, splitAlgo=splitAlgorithm,
                                           configCacheUrl=self.configCacheUrl,
                                           splitArgs=splitArguments, seeding=taskConf['Seeding'],
                                           totalEvents=taskConf['RequestNumEvents'],
                                           timePerEvent=self.timePerEvent,
                                           memoryReq=taskConf.get('Memory', None),
                                           sizePerEvent=self.sizePerEvent,
                                           cmsswVersion=taskConf.get("CMSSWVersion", None),
                                           scramArch=taskConf.get("ScramArch", None),
                                           globalTag=taskConf.get("GlobalTag", None),
                                           taskConf=taskConf)

        # outputModules were added already, we just want to create merge tasks here
        if strToBool(taskConf.get('KeepOutput', True)):
            for outputModuleName in outMods.keys():
                dummyTask = self.addMergeTask(task, self.splittingAlgo,
                                              outputModuleName, "cmsRun1")

        return
Пример #5
0
    def setupGeneratorTask(self, task, taskConf):
        """
        _setupGeneratorTask_

        Set up an initial generator task.
        """
        configCacheID = taskConf['ConfigCacheID']
        splitAlgorithm = taskConf["SplittingAlgo"]
        splitArguments = taskConf["SplittingArguments"]
        outMods = self.setupProcessingTask(
            task,
            "Production",
            couchURL=self.couchURL,
            couchDBName=self.couchDBName,
            configDoc=configCacheID,
            splitAlgo=splitAlgorithm,
            configCacheUrl=self.configCacheUrl,
            splitArgs=splitArguments,
            seeding=taskConf['Seeding'],
            totalEvents=taskConf['RequestNumEvents'],
            timePerEvent=self.timePerEvent,
            memoryReq=taskConf.get('Memory', None),
            sizePerEvent=self.sizePerEvent,
            cmsswVersion=taskConf.get("CMSSWVersion", None),
            scramArch=taskConf.get("ScramArch", None),
            globalTag=taskConf.get("GlobalTag", None),
            taskConf=taskConf)

        # outputModules were added already, we just want to create merge tasks here
        if strToBool(taskConf.get('KeepOutput', True)):
            for outputModuleName in outMods.keys():
                dummyTask = self.addMergeTask(task, self.splittingAlgo,
                                              outputModuleName, "cmsRun1")

        return
Пример #6
0
    def setupTask(self, task, taskConf):
        """
        _setupTask_

        Build the task using the setupProcessingTask from StdBase
        and set the parents appropriately to handle a processing task
        """
        configCacheID = taskConf["ConfigCacheID"]
        splitAlgorithm = taskConf["SplittingAlgo"]
        splitArguments = taskConf["SplittingArguments"]
        self.inputDataset = taskConf["InputDataset"]
        # Use PD from the inputDataset if not provided in the task itself
        if not self.inputPrimaryDataset:
            self.inputPrimaryDataset = self.inputDataset[1:].split("/")[0]

        outMods = self.setupProcessingTask(
            task,
            "Processing",
            inputDataset=self.inputDataset,
            couchURL=self.couchURL,
            couchDBName=self.couchDBName,
            configDoc=configCacheID,
            splitAlgo=splitAlgorithm,
            configCacheUrl=self.configCacheUrl,
            splitArgs=splitArguments,
            timePerEvent=self.timePerEvent,
            memoryReq=taskConf.get('Memory', None),
            sizePerEvent=self.sizePerEvent,
            cmsswVersion=taskConf.get("CMSSWVersion", None),
            scramArch=taskConf.get("ScramArch", None),
            globalTag=taskConf.get("GlobalTag", None),
            taskConf=taskConf)

        lumiMask = taskConf.get("LumiList", self.workload.lumiList)
        if lumiMask:
            task.setLumiMask(lumiMask)

        if taskConf["PileupConfig"]:
            self.setupPileup(task, taskConf['PileupConfig'])

        # outputModules were added already, we just want to create merge tasks here
        if strToBool(taskConf.get('KeepOutput', True)):
            for outputModuleName in outMods.keys():
                dummyTask = self.addMergeTask(task, self.splittingAlgo,
                                              outputModuleName, "cmsRun1")

        return
Пример #7
0
    def setupTask(self, task, taskConf):
        """
        _setupTask_

        Build the task using the setupProcessingTask from StdBase
        and set the parents appropriately to handle a processing task
        """
        configCacheID = taskConf["ConfigCacheID"]
        splitAlgorithm = taskConf["SplittingAlgo"]
        splitArguments = taskConf["SplittingArguments"]
        self.inputDataset = taskConf["InputDataset"]
        # Use PD from the inputDataset if not provided in the task itself
        if not self.inputPrimaryDataset:
            self.inputPrimaryDataset = self.inputDataset[1:].split("/")[0]

        outMods = self.setupProcessingTask(task, "Processing",
                                           inputDataset=self.inputDataset,
                                           couchURL=self.couchURL, couchDBName=self.couchDBName,
                                           configDoc=configCacheID, splitAlgo=splitAlgorithm,
                                           configCacheUrl=self.configCacheUrl,
                                           splitArgs=splitArguments,
                                           timePerEvent=self.timePerEvent,
                                           memoryReq=taskConf.get('Memory', None),
                                           sizePerEvent=self.sizePerEvent,
                                           cmsswVersion=taskConf.get("CMSSWVersion", None),
                                           scramArch=taskConf.get("ScramArch", None),
                                           globalTag=taskConf.get("GlobalTag", None),
                                           taskConf=taskConf)

        lumiMask = taskConf.get("LumiList", self.workload.lumiList)
        if lumiMask:
            task.setLumiMask(lumiMask)

        if taskConf["PileupConfig"]:
            self.setupPileup(task, taskConf['PileupConfig'])

        # outputModules were added already, we just want to create merge tasks here
        if strToBool(taskConf.get('KeepOutput', True)):
            for outputModuleName in outMods.keys():
                dummyTask = self.addMergeTask(task, self.splittingAlgo,
                                              outputModuleName, "cmsRun1")

        return
Пример #8
0
    def assignWorkload(self, requestName, kwargs):
        """ Make all the necessary changes in the Workload to reflect the new assignment """
        request = GetRequest.getRequestByName(requestName)
        helper = Utilities.loadWorkload(request)

        # Validate the different parts of the processed dataset
        processedDatasetParts = {
            "AcquisitionEra": helper.getAcquisitionEra(),
            "ProcessingString": helper.getProcessingString(),
            "ProcessingVersion": helper.getProcessingVersion(),
        }
        for field, origValue in processedDatasetParts.iteritems():
            if field in kwargs and isinstance(kwargs[field], dict):
                for value in kwargs[field].values():
                    self.validate(value, field)
            else:
                self.validate(kwargs.get(field, origValue))

        # Set white list and black list
        whiteList = kwargs.get("SiteWhitelist", [])
        blackList = kwargs.get("SiteBlacklist", [])
        if not isinstance(whiteList, list):
            whiteList = [whiteList]
        if not isinstance(blackList, list):
            blackList = [blackList]
        helper.setSiteWildcardsLists(siteWhitelist=whiteList, siteBlacklist=blackList, wildcardDict=self.wildcardSites)
        res = set(whiteList) & set(blackList)
        if len(res):
            raise cherrypy.HTTPError(400, "White and blacklist the same site is not allowed %s" % list(res))
        # Set AcquisitionEra, ProcessingString and ProcessingVersion
        # which could be json encoded dicts
        if "AcquisitionEra" in kwargs:
            helper.setAcquisitionEra(kwargs["AcquisitionEra"])
        if "ProcessingString" in kwargs:
            helper.setProcessingString(kwargs["ProcessingString"])
        if "ProcessingVersion" in kwargs:
            helper.setProcessingVersion(kwargs["ProcessingVersion"])

        # Now verify the output datasets
        datatier = []
        outputDatasets = helper.listOutputDatasets()
        for dataset in outputDatasets:
            tokens = dataset.split("/")
            procds = tokens[2]
            datatier.append(tokens[3])
            try:
                WMCore.Lexicon.procdataset(procds)
            except AssertionError as ex:
                raise cherrypy.HTTPError(400, "Bad output dataset name, check the processed dataset.\n %s" % str(ex))

        # Verify whether the output datatiers are available in DBS
        self.validateDatatier(datatier, dbsUrl=helper.getDbsUrl())

        # FIXME not validated
        helper.setLFNBase(kwargs["MergedLFNBase"], kwargs["UnmergedLFNBase"])
        helper.setMergeParameters(
            int(kwargs.get("MinMergeSize", 2147483648)),
            int(kwargs.get("MaxMergeSize", 4294967296)),
            int(kwargs.get("MaxMergeEvents", 50000)),
        )
        helper.setupPerformanceMonitoring(
            kwargs.get("MaxRSS", None),
            kwargs.get("MaxVSize", None),
            kwargs.get("SoftTimeout", None),
            kwargs.get("GracePeriod", None),
        )

        # Check whether we should check location for the data
        helper.setTrustLocationFlag(
            inputFlag=strToBool(kwargs.get("TrustSitelists", False)),
            pileupFlag=strToBool(kwargs.get("TrustPUSitelists", False)),
        )
        helper.setAllowOpportunistic(allowOpport=strToBool(kwargs.get("AllowOpportunistic", False)))

        # Set phedex subscription information
        custodialList = kwargs.get("CustodialSites", [])
        nonCustodialList = kwargs.get("NonCustodialSites", [])
        autoApproveList = kwargs.get("AutoApproveSubscriptionSites", [])
        for site in autoApproveList:
            if site.endswith("_MSS"):
                raise cherrypy.HTTPError(400, "Auto-approval to MSS endpoint not allowed %s" % autoApproveList)
        subscriptionPriority = kwargs.get("SubscriptionPriority", "Low")
        if subscriptionPriority not in ["Low", "Normal", "High"]:
            raise cherrypy.HTTPError(400, "Invalid subscription priority %s" % subscriptionPriority)
        custodialType = kwargs.get("CustodialSubType", "Replica")
        if custodialType not in ["Move", "Replica"]:
            raise cherrypy.HTTPError(400, "Invalid custodial subscription type %s" % custodialType)
        nonCustodialType = kwargs.get("NonCustodialSubType", "Replica")
        if nonCustodialType not in ["Move", "Replica"]:
            raise cherrypy.HTTPError(400, "Invalid noncustodial subscription type %s" % nonCustodialType)
        if "CustodialGroup" in kwargs and not isinstance(kwargs["CustodialGroup"], basestring):
            raise cherrypy.HTTPError(400, "Invalid CustodialGroup format %s" % kwargs["CustodialGroup"])
        if "NonCustodialGroup" in kwargs and not isinstance(kwargs["NonCustodialGroup"], basestring):
            raise cherrypy.HTTPError(400, "Invalid NonCustodialGroup format %s" % kwargs["NonCustodialGroup"])
        if "DeleteFromSource" in kwargs and not isinstance(kwargs["DeleteFromSource"], bool):
            raise cherrypy.HTTPError(400, "Invalid DeleteFromSource format %s" % kwargs["DeleteFromSource"])

        helper.setSubscriptionInformationWildCards(
            wildcardDict=self.wildcardSites,
            custodialSites=custodialList,
            nonCustodialSites=nonCustodialList,
            autoApproveSites=autoApproveList,
            custodialSubType=custodialType,
            nonCustodialSubType=nonCustodialType,
            custodialGroup=kwargs.get("CustodialGroup", "DataOps"),
            nonCustodialGroup=kwargs.get("NonCustodialGroup", "DataOps"),
            priority=subscriptionPriority,
            deleteFromSource=kwargs.get("DeleteFromSource", False),
        )

        # Block closing information
        blockCloseMaxWaitTime = int(kwargs.get("BlockCloseMaxWaitTime", helper.getBlockCloseMaxWaitTime()))
        blockCloseMaxFiles = int(kwargs.get("BlockCloseMaxFiles", helper.getBlockCloseMaxFiles()))
        blockCloseMaxEvents = int(kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents()))
        blockCloseMaxSize = int(kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize()))
        helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles, blockCloseMaxEvents, blockCloseMaxSize)

        helper.setMemoryAndCores(kwargs.get("Memory"), kwargs.get("Multicore"))
        helper.setDashboardActivity(kwargs.get("Dashboard", ""))
        helper.setTaskProperties(kwargs)

        Utilities.saveWorkload(helper, request["RequestWorkflow"], self.wmstatWriteURL)

        # update AcquisitionEra in the Couch document (#4380)
        # request object returned above from Oracle doesn't have information Couch
        # database
        reqDetails = Utilities.requestDetails(request["RequestName"])
        couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"])
        couchDb.updateDocument(
            request["RequestName"],
            "ReqMgr",
            "updaterequest",
            fields={
                "AcquisitionEra": reqDetails["AcquisitionEra"],
                "ProcessingVersion": reqDetails["ProcessingVersion"],
                "CustodialSites": custodialList,
                "NonCustodialSites": nonCustodialList,
                "AutoApproveSubscriptionSites": autoApproveList,
                "SubscriptionPriority": subscriptionPriority,
                "CustodialSubType": custodialType,
                "NonCustodialSubType": nonCustodialType,
                "CustodialGroup": kwargs.get("CustodialGroup", "DataOps"),
                "NonCustodialGroup": kwargs.get("NonCustodialGroup", "DataOps"),
                "DeleteFromSource": kwargs.get("DeleteFromSource", False),
                "Teams": kwargs["Teams"],
                "OutputDatasets": outputDatasets,
                "SiteWhitelist": whiteList,
                "SiteBlacklist": blackList,
                "MergedLFNBase": kwargs["MergedLFNBase"],
                "UnmergedLFNBase": kwargs["UnmergedLFNBase"],
                "Dashboard": kwargs.get("Dashboard", ""),
                "TrustSitelists": kwargs.get("TrustSitelists", False),
                "TrustPUSitelists": kwargs.get("TrustPUSitelists", False),
                "AllowOpportunistic": kwargs.get("AllowOpportunistic", False),
            },
            useBody=True,
        )
Пример #9
0
    def assignWorkload(self, requestName, kwargs):
        """ Make all the necessary changes in the Workload to reflect the new assignment """
        request = GetRequest.getRequestByName(requestName)
        helper = Utilities.loadWorkload(request)

        try:
            helper.validateArgumentForAssignment(kwargs)
        except WMSpecFactoryException as ex:
            raise cherrypy.HTTPError(400, str(ex.message()))
        except Exception:
            msg = traceback.format_exc()
            raise cherrypy.HTTPError(400, "Unhandled error: %s" % msg)

        # Validate the different parts of the processed dataset
        processedDatasetParts = {"AcquisitionEra": kwargs.get("AcquisitionEra"),
                                 "ProcessingString": kwargs.get("ProcessingString"),
                                 "ProcessingVersion": kwargs.get("ProcessingVersion")
                                }
        for field, values in processedDatasetParts.iteritems():
            if field in kwargs and isinstance(kwargs[field], dict):
                for value in kwargs[field].values():
                    self.validate(value, field)
            else:
                self.validate(kwargs.get(field, values), field)

        # Set white list and black list
        whiteList = kwargs.get("SiteWhitelist", [])
        blackList = kwargs.get("SiteBlacklist", [])
        if not isinstance(whiteList, list):
            whiteList = [whiteList]
        if not isinstance(blackList, list):
            blackList = [blackList]
        helper.setSiteWildcardsLists(siteWhitelist=whiteList, siteBlacklist=blackList,
                                     wildcardDict=self.wildcardSites)
        res = set(whiteList) & set(blackList)
        if len(res):
            raise cherrypy.HTTPError(400, "White and blacklist the same site is not allowed %s" % list(res))

        helper.setAcquisitionEra(kwargs.get("AcquisitionEra", None))
        helper.setProcessingString(kwargs.get("ProcessingString", None))
        helper.setProcessingVersion(kwargs.get("ProcessingVersion", None))

        # Now verify the output datasets
        datatier = []
        outputDatasets = helper.listOutputDatasets()
        for dataset in outputDatasets:
            tokens = dataset.split("/")
            procds = tokens[2]
            datatier.append(tokens[3])
            try:
                WMCore.Lexicon.procdataset(procds)
            except AssertionError as ex:
                raise cherrypy.HTTPError(400,
                                         "Bad output dataset name, check the processed dataset.\n %s" %
                                         str(ex))
        # Verify whether the output datatiers are available in DBS
        self.validateDatatier(datatier, dbsUrl=helper.getDbsUrl())

        # FIXME not validated
        helper.setLFNBase(kwargs["MergedLFNBase"], kwargs["UnmergedLFNBase"])
        helper.setMergeParameters(int(kwargs.get("MinMergeSize", 2147483648)),
                                  int(kwargs.get("MaxMergeSize", 4294967296)),
                                  int(kwargs.get("MaxMergeEvents", 50000)))
        helper.setupPerformanceMonitoring(kwargs.get("MaxRSS", None),
                                          kwargs.get("MaxVSize", None),
                                          kwargs.get("SoftTimeout", None),
                                          kwargs.get("GracePeriod", None))

        # Check whether we should check location for the data
        helper.setTrustLocationFlag(inputFlag=strToBool(kwargs.get("TrustSitelists", False)),
                                    pileupFlag=strToBool(kwargs.get("TrustPUSitelists", False)))
        helper.setAllowOpportunistic(allowOpport=strToBool(kwargs.get("AllowOpportunistic", False)))

        # Set phedex subscription information
        custodialList = kwargs.get("CustodialSites", [])
        nonCustodialList = kwargs.get("NonCustodialSites", [])
        autoApproveList = kwargs.get("AutoApproveSubscriptionSites", [])
        subscriptionPriority = kwargs.get("SubscriptionPriority", "Low")
        custodialType = kwargs.get("CustodialSubType", "Replica")
        nonCustodialType = kwargs.get("NonCustodialSubType", "Replica")

        helper.setSubscriptionInformationWildCards(wildcardDict=self.wildcardSites,
                                                   custodialSites=custodialList,
                                                   nonCustodialSites=nonCustodialList,
                                                   autoApproveSites=autoApproveList,
                                                   custodialSubType=custodialType,
                                                   nonCustodialSubType=nonCustodialType,
                                                   custodialGroup=kwargs.get("CustodialGroup", "DataOps"),
                                                   nonCustodialGroup=kwargs.get("NonCustodialGroup", "DataOps"),
                                                   priority=subscriptionPriority,
                                                   deleteFromSource=kwargs.get("DeleteFromSource", False))

        # Block closing information
        blockCloseMaxWaitTime = int(kwargs.get("BlockCloseMaxWaitTime", helper.getBlockCloseMaxWaitTime()))
        blockCloseMaxFiles = int(kwargs.get("BlockCloseMaxFiles", helper.getBlockCloseMaxFiles()))
        blockCloseMaxEvents = int(kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents()))
        blockCloseMaxSize = int(kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize()))
        helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles,
                                     blockCloseMaxEvents, blockCloseMaxSize)

        helper.setMemory(kwargs.get("Memory"))
        helper.setCores(kwargs.get("Multicore"))
        helper.setDashboardActivity(kwargs.get("Dashboard", ""))
        helper.setTaskProperties(kwargs)

        Utilities.saveWorkload(helper, request['RequestWorkflow'], self.wmstatWriteURL)

        # update AcquisitionEra in the Couch document (#4380)
        # request object returned above from Oracle doesn't have information Couch
        # database
        reqDetails = Utilities.requestDetails(request["RequestName"])
        couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"])
        couchDb.updateDocument(request["RequestName"], "ReqMgr", "updaterequest",
                               fields={"AcquisitionEra": reqDetails["AcquisitionEra"],
                                       "ProcessingVersion": reqDetails["ProcessingVersion"],
                                       "CustodialSites": custodialList,
                                       "NonCustodialSites": nonCustodialList,
                                       "AutoApproveSubscriptionSites": autoApproveList,
                                       "SubscriptionPriority": subscriptionPriority,
                                       "CustodialSubType": custodialType,
                                       "NonCustodialSubType": nonCustodialType,
                                       "CustodialGroup": kwargs.get("CustodialGroup", "DataOps"),
                                       "NonCustodialGroup": kwargs.get("NonCustodialGroup", "DataOps"),
                                       "DeleteFromSource": kwargs.get("DeleteFromSource", False),
                                       "Teams": kwargs["Teams"],
                                       "OutputDatasets": outputDatasets,
                                       "SiteWhitelist": whiteList,
                                       "SiteBlacklist": blackList,
                                       "MergedLFNBase": kwargs["MergedLFNBase"],
                                       "UnmergedLFNBase": kwargs["UnmergedLFNBase"],
                                       "Dashboard": kwargs.get("Dashboard", ""),
                                       "TrustSitelists": kwargs.get("TrustSitelists", False),
                                       "TrustPUSitelists": kwargs.get("TrustPUSitelists", False),
                                       "AllowOpportunistic": kwargs.get("AllowOpportunistic", False)},
                               useBody=True)
Пример #10
0
    def assignWorkload(self, requestName, kwargs):
        """ Make all the necessary changes in the Workload to reflect the new assignment """
        request = GetRequest.getRequestByName(requestName)
        helper = Utilities.loadWorkload(request)

        #Validate the different parts of the processed dataset
        processedDatasetParts = ["AcquisitionEra", "ProcessingVersion"]
        if kwargs.get("ProcessingString", None):
            processedDatasetParts.append("ProcessingString")
        for field in processedDatasetParts:
            if type(kwargs[field]) == dict:
                for value in kwargs[field].values():
                    self.validate(value, field)
            else:
                self.validate(kwargs[field], field)

        # Set white list and black list
        whiteList = kwargs.get("SiteWhitelist", [])
        blackList = kwargs.get("SiteBlacklist", [])
        if type(whiteList) != list:
            whiteList = [whiteList]
        if type(blackList) != list:
            blackList = [blackList]
        helper.setSiteWildcardsLists(siteWhitelist=whiteList,
                                     siteBlacklist=blackList,
                                     wildcardDict=self.wildcardSites)
        res = set(whiteList) & set(blackList)
        if len(res):
            raise cherrypy.HTTPError(
                400, "White and blacklist the same site is not allowed %s" %
                list(res))
        # Set ProcessingVersion and AcquisitionEra, which could be json encoded dicts
        helper.setProcessingVersion(kwargs["ProcessingVersion"])
        helper.setAcquisitionEra(kwargs["AcquisitionEra"])
        helper.setProcessingString(kwargs.get("ProcessingString", None))

        # Now verify the output datasets
        datatier = []
        outputDatasets = helper.listOutputDatasets()
        for dataset in outputDatasets:
            tokens = dataset.split("/")
            procds = tokens[2]
            datatier.append(tokens[3])
            try:
                WMCore.Lexicon.procdataset(procds)
            except AssertionError as ex:
                raise cherrypy.HTTPError(
                    400,
                    "Bad output dataset name, check the processed dataset.\n %s"
                    % str(ex))

        # Verify whether the output datatiers are available in DBS
        self.validateDatatier(datatier, dbsUrl=helper.getDbsUrl())

        #FIXME not validated
        helper.setLFNBase(kwargs["MergedLFNBase"], kwargs["UnmergedLFNBase"])
        helper.setMergeParameters(int(kwargs.get("MinMergeSize", 2147483648)),
                                  int(kwargs.get("MaxMergeSize", 4294967296)),
                                  int(kwargs.get("MaxMergeEvents", 50000)))
        helper.setupPerformanceMonitoring(kwargs.get("MaxRSS", None),
                                          kwargs.get("MaxVSize", None),
                                          kwargs.get("SoftTimeout", None),
                                          kwargs.get("GracePeriod", None))

        # Check whether we should check location for the data
        useAAA = strToBool(kwargs.get("useSiteListAsLocation", False))
        if useAAA:
            helper.setLocationDataSourceFlag(flag=useAAA)

        # Set phedex subscription information
        custodialList = kwargs.get("CustodialSites", [])
        nonCustodialList = kwargs.get("NonCustodialSites", [])
        autoApproveList = kwargs.get("AutoApproveSubscriptionSites", [])
        for site in autoApproveList:
            if site.endswith('_MSS'):
                raise cherrypy.HTTPError(
                    400, "Auto-approval to MSS endpoint not allowed %s" %
                    autoApproveList)
        subscriptionPriority = kwargs.get("SubscriptionPriority", "Low")
        if subscriptionPriority not in ["Low", "Normal", "High"]:
            raise cherrypy.HTTPError(
                400, "Invalid subscription priority %s" % subscriptionPriority)
        custodialType = kwargs.get("CustodialSubType", "Replica")
        if custodialType not in ["Move", "Replica"]:
            raise cherrypy.HTTPError(
                400, "Invalid custodial subscription type %s" % custodialType)
        nonCustodialType = kwargs.get("NonCustodialSubType", "Replica")
        if nonCustodialType not in ["Move", "Replica"]:
            raise cherrypy.HTTPError(
                400,
                "Invalid noncustodial subscription type %s" % nonCustodialType)

        helper.setSubscriptionInformationWildCards(
            wildcardDict=self.wildcardSites,
            custodialSites=custodialList,
            nonCustodialSites=nonCustodialList,
            autoApproveSites=autoApproveList,
            custodialSubType=custodialType,
            nonCustodialSubType=nonCustodialType,
            priority=subscriptionPriority)

        # Block closing information
        blockCloseMaxWaitTime = int(
            kwargs.get("BlockCloseMaxWaitTime",
                       helper.getBlockCloseMaxWaitTime()))
        blockCloseMaxFiles = int(
            kwargs.get("BlockCloseMaxFiles", helper.getBlockCloseMaxFiles()))
        blockCloseMaxEvents = int(
            kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents()))
        blockCloseMaxSize = int(
            kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize()))

        helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles,
                                     blockCloseMaxEvents, blockCloseMaxSize)

        helper.setDashboardActivity(kwargs.get("Dashboard", ""))
        # set Task properties if they are exist
        # TODO: need to define the task format (maybe kwargs["tasks"]?)
        helper.setTaskProperties(kwargs)

        Utilities.saveWorkload(helper, request['RequestWorkflow'],
                               self.wmstatWriteURL)

        # update AcquisitionEra in the Couch document (#4380)
        # request object returned above from Oracle doesn't have information Couch
        # database
        reqDetails = Utilities.requestDetails(request["RequestName"])
        couchDb = Database(reqDetails["CouchWorkloadDBName"],
                           reqDetails["CouchURL"])
        couchDb.updateDocument(request["RequestName"],
                               "ReqMgr",
                               "updaterequest",
                               fields={
                                   "AcquisitionEra":
                                   reqDetails["AcquisitionEra"],
                                   "ProcessingVersion":
                                   reqDetails["ProcessingVersion"],
                                   "CustodialSites":
                                   custodialList,
                                   "NonCustodialSites":
                                   nonCustodialList,
                                   "AutoApproveSubscriptionSites":
                                   autoApproveList,
                                   "SubscriptionPriority":
                                   subscriptionPriority,
                                   "CustodialSubType":
                                   custodialType,
                                   "NonCustodialSubType":
                                   nonCustodialType,
                                   "Teams":
                                   kwargs["Teams"],
                                   "OutputDatasets":
                                   outputDatasets,
                                   "SiteWhitelist":
                                   whiteList,
                                   "SiteBlacklist":
                                   blackList
                               },
                               useBody=True)
Пример #11
0
    def setupNextSteps(self, task, origArgs):
        """
        _setupNextSteps_

        Modify the step one task to include N more CMSSW steps and
        chain the output between all three steps.
        """
        configCacheUrl = self.configCacheUrl or self.couchURL
        stepMapping = {}
        stepMapping.setdefault(origArgs['Step1']['StepName'],
                               ('Step1', 'cmsRun1'))

        for i in range(2, self.stepChain + 1):
            currentStepNumber = "Step%d" % i
            currentCmsRun = "cmsRun%d" % i
            stepMapping.setdefault(origArgs[currentStepNumber]['StepName'],
                                   (currentStepNumber, currentCmsRun))
            taskConf = {}
            for k, v in origArgs[currentStepNumber].iteritems():
                taskConf[k] = v

            parentStepNumber = stepMapping.get(taskConf['InputStep'])[0]
            parentCmsRun = stepMapping.get(taskConf['InputStep'])[1]
            parentCmsswStep = task.getStep(parentCmsRun)
            parentCmsswStepHelper = parentCmsswStep.getTypeHelper()

            # Set default values for the task parameters
            self.modifyTaskConfiguration(taskConf, False, 'InputDataset'
                                         not in taskConf)
            globalTag = taskConf.get("GlobalTag", self.globalTag)
            frameworkVersion = taskConf.get("CMSSWVersion",
                                            self.frameworkVersion)
            scramArch = taskConf.get("ScramArch", self.scramArch)

            childCmssw = parentCmsswStep.addTopStep(currentCmsRun)
            childCmssw.setStepType("CMSSW")
            template = StepFactory.getStepTemplate("CMSSW")
            template(childCmssw.data)

            childCmsswStepHelper = childCmssw.getTypeHelper()
            childCmsswStepHelper.setGlobalTag(globalTag)
            childCmsswStepHelper.setupChainedProcessing(
                parentCmsRun, taskConf['InputFromOutputModule'])
            childCmsswStepHelper.cmsswSetup(frameworkVersion,
                                            softwareEnvironment="",
                                            scramArch=scramArch)
            childCmsswStepHelper.setConfigCache(configCacheUrl,
                                                taskConf['ConfigCacheID'],
                                                self.couchDBName)

            # Pileup check
            taskConf["PileupConfig"] = parsePileupConfig(
                taskConf["MCPileup"], taskConf["DataPileup"])
            if taskConf["PileupConfig"]:
                self.setupPileup(task, taskConf['PileupConfig'])

            # Handling the output modules
            parentKeepOutput = strToBool(origArgs[parentStepNumber].get(
                'KeepOutput', True))
            parentCmsswStepHelper.keepOutput(parentKeepOutput)
            childKeepOutput = strToBool(taskConf.get('KeepOutput', True))
            childCmsswStepHelper.keepOutput(childKeepOutput)
            self.setupOutputModules(task, taskConf["ConfigCacheID"],
                                    currentCmsRun, childKeepOutput,
                                    taskConf['StepName'])

        # Closing out the task configuration. The last step output must be saved/merged
        childCmsswStepHelper.keepOutput(True)

        return
Пример #12
0
    def assignWorkload(self, requestName, kwargs):
        """ Make all the necessary changes in the Workload to reflect the new assignment """
        request = GetRequest.getRequestByName(requestName)
        helper = Utilities.loadWorkload(request)

        #Validate the different parts of the processed dataset
        processedDatasetParts = ["AcquisitionEra", "ProcessingVersion"]
        if kwargs.get("ProcessingString", None):
            processedDatasetParts.append("ProcessingString")
        for field in processedDatasetParts:
            if type(kwargs[field]) == dict:
                for value in kwargs[field].values():
                    self.validate(value, field)
            else:
                self.validate(kwargs[field], field)

        # Set white list and black list
        whiteList = kwargs.get("SiteWhitelist", [])
        blackList = kwargs.get("SiteBlacklist", [])
        if type(whiteList) != list:
            whiteList = [whiteList]
        if type(blackList) != list:
            blackList = [blackList]
        helper.setSiteWildcardsLists(siteWhitelist = whiteList, siteBlacklist = blackList,
                                     wildcardDict = self.wildcardSites)
        res = set(whiteList) & set(blackList)
        if len(res):
            raise cherrypy.HTTPError(400, "White and blacklist the same site is not allowed %s" % list(res))
        # Set ProcessingVersion and AcquisitionEra, which could be json encoded dicts
        helper.setProcessingVersion(kwargs["ProcessingVersion"])
        helper.setAcquisitionEra(kwargs["AcquisitionEra"])
        helper.setProcessingString(kwargs.get("ProcessingString", None))
        
        # Now verify the output datasets
        outputDatasets = helper.listOutputDatasets()
        for dataset in outputDatasets:
            tokens = dataset.split("/")
            procds = tokens[2]
            try:
                WMCore.Lexicon.procdataset(procds)
            except AssertionError as ex:
                raise cherrypy.HTTPError(400, 
                            "Bad output dataset name, check the processed dataset.\n %s" % 
                            str(ex))

        #FIXME not validated
        helper.setLFNBase(kwargs["MergedLFNBase"], kwargs["UnmergedLFNBase"])
        helper.setMergeParameters(int(kwargs.get("MinMergeSize", 2147483648)),
                                  int(kwargs.get("MaxMergeSize", 4294967296)),
                                  int(kwargs.get("MaxMergeEvents", 50000)))
        helper.setupPerformanceMonitoring(kwargs.get("MaxRSS", None),
                                          kwargs.get("MaxVSize", None),
                                          kwargs.get("SoftTimeout",None),
                                          kwargs.get("GracePeriod", None))        

        # Check whether we should check location for the data
        useAAA = strToBool(kwargs.get("useSiteListAsLocation", False))
        if useAAA:
            helper.setLocationDataSourceFlag(flag = useAAA)

        # Set phedex subscription information
        custodialList = kwargs.get("CustodialSites", [])
        nonCustodialList = kwargs.get("NonCustodialSites", [])
        autoApproveList = kwargs.get("AutoApproveSubscriptionSites", [])
        for site in autoApproveList:
            if site.endswith('_MSS'):
                raise cherrypy.HTTPError(400, "Auto-approval to MSS endpoint not allowed %s" % autoApproveList)
        subscriptionPriority = kwargs.get("SubscriptionPriority", "Low")
        if subscriptionPriority not in ["Low", "Normal", "High"]:
            raise cherrypy.HTTPError(400, "Invalid subscription priority %s" % subscriptionPriority)
        custodialType = kwargs.get("CustodialSubType", "Replica")
        if custodialType not in ["Move", "Replica"]:
            raise cherrypy.HTTPError(400, "Invalid custodial subscription type %s" % custodialType)
        nonCustodialType = kwargs.get("NonCustodialSubType", "Replica")
        if nonCustodialType not in ["Move", "Replica"]:
            raise cherrypy.HTTPError(400, "Invalid noncustodial subscription type %s" % nonCustodialType)

        helper.setSubscriptionInformationWildCards(wildcardDict = self.wildcardSites,
                                                   custodialSites = custodialList,
                                                   nonCustodialSites = nonCustodialList,
                                                   autoApproveSites = autoApproveList,
                                                   custodialSubType = custodialType,
                                                   nonCustodialSubType = nonCustodialType,
                                                   priority = subscriptionPriority)

        # Block closing information
        blockCloseMaxWaitTime = int(kwargs.get("BlockCloseMaxWaitTime", helper.getBlockCloseMaxWaitTime()))
        blockCloseMaxFiles = int(kwargs.get("BlockCloseMaxFiles", helper.getBlockCloseMaxFiles()))
        blockCloseMaxEvents = int(kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents()))
        blockCloseMaxSize = int(kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize()))

        helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles,
                                     blockCloseMaxEvents, blockCloseMaxSize)

        helper.setDashboardActivity(kwargs.get("Dashboard", ""))
        # set Task properties if they are exist
        # TODO: need to define the task format (maybe kwargs["tasks"]?)
        helper.setTaskProperties(kwargs)
        
        Utilities.saveWorkload(helper, request['RequestWorkflow'], self.wmstatWriteURL)
        
        # update AcquisitionEra in the Couch document (#4380)
        # request object returned above from Oracle doesn't have information Couch
        # database
        reqDetails = Utilities.requestDetails(request["RequestName"])
        couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"])
        couchDb.updateDocument(request["RequestName"], "ReqMgr", "updaterequest",
                               fields={"AcquisitionEra": reqDetails["AcquisitionEra"],
                                       "ProcessingVersion": reqDetails["ProcessingVersion"],
                                       "CustodialSites": custodialList, 
                                       "NonCustodialSites": nonCustodialList, 
                                       "AutoApproveSubscriptionSites": autoApproveList,
                                       "SubscriptionPriority": subscriptionPriority,
                                       "CustodialSubType": custodialType,
                                       "NonCustodialSubType": nonCustodialType,
                                       "Teams": kwargs["Teams"],
                                       "OutputDatasets": outputDatasets, 
                                       "SiteWhitelist": whiteList,
                                       "SiteBlacklist": blackList},
                               useBody = True)