示例#1
0
    def _parseConfigTemplate(self, templatePath, cfg=None):
        """Parse the ConfigTemplate.cfg files.

    :param str templatePath: path to the folder containing a ConfigTemplate.cfg file
    :param CFG cfg: cfg to merge with the systems config
    :returns: CFG object
    """
        cfg = CFG() if cfg is None else cfg

        system = os.path.split(templatePath.rstrip("/"))[1]
        if system.lower().endswith('system'):
            system = system[:-len('System')]

        if self.systems and system not in self.systems:
            return S_OK(cfg)

        templatePath = os.path.join(templatePath, 'ConfigTemplate.cfg')
        if not os.path.exists(templatePath):
            return S_ERROR("File not found: %s" % templatePath)

        loadCfg = CFG()
        loadCfg.loadFromFile(templatePath)

        newCfg = CFG()
        newCfg.createNewSection("/%s" % system, contents=loadCfg)

        cfg = cfg.mergeWith(newCfg)

        return S_OK(cfg)
示例#2
0
    def parseConfigTemplate(self, templatePath, cfg):
        """Parse the ConfigTemplate.cfg files.

        :param str templatePath: path to the folder containing a ConfigTemplate.cfg file
        :param CFG cfg: cfg to merge with the systems config
        :returns: CFG object
        """
        system = os.path.split(templatePath.rstrip("/"))[1]
        if system.lower().endswith("system"):
            system = system[: -len("System")]

        templatePath = os.path.join(templatePath, "ConfigTemplate.cfg")
        if not os.path.exists(templatePath):
            return S_ERROR("File not found: %s" % templatePath)

        loadCfg = CFG()
        try:
            loadCfg.loadFromFile(templatePath)
        except ValueError as err:
            LOG.error("Failed loading file %r: %r", templatePath, err)
            self.retVal = 1
            return S_ERROR()
        cfg.createNewSection("/Systems/%s" % system, contents=loadCfg)

        return S_OK(cfg)
示例#3
0
def checkAgentOptions(getOptionMock, systemName, agentName, agentLocation, ignoreOptions=None):
    """Ensure that all the agent options are properly documented.

    :param getOptionMock: Mock object for agentmodule.get_amOption function
    :param str systemName: name of the **System**
    :param str agentName: name of the **Agent**
    :param list ignoreOptions: list of options to ignore
    """
    if ignoreOptions is None:
        ignoreOptions = []

    # add some options that can be set, see the AgentModule for all of them
    ignoreOptions.extend(
        ["PollingTime", "Status", "Enabled", "MaxCycles", "LogOutputs", "ControlDirectory", "shifterProxy"]
    )
    ignoreOptions = list(set(ignoreOptions))
    config = CFG()

    LOG.info("Testing %s/%s, ignoring options %s", systemName, agentName, ignoreOptions)

    # expect the ConfigTemplate one level above the agent module
    configFilePath = os.path.join(agentLocation, "..", "ConfigTemplate.cfg")
    config.loadFromFile(configFilePath)
    optionsDict = config.getAsDict("Agents/%s" % agentName)
    outDict = {}
    _parseOption(outDict, optionsDict)
    optionsDict = outDict
    LOG.info("Calls: %s", pformat(getOptionMock.call_args_list))
    LOG.info("Options found in ConfigTemplate: %s ", list(optionsDict.keys()))

    # check that values in ConfigTemplate are used
    for option, value in optionsDict.items():
        if any(ignoreOp in option for ignoreOp in ignoreOptions):
            LOG.info("From Agent: ignoring option %r with value %r, (%s)", option, value, type(value))
            continue
        LOG.info("Looking for call to option %r with value %r, (%s)", option, value, type(value))
        if not isinstance(value, bool) and not value:  # empty string, list, dict ...
            assert any(call(option, null) in getOptionMock.call_args_list for null in ({}, set(), [], "", 0, None))
        else:
            assert (
                call(option, value) in getOptionMock.call_args_list
                or call(option, [value]) in getOptionMock.call_args_list
            )

    # check that options used in the agent are in the ConfigTemplates
    for opCall in getOptionMock.call_args_list:
        optionArguments = opCall[0]
        if len(optionArguments) != 2:
            continue
        optionName = optionArguments[0]
        optionValue = optionArguments[1]
        if optionName in ignoreOptions:
            LOG.info("From Template: ignoring option %r with %r", optionName, optionValue)
            continue
        LOG.info("Checking Template option %r with %r", optionName, optionValue)
        assert optionName in optionsDict
        if not optionsDict[optionName]:
            assert not optionValue
            continue
        assert optionsDict[optionName] == optionValue or [optionsDict[optionName]] == optionValue
示例#4
0
 def loadFile(self, fileName):
     try:
         fileCFG = CFG()
         fileCFG.loadFromFile(fileName)
     except IOError:
         self.localCFG = self.localCFG.mergeWith(fileCFG)
         return S_ERROR("Can't load a cfg file '%s'" % fileName)
     return self.mergeWithLocal(fileCFG)
示例#5
0
    def updateCompleteDiracCFG(self):
        """Read the dirac.cfg and update the Systems sections from the ConfigTemplate.cfg files."""
        compCfg = CFG()
        mainDiracCfgPath = self.config.cfg_baseFile

        if not os.path.exists(mainDiracCfgPath):
            LOG.error("Failed to find Main Dirac cfg at %r", mainDiracCfgPath)
            return 1

        self.prepareDiracCFG()

        LOG.info("Extracting default configuration from %r", mainDiracCfgPath)
        loadCFG = CFG()
        loadCFG.loadFromFile(mainDiracCfgPath)
        compCfg = loadCFG.mergeWith(compCfg)

        cfg = self.getSystemsCFG()
        compCfg = compCfg.mergeWith(cfg)
        diracCfgOutput = self.config.cfg_targetFile

        LOG.info("Writing output to %r", diracCfgOutput)

        with open(diracCfgOutput, "w") as rst:
            rst.write(
                textwrap.dedent(
                    """
                                .. _full_configuration_example:

                                ==========================
                                Full Configuration Example
                                ==========================

                                .. This file is created by docs/Tools/UpdateDiracCFG.py

                                Below is a complete example configuration with anotations for some sections::

                                """
                )
            )
            # indent the cfg text
            cfgString = "".join("  " + line for line in str(compCfg).splitlines(True))
            # fix the links, add back the # for targets
            # match .html with following character using positive look ahead
            htmlMatch = re.compile(r"\.html(?=[a-zA-Z0-9])")
            cfgString = re.sub(htmlMatch, ".html#", cfgString)
            rst.write(cfgString)
        return self.retVal
示例#6
0
    def _updateConfiguration(self, key, value, path="/LocalSite"):
        """Update local configuration to be used by submitted job wrappers"""
        localCfg = CFG()
        if self.extraOptions:
            localConfigFile = os.path.join(".", self.extraOptions)
        else:
            localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg")
        localCfg.loadFromFile(localConfigFile)

        section = "/"
        for p in path.split("/")[1:]:
            section = os.path.join(section, p)
            if not localCfg.isSection(section):
                localCfg.createNewSection(section)

        localCfg.setOption("%s/%s" % (section, key), value)
        localCfg.writeToFile(localConfigFile)
示例#7
0
def getComputingElementDefaults(ceName="",
                                ceType="",
                                cfg=None,
                                currentSectionPath=""):
    """
    Return cfgDefaults with defaults for the given CEs defined either in arguments or in the provided cfg
    """
    cesCfg = CFG()
    if cfg:
        try:
            cesCfg.loadFromFile(cfg)
            cesPath = cfgInstallPath("ComputingElements")
            if cesCfg.isSection(cesPath):
                for section in cfgPathToList(cesPath):
                    cesCfg = cesCfg[section]
        except Exception:
            return CFG()

    # Overwrite the cfg with Command line arguments
    if ceName:
        if not cesCfg.isSection(ceName):
            cesCfg.createNewSection(ceName)
        if currentSectionPath:
            # Add Options from Command Line
            optionsDict = __getExtraOptions(currentSectionPath)
            for name, value in optionsDict.items():
                cesCfg[ceName].setOption(name, value)  # pylint: disable=no-member
        if ceType:
            cesCfg[ceName].setOption("CEType", ceType)  # pylint: disable=no-member

    ceDefaultSection = cfgPath(defaultSection("ComputingElements"))
    # Load Default for the given type from Central configuration is defined
    ceDefaults = __gConfigDefaults(ceDefaultSection)
    for ceName in cesCfg.listSections():
        if "CEType" in cesCfg[ceName]:
            ceType = cesCfg[ceName]["CEType"]
            if ceType in ceDefaults:
                for option in ceDefaults[ceType].listOptions():  # pylint: disable=no-member
                    if option not in cesCfg[ceName]:
                        cesCfg[ceName].setOption(
                            option,
                            ceDefaults[ceType][option]  # pylint: disable=unsubscriptable-object
                        )

    return cesCfg
示例#8
0
class JobRepository(object):
    def __init__(self, repository=None):
        self.location = repository
        if not self.location:
            if "HOME" in os.environ:
                self.location = '%s/.dirac.repo.rep' % os.environ['HOME']
            else:
                self.location = '%s/.dirac.repo.rep' % os.getcwd()
        self.repo = CFG()
        if os.path.exists(self.location):
            self.repo.loadFromFile(self.location)
            if not self.repo.existsKey('Jobs'):
                self.repo.createNewSection('Jobs')
        else:
            self.repo.createNewSection('Jobs')
        self.OK = True
        written = self._writeRepository(self.location)
        if not written:
            self.OK = False

    def isOK(self):
        return self.OK

    def readRepository(self):
        return S_OK(self.repo.getAsDict('Jobs'))

    def writeRepository(self, alternativePath=None):
        destination = self.location
        if alternativePath:
            destination = alternativePath
        written = self._writeRepository(destination)
        if not written:
            return S_ERROR("Failed to write repository")
        return S_OK(destination)

    def resetRepository(self, jobIDs=[]):
        if not jobIDs:
            jobs = self.readRepository()['Value']
            jobIDs = list(jobs)
        paramDict = {'State': 'Submitted', 'Retrieved': 0, 'OutputData': 0}
        for jobID in jobIDs:
            self._writeJob(jobID, paramDict, True)
        self._writeRepository(self.location)
        return S_OK()

    def _writeRepository(self, path):
        handle, tmpName = tempfile.mkstemp()
        written = self.repo.writeToFile(tmpName)
        os.close(handle)
        if not written:
            if os.path.exists(tmpName):
                os.remove(tmpName)
            return written
        if os.path.exists(path):
            gLogger.debug("Replacing %s" % path)
        try:
            shutil.move(tmpName, path)
            return True
        except Exception as x:
            gLogger.error("Failed to overwrite repository.", x)
            gLogger.info(
                "If your repository is corrupted a backup can be found %s" %
                tmpName)
            return False

    def appendToRepository(self, repoLocation):
        if not os.path.exists(repoLocation):
            gLogger.error("Secondary repository does not exist", repoLocation)
            return S_ERROR("Secondary repository does not exist")
        self.repo = CFG().loadFromFile(repoLocation).mergeWith(self.repo)
        self._writeRepository(self.location)
        return S_OK()

    def addJob(self,
               jobID,
               state='Submitted',
               retrieved=0,
               outputData=0,
               update=False):
        paramDict = {
            'State': state,
            'Time': self._getTime(),
            'Retrieved': int(retrieved),
            'OutputData': outputData
        }
        self._writeJob(jobID, paramDict, update)
        self._writeRepository(self.location)
        return S_OK(jobID)

    def updateJob(self, jobID, paramDict):
        if self._existsJob(jobID):
            paramDict['Time'] = self._getTime()
            self._writeJob(jobID, paramDict, True)
            self._writeRepository(self.location)
        return S_OK()

    def updateJobs(self, jobDict):
        for jobID, paramDict in jobDict.items():
            if self._existsJob(jobID):
                paramDict['Time'] = self._getTime()
                self._writeJob(jobID, paramDict, True)
        self._writeRepository(self.location)
        return S_OK()

    def _getTime(self):
        runtime = time.ctime()
        return runtime.replace(" ", "_")

    def _writeJob(self, jobID, paramDict, update):
        jobID = str(jobID)
        jobExists = self._existsJob(jobID)
        if jobExists and (not update):
            gLogger.warn("Job exists and not overwriting")
            return S_ERROR("Job exists and not overwriting")
        if not jobExists:
            self.repo.createNewSection('Jobs/%s' % jobID)
        for key, value in paramDict.items():
            self.repo.setOption('Jobs/%s/%s' % (jobID, key), value)
        return S_OK()

    def removeJob(self, jobID):
        res = self.repo['Jobs'].deleteKey(str(jobID))  # pylint: disable=no-member
        if res:
            self._writeRepository(self.location)
        return S_OK()

    def existsJob(self, jobID):
        return S_OK(self._existsJob(jobID))

    def _existsJob(self, jobID):
        return self.repo.isSection('Jobs/%s' % jobID)

    def getLocation(self):
        return S_OK(self.location)

    def getSize(self):
        return S_OK(len(self.repo.getAsDict('Jobs')))
示例#9
0
 def mergeFromFile(self, filename):
     cfg = CFG()
     cfg.loadFromFile(filename)
     self.cfgData = self.cfgData.mergeWith(cfg)
示例#10
0
class ConfigurationData(object):
    def __init__(self, loadDefaultCFG=True):
        envVar = os.environ.get("DIRAC_FEWER_CFG_LOCKS", "no").lower()
        self.__locksEnabled = envVar not in ("y", "yes", "t", "true", "on",
                                             "1")
        if self.__locksEnabled:
            lr = LockRing()
            self.threadingEvent = lr.getEvent()
            self.threadingEvent.set()
            self.threadingLock = lr.getLock()
            self.runningThreadsNumber = 0

        self.__compressedConfigurationData = None
        self.configurationPath = "/DIRAC/Configuration"
        self.backupsDir = os.path.join(DIRAC.rootPath, "etc", "csbackup")
        self._isService = False
        self.localCFG = CFG()
        self.remoteCFG = CFG()
        self.mergedCFG = CFG()
        self.remoteServerList = []
        if loadDefaultCFG:
            defaultCFGFile = os.path.join(DIRAC.rootPath, "etc", "dirac.cfg")
            gLogger.debug("dirac.cfg should be at", "%s" % defaultCFGFile)
            retVal = self.loadFile(defaultCFGFile)
            if not retVal["OK"]:
                gLogger.warn("Can't load %s file" % defaultCFGFile)
        self.sync()

    def getBackupDir(self):
        return self.backupsDir

    def sync(self):
        gLogger.debug("Updating configuration internals")
        self.mergedCFG = self.remoteCFG.mergeWith(self.localCFG)
        self.remoteServerList = []
        localServers = self.extractOptionFromCFG("%s/Servers" %
                                                 self.configurationPath,
                                                 self.localCFG,
                                                 disableDangerZones=True)
        if localServers:
            self.remoteServerList.extend(List.fromChar(localServers, ","))
        remoteServers = self.extractOptionFromCFG("%s/Servers" %
                                                  self.configurationPath,
                                                  self.remoteCFG,
                                                  disableDangerZones=True)
        if remoteServers:
            self.remoteServerList.extend(List.fromChar(remoteServers, ","))
        self.remoteServerList = List.uniqueElements(self.remoteServerList)
        self.__compressedConfigurationData = None

    def loadFile(self, fileName):
        try:
            fileCFG = CFG()
            fileCFG.loadFromFile(fileName)
        except IOError:
            self.localCFG = self.localCFG.mergeWith(fileCFG)
            return S_ERROR("Can't load a cfg file '%s'" % fileName)
        return self.mergeWithLocal(fileCFG)

    def mergeWithLocal(self, extraCFG):
        self.lock()
        try:
            self.localCFG = self.localCFG.mergeWith(extraCFG)
            self.unlock()
            gLogger.debug("CFG merged")
        except Exception as e:
            self.unlock()
            return S_ERROR("Cannot merge with new cfg: %s" % str(e))
        self.sync()
        return S_OK()

    def loadRemoteCFGFromCompressedMem(self, data):
        if six.PY3 and isinstance(data, str):
            data = data.encode(errors="surrogateescape")
        sUncompressedData = zlib.decompress(data).decode()
        self.loadRemoteCFGFromMem(sUncompressedData)

    def loadRemoteCFGFromMem(self, data):
        self.lock()
        self.remoteCFG.loadFromBuffer(data)
        self.unlock()
        self.sync()

    def loadConfigurationData(self, fileName=False):
        name = self.getName()
        self.lock()
        try:
            if not fileName:
                fileName = "%s.cfg" % name
            if fileName[0] != "/":
                fileName = os.path.join(DIRAC.rootPath, "etc", fileName)
            self.remoteCFG.loadFromFile(fileName)
        except Exception as e:
            print(e)
        self.unlock()
        self.sync()

    def getCommentFromCFG(self, path, cfg=False):
        if not cfg:
            cfg = self.mergedCFG
        self.dangerZoneStart()
        try:
            levelList = [
                level.strip() for level in path.split("/")
                if level.strip() != ""
            ]
            for section in levelList[:-1]:
                cfg = cfg[section]
            return self.dangerZoneEnd(cfg.getComment(levelList[-1]))
        except Exception:
            pass
        return self.dangerZoneEnd(None)

    def getSectionsFromCFG(self, path, cfg=False, ordered=False):
        if not cfg:
            cfg = self.mergedCFG
        self.dangerZoneStart()
        try:
            levelList = [
                level.strip() for level in path.split("/")
                if level.strip() != ""
            ]
            for section in levelList:
                cfg = cfg[section]
            return self.dangerZoneEnd(cfg.listSections(ordered))
        except Exception:
            pass
        return self.dangerZoneEnd(None)

    def getOptionsFromCFG(self, path, cfg=False, ordered=False):
        if not cfg:
            cfg = self.mergedCFG
        self.dangerZoneStart()
        try:
            levelList = [
                level.strip() for level in path.split("/")
                if level.strip() != ""
            ]
            for section in levelList:
                cfg = cfg[section]
            return self.dangerZoneEnd(cfg.listOptions(ordered))
        except Exception:
            pass
        return self.dangerZoneEnd(None)

    def extractOptionFromCFG(self, path, cfg=False, disableDangerZones=False):
        if not cfg:
            cfg = self.mergedCFG
        if not disableDangerZones:
            self.dangerZoneStart()
        try:
            levelList = [
                level.strip() for level in path.split("/")
                if level.strip() != ""
            ]
            for section in levelList[:-1]:
                cfg = cfg[section]
            if levelList[-1] in cfg.listOptions():
                return self.dangerZoneEnd(cfg[levelList[-1]])
        except Exception:
            pass
        if not disableDangerZones:
            self.dangerZoneEnd()

    def setOptionInCFG(self, path, value, cfg=False, disableDangerZones=False):
        if not cfg:
            cfg = self.localCFG
        if not disableDangerZones:
            self.dangerZoneStart()
        try:
            levelList = [
                level.strip() for level in path.split("/")
                if level.strip() != ""
            ]
            for section in levelList[:-1]:
                if section not in cfg.listSections():
                    cfg.createNewSection(section)
                cfg = cfg[section]
            cfg.setOption(levelList[-1], value)
        finally:
            if not disableDangerZones:
                self.dangerZoneEnd()
        self.sync()

    def deleteOptionInCFG(self, path, cfg=False):
        if not cfg:
            cfg = self.localCFG
        self.dangerZoneStart()
        try:
            levelList = [
                level.strip() for level in path.split("/")
                if level.strip() != ""
            ]
            for section in levelList[:-1]:
                if section not in cfg.listSections():
                    return
                cfg = cfg[section]
            cfg.deleteKey(levelList[-1])
        finally:
            self.dangerZoneEnd()
        self.sync()

    def generateNewVersion(self):
        self.setVersion(Time.toString())
        self.sync()
        gLogger.info("Generated new version %s" % self.getVersion())

    def setVersion(self, version, cfg=False):
        if not cfg:
            cfg = self.remoteCFG
        self.setOptionInCFG("%s/Version" % self.configurationPath, version,
                            cfg)

    def getVersion(self, cfg=False):
        if not cfg:
            cfg = self.remoteCFG
        value = self.extractOptionFromCFG(
            "%s/Version" % self.configurationPath, cfg)
        if value:
            return value
        return "0"

    def getName(self):
        return self.extractOptionFromCFG("%s/Name" % self.configurationPath,
                                         self.mergedCFG)

    def exportName(self):
        return self.setOptionInCFG("%s/Name" % self.configurationPath,
                                   self.getName(), self.remoteCFG)

    def getRefreshTime(self):
        try:
            return int(
                self.extractOptionFromCFG(
                    "%s/RefreshTime" % self.configurationPath, self.mergedCFG))
        except Exception:
            return 300

    def getPropagationTime(self):
        try:
            return int(
                self.extractOptionFromCFG(
                    "%s/PropagationTime" % self.configurationPath,
                    self.mergedCFG))
        except Exception:
            return 300

    def getSlavesGraceTime(self):
        try:
            return int(
                self.extractOptionFromCFG(
                    "%s/SlavesGraceTime" % self.configurationPath,
                    self.mergedCFG))
        except Exception:
            return 600

    def mergingEnabled(self):
        try:
            val = self.extractOptionFromCFG(
                "%s/EnableAutoMerge" % self.configurationPath, self.mergedCFG)
            return val.lower() in ("yes", "true", "y")
        except Exception:
            return False

    def getAutoPublish(self):
        value = self.extractOptionFromCFG(
            "%s/AutoPublish" % self.configurationPath, self.localCFG)
        if value and value.lower() in ("no", "false", "n"):
            return False
        else:
            return True

    def getAutoSlaveSync(self):
        value = self.extractOptionFromCFG(
            "%s/AutoSlaveSync" % self.configurationPath, self.localCFG)
        if value and value.lower() in ("no", "false", "n"):
            return False
        else:
            return True

    def getServers(self):
        return list(self.remoteServerList)

    def getConfigurationGateway(self):
        return self.extractOptionFromCFG("/DIRAC/Gateway", self.localCFG)

    def setServers(self, sServers):
        self.setOptionInCFG("%s/Servers" % self.configurationPath, sServers,
                            self.remoteCFG)
        self.sync()

    def deleteLocalOption(self, optionPath):
        self.deleteOptionInCFG(optionPath, self.localCFG)

    def getMasterServer(self):
        return self.extractOptionFromCFG(
            "%s/MasterServer" % self.configurationPath, self.remoteCFG)

    def setMasterServer(self, sURL):
        self.setOptionInCFG("%s/MasterServer" % self.configurationPath, sURL,
                            self.remoteCFG)
        self.sync()

    def getCompressedData(self):
        if self.__compressedConfigurationData is None:
            self.__compressedConfigurationData = zlib.compress(
                str(self.remoteCFG).encode(), 9)
        return self.__compressedConfigurationData

    def isMaster(self):
        value = self.extractOptionFromCFG("%s/Master" % self.configurationPath,
                                          self.localCFG)
        if value and value.lower() in ("yes", "true", "y"):
            return True
        else:
            return False

    def getServicesPath(self):
        return "/Services"

    def setAsService(self):
        self._isService = True

    def isService(self):
        return self._isService

    def useServerCertificate(self):
        value = self.extractOptionFromCFG(
            "/DIRAC/Security/UseServerCertificate")
        if value and value.lower() in ("y", "yes", "true"):
            return True
        return False

    def skipCACheck(self):
        value = self.extractOptionFromCFG("/DIRAC/Security/SkipCAChecks")
        if value and value.lower() in ("y", "yes", "true"):
            return True
        return False

    def dumpLocalCFGToFile(self, fileName):
        try:
            with open(fileName, "w") as fd:
                fd.write(str(self.localCFG))
            gLogger.verbose("Configuration file dumped", "'%s'" % fileName)
        except IOError:
            gLogger.error("Can't dump cfg file", "'%s'" % fileName)
            return S_ERROR("Can't dump cfg file '%s'" % fileName)
        return S_OK()

    def getRemoteCFG(self):
        return self.remoteCFG

    def getMergedCFGAsString(self):
        return str(self.mergedCFG)

    def dumpRemoteCFGToFile(self, fileName):
        with open(fileName, "w") as fd:
            fd.write(str(self.remoteCFG))

    def __backupCurrentConfiguration(self, backupName):
        configurationFilename = "%s.cfg" % self.getName()
        configurationFile = os.path.join(DIRAC.rootPath, "etc",
                                         configurationFilename)
        today = Time.date()
        backupPath = os.path.join(self.getBackupDir(), str(today.year),
                                  "%02d" % today.month)
        mkDir(backupPath)
        backupFile = os.path.join(
            backupPath,
            configurationFilename.replace(".cfg", ".%s.zip" % backupName))
        if os.path.isfile(configurationFile):
            gLogger.info("Making a backup of configuration in %s" % backupFile)
            try:
                with zipfile.ZipFile(backupFile, "w",
                                     zipfile.ZIP_DEFLATED) as zf:
                    zf.write(
                        configurationFile, "%s.backup.%s" %
                        (os.path.split(configurationFile)[1], backupName))
            except Exception:
                gLogger.exception()
                gLogger.error("Cannot backup configuration data file",
                              "file %s" % backupFile)
        else:
            gLogger.warn("CS data file does not exist", configurationFile)

    def writeRemoteConfigurationToDisk(self, backupName=False):
        configurationFile = os.path.join(DIRAC.rootPath, "etc",
                                         "%s.cfg" % self.getName())
        try:
            with open(configurationFile, "w") as fd:
                fd.write(str(self.remoteCFG))
        except Exception as e:
            gLogger.fatal(
                "Cannot write new configuration to disk!",
                "file %s exception %s" % (configurationFile, repr(e)))
            return S_ERROR("Can't write cs file %s!: %s" %
                           (configurationFile, repr(e).replace(",)", ")")))
        if backupName:
            self.__backupCurrentConfiguration(backupName)
        return S_OK()

    def setRemoteCFG(self, cfg, disableSync=False):
        self.remoteCFG = cfg.clone()
        if not disableSync:
            self.sync()

    def lock(self):
        """
        Locks Event to prevent further threads from reading.
        Stops current thread until no other thread is accessing.
        PRIVATE USE
        """
        if not self.__locksEnabled:
            return
        self.threadingEvent.clear()
        while self.runningThreadsNumber > 0:
            time.sleep(0.1)

    def unlock(self):
        """
        Unlocks Event.
        PRIVATE USE
        """
        if not self.__locksEnabled:
            return
        self.threadingEvent.set()

    def dangerZoneStart(self):
        """
        Start of danger zone. This danger zone may be or may not be a mutual exclusion zone.
        Counter is maintained to know how many threads are inside and be able to enable and disable mutual exclusion.
        PRIVATE USE
        """
        if not self.__locksEnabled:
            return
        self.threadingEvent.wait()
        self.threadingLock.acquire()
        self.runningThreadsNumber += 1
        try:
            self.threadingLock.release()
        except thread.error:
            pass

    def dangerZoneEnd(self, returnValue=None):
        """
        End of danger zone.
        PRIVATE USE
        """
        if not self.__locksEnabled:
            return returnValue
        self.threadingLock.acquire()
        self.runningThreadsNumber -= 1
        try:
            self.threadingLock.release()
        except thread.error:
            pass
        return returnValue
示例#11
0
def checkAgentOptions(getOptionMock,
                      systemName,
                      agentName,
                      ignoreOptions=None,
                      extension='DIRAC'):
    """Ensure that all the agent options are properly documented.

  :param getOptionMock: Mock object for agentmodule.get_amOption function
  :param str systemName: name of the **System**
  :param str agentName: name of the **Agent**
  :param list ignoreOptions: list of options to ignore
  :param str extension: name of the DIRAC **Extension** where the Agent comes from
  """
    if ignoreOptions is None:
        ignoreOptions = []

    # add some options that can be set, see the AgentModule for all of them
    ignoreOptions.extend([
        'PollingTime', 'Status', 'Enabled', 'MaxCycles', 'LogOutputs',
        'ControlDirectory', 'shifterProxy'
    ])
    ignoreOptions = list(set(ignoreOptions))
    config = CFG()

    LOG.info("Testing %s/%s, ignoring options %s", systemName, agentName,
             ignoreOptions)

    # get the location where DIRAC is in from basefolder/DIRAC/__ini__.py
    configFilePath = os.path.join(
        os.path.dirname(os.path.dirname(DIRAC.__file__)), extension,
        systemName, 'ConfigTemplate.cfg')
    config.loadFromFile(configFilePath)
    optionsDict = config.getAsDict('Agents/%s' % agentName)
    outDict = {}
    _parseOption(outDict, optionsDict)
    optionsDict = outDict
    LOG.info("Calls: %s", pformat(getOptionMock.call_args_list))
    LOG.info("Options found in ConfigTemplate: %s ", list(optionsDict.keys()))

    # check that values in ConfigTemplate are used
    for option, value in optionsDict.items():
        if any(ignoreOp in option for ignoreOp in ignoreOptions):
            LOG.info("From Agent: ignoring option %r with value %r, (%s)",
                     option, value, type(value))
            continue
        LOG.info("Looking for call to option %r with value %r, (%s)", option,
                 value, type(value))
        if not isinstance(value,
                          bool) and not value:  # empty string, list, dict ...
            assert any(
                call(option, null) in getOptionMock.call_args_list
                for null in ({}, set(), [], '', 0, None))
        else:
            assert call(option, value) in getOptionMock.call_args_list or \
                call(option, [value]) in getOptionMock.call_args_list

    # check that options used in the agent are in the ConfigTemplates
    for opCall in getOptionMock.call_args_list:
        optionArguments = opCall[0]
        if len(optionArguments) != 2:
            continue
        optionName = optionArguments[0]
        optionValue = optionArguments[1]
        if optionName in ignoreOptions:
            LOG.info("From Template: ignoring option %r with %r", optionName,
                     optionValue)
            continue
        LOG.info("Checking Template option %r with %r", optionName,
                 optionValue)
        assert optionName in optionsDict
        if not optionsDict[optionName]:
            assert not optionValue
            continue
        assert optionsDict[optionName] == optionValue or [
            optionsDict[optionName]
        ] == optionValue
示例#12
0
            os.path.expandvars("$WORKSPACE") +
            "/PilotInstallDIR/etc/dirac.cfg"):
        localConfigFile = os.path.expandvars(
            "$WORKSPACE") + "/PilotInstallDIR/etc/dirac.cfg"
    elif os.path.isfile(
            os.path.expandvars("$WORKSPACE") +
            "/ServerInstallDIR/etc/dirac.cfg"):
        localConfigFile = os.path.expandvars(
            "$WORKSPACE") + "/ServerInstallDIR/etc/dirac.cfg"
    elif os.path.isfile("./etc/dirac.cfg"):
        localConfigFile = "./etc/dirac.cfg"
    else:
        print("Local CFG file not found")
        exit(2)

localCfg.loadFromFile(localConfigFile)
if not localCfg.isSection("/LocalSite"):
    localCfg.createNewSection("/LocalSite")
localCfg.setOption("/LocalSite/CPUTimeLeft", 5000)
localCfg.setOption("/DIRAC/Security/UseServerCertificate", False)

if not sMod:
    if not setup:
        setup = gConfig.getValue("/DIRAC/Setup")
        if not setup:
            setup = "dirac-JenkinsSetup"

    if not localCfg.isSection("/Operations"):
        localCfg.createNewSection("/Operations")
    if not localCfg.isSection("/Operations/%s" % setup):
        localCfg.createNewSection("/Operations/%s" % setup)
示例#13
0
    def execute(self):
        """The JobAgent execution method.
    """

        # Temporary mechanism to pass a shutdown message to the agent
        if os.path.exists('/var/lib/dirac_drain'):
            return self.__finish('Node is being drained by an operator')

        # Check if we can match jobs at all
        self.log.verbose('Job Agent execution loop')
        result = self.computingElement.available()
        if not result['OK']:
            self.log.info('Resource is not available', result['Message'])
            return self.__finish('CE Not Available')

        ceInfoDict = result['CEInfoDict']
        runningJobs = ceInfoDict.get("RunningJobs")
        availableSlots = result['Value']

        if not availableSlots:
            if runningJobs:
                self.log.info('No available slots',
                              ': %d running jobs' % runningJobs)
                return S_OK('Job Agent cycle complete with %d running jobs' %
                            runningJobs)
            self.log.info(
                'CE is not available (and there are no running jobs)')
            return self.__finish('CE Not Available')

        if self.jobCount:
            # Only call timeLeft utility after a job has been picked up
            self.log.info('Attempting to check CPU time left for filling mode')
            if self.fillingMode:
                self.timeLeft = self.computeCPUWorkLeft()
                self.log.info('normalized CPU units remaining in slot',
                              self.timeLeft)
                if self.timeLeft <= self.minimumTimeLeft:
                    return self.__finish('No more time left')
                # Need to update the Configuration so that the new value is published in the next matching request
                result = self.computingElement.setCPUTimeLeft(
                    cpuTimeLeft=self.timeLeft)
                if not result['OK']:
                    return self.__finish(result['Message'])

                # Update local configuration to be used by submitted job wrappers
                localCfg = CFG()
                if self.extraOptions:
                    localConfigFile = os.path.join('.', self.extraOptions)
                else:
                    localConfigFile = os.path.join(rootPath, "etc",
                                                   "dirac.cfg")
                localCfg.loadFromFile(localConfigFile)
                if not localCfg.isSection('/LocalSite'):
                    localCfg.createNewSection('/LocalSite')
                localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft)
                localCfg.writeToFile(localConfigFile)

            else:
                return self.__finish('Filling Mode is Disabled')

        # if we are here we assume that a job can be matched
        result = self.computingElement.getDescription()
        if not result['OK']:
            return result

        # We can have several prioritized job retrieval strategies
        if isinstance(result['Value'], dict):
            ceDictList = [result['Value']]
        elif isinstance(result['Value'], list):
            # This is the case for Pool ComputingElement, and parameter 'MultiProcessorStrategy'
            ceDictList = result['Value']

        for ceDict in ceDictList:

            # Add pilot information
            gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown')
            if gridCE != 'Unknown':
                ceDict['GridCE'] = gridCE
            if 'PilotReference' not in ceDict:
                ceDict['PilotReference'] = str(self.pilotReference)
            ceDict['PilotBenchmark'] = self.cpuFactor
            ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag

            # Add possible job requirements
            result = gConfig.getOptionsDict('/AgentJobRequirements')
            if result['OK']:
                requirementsDict = result['Value']
                ceDict.update(requirementsDict)
                self.log.info('Requirements:', requirementsDict)

            self.log.verbose('CE dict', ceDict)

            # here finally calling the matcher
            start = time.time()
            jobRequest = MatcherClient().requestJob(ceDict)
            matchTime = time.time() - start
            self.log.info('MatcherTime', '= %.2f (s)' % (matchTime))
            if jobRequest['OK']:
                break

        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', self.stopAfterFailedMatches)

        if not jobRequest['OK']:

            # if we don't match a job, independently from the reason,
            # we wait a bit longer before trying again
            self.am_setOption("PollingTime",
                              int(self.am_getOption("PollingTime") * 1.5))

            if re.search('No match found', jobRequest['Message']):
                self.log.notice('Job request OK, but no match found',
                                ': %s' % (jobRequest['Message']))
                self.matchFailedCount += 1
                if self.matchFailedCount > self.stopAfterFailedMatches:
                    return self.__finish(
                        'Nothing to do for more than %d cycles' %
                        self.stopAfterFailedMatches)
                return S_OK(jobRequest['Message'])
            elif jobRequest['Message'].find("seconds timeout") != -1:
                self.log.error('Timeout while requesting job',
                               jobRequest['Message'])
                self.matchFailedCount += 1
                if self.matchFailedCount > self.stopAfterFailedMatches:
                    return self.__finish(
                        'Nothing to do for more than %d cycles' %
                        self.stopAfterFailedMatches)
                return S_OK(jobRequest['Message'])
            elif jobRequest['Message'].find(
                    "Pilot version does not match") != -1:
                errorMsg = 'Pilot version does not match the production version'
                self.log.error(errorMsg,
                               jobRequest['Message'].replace(errorMsg, ''))
                return S_ERROR(jobRequest['Message'])
            else:
                self.log.notice('Failed to get jobs',
                                ': %s' % (jobRequest['Message']))
                self.matchFailedCount += 1
                if self.matchFailedCount > self.stopAfterFailedMatches:
                    return self.__finish(
                        'Nothing to do for more than %d cycles' %
                        self.stopAfterFailedMatches)
                return S_OK(jobRequest['Message'])

        # Reset the Counter
        self.matchFailedCount = 0

        # If we are here it is because we matched a job
        matcherInfo = jobRequest['Value']
        if not self.pilotInfoReportedFlag:
            # Check the flag after the first access to the Matcher
            self.pilotInfoReportedFlag = matcherInfo.get(
                'PilotInfoReportedFlag', False)
        jobID = matcherInfo['JobID']
        jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName)
        matcherParams = ['JDL', 'DN', 'Group']
        for param in matcherParams:
            if param not in matcherInfo:
                jobReport.setJobStatus(status='Failed',
                                       minor='Matcher did not return %s' %
                                       (param))
                return self.__finish('Matcher Failed')
            elif not matcherInfo[param]:
                jobReport.setJobStatus(status='Failed',
                                       minor='Matcher returned null %s' %
                                       (param))
                return self.__finish('Matcher Failed')
            else:
                self.log.verbose('Matcher returned',
                                 '%s = %s ' % (param, matcherInfo[param]))

        jobJDL = matcherInfo['JDL']
        jobGroup = matcherInfo['Group']
        ownerDN = matcherInfo['DN']

        optimizerParams = {}
        for key in matcherInfo:
            if key not in matcherParams:
                optimizerParams[key] = matcherInfo[key]

        parameters = self._getJDLParameters(jobJDL)
        if not parameters['OK']:
            jobReport.setJobStatus(status='Failed',
                                   minor='Could Not Extract JDL Parameters')
            self.log.warn('Could Not Extract JDL Parameters',
                          parameters['Message'])
            return self.__finish('JDL Problem')

        params = parameters['Value']
        if 'JobID' not in params:
            msg = 'Job has not JobID defined in JDL parameters'
            jobReport.setJobStatus(status='Failed', minor=msg)
            self.log.warn(msg)
            return self.__finish('JDL Problem')
        else:
            jobID = params['JobID']

        if 'JobType' not in params:
            self.log.warn('Job has no JobType defined in JDL parameters')
            jobType = 'Unknown'
        else:
            jobType = params['JobType']

        if 'CPUTime' not in params:
            self.log.warn(
                'Job has no CPU requirement defined in JDL parameters')

        # Job requirements for determining the number of processors
        # the minimum number of processors requested
        processors = int(
            params.get('NumberOfProcessors',
                       int(params.get('MinNumberOfProcessors', 1))))
        # the maximum number of processors allowed to the payload
        maxNumberOfProcessors = int(params.get('MaxNumberOfProcessors', 0))
        # need or not the whole node for the job
        wholeNode = 'WholeNode' in params
        mpTag = 'MultiProcessor' in params.get('Tags', [])

        if self.extraOptions and 'dirac-jobexec' in params.get(
                'Executable', '').strip():
            params['Arguments'] = (params.get('Arguments', '') + ' ' +
                                   self.extraOptions).strip()
            params['ExtraOptions'] = self.extraOptions

        self.log.verbose('Job request successful: \n', jobRequest['Value'])
        self.log.info(
            'Received', 'JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s' %
            (jobID, jobType, ownerDN, jobGroup))
        self.jobCount += 1
        try:
            jobReport.setJobParameter(par_name='MatcherServiceTime',
                                      par_value=str(matchTime),
                                      sendFlag=False)

            if 'BOINC_JOB_ID' in os.environ:
                # Report BOINC environment
                for thisp in ('BoincUserID', 'BoincHostID',
                              'BoincHostPlatform', 'BoincHostName'):
                    jobReport.setJobParameter(par_name=thisp,
                                              par_value=gConfig.getValue(
                                                  '/LocalSite/%s' % thisp,
                                                  'Unknown'),
                                              sendFlag=False)

            jobReport.setJobStatus(status='Matched',
                                   minor='Job Received by Agent',
                                   sendFlag=False)
            result_setupProxy = self._setupProxy(ownerDN, jobGroup)
            if not result_setupProxy['OK']:
                return self._rescheduleFailedJob(jobID,
                                                 result_setupProxy['Message'],
                                                 self.stopOnApplicationFailure)
            proxyChain = result_setupProxy.get('Value')

            # Save the job jdl for external monitoring
            self.__saveJobJDLRequest(jobID, jobJDL)

            software = self._checkInstallSoftware(jobID, params, ceDict,
                                                  jobReport)
            if not software['OK']:
                self.log.error('Failed to install software for job',
                               '%s' % (jobID))
                errorMsg = software['Message']
                if not errorMsg:
                    errorMsg = 'Failed software installation'
                return self._rescheduleFailedJob(jobID, errorMsg,
                                                 self.stopOnApplicationFailure)

            self.log.debug('Before self._submitJob() (%sCE)' % (self.ceName))
            result_submitJob = self._submitJob(
                jobID=jobID,
                jobParams=params,
                resourceParams=ceDict,
                optimizerParams=optimizerParams,
                proxyChain=proxyChain,
                jobReport=jobReport,
                processors=processors,
                wholeNode=wholeNode,
                maxNumberOfProcessors=maxNumberOfProcessors,
                mpTag=mpTag)

            # Committing the JobReport before evaluating the result of job submission
            res = jobReport.commit()
            if not res['OK']:
                resFD = jobReport.generateForwardDISET()
                if not resFD['OK']:
                    self.log.error("Error generating ForwardDISET operation",
                                   resFD['Message'])
                else:
                    # Here we create the Request.
                    op = resFD['Value']
                    request = Request()
                    requestName = 'jobAgent_%s' % jobID
                    request.RequestName = requestName.replace('"', '')
                    request.JobID = jobID
                    request.SourceComponent = "JobAgent_%s" % jobID
                    request.addOperation(op)
                    # This might fail, but only a message would be printed.
                    self._sendFailoverRequest(request)

            if not result_submitJob['OK']:
                return self.__finish(result_submitJob['Message'])
            elif 'PayloadFailed' in result_submitJob:
                # Do not keep running and do not overwrite the Payload error
                message = 'Payload execution failed with error code %s' % result_submitJob[
                    'PayloadFailed']
                if self.stopOnApplicationFailure:
                    return self.__finish(message,
                                         self.stopOnApplicationFailure)
                else:
                    self.log.info(message)

            self.log.debug('After %sCE submitJob()' % (self.ceName))
        except Exception as subExcept:  # pylint: disable=broad-except
            self.log.exception("Exception in submission",
                               "",
                               lException=subExcept,
                               lExcInfo=True)
            return self._rescheduleFailedJob(
                jobID, 'Job processing failed with exception',
                self.stopOnApplicationFailure)

        return S_OK('Job Agent cycle complete')