def execute(self): """ The main agent execution method """ self.log.verbose('Waking up Stalled Job Agent') wms_instance = getSystemInstance('WorkloadManagement') if not wms_instance: return S_ERROR( 'Can not get the WorkloadManagement system instance') wrapperSection = cfgPath('Systems', 'WorkloadManagement', wms_instance, 'JobWrapper') stalledTime = self.am_getOption('StalledTimeHours', 2) failedTime = self.am_getOption('FailedTimeHours', 6) self.stalledJobsToleranceTime = self.am_getOption( 'StalledJobsToleranceTime', 0) self.matchedTime = self.am_getOption('MatchedTime', self.matchedTime) self.rescheduledTime = self.am_getOption('RescheduledTime', self.rescheduledTime) self.completedTime = self.am_getOption('CompletedTime', self.completedTime) self.log.verbose('StalledTime = %s cycles' % (stalledTime)) self.log.verbose('FailedTime = %s cycles' % (failedTime)) watchdogCycle = gConfig.getValue( cfgPath(wrapperSection, 'CheckingTime'), 30 * 60) watchdogCycle = max( watchdogCycle, gConfig.getValue(cfgPath(wrapperSection, 'MinCheckingTime'), 20 * 60)) # Add half cycle to avoid race conditions stalledTime = watchdogCycle * (stalledTime + 0.5) failedTime = watchdogCycle * (failedTime + 0.5) result = self.__markStalledJobs(stalledTime) if not result['OK']: self.log.error('Failed to detect stalled jobs', result['Message']) # Note, jobs will be revived automatically during the heartbeat signal phase and # subsequent status changes will result in jobs not being selected by the # stalled job agent. result = self.__failStalledJobs(failedTime) if not result['OK']: self.log.error('Failed to process stalled jobs', result['Message']) result = self.__failCompletedJobs() if not result['OK']: self.log.error('Failed to process completed jobs', result['Message']) result = self.__kickStuckJobs() if not result['OK']: self.log.error('Failed to kick stuck jobs', result['Message']) return S_OK('Stalled Job Agent cycle complete')
def initialize(self): """Sets default parameters""" self.jobDB = JobDB() self.logDB = JobLoggingDB() # getting parameters if not self.am_getOption("Enable", True): self.log.info("Stalled Job Agent running in disabled mode") wms_instance = getSystemInstance("WorkloadManagement") if not wms_instance: return S_ERROR( "Can not get the WorkloadManagement system instance") self.stalledJobsTolerantSites = self.am_getOption( "StalledJobsTolerantSites", []) self.stalledJobsToleranceTime = self.am_getOption( "StalledJobsToleranceTime", 0) self.stalledJobsToRescheduleSites = self.am_getOption( "StalledJobsToRescheduleSites", []) self.submittingTime = self.am_getOption("SubmittingTime", self.submittingTime) self.matchedTime = self.am_getOption("MatchedTime", self.matchedTime) self.rescheduledTime = self.am_getOption("RescheduledTime", self.rescheduledTime) wrapperSection = cfgPath("Systems", "WorkloadManagement", wms_instance, "JobWrapper") failedTime = self.am_getOption("FailedTimeHours", 6) watchdogCycle = gConfig.getValue( cfgPath(wrapperSection, "CheckingTime"), 30 * 60) watchdogCycle = max( watchdogCycle, gConfig.getValue(cfgPath(wrapperSection, "MinCheckingTime"), 20 * 60)) stalledTime = self.am_getOption("StalledTimeHours", 2) self.log.verbose("", "StalledTime = %s cycles" % (stalledTime)) self.stalledTime = int(watchdogCycle * (stalledTime + 0.5)) self.log.verbose("", "FailedTime = %s cycles" % (failedTime)) # Add half cycle to avoid race conditions self.failedTime = int(watchdogCycle * (failedTime + 0.5)) self.minorStalledStatuses = ( JobMinorStatus.STALLED_PILOT_NOT_RUNNING, "Stalling for more than %d sec" % self.failedTime, ) # setting up the threading maxNumberOfThreads = self.am_getOption("MaxNumberOfThreads", 15) self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads) self.threadPoolExecutor = concurrent.futures.ThreadPoolExecutor( max_workers=maxNumberOfThreads) return S_OK()
def getShifterProxy(shifterType, fileName=False): """ This method returns a shifter's proxy :param shifterType: ProductionManager / DataManager... """ if fileName: try: os.makedirs(os.path.dirname(fileName)) except OSError: pass opsHelper = Operations() userName = opsHelper.getValue(cfgPath('Shifter', shifterType, 'User'), '') if not userName: return S_ERROR("No shifter User defined for %s" % shifterType) result = CS.getDNForUsername(userName) if not result['OK']: return result userDN = result['Value'][0] result = CS.findDefaultGroupForDN(userDN) if not result['OK']: return result defaultGroup = result['Value'] userGroup = opsHelper.getValue(cfgPath('Shifter', shifterType, 'Group'), defaultGroup) vomsAttr = CS.getVOMSAttributeForGroup(userGroup) if vomsAttr: gLogger.info("Getting VOMS [%s] proxy for shifter %s@%s (%s)" % (vomsAttr, userName, userGroup, userDN)) result = gProxyManager.downloadVOMSProxyToFile(userDN, userGroup, filePath=fileName, requiredTimeLeft=86400, cacheTime=86400) else: gLogger.info("Getting proxy for shifter %s@%s (%s)" % (userName, userGroup, userDN)) result = gProxyManager.downloadProxyToFile(userDN, userGroup, filePath=fileName, requiredTimeLeft=86400, cacheTime=86400) if not result['OK']: return result chain = result['chain'] fileName = result['Value'] return S_OK({ 'DN': userDN, 'username': userName, 'group': userGroup, 'chain': chain, 'proxyFile': fileName })
def execute( self ): """ The main agent execution method """ self.log.verbose( 'Waking up Stalled Job Agent' ) wms_instance = getSystemInstance( 'WorkloadManagement' ) if not wms_instance: return S_ERROR( 'Can not get the WorkloadManagement system instance' ) wrapperSection = cfgPath( 'Systems', 'WorkloadManagement', wms_instance, 'JobWrapper' ) stalledTime = self.am_getOption( 'StalledTimeHours', 2 ) failedTime = self.am_getOption( 'FailedTimeHours', 6 ) self.stalledJobsToleranceTime = self.am_getOption( 'StalledJobsToleranceTime', 0 ) self.submittingTime = self.am_getOption('SubmittingTime', self.submittingTime) self.matchedTime = self.am_getOption( 'MatchedTime', self.matchedTime ) self.rescheduledTime = self.am_getOption( 'RescheduledTime', self.rescheduledTime ) self.completedTime = self.am_getOption( 'CompletedTime', self.completedTime ) self.log.verbose( 'StalledTime = %s cycles' % ( stalledTime ) ) self.log.verbose( 'FailedTime = %s cycles' % ( failedTime ) ) watchdogCycle = gConfig.getValue( cfgPath( wrapperSection , 'CheckingTime' ), 30 * 60 ) watchdogCycle = max( watchdogCycle, gConfig.getValue( cfgPath( wrapperSection , 'MinCheckingTime' ), 20 * 60 ) ) # Add half cycle to avoid race conditions stalledTime = watchdogCycle * ( stalledTime + 0.5 ) failedTime = watchdogCycle * ( failedTime + 0.5 ) result = self.__markStalledJobs( stalledTime ) if not result['OK']: self.log.error( 'Failed to detect stalled jobs', result['Message'] ) # Note, jobs will be revived automatically during the heartbeat signal phase and # subsequent status changes will result in jobs not being selected by the # stalled job agent. result = self.__failStalledJobs( failedTime ) if not result['OK']: self.log.error( 'Failed to process stalled jobs', result['Message'] ) result = self.__failCompletedJobs() if not result['OK']: self.log.error( 'Failed to process completed jobs', result['Message'] ) result = self.__failSubmittingJobs() if not result['OK']: self.log.error('Failed to process jobs being submitted', result['Message']) result = self.__kickStuckJobs() if not result['OK']: self.log.error( 'Failed to kick stuck jobs', result['Message'] ) return S_OK( 'Stalled Job Agent cycle complete' )
def getShifterProxy(shifterType, fileName=False): """This method returns a shifter's proxy :param str shifterType: ProductionManager / DataManager... :param str fileName: file name :return: S_OK(dict)/S_ERROR() """ if fileName: mkDir(os.path.dirname(fileName)) opsHelper = Operations() userName = opsHelper.getValue(cfgPath("Shifter", shifterType, "User"), "") if not userName: return S_ERROR("No shifter User defined for %s" % shifterType) result = Registry.getDNForUsername(userName) if not result["OK"]: return result userDN = result["Value"][0] result = Registry.findDefaultGroupForDN(userDN) if not result["OK"]: return result defaultGroup = result["Value"] userGroup = opsHelper.getValue(cfgPath("Shifter", shifterType, "Group"), defaultGroup) vomsAttr = Registry.getVOMSAttributeForGroup(userGroup) if vomsAttr: gLogger.info("Getting VOMS [%s] proxy for shifter %s@%s (%s)" % (vomsAttr, userName, userGroup, userDN)) result = gProxyManager.downloadVOMSProxyToFile(userDN, userGroup, filePath=fileName, requiredTimeLeft=86400, cacheTime=86400) else: gLogger.info("Getting proxy for shifter %s@%s (%s)" % (userName, userGroup, userDN)) result = gProxyManager.downloadProxyToFile(userDN, userGroup, filePath=fileName, requiredTimeLeft=86400, cacheTime=86400) if not result["OK"]: return result chain = result["chain"] fileName = result["Value"] return S_OK({ "DN": userDN, "username": userName, "group": userGroup, "chain": chain, "proxyFile": fileName })
def getShifterProxy( shifterType, fileName = False ): """ This method returns a shifter's proxy :param shifterType: ProductionManager / DataManager... """ if fileName: try: os.makedirs( os.path.dirname( fileName ) ) except OSError: pass opsHelper = Operations() userName = opsHelper.getValue( cfgPath( 'Shifter', shifterType, 'User' ), '' ) if not userName: return S_ERROR( "No shifter User defined for %s" % shifterType ) result = CS.getDNForUsername( userName ) if not result[ 'OK' ]: return result userDN = result[ 'Value' ][0] result = CS.findDefaultGroupForDN( userDN ) if not result['OK']: return result defaultGroup = result['Value'] userGroup = opsHelper.getValue( cfgPath( 'Shifter', shifterType, 'Group' ), defaultGroup ) vomsAttr = CS.getVOMSAttributeForGroup( userGroup ) if vomsAttr: gLogger.info( "Getting VOMS [%s] proxy for shifter %s@%s (%s)" % ( vomsAttr, userName, userGroup, userDN ) ) result = gProxyManager.downloadVOMSProxyToFile( userDN, userGroup, filePath = fileName, requiredTimeLeft = 86400, cacheTime = 86400 ) else: gLogger.info( "Getting proxy for shifter %s@%s (%s)" % ( userName, userGroup, userDN ) ) result = gProxyManager.downloadProxyToFile( userDN, userGroup, filePath = fileName, requiredTimeLeft = 86400, cacheTime = 86400 ) if not result[ 'OK' ]: return result chain = result[ 'chain' ] fileName = result[ 'Value' ] return S_OK( { 'DN' : userDN, 'username' : userName, 'group' : userGroup, 'chain' : chain, 'proxyFile' : fileName } )
def execute(self): """ The main agent execution method """ self.log.verbose("Waking up Stalled Job Agent") wms_instance = getSystemInstance("WorkloadManagement") if not wms_instance: return S_ERROR("Can not get the WorkloadManagement system instance") wrapperSection = cfgPath("Systems", "WorkloadManagement", wms_instance, "JobWrapper") stalledTime = self.am_getOption("StalledTimeHours", 2) failedTime = self.am_getOption("FailedTimeHours", 6) self.matchedTime = self.am_getOption("MatchedTime", self.matchedTime) self.rescheduledTime = self.am_getOption("RescheduledTime", self.rescheduledTime) self.completedTime = self.am_getOption("CompletedTime", self.completedTime) self.log.verbose("StalledTime = %s cycles" % (stalledTime)) self.log.verbose("FailedTime = %s cycles" % (failedTime)) watchdogCycle = gConfig.getValue(cfgPath(wrapperSection, "CheckingTime"), 30 * 60) watchdogCycle = max(watchdogCycle, gConfig.getValue(cfgPath(wrapperSection, "MinCheckingTime"), 20 * 60)) # Add half cycle to avoid race conditions stalledTime = watchdogCycle * (stalledTime + 0.5) failedTime = watchdogCycle * (failedTime + 0.5) result = self.__markStalledJobs(stalledTime) if not result["OK"]: self.log.error("Failed to detect stalled jobs", result["Message"]) # Note, jobs will be revived automatically during the heartbeat signal phase and # subsequent status changes will result in jobs not being selected by the # stalled job agent. result = self.__failStalledJobs(failedTime) if not result["OK"]: self.log.error("Failed to process stalled jobs", result["Message"]) result = self.__failCompletedJobs() if not result["OK"]: self.log.error("Failed to process completed jobs", result["Message"]) result = self.__kickStuckJobs() if not result["OK"]: self.log.error("Failed to kick stuck jobs", result["Message"]) return S_OK("Stalled Job Agent cycle complete")
def __getOwnerGroupDN(self, shifterType): opsHelper = Operations() userName = opsHelper.getValue( cfgPath('BoincShifter', shifterType, 'User'), '') if not userName: return S_ERROR("No shifter User defined for %s" % shifterType) result = CS.getDNForUsername(userName) if not result['OK']: return result userDN = result['Value'][0] result = CS.findDefaultGroupForDN(userDN) if not result['OK']: return result defaultGroup = result['Value'] userGroup = opsHelper.getValue( cfgPath('BoincShifter', shifterType, 'Group'), defaultGroup) return userDN, userGroup, userName
def getShifterProxy(shifterType, fileName=False): """ This method returns a shifter's proxy - shifterType : ProductionManager / DataManager... """ if fileName: try: os.makedirs(os.path.dirname(fileName)) except OSError: pass opsHelper = Operations() userName = opsHelper.getValue(cfgPath("Shifter", shifterType, "User"), "") if not userName: return S_ERROR("No shifter User defined for %s" % shifterType) result = CS.getDNForUsername(userName) if not result["OK"]: return result userDN = result["Value"][0] result = CS.findDefaultGroupForDN(userDN) if not result["OK"]: return result defaultGroup = result["Value"] userGroup = opsHelper.getValue(cfgPath("Shifter", shifterType, "Group"), defaultGroup) vomsAttr = CS.getVOMSAttributeForGroup(userGroup) if vomsAttr: gLogger.info("Getting VOMS [%s] proxy for shifter %s@%s (%s)" % (vomsAttr, userName, userGroup, userDN)) result = gProxyManager.downloadVOMSProxyToFile( userDN, userGroup, filePath=fileName, requiredTimeLeft=1200, cacheTime=4 * 43200 ) else: gLogger.info("Getting proxy for shifter %s@%s (%s)" % (userName, userGroup, userDN)) result = gProxyManager.downloadProxyToFile( userDN, userGroup, filePath=fileName, requiredTimeLeft=1200, cacheTime=4 * 43200 ) if not result["OK"]: return result chain = result["chain"] fileName = result["Value"] return S_OK({"DN": userDN, "username": userName, "group": userGroup, "chain": chain, "proxyFile": fileName})
def main(): # Registering arguments will automatically add their description to the help menu Script.registerArgument("CE: Name of the CE") Script.parseCommandLine(ignoreErrors=True) # parseCommandLine show help when mandatory arguments are not specified or incorrect argument ce = Script.getPositionalArgs(group=True) from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getCESiteMapping res = getCESiteMapping(ce) if not res["OK"]: gLogger.error(res["Message"]) Dexit(1) site = res["Value"][ce] res = gConfig.getOptionsDict(cfgPath("Resources", "Sites", site.split(".")[0], site, "CEs", ce)) if not res["OK"]: gLogger.error(res["Message"]) Dexit(1) gLogger.notice(res["Value"])
def main(): Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getCESiteMapping if len(args) < 1: Script.showHelp(exitCode=1) res = getCESiteMapping(args[0]) if not res['OK']: gLogger.error(res['Message']) Dexit(1) site = res['Value'][args[0]] res = gConfig.getOptionsDict( cfgPath('Resources', 'Sites', site.split('.')[0], site, 'CEs', args[0])) if not res['OK']: gLogger.error(res['Message']) Dexit(1) gLogger.notice(res['Value'])
def runDiracConfigure(params): Script.registerSwitch("S:", "Setup=", "Set <setup> as DIRAC setup", params.setSetup) Script.registerSwitch("e:", "Extensions=", "Set <extensions> as DIRAC extensions", params.setExtensions) Script.registerSwitch("C:", "ConfigurationServer=", "Set <server> as DIRAC configuration server", params.setServer) Script.registerSwitch("I", "IncludeAllServers", "include all Configuration Servers", params.setAllServers) Script.registerSwitch("n:", "SiteName=", "Set <sitename> as DIRAC Site Name", params.setSiteName) Script.registerSwitch("N:", "CEName=", "Determiner <sitename> from <cename>", params.setCEName) Script.registerSwitch("V:", "VO=", "Set the VO name", params.setVO) Script.registerSwitch("W:", "gateway=", "Configure <gateway> as DIRAC Gateway for the site", params.setGateway) Script.registerSwitch("U", "UseServerCertificate", "Configure to use Server Certificate", params.setServerCert) Script.registerSwitch("H", "SkipCAChecks", "Configure to skip check of CAs", params.setSkipCAChecks) Script.registerSwitch("D", "SkipCADownload", "Configure to skip download of CAs", params.setSkipCADownload) Script.registerSwitch( "M", "SkipVOMSDownload", "Configure to skip download of VOMS info", params.setSkipVOMSDownload ) Script.registerSwitch("v", "UseVersionsDir", "Use versions directory", params.setUseVersionsDir) Script.registerSwitch("A:", "Architecture=", "Configure /Architecture=<architecture>", params.setArchitecture) Script.registerSwitch("L:", "LocalSE=", "Configure LocalSite/LocalSE=<localse>", params.setLocalSE) Script.registerSwitch( "F", "ForceUpdate", "Force Update of cfg file (i.e. dirac.cfg) (otherwise nothing happens if dirac.cfg already exists)", params.forceUpdate, ) Script.registerSwitch("O:", "output=", "output configuration file", params.setOutput) Script.parseCommandLine(ignoreErrors=True) if not params.logLevel: params.logLevel = DIRAC.gConfig.getValue(cfgInstallPath("LogLevel"), "") if params.logLevel: DIRAC.gLogger.setLevel(params.logLevel) else: DIRAC.gConfig.setOptionValue(cfgInstallPath("LogLevel"), params.logLevel) if not params.gatewayServer: newGatewayServer = DIRAC.gConfig.getValue(cfgInstallPath("Gateway"), "") if newGatewayServer: params.setGateway(newGatewayServer) if not params.configurationServer: newConfigurationServer = DIRAC.gConfig.getValue(cfgInstallPath("ConfigurationServer"), "") if newConfigurationServer: params.setServer(newConfigurationServer) if not params.includeAllServers: newIncludeAllServer = DIRAC.gConfig.getValue(cfgInstallPath("IncludeAllServers"), False) if newIncludeAllServer: params.setAllServers(True) if not params.setup: newSetup = DIRAC.gConfig.getValue(cfgInstallPath("Setup"), "") if newSetup: params.setSetup(newSetup) if not params.siteName: newSiteName = DIRAC.gConfig.getValue(cfgInstallPath("SiteName"), "") if newSiteName: params.setSiteName(newSiteName) if not params.ceName: newCEName = DIRAC.gConfig.getValue(cfgInstallPath("CEName"), "") if newCEName: params.setCEName(newCEName) if not params.useServerCert: newUserServerCert = DIRAC.gConfig.getValue(cfgInstallPath("UseServerCertificate"), False) if newUserServerCert: params.setServerCert(newUserServerCert) if not params.skipCAChecks: newSkipCAChecks = DIRAC.gConfig.getValue(cfgInstallPath("SkipCAChecks"), False) if newSkipCAChecks: params.setSkipCAChecks(newSkipCAChecks) if not params.skipCADownload: newSkipCADownload = DIRAC.gConfig.getValue(cfgInstallPath("SkipCADownload"), False) if newSkipCADownload: params.setSkipCADownload(newSkipCADownload) if not params.useVersionsDir: newUseVersionsDir = DIRAC.gConfig.getValue(cfgInstallPath("UseVersionsDir"), False) if newUseVersionsDir: params.setUseVersionsDir(newUseVersionsDir) # Set proper Defaults in configuration (even if they will be properly overwrite by gComponentInstaller instancePath = os.path.dirname(os.path.dirname(DIRAC.rootPath)) rootPath = os.path.join(instancePath, "pro") DIRAC.gConfig.setOptionValue(cfgInstallPath("InstancePath"), instancePath) DIRAC.gConfig.setOptionValue(cfgInstallPath("RootPath"), rootPath) if not params.architecture: newArchitecture = DIRAC.gConfig.getValue(cfgInstallPath("Architecture"), "") if newArchitecture: params.setArchitecture(newArchitecture) if not params.vo: newVO = DIRAC.gConfig.getValue(cfgInstallPath("VirtualOrganization"), "") if newVO: params.setVO(newVO) if not params.extensions: newExtensions = DIRAC.gConfig.getValue(cfgInstallPath("Extensions"), "") if newExtensions: params.setExtensions(newExtensions) DIRAC.gLogger.notice("Executing: %s " % (" ".join(sys.argv))) DIRAC.gLogger.notice('Checking DIRAC installation at "%s"' % DIRAC.rootPath) if params.update: if params.outputFile: DIRAC.gLogger.notice("Will update the output file %s" % params.outputFile) else: DIRAC.gLogger.notice("Will update %s" % DIRAC.gConfig.diracConfigFilePath) if params.setup: DIRAC.gLogger.verbose("/DIRAC/Setup =", params.setup) if params.vo: DIRAC.gLogger.verbose("/DIRAC/VirtualOrganization =", params.vo) if params.configurationServer: DIRAC.gLogger.verbose("/DIRAC/Configuration/Servers =", params.configurationServer) if params.siteName: DIRAC.gLogger.verbose("/LocalSite/Site =", params.siteName) if params.architecture: DIRAC.gLogger.verbose("/LocalSite/Architecture =", params.architecture) if params.localSE: DIRAC.gLogger.verbose("/LocalSite/localSE =", params.localSE) if not params.useServerCert: DIRAC.gLogger.verbose("/DIRAC/Security/UseServerCertificate =", "no") # Being sure it was not there before Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") Script.localCfg.addDefaultEntry("/DIRAC/Security/UseServerCertificate", "no") else: DIRAC.gLogger.verbose("/DIRAC/Security/UseServerCertificate =", "yes") # Being sure it was not there before Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") Script.localCfg.addDefaultEntry("/DIRAC/Security/UseServerCertificate", "yes") host = DIRAC.gConfig.getValue(cfgInstallPath("Host"), "") if host: DIRAC.gConfig.setOptionValue(cfgPath("DIRAC", "Hostname"), host) if params.skipCAChecks: DIRAC.gLogger.verbose("/DIRAC/Security/SkipCAChecks =", "yes") # Being sure it was not there before Script.localCfg.deleteOption("/DIRAC/Security/SkipCAChecks") Script.localCfg.addDefaultEntry("/DIRAC/Security/SkipCAChecks", "yes") else: # Necessary to allow initial download of CA's if not params.skipCADownload: DIRAC.gConfig.setOptionValue("/DIRAC/Security/SkipCAChecks", "yes") if not params.skipCADownload: Script.enableCS() try: dirName = os.path.join(DIRAC.rootPath, "etc", "grid-security", "certificates") mkDir(dirName) except Exception: DIRAC.gLogger.exception() DIRAC.gLogger.fatal("Fail to create directory:", dirName) DIRAC.exit(-1) try: bdc = BundleDeliveryClient() result = bdc.syncCAs() if result["OK"]: result = bdc.syncCRLs() except Exception as e: DIRAC.gLogger.error("Failed to sync CAs and CRLs: %s" % str(e)) Script.localCfg.deleteOption("/DIRAC/Security/SkipCAChecks") if params.ceName or params.siteName: # This is used in the pilot context, we should have a proxy, or a certificate, and access to CS if params.useServerCert: # Being sure it was not there before Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") Script.localCfg.addDefaultEntry("/DIRAC/Security/UseServerCertificate", "yes") Script.enableCS() # Get the site resource section gridSections = DIRAC.gConfig.getSections("/Resources/Sites/") if not gridSections["OK"]: DIRAC.gLogger.warn("Could not get grid sections list") grids = [] else: grids = gridSections["Value"] # try to get siteName from ceName or Local SE from siteName using Remote Configuration for grid in grids: siteSections = DIRAC.gConfig.getSections("/Resources/Sites/%s/" % grid) if not siteSections["OK"]: DIRAC.gLogger.warn("Could not get %s site list" % grid) sites = [] else: sites = siteSections["Value"] if not params.siteName: if params.ceName: for site in sites: res = DIRAC.gConfig.getSections("/Resources/Sites/%s/%s/CEs/" % (grid, site), []) if not res["OK"]: DIRAC.gLogger.warn("Could not get %s CEs list" % site) if params.ceName in res["Value"]: params.siteName = site break if params.siteName: DIRAC.gLogger.notice("Setting /LocalSite/Site = %s" % params.siteName) Script.localCfg.addDefaultEntry("/LocalSite/Site", params.siteName) DIRAC.__siteName = False if params.ceName: DIRAC.gLogger.notice("Setting /LocalSite/GridCE = %s" % params.ceName) Script.localCfg.addDefaultEntry("/LocalSite/GridCE", params.ceName) if not params.localSE and params.siteName in sites: params.localSE = getSEsForSite(params.siteName) if params.localSE["OK"] and params.localSE["Value"]: params.localSE = ",".join(params.localSE["Value"]) DIRAC.gLogger.notice("Setting /LocalSite/LocalSE =", params.localSE) Script.localCfg.addDefaultEntry("/LocalSite/LocalSE", params.localSE) break if params.gatewayServer: DIRAC.gLogger.verbose("/DIRAC/Gateways/%s =" % DIRAC.siteName(), params.gatewayServer) Script.localCfg.addDefaultEntry("/DIRAC/Gateways/%s" % DIRAC.siteName(), params.gatewayServer) # Create the local cfg if it is not yet there if not params.outputFile: params.outputFile = DIRAC.gConfig.diracConfigFilePath params.outputFile = os.path.abspath(params.outputFile) if not os.path.exists(params.outputFile): configDir = os.path.dirname(params.outputFile) mkDir(configDir) params.update = True DIRAC.gConfig.dumpLocalCFGToFile(params.outputFile) if params.includeAllServers: # We need user proxy or server certificate to continue in order to get all the CS URLs if not params.useServerCert: Script.enableCS() result = getProxyInfo() if not result["OK"]: DIRAC.gLogger.notice("Configuration is not completed because no user proxy is available") DIRAC.gLogger.notice("Create one using dirac-proxy-init and execute again with -F option") return 1 else: Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") # When using Server Certs CA's will be checked, the flag only disables initial download # this will be replaced by the use of SkipCADownload Script.localCfg.addDefaultEntry("/DIRAC/Security/UseServerCertificate", "yes") Script.enableCS() DIRAC.gConfig.setOptionValue("/DIRAC/Configuration/Servers", ",".join(DIRAC.gConfig.getServersList())) DIRAC.gLogger.verbose("/DIRAC/Configuration/Servers =", ",".join(DIRAC.gConfig.getServersList())) if params.useServerCert: # always removing before dumping Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") Script.localCfg.deleteOption("/DIRAC/Security/SkipCAChecks") Script.localCfg.deleteOption("/DIRAC/Security/SkipVOMSDownload") if params.update: DIRAC.gConfig.dumpLocalCFGToFile(params.outputFile) # ## LAST PART: do the vomsdir/vomses magic # This has to be done for all VOs in the installation if params.skipVOMSDownload: return 0 result = Registry.getVOMSServerInfo() if not result["OK"]: return 1 error = "" vomsDict = result["Value"] for vo in vomsDict: voName = vomsDict[vo]["VOMSName"] vomsDirPath = os.path.join(DIRAC.rootPath, "etc", "grid-security", "vomsdir", voName) vomsesDirPath = os.path.join(DIRAC.rootPath, "etc", "grid-security", "vomses") for path in (vomsDirPath, vomsesDirPath): mkDir(path) vomsesLines = [] for vomsHost in vomsDict[vo].get("Servers", {}): hostFilePath = os.path.join(vomsDirPath, "%s.lsc" % vomsHost) try: DN = vomsDict[vo]["Servers"][vomsHost]["DN"] CA = vomsDict[vo]["Servers"][vomsHost]["CA"] port = vomsDict[vo]["Servers"][vomsHost]["Port"] if not DN or not CA or not port: DIRAC.gLogger.error("DN = %s" % DN) DIRAC.gLogger.error("CA = %s" % CA) DIRAC.gLogger.error("Port = %s" % port) DIRAC.gLogger.error("Missing Parameter for %s" % vomsHost) continue with open(hostFilePath, "wt") as fd: fd.write("%s\n%s\n" % (DN, CA)) vomsesLines.append('"%s" "%s" "%s" "%s" "%s" "24"' % (voName, vomsHost, port, DN, voName)) DIRAC.gLogger.notice("Created vomsdir file %s" % hostFilePath) except Exception: DIRAC.gLogger.exception("Could not generate vomsdir file for host", vomsHost) error = "Could not generate vomsdir file for VO %s, host %s" % (voName, vomsHost) try: vomsesFilePath = os.path.join(vomsesDirPath, voName) with open(vomsesFilePath, "wt") as fd: fd.write("%s\n" % "\n".join(vomsesLines)) DIRAC.gLogger.notice("Created vomses file %s" % vomsesFilePath) except Exception: DIRAC.gLogger.exception("Could not generate vomses file") error = "Could not generate vomses file for VO %s" % voName if params.useServerCert: Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") # When using Server Certs CA's will be checked, the flag only disables initial download # this will be replaced by the use of SkipCADownload Script.localCfg.deleteOption("/DIRAC/Security/SkipCAChecks") if error: return 1 return 0
if not useServerCert: DIRAC.gLogger.verbose('/DIRAC/Security/UseServerCertificate =', 'no') #Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') Script.localCfg.addDefaultEntry('/DIRAC/Security/UseServerCertificate', 'no') else: DIRAC.gLogger.verbose('/DIRAC/Security/UseServerCertificate =', 'yes') #Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') Script.localCfg.addDefaultEntry('/DIRAC/Security/UseServerCertificate', 'yes') host = DIRAC.gConfig.getValue(cfgInstallPath("Host"), "") if host: DIRAC.gConfig.setOptionValue(cfgPath("DIRAC", "Hostname"), host) if skipCAChecks: DIRAC.gLogger.verbose('/DIRAC/Security/SkipCAChecks =', 'yes') #Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') Script.localCfg.addDefaultEntry('/DIRAC/Security/SkipCAChecks', 'yes') else: # Necessary to allow initial download of CA's if not skipCADownload: DIRAC.gConfig.setOptionValue('/DIRAC/Security/SkipCAChecks', 'yes') if not skipCADownload: Script.enableCS() try: dirName = os.path.join(DIRAC.rootPath, 'etc', 'grid-security', 'certificates')
' %s [option|cfgfile] ... CE ...' % Script.scriptName, 'Arguments:', ' CE: Name of the CE' ] ) ) Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() if len( args ) < 1: Script.showHelp() from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin diracAdmin = DiracAdmin() exitCode = 0 errorList = [] gridCfgPath = cfgPath( 'Resources', 'Sites', grid ) result = DIRAC.gConfig.getSections( gridCfgPath ) if not result['OK']: print 'Could not get DIRAC site list' DIRAC.exit( 2 ) sites = result['Value'] for site in sites: result = diracAdmin.getCSDict( cfgPath( gridCfgPath, site ) ) if result['OK']: ces = result['Value'].get( 'CE', [] ) for ce in args: if ce in ces: print '%s: %s' % ( ce, site )
def execute(self): """ The main agent execution method """ self.log.debug('Waking up Stalled Job Agent') # getting parameters wms_instance = getSystemInstance('WorkloadManagement') if not wms_instance: return S_ERROR('Can not get the WorkloadManagement system instance') wrapperSection = cfgPath('Systems', 'WorkloadManagement', wms_instance, 'JobWrapper') stalledTime = self.am_getOption('StalledTimeHours', 2) failedTime = self.am_getOption('FailedTimeHours', 6) self.stalledJobsTolerantSites = self.am_getOption('StalledJobsTolerantSites', []) self.stalledJobsToleranceTime = self.am_getOption('StalledJobsToleranceTime', 0) self.submittingTime = self.am_getOption('SubmittingTime', self.submittingTime) self.matchedTime = self.am_getOption('MatchedTime', self.matchedTime) self.rescheduledTime = self.am_getOption('RescheduledTime', self.rescheduledTime) self.log.verbose('', 'StalledTime = %s cycles' % (stalledTime)) self.log.verbose('', 'FailedTime = %s cycles' % (failedTime)) watchdogCycle = gConfig.getValue(cfgPath(wrapperSection, 'CheckingTime'), 30 * 60) watchdogCycle = max(watchdogCycle, gConfig.getValue(cfgPath(wrapperSection, 'MinCheckingTime'), 20 * 60)) # Add half cycle to avoid race conditions self.stalledTime = int(watchdogCycle * (stalledTime + 0.5)) self.failedTime = int(watchdogCycle * (failedTime + 0.5)) self.minorStalledStatuses = ( JobMinorStatus.STALLED_PILOT_NOT_RUNNING, 'Stalling for more than %d sec' % self.failedTime) # Now we are getting what's going to be checked # 1) Queueing the jobs that might be marked Stalled # This is the minimum time we wait for declaring a job Stalled, therefore it is safe checkTime = dateTime() - self.stalledTime * second checkedStatuses = [JobStatus.RUNNING, JobStatus.COMPLETING] # Only get jobs whose HeartBeat is older than the stalledTime result = self.jobDB.selectJobs({'Status': checkedStatuses}, older=checkTime, timeStamp='HeartBeatTime') if not result['OK']: self.log.error("Issue selecting %s jobs" % ' & '.join(checkedStatuses), result['Message']) if result['Value']: jobs = sorted(result['Value']) self.log.info('%s jobs will be checked for being stalled' % ' & '.join(checkedStatuses), '(n=%d, heartbeat before %s)' % (len(jobs), str(checkTime))) for job in jobs: self.jobsQueue.put('%s:_markStalledJobs' % job) # 2) Queueing the Stalled jobs that might be marked Failed result = self.jobDB.selectJobs({'Status': JobStatus.STALLED}) if not result['OK']: self.log.error("Issue selecting Stalled jobs", result['Message']) if result['Value']: jobs = sorted(result['Value']) self.log.info('Jobs Stalled will be checked for failure', '(n=%d)' % len(jobs)) for job in jobs: self.jobsQueue.put('%s:_failStalledJobs' % job) # 3) Send accounting for minor in self.minorStalledStatuses: result = self.jobDB.selectJobs({'Status': JobStatus.FAILED, 'MinorStatus': minor, 'AccountedFlag': 'False'}) if not result['OK']: self.log.error("Issue selecting jobs for accounting", result['Message']) if result['Value']: jobs = result['Value'] self.log.info('Stalled jobs will be Accounted', '(n=%d)' % (len(jobs))) for job in jobs: self.jobsQueue.put('%s:__sendAccounting' % job) # From here on we don't use the threads # 4) Fail submitting jobs result = self._failSubmittingJobs() if not result['OK']: self.log.error('Failed to process jobs being submitted', result['Message']) # 5) Kick stuck jobs result = self._kickStuckJobs() if not result['OK']: self.log.error('Failed to kick stuck jobs', result['Message']) return S_OK()
Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... CE ...' % Script.scriptName, 'Arguments:', ' CE: Name of the CE' ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() exitCode = 0 errorList = [] gridCfgPath = cfgPath('Resources', 'Sites', grid) result = DIRAC.gConfig.getSections(gridCfgPath) if not result['OK']: print('Could not get DIRAC site list') DIRAC.exit(2) sites = result['Value'] for site in sites: result = DIRAC.gConfig.getOptionsDict(cfgPath(gridCfgPath, site)) if result['OK']: ces = result['Value'].get('CE', []) for ce in args: if ce in ces: print('%s: %s' % (ce, site))
DIRAC.gLogger.verbose( '/LocalSite/localSE =', localSE ) if not useServerCert: DIRAC.gLogger.verbose( '/DIRAC/Security/UseServerCertificate =', 'no' ) #Being sure it was not there before Script.localCfg.deleteOption( '/DIRAC/Security/UseServerCertificate' ) Script.localCfg.addDefaultEntry( '/DIRAC/Security/UseServerCertificate', 'no' ) else: DIRAC.gLogger.verbose( '/DIRAC/Security/UseServerCertificate =', 'yes' ) #Being sure it was not there before Script.localCfg.deleteOption( '/DIRAC/Security/UseServerCertificate' ) Script.localCfg.addDefaultEntry( '/DIRAC/Security/UseServerCertificate', 'yes' ) host = DIRAC.gConfig.getValue( cfgInstallPath( "Host" ), "" ) if host: DIRAC.gConfig.setOptionValue( cfgPath( "DIRAC", "Hostname" ), host ) if skipCAChecks: DIRAC.gLogger.verbose( '/DIRAC/Security/SkipCAChecks =', 'yes' ) #Being sure it was not there before Script.localCfg.deleteOption( '/DIRAC/Security/SkipCAChecks' ) Script.localCfg.addDefaultEntry( '/DIRAC/Security/SkipCAChecks', 'yes' ) else: # Necessary to allow initial download of CA's if not skipCADownload: DIRAC.gConfig.setOptionValue( '/DIRAC/Security/SkipCAChecks', 'yes' ) if not skipCADownload: Script.enableCS() try: dirName = os.path.join( DIRAC.rootPath, 'etc', 'grid-security', 'certificates' ) mkDir(dirName)
Script.setUsageMessage( '\n'.join( [ __doc__.split( '\n' )[1], 'Usage:', ' %s [option|cfgfile] ... CE ...' % Script.scriptName, 'Arguments:', ' CE: Name of the CE' ] ) ) Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() if len( args ) < 1: Script.showHelp() exitCode = 0 errorList = [] gridCfgPath = cfgPath( 'Resources', 'Sites', grid ) result = DIRAC.gConfig.getSections( gridCfgPath ) if not result['OK']: print 'Could not get DIRAC site list' DIRAC.exit( 2 ) sites = result['Value'] for site in sites: result = DIRAC.gConfig.getOptionsDict( cfgPath( gridCfgPath, site ) ) if result['OK']: ces = result['Value'].get( 'CE', [] ) for ce in args: if ce in ces: print '%s: %s' % ( ce, site )
def main(): global logLevel global setup global configurationServer global includeAllServers global gatewayServer global siteName global useServerCert global skipCAChecks global skipCADownload global useVersionsDir global architecture global localSE global ceName global vo global update global outputFile global skipVOMSDownload global extensions Script.disableCS() Script.registerSwitch("S:", "Setup=", "Set <setup> as DIRAC setup", setSetup) Script.registerSwitch("e:", "Extensions=", "Set <extensions> as DIRAC extensions", setExtensions) Script.registerSwitch("C:", "ConfigurationServer=", "Set <server> as DIRAC configuration server", setServer) Script.registerSwitch("I", "IncludeAllServers", "include all Configuration Servers", setAllServers) Script.registerSwitch("n:", "SiteName=", "Set <sitename> as DIRAC Site Name", setSiteName) Script.registerSwitch("N:", "CEName=", "Determiner <sitename> from <cename>", setCEName) Script.registerSwitch("V:", "VO=", "Set the VO name", setVO) Script.registerSwitch("W:", "gateway=", "Configure <gateway> as DIRAC Gateway for the site", setGateway) Script.registerSwitch("U", "UseServerCertificate", "Configure to use Server Certificate", setServerCert) Script.registerSwitch("H", "SkipCAChecks", "Configure to skip check of CAs", setSkipCAChecks) Script.registerSwitch("D", "SkipCADownload", "Configure to skip download of CAs", setSkipCADownload) Script.registerSwitch("M", "SkipVOMSDownload", "Configure to skip download of VOMS info", setSkipVOMSDownload) Script.registerSwitch("v", "UseVersionsDir", "Use versions directory", setUseVersionsDir) Script.registerSwitch("A:", "Architecture=", "Configure /Architecture=<architecture>", setArchitecture) Script.registerSwitch("L:", "LocalSE=", "Configure LocalSite/LocalSE=<localse>", setLocalSE) Script.registerSwitch( "F", "ForceUpdate", "Force Update of cfg file (i.e. dirac.cfg) (otherwise nothing happens if dirac.cfg already exists)", forceUpdate) Script.registerSwitch("O:", "output=", "output configuration file", setOutput) Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], '\nUsage:', ' %s [options] ...\n' % Script.scriptName ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getExtraCLICFGFiles() if not logLevel: logLevel = DIRAC.gConfig.getValue(cfgInstallPath('LogLevel'), '') if logLevel: DIRAC.gLogger.setLevel(logLevel) else: DIRAC.gConfig.setOptionValue(cfgInstallPath('LogLevel'), logLevel) if not gatewayServer: newGatewayServer = DIRAC.gConfig.getValue(cfgInstallPath('Gateway'), '') if newGatewayServer: setGateway(newGatewayServer) if not configurationServer: newConfigurationServer = DIRAC.gConfig.getValue( cfgInstallPath('ConfigurationServer'), '') if newConfigurationServer: setServer(newConfigurationServer) if not includeAllServers: newIncludeAllServer = DIRAC.gConfig.getValue( cfgInstallPath('IncludeAllServers'), False) if newIncludeAllServer: setAllServers(True) if not setup: newSetup = DIRAC.gConfig.getValue(cfgInstallPath('Setup'), '') if newSetup: setSetup(newSetup) if not siteName: newSiteName = DIRAC.gConfig.getValue(cfgInstallPath('SiteName'), '') if newSiteName: setSiteName(newSiteName) if not ceName: newCEName = DIRAC.gConfig.getValue(cfgInstallPath('CEName'), '') if newCEName: setCEName(newCEName) if not useServerCert: newUserServerCert = DIRAC.gConfig.getValue( cfgInstallPath('UseServerCertificate'), False) if newUserServerCert: setServerCert(newUserServerCert) if not skipCAChecks: newSkipCAChecks = DIRAC.gConfig.getValue( cfgInstallPath('SkipCAChecks'), False) if newSkipCAChecks: setSkipCAChecks(newSkipCAChecks) if not skipCADownload: newSkipCADownload = DIRAC.gConfig.getValue( cfgInstallPath('SkipCADownload'), False) if newSkipCADownload: setSkipCADownload(newSkipCADownload) if not useVersionsDir: newUseVersionsDir = DIRAC.gConfig.getValue( cfgInstallPath('UseVersionsDir'), False) if newUseVersionsDir: setUseVersionsDir(newUseVersionsDir) # Set proper Defaults in configuration (even if they will be properly overwrite by gComponentInstaller instancePath = os.path.dirname(os.path.dirname(DIRAC.rootPath)) rootPath = os.path.join(instancePath, 'pro') DIRAC.gConfig.setOptionValue(cfgInstallPath('InstancePath'), instancePath) DIRAC.gConfig.setOptionValue(cfgInstallPath('RootPath'), rootPath) if not architecture: newArchitecture = DIRAC.gConfig.getValue( cfgInstallPath('Architecture'), '') if newArchitecture: setArchitecture(newArchitecture) if not vo: newVO = DIRAC.gConfig.getValue(cfgInstallPath('VirtualOrganization'), '') if newVO: setVO(newVO) if not extensions: newExtensions = DIRAC.gConfig.getValue(cfgInstallPath('Extensions'), '') if newExtensions: setExtensions(newExtensions) DIRAC.gLogger.notice('Executing: %s ' % (' '.join(sys.argv))) DIRAC.gLogger.notice('Checking DIRAC installation at "%s"' % DIRAC.rootPath) if update: if outputFile: DIRAC.gLogger.notice('Will update the output file %s' % outputFile) else: DIRAC.gLogger.notice('Will update %s' % DIRAC.gConfig.diracConfigFilePath) if setup: DIRAC.gLogger.verbose('/DIRAC/Setup =', setup) if vo: DIRAC.gLogger.verbose('/DIRAC/VirtualOrganization =', vo) if configurationServer: DIRAC.gLogger.verbose('/DIRAC/Configuration/Servers =', configurationServer) if siteName: DIRAC.gLogger.verbose('/LocalSite/Site =', siteName) if architecture: DIRAC.gLogger.verbose('/LocalSite/Architecture =', architecture) if localSE: DIRAC.gLogger.verbose('/LocalSite/localSE =', localSE) if not useServerCert: DIRAC.gLogger.verbose('/DIRAC/Security/UseServerCertificate =', 'no') # Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') Script.localCfg.addDefaultEntry('/DIRAC/Security/UseServerCertificate', 'no') else: DIRAC.gLogger.verbose('/DIRAC/Security/UseServerCertificate =', 'yes') # Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') Script.localCfg.addDefaultEntry('/DIRAC/Security/UseServerCertificate', 'yes') host = DIRAC.gConfig.getValue(cfgInstallPath("Host"), "") if host: DIRAC.gConfig.setOptionValue(cfgPath("DIRAC", "Hostname"), host) if skipCAChecks: DIRAC.gLogger.verbose('/DIRAC/Security/SkipCAChecks =', 'yes') # Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') Script.localCfg.addDefaultEntry('/DIRAC/Security/SkipCAChecks', 'yes') else: # Necessary to allow initial download of CA's if not skipCADownload: DIRAC.gConfig.setOptionValue('/DIRAC/Security/SkipCAChecks', 'yes') if not skipCADownload: Script.enableCS() try: dirName = os.path.join(DIRAC.rootPath, 'etc', 'grid-security', 'certificates') mkDir(dirName) except BaseException: DIRAC.gLogger.exception() DIRAC.gLogger.fatal('Fail to create directory:', dirName) DIRAC.exit(-1) try: bdc = BundleDeliveryClient() result = bdc.syncCAs() if result['OK']: result = bdc.syncCRLs() except Exception as e: DIRAC.gLogger.error('Failed to sync CAs and CRLs: %s' % str(e)) if not skipCAChecks: Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') if ceName or siteName: # This is used in the pilot context, we should have a proxy, or a certificate, and access to CS if useServerCert: # Being sure it was not there before Script.localCfg.deleteOption( '/DIRAC/Security/UseServerCertificate') Script.localCfg.addDefaultEntry( '/DIRAC/Security/UseServerCertificate', 'yes') Script.enableCS() # Get the site resource section gridSections = DIRAC.gConfig.getSections('/Resources/Sites/') if not gridSections['OK']: DIRAC.gLogger.warn('Could not get grid sections list') grids = [] else: grids = gridSections['Value'] # try to get siteName from ceName or Local SE from siteName using Remote Configuration for grid in grids: siteSections = DIRAC.gConfig.getSections('/Resources/Sites/%s/' % grid) if not siteSections['OK']: DIRAC.gLogger.warn('Could not get %s site list' % grid) sites = [] else: sites = siteSections['Value'] if not siteName: if ceName: for site in sites: res = DIRAC.gConfig.getSections( '/Resources/Sites/%s/%s/CEs/' % (grid, site), []) if not res['OK']: DIRAC.gLogger.warn('Could not get %s CEs list' % site) if ceName in res['Value']: siteName = site break if siteName: DIRAC.gLogger.notice('Setting /LocalSite/Site = %s' % siteName) Script.localCfg.addDefaultEntry('/LocalSite/Site', siteName) DIRAC.__siteName = False if ceName: DIRAC.gLogger.notice('Setting /LocalSite/GridCE = %s' % ceName) Script.localCfg.addDefaultEntry('/LocalSite/GridCE', ceName) if not localSE and siteName in sites: localSE = getSEsForSite(siteName) if localSE['OK'] and localSE['Value']: localSE = ','.join(localSE['Value']) DIRAC.gLogger.notice('Setting /LocalSite/LocalSE =', localSE) Script.localCfg.addDefaultEntry( '/LocalSite/LocalSE', localSE) break if gatewayServer: DIRAC.gLogger.verbose('/DIRAC/Gateways/%s =' % DIRAC.siteName(), gatewayServer) Script.localCfg.addDefaultEntry( '/DIRAC/Gateways/%s' % DIRAC.siteName(), gatewayServer) # Create the local cfg if it is not yet there if not outputFile: outputFile = DIRAC.gConfig.diracConfigFilePath outputFile = os.path.abspath(outputFile) if not os.path.exists(outputFile): configDir = os.path.dirname(outputFile) mkDir(configDir) update = True DIRAC.gConfig.dumpLocalCFGToFile(outputFile) if includeAllServers: # We need user proxy or server certificate to continue in order to get all the CS URLs if not useServerCert: Script.enableCS() result = getProxyInfo() if not result['OK']: DIRAC.gLogger.notice( 'Configuration is not completed because no user proxy is available' ) DIRAC.gLogger.notice( 'Create one using dirac-proxy-init and execute again with -F option' ) sys.exit(1) else: Script.localCfg.deleteOption( '/DIRAC/Security/UseServerCertificate') # When using Server Certs CA's will be checked, the flag only disables initial download # this will be replaced by the use of SkipCADownload Script.localCfg.addDefaultEntry( '/DIRAC/Security/UseServerCertificate', 'yes') Script.enableCS() DIRAC.gConfig.setOptionValue('/DIRAC/Configuration/Servers', ','.join(DIRAC.gConfig.getServersList())) DIRAC.gLogger.verbose('/DIRAC/Configuration/Servers =', ','.join(DIRAC.gConfig.getServersList())) if useServerCert: # always removing before dumping Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') Script.localCfg.deleteOption('/DIRAC/Security/SkipVOMSDownload') if update: DIRAC.gConfig.dumpLocalCFGToFile(outputFile) # ## LAST PART: do the vomsdir/vomses magic # This has to be done for all VOs in the installation if skipVOMSDownload: # We stop here sys.exit(0) result = Registry.getVOMSServerInfo() if not result['OK']: sys.exit(1) error = '' vomsDict = result['Value'] for vo in vomsDict: voName = vomsDict[vo]['VOMSName'] vomsDirPath = os.path.join(DIRAC.rootPath, 'etc', 'grid-security', 'vomsdir', voName) vomsesDirPath = os.path.join(DIRAC.rootPath, 'etc', 'grid-security', 'vomses') for path in (vomsDirPath, vomsesDirPath): mkDir(path) vomsesLines = [] for vomsHost in vomsDict[vo].get('Servers', {}): hostFilePath = os.path.join(vomsDirPath, "%s.lsc" % vomsHost) try: DN = vomsDict[vo]['Servers'][vomsHost]['DN'] CA = vomsDict[vo]['Servers'][vomsHost]['CA'] port = vomsDict[vo]['Servers'][vomsHost]['Port'] if not DN or not CA or not port: DIRAC.gLogger.error('DN = %s' % DN) DIRAC.gLogger.error('CA = %s' % CA) DIRAC.gLogger.error('Port = %s' % port) DIRAC.gLogger.error('Missing Parameter for %s' % vomsHost) continue with open(hostFilePath, "wt") as fd: fd.write("%s\n%s\n" % (DN, CA)) vomsesLines.append('"%s" "%s" "%s" "%s" "%s" "24"' % (voName, vomsHost, port, DN, voName)) DIRAC.gLogger.notice("Created vomsdir file %s" % hostFilePath) except Exception: DIRAC.gLogger.exception( "Could not generate vomsdir file for host", vomsHost) error = "Could not generate vomsdir file for VO %s, host %s" % ( voName, vomsHost) try: vomsesFilePath = os.path.join(vomsesDirPath, voName) with open(vomsesFilePath, "wt") as fd: fd.write("%s\n" % "\n".join(vomsesLines)) DIRAC.gLogger.notice("Created vomses file %s" % vomsesFilePath) except Exception: DIRAC.gLogger.exception("Could not generate vomses file") error = "Could not generate vomses file for VO %s" % voName if useServerCert: Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') # When using Server Certs CA's will be checked, the flag only disables initial download # this will be replaced by the use of SkipCADownload Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') if error: sys.exit(1) sys.exit(0)
from DIRAC import gConfig, gLogger, exit as Dexit from DIRAC.Core.Base import Script from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getCESiteMapping from DIRAC.ConfigurationSystem.Client.Helpers import cfgPath Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], 'Usage:', ' %s [option|cfgfile] ... CE ...' % Script.scriptName, 'Arguments:', ' CE: Name of the CE' ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() res = getCESiteMapping(args[0]) if not res['OK']: gLogger.error(res['Message']) Dexit(1) site = res['Value'][args[0]] res = gConfig.getOptionsDict( cfgPath('Resources', 'Sites', site.split('.')[0], site, 'CEs', args[0])) if not res['OK']: gLogger.error(res['Message']) Dexit(1) gLogger.notice(res['Value'])